Imported Upstream version 0.8.9
[platform/upstream/multipath-tools.git] / libmultipath / uevent.c
1 /*
2  * uevent.c - trigger upon netlink uevents from the kernel
3  *
4  *      Only kernels from version 2.6.10* on provide the uevent netlink socket.
5  *      Until the libc-kernel-headers are updated, you need to compile with:
6  *
7  *        gcc -I /lib/modules/`uname -r`/build/include -o uevent_listen uevent_listen.c
8  *
9  * Copyright (C) 2004 Kay Sievers <kay.sievers@vrfy.org>
10  *
11  *      This program is free software; you can redistribute it and/or modify it
12  *      under the terms of the GNU General Public License as published by the
13  *      Free Software Foundation version 2 of the License.
14  *
15  *      This program is distributed in the hope that it will be useful, but
16  *      WITHOUT ANY WARRANTY; without even the implied warranty of
17  *      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  *      General Public License for more details.
19  *
20  *      You should have received a copy of the GNU General Public License along
21  *      with this program.  If not, see <http://www.gnu.org/licenses/>.
22  *
23  */
24
25 #include <unistd.h>
26 #include <stdio.h>
27 #include <stdbool.h>
28 #include <errno.h>
29 #include <stdlib.h>
30 #include <stddef.h>
31 #include <string.h>
32 #include <fcntl.h>
33 #include <time.h>
34 #include <sys/socket.h>
35 #include <sys/user.h>
36 #include <sys/un.h>
37 #include <poll.h>
38 #include <linux/types.h>
39 #include <linux/netlink.h>
40 #include <pthread.h>
41 #include <sys/mman.h>
42 #include <sys/time.h>
43 #include <libudev.h>
44
45 #include "debug.h"
46 #include "list.h"
47 #include "uevent.h"
48 #include "vector.h"
49 #include "structs.h"
50 #include "util.h"
51 #include "config.h"
52 #include "blacklist.h"
53 #include "devmapper.h"
54
55 #define MAX_ACCUMULATION_COUNT 2048
56 #define MAX_ACCUMULATION_TIME 30*1000
57 #define MIN_BURST_SPEED 10
58
59 typedef int (uev_trigger)(struct uevent *, void * trigger_data);
60
61 static LIST_HEAD(uevq);
62 static pthread_mutex_t uevq_lock = PTHREAD_MUTEX_INITIALIZER;
63 static pthread_mutex_t *uevq_lockp = &uevq_lock;
64 static pthread_cond_t uev_cond = PTHREAD_COND_INITIALIZER;
65 static pthread_cond_t *uev_condp = &uev_cond;
66 static uev_trigger *my_uev_trigger;
67 static void *my_trigger_data;
68 static int servicing_uev;
69
70 int is_uevent_busy(void)
71 {
72         int empty;
73
74         pthread_mutex_lock(uevq_lockp);
75         empty = list_empty(&uevq);
76         pthread_mutex_unlock(uevq_lockp);
77         return (!empty || servicing_uev);
78 }
79
80 struct uevent * alloc_uevent (void)
81 {
82         struct uevent *uev = calloc(1, sizeof(struct uevent));
83
84         if (uev) {
85                 INIT_LIST_HEAD(&uev->node);
86                 INIT_LIST_HEAD(&uev->merge_node);
87         }
88
89         return uev;
90 }
91
92 static void uevq_cleanup(struct list_head *tmpq);
93
94 static void cleanup_uev(void *arg)
95 {
96         struct uevent *uev = arg;
97
98         uevq_cleanup(&uev->merge_node);
99         if (uev->udev)
100                 udev_device_unref(uev->udev);
101         free(uev);
102 }
103
104 static void uevq_cleanup(struct list_head *tmpq)
105 {
106         struct uevent *uev, *tmp;
107
108         list_for_each_entry_safe(uev, tmp, tmpq, node) {
109                 list_del_init(&uev->node);
110                 cleanup_uev(uev);
111         }
112 }
113
114 static const char* uevent_get_env_var(const struct uevent *uev,
115                                       const char *attr)
116 {
117         int i;
118         size_t len;
119         const char *p = NULL;
120
121         if (attr == NULL)
122                 goto invalid;
123
124         len = strlen(attr);
125         if (len == 0)
126                 goto invalid;
127
128         for (i = 0; uev->envp[i] != NULL; i++) {
129                 const char *var = uev->envp[i];
130
131                 if (strlen(var) > len &&
132                     !memcmp(var, attr, len) && var[len] == '=') {
133                         p = var + len + 1;
134                         break;
135                 }
136         }
137
138         condlog(4, "%s: %s -> '%s'", __func__, attr, p ?: "(null)");
139         return p;
140
141 invalid:
142         condlog(2, "%s: empty variable name", __func__);
143         return NULL;
144 }
145
146 int uevent_get_env_positive_int(const struct uevent *uev,
147                                        const char *attr)
148 {
149         const char *p = uevent_get_env_var(uev, attr);
150         char *q;
151         int ret;
152
153         if (p == NULL || *p == '\0')
154                 return -1;
155
156         ret = strtoul(p, &q, 10);
157         if (*q != '\0' || ret < 0) {
158                 condlog(2, "%s: invalid %s: '%s'", __func__, attr, p);
159                 return -1;
160         }
161         return ret;
162 }
163
164 void
165 uevent_get_wwid(struct uevent *uev)
166 {
167         char *uid_attribute;
168         const char *val;
169         struct config * conf;
170
171         conf = get_multipath_config();
172         pthread_cleanup_push(put_multipath_config, conf);
173         uid_attribute = get_uid_attribute_by_attrs(conf, uev->kernel);
174         pthread_cleanup_pop(1);
175
176         val = uevent_get_env_var(uev, uid_attribute);
177         if (val)
178                 uev->wwid = val;
179 }
180
181 static bool uevent_need_merge(void)
182 {
183         struct config * conf;
184         bool need_merge = false;
185
186         conf = get_multipath_config();
187         if (VECTOR_SIZE(&conf->uid_attrs) > 0)
188                 need_merge = true;
189         put_multipath_config(conf);
190
191         return need_merge;
192 }
193
194 static bool uevent_can_discard(struct uevent *uev)
195 {
196         int invalid = 0;
197         struct config * conf;
198
199         /*
200          * do not filter dm devices by devnode
201          */
202         if (!strncmp(uev->kernel, "dm-", 3))
203                 return false;
204         /*
205          * filter paths devices by devnode
206          */
207         conf = get_multipath_config();
208         pthread_cleanup_push(put_multipath_config, conf);
209         if (filter_devnode(conf->blist_devnode, conf->elist_devnode,
210                            uev->kernel) > 0)
211                 invalid = 1;
212         pthread_cleanup_pop(1);
213
214         if (invalid)
215                 return true;
216         return false;
217 }
218
219 static bool
220 uevent_can_filter(struct uevent *earlier, struct uevent *later)
221 {
222
223         /*
224          * filter earlier uvents if path has removed later. Eg:
225          * "add path1 |chang path1 |add path2 |remove path1"
226          * can filter as:
227          * "add path2 |remove path1"
228          * uevents "add path1" and "chang path1" are filtered out
229          */
230         if (!strcmp(earlier->kernel, later->kernel) &&
231                 !strcmp(later->action, "remove") &&
232                 strncmp(later->kernel, "dm-", 3)) {
233                 return true;
234         }
235
236         /*
237          * filter change uvents if add uevents exist. Eg:
238          * "change path1| add path1 |add path2"
239          * can filter as:
240          * "add path1 |add path2"
241          * uevent "chang path1" is filtered out
242          */
243         if (!strcmp(earlier->kernel, later->kernel) &&
244                 !strcmp(earlier->action, "change") &&
245                 !strcmp(later->action, "add") &&
246                 strncmp(later->kernel, "dm-", 3)) {
247                 return true;
248         }
249
250         return false;
251 }
252
253 static bool
254 merge_need_stop(struct uevent *earlier, struct uevent *later)
255 {
256         /*
257          * dm uevent do not try to merge with left uevents
258          */
259         if (!strncmp(later->kernel, "dm-", 3))
260                 return true;
261
262         /*
263          * we can not make a jugement without wwid,
264          * so it is sensible to stop merging
265          */
266         if (!earlier->wwid || !later->wwid)
267                 return true;
268         /*
269          * uevents merging stopped
270          * when we meet an opposite action uevent from the same LUN to AVOID
271          * "add path1 |remove path1 |add path2 |remove path2 |add path3"
272          * to merge as "remove path1, path2" and "add path1, path2, path3"
273          * OR
274          * "remove path1 |add path1 |remove path2 |add path2 |remove path3"
275          * to merge as "add path1, path2" and "remove path1, path2, path3"
276          * SO
277          * when we meet a non-change uevent from the same LUN
278          * with the same wwid and different action
279          * it would be better to stop merging.
280          */
281         if (!strcmp(earlier->wwid, later->wwid) &&
282             strcmp(earlier->action, later->action) &&
283             strcmp(earlier->action, "change") &&
284             strcmp(later->action, "change"))
285                 return true;
286
287         return false;
288 }
289
290 static bool
291 uevent_can_merge(struct uevent *earlier, struct uevent *later)
292 {
293         /* merge paths uevents
294          * whose wwids exist and are same
295          * and actions are same,
296          * and actions are addition or deletion
297          */
298         if (earlier->wwid && later->wwid &&
299             !strcmp(earlier->wwid, later->wwid) &&
300             !strcmp(earlier->action, later->action) &&
301             strncmp(earlier->action, "change", 6) &&
302             strncmp(earlier->kernel, "dm-", 3)) {
303                 return true;
304         }
305
306         return false;
307 }
308
309 static void
310 uevent_prepare(struct list_head *tmpq)
311 {
312         struct uevent *uev, *tmp;
313
314         list_for_each_entry_reverse_safe(uev, tmp, tmpq, node) {
315                 if (uevent_can_discard(uev)) {
316                         list_del_init(&uev->node);
317                         if (uev->udev)
318                                 udev_device_unref(uev->udev);
319                         free(uev);
320                         continue;
321                 }
322
323                 if (strncmp(uev->kernel, "dm-", 3) &&
324                     uevent_need_merge())
325                         uevent_get_wwid(uev);
326         }
327 }
328
329 static void
330 uevent_filter(struct uevent *later, struct list_head *tmpq)
331 {
332         struct uevent *earlier, *tmp;
333
334         list_for_some_entry_reverse_safe(earlier, tmp, &later->node, tmpq, node) {
335                 /*
336                  * filter unnessary earlier uevents
337                  * by the later uevent
338                  */
339                 if (uevent_can_filter(earlier, later)) {
340                         condlog(3, "uevent: %s-%s has filtered by uevent: %s-%s",
341                                 earlier->kernel, earlier->action,
342                                 later->kernel, later->action);
343
344                         list_del_init(&earlier->node);
345                         if (earlier->udev)
346                                 udev_device_unref(earlier->udev);
347                         free(earlier);
348                 }
349         }
350 }
351
352 static void
353 uevent_merge(struct uevent *later, struct list_head *tmpq)
354 {
355         struct uevent *earlier, *tmp;
356
357         list_for_some_entry_reverse_safe(earlier, tmp, &later->node, tmpq, node) {
358                 if (merge_need_stop(earlier, later))
359                         break;
360                 /*
361                  * merge earlier uevents to the later uevent
362                  */
363                 if (uevent_can_merge(earlier, later)) {
364                         condlog(3, "merged uevent: %s-%s-%s with uevent: %s-%s-%s",
365                                 earlier->action, earlier->kernel, earlier->wwid,
366                                 later->action, later->kernel, later->wwid);
367
368                         list_move(&earlier->node, &later->merge_node);
369                 }
370         }
371 }
372
373 static void
374 merge_uevq(struct list_head *tmpq)
375 {
376         struct uevent *later;
377
378         uevent_prepare(tmpq);
379         list_for_each_entry_reverse(later, tmpq, node) {
380                 uevent_filter(later, tmpq);
381                 if(uevent_need_merge())
382                         uevent_merge(later, tmpq);
383         }
384 }
385
386 static void
387 service_uevq(struct list_head *tmpq)
388 {
389         struct uevent *uev, *tmp;
390
391         list_for_each_entry_safe(uev, tmp, tmpq, node) {
392                 list_del_init(&uev->node);
393
394                 pthread_cleanup_push(cleanup_uev, uev);
395                 if (my_uev_trigger && my_uev_trigger(uev, my_trigger_data))
396                         condlog(0, "uevent trigger error");
397                 pthread_cleanup_pop(1);
398         }
399 }
400
401 static void uevent_cleanup(void *arg)
402 {
403         struct udev *udev = arg;
404
405         condlog(3, "Releasing uevent_listen() resources");
406         udev_unref(udev);
407 }
408
409 static void monitor_cleanup(void *arg)
410 {
411         struct udev_monitor *monitor = arg;
412
413         condlog(3, "Releasing uevent_monitor() resources");
414         udev_monitor_unref(monitor);
415 }
416
417 static void cleanup_uevq(void *arg)
418 {
419         uevq_cleanup(arg);
420 }
421
422 static void cleanup_global_uevq(void *arg __attribute__((unused)))
423 {
424         pthread_mutex_lock(uevq_lockp);
425         uevq_cleanup(&uevq);
426         pthread_mutex_unlock(uevq_lockp);
427 }
428
429 /*
430  * Service the uevent queue.
431  */
432 int uevent_dispatch(int (*uev_trigger)(struct uevent *, void * trigger_data),
433                     void * trigger_data)
434 {
435         my_uev_trigger = uev_trigger;
436         my_trigger_data = trigger_data;
437
438         mlockall(MCL_CURRENT | MCL_FUTURE);
439
440         while (1) {
441                 LIST_HEAD(uevq_tmp);
442
443                 pthread_cleanup_push(cleanup_mutex, uevq_lockp);
444                 pthread_mutex_lock(uevq_lockp);
445                 servicing_uev = 0;
446                 /*
447                  * Condition signals are unreliable,
448                  * so make sure we only wait if we have to.
449                  */
450                 if (list_empty(&uevq)) {
451                         pthread_cond_wait(uev_condp, uevq_lockp);
452                 }
453                 servicing_uev = 1;
454                 list_splice_init(&uevq, &uevq_tmp);
455                 pthread_cleanup_pop(1);
456
457                 if (!my_uev_trigger)
458                         break;
459
460                 pthread_cleanup_push(cleanup_uevq, &uevq_tmp);
461                 merge_uevq(&uevq_tmp);
462                 service_uevq(&uevq_tmp);
463                 pthread_cleanup_pop(1);
464         }
465         condlog(3, "Terminating uev service queue");
466         return 0;
467 }
468
469 static struct uevent *uevent_from_udev_device(struct udev_device *dev)
470 {
471         struct uevent *uev;
472         int i = 0;
473         char *pos, *end;
474         struct udev_list_entry *list_entry;
475
476         uev = alloc_uevent();
477         if (!uev) {
478                 udev_device_unref(dev);
479                 condlog(1, "lost uevent, oom");
480                 return NULL;
481         }
482         pos = uev->buffer;
483         end = pos + HOTPLUG_BUFFER_SIZE + OBJECT_SIZE - 1;
484         udev_list_entry_foreach(list_entry, udev_device_get_properties_list_entry(dev)) {
485                 const char *name, *value;
486                 int bytes;
487
488                 name = udev_list_entry_get_name(list_entry);
489                 if (!name)
490                         name = "(null)";
491                 value = udev_list_entry_get_value(list_entry);
492                 if (!value)
493                         value = "(null)";
494                 bytes = snprintf(pos, end - pos, "%s=%s", name, value);
495                 if (pos + bytes >= end) {
496                         condlog(2, "buffer overflow for uevent");
497                         break;
498                 }
499                 uev->envp[i] = pos;
500                 pos += bytes;
501                 *pos = '\0';
502                 pos++;
503                 if (strcmp(name, "DEVPATH") == 0)
504                         uev->devpath = uev->envp[i] + 8;
505                 if (strcmp(name, "ACTION") == 0)
506                         uev->action = uev->envp[i] + 7;
507                 i++;
508                 if (i == HOTPLUG_NUM_ENVP - 1)
509                         break;
510         }
511         if (!uev->devpath || ! uev->action) {
512                 udev_device_unref(dev);
513                 condlog(1, "uevent missing necessary fields");
514                 free(uev);
515                 return NULL;
516         }
517         uev->udev = dev;
518         uev->envp[i] = NULL;
519
520         condlog(3, "uevent '%s' from '%s'", uev->action, uev->devpath);
521         uev->kernel = strrchr(uev->devpath, '/');
522         if (uev->kernel)
523                 uev->kernel++;
524
525         /* print payload environment */
526         for (i = 0; uev->envp[i] != NULL; i++)
527                 condlog(5, "%s", uev->envp[i]);
528         return uev;
529 }
530
531 static bool uevent_burst(struct timeval *start_time, int events)
532 {
533         struct timeval diff_time, end_time;
534         unsigned long speed;
535         unsigned long eclipse_ms;
536
537         if(events > MAX_ACCUMULATION_COUNT) {
538                 condlog(2, "burst got %u uevents, too much uevents, stopped", events);
539                 return false;
540         }
541
542         gettimeofday(&end_time, NULL);
543         timersub(&end_time, start_time, &diff_time);
544
545         eclipse_ms = diff_time.tv_sec * 1000 + diff_time.tv_usec / 1000;
546
547         if (eclipse_ms == 0)
548                 return true;
549
550         if (eclipse_ms > MAX_ACCUMULATION_TIME) {
551                 condlog(2, "burst continued %lu ms, too long time, stopped", eclipse_ms);
552                 return false;
553         }
554
555         speed = (events * 1000) / eclipse_ms;
556         if (speed > MIN_BURST_SPEED)
557                 return true;
558
559         return false;
560 }
561
562 int uevent_listen(struct udev *udev)
563 {
564         int err = 2;
565         struct udev_monitor *monitor = NULL;
566         int fd, socket_flags, events;
567         struct timeval start_time;
568         int timeout = 30;
569         LIST_HEAD(uevlisten_tmp);
570
571         /*
572          * Queue uevents for service by dedicated thread so that the uevent
573          * listening thread does not block on multipathd locks (vecs->lock)
574          * thereby not getting to empty the socket's receive buffer queue
575          * often enough.
576          */
577         if (!udev) {
578                 condlog(1, "no udev context");
579                 return 1;
580         }
581         udev_ref(udev);
582         pthread_cleanup_push(uevent_cleanup, udev);
583
584         monitor = udev_monitor_new_from_netlink(udev, "udev");
585         if (!monitor) {
586                 condlog(2, "failed to create udev monitor");
587                 goto out_udev;
588         }
589         pthread_cleanup_push(monitor_cleanup, monitor);
590 #ifdef LIBUDEV_API_RECVBUF
591         if (udev_monitor_set_receive_buffer_size(monitor, 128 * 1024 * 1024) < 0)
592                 condlog(2, "failed to increase buffer size");
593 #endif
594         fd = udev_monitor_get_fd(monitor);
595         if (fd < 0) {
596                 condlog(2, "failed to get monitor fd");
597                 goto out;
598         }
599         socket_flags = fcntl(fd, F_GETFL);
600         if (socket_flags < 0) {
601                 condlog(2, "failed to get monitor socket flags : %s",
602                         strerror(errno));
603                 goto out;
604         }
605         if (fcntl(fd, F_SETFL, socket_flags & ~O_NONBLOCK) < 0) {
606                 condlog(2, "failed to set monitor socket flags : %s",
607                         strerror(errno));
608                 goto out;
609         }
610         err = udev_monitor_filter_add_match_subsystem_devtype(monitor, "block",
611                                                               "disk");
612         if (err)
613                 condlog(2, "failed to create filter : %s", strerror(-err));
614         err = udev_monitor_enable_receiving(monitor);
615         if (err) {
616                 condlog(2, "failed to enable receiving : %s", strerror(-err));
617                 goto out;
618         }
619
620         events = 0;
621         gettimeofday(&start_time, NULL);
622         pthread_cleanup_push(cleanup_global_uevq, NULL);
623         pthread_cleanup_push(cleanup_uevq, &uevlisten_tmp);
624         while (1) {
625                 struct uevent *uev;
626                 struct udev_device *dev;
627                 struct pollfd ev_poll;
628                 int poll_timeout;
629                 int fdcount;
630
631                 memset(&ev_poll, 0, sizeof(struct pollfd));
632                 ev_poll.fd = fd;
633                 ev_poll.events = POLLIN;
634                 poll_timeout = timeout * 1000;
635                 errno = 0;
636                 fdcount = poll(&ev_poll, 1, poll_timeout);
637                 if (fdcount > 0 && ev_poll.revents & POLLIN) {
638                         timeout = uevent_burst(&start_time, events + 1) ? 1 : 0;
639                         dev = udev_monitor_receive_device(monitor);
640                         if (!dev) {
641                                 condlog(0, "failed getting udev device");
642                                 continue;
643                         }
644                         uev = uevent_from_udev_device(dev);
645                         if (!uev)
646                                 continue;
647                         list_add_tail(&uev->node, &uevlisten_tmp);
648                         events++;
649                         continue;
650                 }
651                 if (fdcount < 0) {
652                         if (errno == EINTR)
653                                 continue;
654
655                         condlog(0, "error receiving "
656                                 "uevent message: %m");
657                         err = -errno;
658                         break;
659                 }
660                 if (!list_empty(&uevlisten_tmp)) {
661                         /*
662                          * Queue uevents and poke service pthread.
663                          */
664                         condlog(3, "Forwarding %d uevents", events);
665                         pthread_mutex_lock(uevq_lockp);
666                         list_splice_tail_init(&uevlisten_tmp, &uevq);
667                         pthread_cond_signal(uev_condp);
668                         pthread_mutex_unlock(uevq_lockp);
669                         events = 0;
670                 }
671                 gettimeofday(&start_time, NULL);
672                 timeout = 30;
673         }
674         pthread_cleanup_pop(1);
675         pthread_cleanup_pop(1);
676 out:
677         pthread_cleanup_pop(1);
678 out_udev:
679         pthread_cleanup_pop(1);
680         return err;
681 }
682
683 char *uevent_get_dm_str(const struct uevent *uev, char *attr)
684 {
685         const char *tmp = uevent_get_env_var(uev, attr);
686
687         if (tmp == NULL)
688                 return NULL;
689         return strdup(tmp);
690 }
691
692 bool uevent_is_mpath(const struct uevent *uev)
693 {
694         const char *uuid = uevent_get_env_var(uev, "DM_UUID");
695
696         if (uuid == NULL)
697                 return false;
698         if (strncmp(uuid, UUID_PREFIX, UUID_PREFIX_LEN))
699                 return false;
700         return uuid[UUID_PREFIX_LEN] != '\0';
701 }