2 * uevent.c - trigger upon netlink uevents from the kernel
4 * Only kernels from version 2.6.10* on provide the uevent netlink socket.
5 * Until the libc-kernel-headers are updated, you need to compile with:
7 * gcc -I /lib/modules/`uname -r`/build/include -o uevent_listen uevent_listen.c
9 * Copyright (C) 2004 Kay Sievers <kay.sievers@vrfy.org>
11 * This program is free software; you can redistribute it and/or modify it
12 * under the terms of the GNU General Public License as published by the
13 * Free Software Foundation version 2 of the License.
15 * This program is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
20 * You should have received a copy of the GNU General Public License along
21 * with this program. If not, see <http://www.gnu.org/licenses/>.
34 #include <sys/socket.h>
38 #include <linux/types.h>
39 #include <linux/netlink.h>
52 #include "blacklist.h"
53 #include "devmapper.h"
55 #define MAX_ACCUMULATION_COUNT 2048
56 #define MAX_ACCUMULATION_TIME 30*1000
57 #define MIN_BURST_SPEED 10
59 typedef int (uev_trigger)(struct uevent *, void * trigger_data);
61 static LIST_HEAD(uevq);
62 static pthread_mutex_t uevq_lock = PTHREAD_MUTEX_INITIALIZER;
63 static pthread_mutex_t *uevq_lockp = &uevq_lock;
64 static pthread_cond_t uev_cond = PTHREAD_COND_INITIALIZER;
65 static pthread_cond_t *uev_condp = &uev_cond;
66 static uev_trigger *my_uev_trigger;
67 static void *my_trigger_data;
68 static int servicing_uev;
70 int is_uevent_busy(void)
74 pthread_mutex_lock(uevq_lockp);
75 empty = list_empty(&uevq);
76 pthread_mutex_unlock(uevq_lockp);
77 return (!empty || servicing_uev);
80 struct uevent * alloc_uevent (void)
82 struct uevent *uev = calloc(1, sizeof(struct uevent));
85 INIT_LIST_HEAD(&uev->node);
86 INIT_LIST_HEAD(&uev->merge_node);
92 static void uevq_cleanup(struct list_head *tmpq);
94 static void cleanup_uev(void *arg)
96 struct uevent *uev = arg;
98 uevq_cleanup(&uev->merge_node);
100 udev_device_unref(uev->udev);
104 static void uevq_cleanup(struct list_head *tmpq)
106 struct uevent *uev, *tmp;
108 list_for_each_entry_safe(uev, tmp, tmpq, node) {
109 list_del_init(&uev->node);
114 static const char* uevent_get_env_var(const struct uevent *uev,
119 const char *p = NULL;
128 for (i = 0; uev->envp[i] != NULL; i++) {
129 const char *var = uev->envp[i];
131 if (strlen(var) > len &&
132 !memcmp(var, attr, len) && var[len] == '=') {
138 condlog(4, "%s: %s -> '%s'", __func__, attr, p ?: "(null)");
142 condlog(2, "%s: empty variable name", __func__);
146 int uevent_get_env_positive_int(const struct uevent *uev,
149 const char *p = uevent_get_env_var(uev, attr);
153 if (p == NULL || *p == '\0')
156 ret = strtoul(p, &q, 10);
157 if (*q != '\0' || ret < 0) {
158 condlog(2, "%s: invalid %s: '%s'", __func__, attr, p);
165 uevent_get_wwid(struct uevent *uev)
169 struct config * conf;
171 conf = get_multipath_config();
172 pthread_cleanup_push(put_multipath_config, conf);
173 uid_attribute = get_uid_attribute_by_attrs(conf, uev->kernel);
174 pthread_cleanup_pop(1);
176 val = uevent_get_env_var(uev, uid_attribute);
181 static bool uevent_need_merge(void)
183 struct config * conf;
184 bool need_merge = false;
186 conf = get_multipath_config();
187 if (VECTOR_SIZE(&conf->uid_attrs) > 0)
189 put_multipath_config(conf);
194 static bool uevent_can_discard(struct uevent *uev)
197 struct config * conf;
200 * do not filter dm devices by devnode
202 if (!strncmp(uev->kernel, "dm-", 3))
205 * filter paths devices by devnode
207 conf = get_multipath_config();
208 pthread_cleanup_push(put_multipath_config, conf);
209 if (filter_devnode(conf->blist_devnode, conf->elist_devnode,
212 pthread_cleanup_pop(1);
220 uevent_can_filter(struct uevent *earlier, struct uevent *later)
224 * filter earlier uvents if path has removed later. Eg:
225 * "add path1 |chang path1 |add path2 |remove path1"
227 * "add path2 |remove path1"
228 * uevents "add path1" and "chang path1" are filtered out
230 if (!strcmp(earlier->kernel, later->kernel) &&
231 !strcmp(later->action, "remove") &&
232 strncmp(later->kernel, "dm-", 3)) {
237 * filter change uvents if add uevents exist. Eg:
238 * "change path1| add path1 |add path2"
240 * "add path1 |add path2"
241 * uevent "chang path1" is filtered out
243 if (!strcmp(earlier->kernel, later->kernel) &&
244 !strcmp(earlier->action, "change") &&
245 !strcmp(later->action, "add") &&
246 strncmp(later->kernel, "dm-", 3)) {
254 merge_need_stop(struct uevent *earlier, struct uevent *later)
257 * dm uevent do not try to merge with left uevents
259 if (!strncmp(later->kernel, "dm-", 3))
263 * we can not make a jugement without wwid,
264 * so it is sensible to stop merging
266 if (!earlier->wwid || !later->wwid)
269 * uevents merging stopped
270 * when we meet an opposite action uevent from the same LUN to AVOID
271 * "add path1 |remove path1 |add path2 |remove path2 |add path3"
272 * to merge as "remove path1, path2" and "add path1, path2, path3"
274 * "remove path1 |add path1 |remove path2 |add path2 |remove path3"
275 * to merge as "add path1, path2" and "remove path1, path2, path3"
277 * when we meet a non-change uevent from the same LUN
278 * with the same wwid and different action
279 * it would be better to stop merging.
281 if (!strcmp(earlier->wwid, later->wwid) &&
282 strcmp(earlier->action, later->action) &&
283 strcmp(earlier->action, "change") &&
284 strcmp(later->action, "change"))
291 uevent_can_merge(struct uevent *earlier, struct uevent *later)
293 /* merge paths uevents
294 * whose wwids exist and are same
295 * and actions are same,
296 * and actions are addition or deletion
298 if (earlier->wwid && later->wwid &&
299 !strcmp(earlier->wwid, later->wwid) &&
300 !strcmp(earlier->action, later->action) &&
301 strncmp(earlier->action, "change", 6) &&
302 strncmp(earlier->kernel, "dm-", 3)) {
310 uevent_prepare(struct list_head *tmpq)
312 struct uevent *uev, *tmp;
314 list_for_each_entry_reverse_safe(uev, tmp, tmpq, node) {
315 if (uevent_can_discard(uev)) {
316 list_del_init(&uev->node);
318 udev_device_unref(uev->udev);
323 if (strncmp(uev->kernel, "dm-", 3) &&
325 uevent_get_wwid(uev);
330 uevent_filter(struct uevent *later, struct list_head *tmpq)
332 struct uevent *earlier, *tmp;
334 list_for_some_entry_reverse_safe(earlier, tmp, &later->node, tmpq, node) {
336 * filter unnessary earlier uevents
337 * by the later uevent
339 if (uevent_can_filter(earlier, later)) {
340 condlog(3, "uevent: %s-%s has filtered by uevent: %s-%s",
341 earlier->kernel, earlier->action,
342 later->kernel, later->action);
344 list_del_init(&earlier->node);
346 udev_device_unref(earlier->udev);
353 uevent_merge(struct uevent *later, struct list_head *tmpq)
355 struct uevent *earlier, *tmp;
357 list_for_some_entry_reverse_safe(earlier, tmp, &later->node, tmpq, node) {
358 if (merge_need_stop(earlier, later))
361 * merge earlier uevents to the later uevent
363 if (uevent_can_merge(earlier, later)) {
364 condlog(3, "merged uevent: %s-%s-%s with uevent: %s-%s-%s",
365 earlier->action, earlier->kernel, earlier->wwid,
366 later->action, later->kernel, later->wwid);
368 list_move(&earlier->node, &later->merge_node);
374 merge_uevq(struct list_head *tmpq)
376 struct uevent *later;
378 uevent_prepare(tmpq);
379 list_for_each_entry_reverse(later, tmpq, node) {
380 uevent_filter(later, tmpq);
381 if(uevent_need_merge())
382 uevent_merge(later, tmpq);
387 service_uevq(struct list_head *tmpq)
389 struct uevent *uev, *tmp;
391 list_for_each_entry_safe(uev, tmp, tmpq, node) {
392 list_del_init(&uev->node);
394 pthread_cleanup_push(cleanup_uev, uev);
395 if (my_uev_trigger && my_uev_trigger(uev, my_trigger_data))
396 condlog(0, "uevent trigger error");
397 pthread_cleanup_pop(1);
401 static void uevent_cleanup(void *arg)
403 struct udev *udev = arg;
405 condlog(3, "Releasing uevent_listen() resources");
409 static void monitor_cleanup(void *arg)
411 struct udev_monitor *monitor = arg;
413 condlog(3, "Releasing uevent_monitor() resources");
414 udev_monitor_unref(monitor);
417 static void cleanup_uevq(void *arg)
422 static void cleanup_global_uevq(void *arg __attribute__((unused)))
424 pthread_mutex_lock(uevq_lockp);
426 pthread_mutex_unlock(uevq_lockp);
430 * Service the uevent queue.
432 int uevent_dispatch(int (*uev_trigger)(struct uevent *, void * trigger_data),
435 my_uev_trigger = uev_trigger;
436 my_trigger_data = trigger_data;
438 mlockall(MCL_CURRENT | MCL_FUTURE);
443 pthread_cleanup_push(cleanup_mutex, uevq_lockp);
444 pthread_mutex_lock(uevq_lockp);
447 * Condition signals are unreliable,
448 * so make sure we only wait if we have to.
450 if (list_empty(&uevq)) {
451 pthread_cond_wait(uev_condp, uevq_lockp);
454 list_splice_init(&uevq, &uevq_tmp);
455 pthread_cleanup_pop(1);
460 pthread_cleanup_push(cleanup_uevq, &uevq_tmp);
461 merge_uevq(&uevq_tmp);
462 service_uevq(&uevq_tmp);
463 pthread_cleanup_pop(1);
465 condlog(3, "Terminating uev service queue");
469 static struct uevent *uevent_from_udev_device(struct udev_device *dev)
474 struct udev_list_entry *list_entry;
476 uev = alloc_uevent();
478 udev_device_unref(dev);
479 condlog(1, "lost uevent, oom");
483 end = pos + HOTPLUG_BUFFER_SIZE + OBJECT_SIZE - 1;
484 udev_list_entry_foreach(list_entry, udev_device_get_properties_list_entry(dev)) {
485 const char *name, *value;
488 name = udev_list_entry_get_name(list_entry);
491 value = udev_list_entry_get_value(list_entry);
494 bytes = snprintf(pos, end - pos, "%s=%s", name, value);
495 if (pos + bytes >= end) {
496 condlog(2, "buffer overflow for uevent");
503 if (strcmp(name, "DEVPATH") == 0)
504 uev->devpath = uev->envp[i] + 8;
505 if (strcmp(name, "ACTION") == 0)
506 uev->action = uev->envp[i] + 7;
508 if (i == HOTPLUG_NUM_ENVP - 1)
511 if (!uev->devpath || ! uev->action) {
512 udev_device_unref(dev);
513 condlog(1, "uevent missing necessary fields");
520 condlog(3, "uevent '%s' from '%s'", uev->action, uev->devpath);
521 uev->kernel = strrchr(uev->devpath, '/');
525 /* print payload environment */
526 for (i = 0; uev->envp[i] != NULL; i++)
527 condlog(5, "%s", uev->envp[i]);
531 static bool uevent_burst(struct timeval *start_time, int events)
533 struct timeval diff_time, end_time;
535 unsigned long eclipse_ms;
537 if(events > MAX_ACCUMULATION_COUNT) {
538 condlog(2, "burst got %u uevents, too much uevents, stopped", events);
542 gettimeofday(&end_time, NULL);
543 timersub(&end_time, start_time, &diff_time);
545 eclipse_ms = diff_time.tv_sec * 1000 + diff_time.tv_usec / 1000;
550 if (eclipse_ms > MAX_ACCUMULATION_TIME) {
551 condlog(2, "burst continued %lu ms, too long time, stopped", eclipse_ms);
555 speed = (events * 1000) / eclipse_ms;
556 if (speed > MIN_BURST_SPEED)
562 int uevent_listen(struct udev *udev)
565 struct udev_monitor *monitor = NULL;
566 int fd, socket_flags, events;
567 struct timeval start_time;
569 LIST_HEAD(uevlisten_tmp);
572 * Queue uevents for service by dedicated thread so that the uevent
573 * listening thread does not block on multipathd locks (vecs->lock)
574 * thereby not getting to empty the socket's receive buffer queue
578 condlog(1, "no udev context");
582 pthread_cleanup_push(uevent_cleanup, udev);
584 monitor = udev_monitor_new_from_netlink(udev, "udev");
586 condlog(2, "failed to create udev monitor");
589 pthread_cleanup_push(monitor_cleanup, monitor);
590 #ifdef LIBUDEV_API_RECVBUF
591 if (udev_monitor_set_receive_buffer_size(monitor, 128 * 1024 * 1024) < 0)
592 condlog(2, "failed to increase buffer size");
594 fd = udev_monitor_get_fd(monitor);
596 condlog(2, "failed to get monitor fd");
599 socket_flags = fcntl(fd, F_GETFL);
600 if (socket_flags < 0) {
601 condlog(2, "failed to get monitor socket flags : %s",
605 if (fcntl(fd, F_SETFL, socket_flags & ~O_NONBLOCK) < 0) {
606 condlog(2, "failed to set monitor socket flags : %s",
610 err = udev_monitor_filter_add_match_subsystem_devtype(monitor, "block",
613 condlog(2, "failed to create filter : %s", strerror(-err));
614 err = udev_monitor_enable_receiving(monitor);
616 condlog(2, "failed to enable receiving : %s", strerror(-err));
621 gettimeofday(&start_time, NULL);
622 pthread_cleanup_push(cleanup_global_uevq, NULL);
623 pthread_cleanup_push(cleanup_uevq, &uevlisten_tmp);
626 struct udev_device *dev;
627 struct pollfd ev_poll;
631 memset(&ev_poll, 0, sizeof(struct pollfd));
633 ev_poll.events = POLLIN;
634 poll_timeout = timeout * 1000;
636 fdcount = poll(&ev_poll, 1, poll_timeout);
637 if (fdcount > 0 && ev_poll.revents & POLLIN) {
638 timeout = uevent_burst(&start_time, events + 1) ? 1 : 0;
639 dev = udev_monitor_receive_device(monitor);
641 condlog(0, "failed getting udev device");
644 uev = uevent_from_udev_device(dev);
647 list_add_tail(&uev->node, &uevlisten_tmp);
655 condlog(0, "error receiving "
656 "uevent message: %m");
660 if (!list_empty(&uevlisten_tmp)) {
662 * Queue uevents and poke service pthread.
664 condlog(3, "Forwarding %d uevents", events);
665 pthread_mutex_lock(uevq_lockp);
666 list_splice_tail_init(&uevlisten_tmp, &uevq);
667 pthread_cond_signal(uev_condp);
668 pthread_mutex_unlock(uevq_lockp);
671 gettimeofday(&start_time, NULL);
674 pthread_cleanup_pop(1);
675 pthread_cleanup_pop(1);
677 pthread_cleanup_pop(1);
679 pthread_cleanup_pop(1);
683 char *uevent_get_dm_str(const struct uevent *uev, char *attr)
685 const char *tmp = uevent_get_env_var(uev, attr);
692 bool uevent_is_mpath(const struct uevent *uev)
694 const char *uuid = uevent_get_env_var(uev, "DM_UUID");
698 if (strncmp(uuid, UUID_PREFIX, UUID_PREFIX_LEN))
700 return uuid[UUID_PREFIX_LEN] != '\0';