2 * uevent.c - trigger upon netlink uevents from the kernel
4 * Only kernels from version 2.6.10* on provide the uevent netlink socket.
5 * Until the libc-kernel-headers are updated, you need to compile with:
7 * gcc -I /lib/modules/`uname -r`/build/include -o uevent_listen uevent_listen.c
9 * Copyright (C) 2004 Kay Sievers <kay.sievers@vrfy.org>
11 * This program is free software; you can redistribute it and/or modify it
12 * under the terms of the GNU General Public License as published by the
13 * Free Software Foundation version 2 of the License.
15 * This program is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
20 * You should have received a copy of the GNU General Public License along
21 * with this program. If not, see <http://www.gnu.org/licenses/>.
34 #include <sys/socket.h>
38 #include <linux/types.h>
39 #include <linux/netlink.h>
53 #include "blacklist.h"
54 #include "devmapper.h"
56 #define MAX_ACCUMULATION_COUNT 2048
57 #define MAX_ACCUMULATION_TIME 30*1000
58 #define MIN_BURST_SPEED 10
60 typedef int (uev_trigger)(struct uevent *, void * trigger_data);
62 static LIST_HEAD(uevq);
63 static pthread_mutex_t uevq_lock = PTHREAD_MUTEX_INITIALIZER;
64 static pthread_mutex_t *uevq_lockp = &uevq_lock;
65 static pthread_cond_t uev_cond = PTHREAD_COND_INITIALIZER;
66 static pthread_cond_t *uev_condp = &uev_cond;
67 static uev_trigger *my_uev_trigger;
68 static void *my_trigger_data;
69 static int servicing_uev;
71 int is_uevent_busy(void)
75 pthread_mutex_lock(uevq_lockp);
76 empty = list_empty(&uevq);
77 pthread_mutex_unlock(uevq_lockp);
78 return (!empty || servicing_uev);
81 struct uevent * alloc_uevent (void)
83 struct uevent *uev = calloc(1, sizeof(struct uevent));
86 INIT_LIST_HEAD(&uev->node);
87 INIT_LIST_HEAD(&uev->merge_node);
93 static void uevq_cleanup(struct list_head *tmpq);
95 static void cleanup_uev(void *arg)
97 struct uevent *uev = arg;
99 uevq_cleanup(&uev->merge_node);
101 udev_device_unref(uev->udev);
105 static void uevq_cleanup(struct list_head *tmpq)
107 struct uevent *uev, *tmp;
109 list_for_each_entry_safe(uev, tmp, tmpq, node) {
110 list_del_init(&uev->node);
115 static const char* uevent_get_env_var(const struct uevent *uev,
120 const char *p = NULL;
129 for (i = 0; uev->envp[i] != NULL; i++) {
130 const char *var = uev->envp[i];
132 if (strlen(var) > len &&
133 !memcmp(var, attr, len) && var[len] == '=') {
139 condlog(4, "%s: %s -> '%s'", __func__, attr, p ?: "(null)");
143 condlog(2, "%s: empty variable name", __func__);
147 int uevent_get_env_positive_int(const struct uevent *uev,
150 const char *p = uevent_get_env_var(uev, attr);
154 if (p == NULL || *p == '\0')
157 ret = strtoul(p, &q, 10);
158 if (*q != '\0' || ret < 0) {
159 condlog(2, "%s: invalid %s: '%s'", __func__, attr, p);
166 uevent_get_wwid(struct uevent *uev)
170 struct config * conf;
172 conf = get_multipath_config();
173 pthread_cleanup_push(put_multipath_config, conf);
174 uid_attribute = get_uid_attribute_by_attrs(conf, uev->kernel);
175 pthread_cleanup_pop(1);
177 val = uevent_get_env_var(uev, uid_attribute);
182 static bool uevent_need_merge(void)
184 struct config * conf;
185 bool need_merge = false;
187 conf = get_multipath_config();
188 if (VECTOR_SIZE(&conf->uid_attrs) > 0)
190 put_multipath_config(conf);
195 static bool uevent_can_discard(struct uevent *uev)
198 struct config * conf;
201 * do not filter dm devices by devnode
203 if (!strncmp(uev->kernel, "dm-", 3))
206 * filter paths devices by devnode
208 conf = get_multipath_config();
209 pthread_cleanup_push(put_multipath_config, conf);
210 if (filter_devnode(conf->blist_devnode, conf->elist_devnode,
213 pthread_cleanup_pop(1);
221 uevent_can_filter(struct uevent *earlier, struct uevent *later)
225 * filter earlier uvents if path has removed later. Eg:
226 * "add path1 |chang path1 |add path2 |remove path1"
228 * "add path2 |remove path1"
229 * uevents "add path1" and "chang path1" are filtered out
231 if (!strcmp(earlier->kernel, later->kernel) &&
232 !strcmp(later->action, "remove") &&
233 strncmp(later->kernel, "dm-", 3)) {
238 * filter change uvents if add uevents exist. Eg:
239 * "change path1| add path1 |add path2"
241 * "add path1 |add path2"
242 * uevent "chang path1" is filtered out
244 if (!strcmp(earlier->kernel, later->kernel) &&
245 !strcmp(earlier->action, "change") &&
246 !strcmp(later->action, "add") &&
247 strncmp(later->kernel, "dm-", 3)) {
255 merge_need_stop(struct uevent *earlier, struct uevent *later)
258 * dm uevent do not try to merge with left uevents
260 if (!strncmp(later->kernel, "dm-", 3))
264 * we can not make a jugement without wwid,
265 * so it is sensible to stop merging
267 if (!earlier->wwid || !later->wwid)
270 * uevents merging stopped
271 * when we meet an opposite action uevent from the same LUN to AVOID
272 * "add path1 |remove path1 |add path2 |remove path2 |add path3"
273 * to merge as "remove path1, path2" and "add path1, path2, path3"
275 * "remove path1 |add path1 |remove path2 |add path2 |remove path3"
276 * to merge as "add path1, path2" and "remove path1, path2, path3"
278 * when we meet a non-change uevent from the same LUN
279 * with the same wwid and different action
280 * it would be better to stop merging.
282 if (!strcmp(earlier->wwid, later->wwid) &&
283 strcmp(earlier->action, later->action) &&
284 strcmp(earlier->action, "change") &&
285 strcmp(later->action, "change"))
292 uevent_can_merge(struct uevent *earlier, struct uevent *later)
294 /* merge paths uevents
295 * whose wwids exsit and are same
296 * and actions are same,
297 * and actions are addition or deletion
299 if (earlier->wwid && later->wwid &&
300 !strcmp(earlier->wwid, later->wwid) &&
301 !strcmp(earlier->action, later->action) &&
302 strncmp(earlier->action, "change", 6) &&
303 strncmp(earlier->kernel, "dm-", 3)) {
311 uevent_prepare(struct list_head *tmpq)
313 struct uevent *uev, *tmp;
315 list_for_each_entry_reverse_safe(uev, tmp, tmpq, node) {
316 if (uevent_can_discard(uev)) {
317 list_del_init(&uev->node);
319 udev_device_unref(uev->udev);
324 if (strncmp(uev->kernel, "dm-", 3) &&
326 uevent_get_wwid(uev);
331 uevent_filter(struct uevent *later, struct list_head *tmpq)
333 struct uevent *earlier, *tmp;
335 list_for_some_entry_reverse_safe(earlier, tmp, &later->node, tmpq, node) {
337 * filter unnessary earlier uevents
338 * by the later uevent
340 if (uevent_can_filter(earlier, later)) {
341 condlog(3, "uevent: %s-%s has filtered by uevent: %s-%s",
342 earlier->kernel, earlier->action,
343 later->kernel, later->action);
345 list_del_init(&earlier->node);
347 udev_device_unref(earlier->udev);
354 uevent_merge(struct uevent *later, struct list_head *tmpq)
356 struct uevent *earlier, *tmp;
358 list_for_some_entry_reverse_safe(earlier, tmp, &later->node, tmpq, node) {
359 if (merge_need_stop(earlier, later))
362 * merge earlier uevents to the later uevent
364 if (uevent_can_merge(earlier, later)) {
365 condlog(3, "merged uevent: %s-%s-%s with uevent: %s-%s-%s",
366 earlier->action, earlier->kernel, earlier->wwid,
367 later->action, later->kernel, later->wwid);
369 list_move(&earlier->node, &later->merge_node);
375 merge_uevq(struct list_head *tmpq)
377 struct uevent *later;
379 uevent_prepare(tmpq);
380 list_for_each_entry_reverse(later, tmpq, node) {
381 uevent_filter(later, tmpq);
382 if(uevent_need_merge())
383 uevent_merge(later, tmpq);
388 service_uevq(struct list_head *tmpq)
390 struct uevent *uev, *tmp;
392 list_for_each_entry_safe(uev, tmp, tmpq, node) {
393 list_del_init(&uev->node);
395 pthread_cleanup_push(cleanup_uev, uev);
396 if (my_uev_trigger && my_uev_trigger(uev, my_trigger_data))
397 condlog(0, "uevent trigger error");
398 pthread_cleanup_pop(1);
402 static void uevent_cleanup(void *arg)
404 struct udev *udev = arg;
406 condlog(3, "Releasing uevent_listen() resources");
410 static void monitor_cleanup(void *arg)
412 struct udev_monitor *monitor = arg;
414 condlog(3, "Releasing uevent_monitor() resources");
415 udev_monitor_unref(monitor);
418 static void cleanup_uevq(void *arg)
423 static void cleanup_global_uevq(void *arg __attribute__((unused)))
425 pthread_mutex_lock(uevq_lockp);
427 pthread_mutex_unlock(uevq_lockp);
431 * Service the uevent queue.
433 int uevent_dispatch(int (*uev_trigger)(struct uevent *, void * trigger_data),
436 my_uev_trigger = uev_trigger;
437 my_trigger_data = trigger_data;
439 mlockall(MCL_CURRENT | MCL_FUTURE);
444 pthread_cleanup_push(cleanup_mutex, uevq_lockp);
445 pthread_mutex_lock(uevq_lockp);
448 * Condition signals are unreliable,
449 * so make sure we only wait if we have to.
451 if (list_empty(&uevq)) {
452 pthread_cond_wait(uev_condp, uevq_lockp);
455 list_splice_init(&uevq, &uevq_tmp);
456 pthread_cleanup_pop(1);
461 pthread_cleanup_push(cleanup_uevq, &uevq_tmp);
462 merge_uevq(&uevq_tmp);
463 service_uevq(&uevq_tmp);
464 pthread_cleanup_pop(1);
466 condlog(3, "Terminating uev service queue");
470 static struct uevent *uevent_from_udev_device(struct udev_device *dev)
475 struct udev_list_entry *list_entry;
477 uev = alloc_uevent();
479 udev_device_unref(dev);
480 condlog(1, "lost uevent, oom");
484 end = pos + HOTPLUG_BUFFER_SIZE + OBJECT_SIZE - 1;
485 udev_list_entry_foreach(list_entry, udev_device_get_properties_list_entry(dev)) {
486 const char *name, *value;
489 name = udev_list_entry_get_name(list_entry);
492 value = udev_list_entry_get_value(list_entry);
495 bytes = snprintf(pos, end - pos, "%s=%s", name, value);
496 if (pos + bytes >= end) {
497 condlog(2, "buffer overflow for uevent");
504 if (strcmp(name, "DEVPATH") == 0)
505 uev->devpath = uev->envp[i] + 8;
506 if (strcmp(name, "ACTION") == 0)
507 uev->action = uev->envp[i] + 7;
509 if (i == HOTPLUG_NUM_ENVP - 1)
512 if (!uev->devpath || ! uev->action) {
513 udev_device_unref(dev);
514 condlog(1, "uevent missing necessary fields");
521 condlog(3, "uevent '%s' from '%s'", uev->action, uev->devpath);
522 uev->kernel = strrchr(uev->devpath, '/');
526 /* print payload environment */
527 for (i = 0; uev->envp[i] != NULL; i++)
528 condlog(5, "%s", uev->envp[i]);
532 static bool uevent_burst(struct timeval *start_time, int events)
534 struct timeval diff_time, end_time;
536 unsigned long eclipse_ms;
538 if(events > MAX_ACCUMULATION_COUNT) {
539 condlog(2, "burst got %u uevents, too much uevents, stopped", events);
543 gettimeofday(&end_time, NULL);
544 timersub(&end_time, start_time, &diff_time);
546 eclipse_ms = diff_time.tv_sec * 1000 + diff_time.tv_usec / 1000;
551 if (eclipse_ms > MAX_ACCUMULATION_TIME) {
552 condlog(2, "burst continued %lu ms, too long time, stopped", eclipse_ms);
556 speed = (events * 1000) / eclipse_ms;
557 if (speed > MIN_BURST_SPEED)
563 int uevent_listen(struct udev *udev)
566 struct udev_monitor *monitor = NULL;
567 int fd, socket_flags, events;
568 struct timeval start_time;
570 LIST_HEAD(uevlisten_tmp);
573 * Queue uevents for service by dedicated thread so that the uevent
574 * listening thread does not block on multipathd locks (vecs->lock)
575 * thereby not getting to empty the socket's receive buffer queue
579 condlog(1, "no udev context");
583 pthread_cleanup_push(uevent_cleanup, udev);
585 monitor = udev_monitor_new_from_netlink(udev, "udev");
587 condlog(2, "failed to create udev monitor");
590 pthread_cleanup_push(monitor_cleanup, monitor);
591 #ifdef LIBUDEV_API_RECVBUF
592 if (udev_monitor_set_receive_buffer_size(monitor, 128 * 1024 * 1024) < 0)
593 condlog(2, "failed to increase buffer size");
595 fd = udev_monitor_get_fd(monitor);
597 condlog(2, "failed to get monitor fd");
600 socket_flags = fcntl(fd, F_GETFL);
601 if (socket_flags < 0) {
602 condlog(2, "failed to get monitor socket flags : %s",
606 if (fcntl(fd, F_SETFL, socket_flags & ~O_NONBLOCK) < 0) {
607 condlog(2, "failed to set monitor socket flags : %s",
611 err = udev_monitor_filter_add_match_subsystem_devtype(monitor, "block",
614 condlog(2, "failed to create filter : %s", strerror(-err));
615 err = udev_monitor_enable_receiving(monitor);
617 condlog(2, "failed to enable receiving : %s", strerror(-err));
622 gettimeofday(&start_time, NULL);
623 pthread_cleanup_push(cleanup_global_uevq, NULL);
624 pthread_cleanup_push(cleanup_uevq, &uevlisten_tmp);
627 struct udev_device *dev;
628 struct pollfd ev_poll;
632 memset(&ev_poll, 0, sizeof(struct pollfd));
634 ev_poll.events = POLLIN;
635 poll_timeout = timeout * 1000;
637 fdcount = poll(&ev_poll, 1, poll_timeout);
638 if (fdcount > 0 && ev_poll.revents & POLLIN) {
639 timeout = uevent_burst(&start_time, events + 1) ? 1 : 0;
640 dev = udev_monitor_receive_device(monitor);
642 condlog(0, "failed getting udev device");
645 uev = uevent_from_udev_device(dev);
648 list_add_tail(&uev->node, &uevlisten_tmp);
656 condlog(0, "error receiving "
657 "uevent message: %m");
661 if (!list_empty(&uevlisten_tmp)) {
663 * Queue uevents and poke service pthread.
665 condlog(3, "Forwarding %d uevents", events);
666 pthread_mutex_lock(uevq_lockp);
667 list_splice_tail_init(&uevlisten_tmp, &uevq);
668 pthread_cond_signal(uev_condp);
669 pthread_mutex_unlock(uevq_lockp);
672 gettimeofday(&start_time, NULL);
675 pthread_cleanup_pop(1);
676 pthread_cleanup_pop(1);
678 pthread_cleanup_pop(1);
680 pthread_cleanup_pop(1);
684 char *uevent_get_dm_str(const struct uevent *uev, char *attr)
686 const char *tmp = uevent_get_env_var(uev, attr);
693 bool uevent_is_mpath(const struct uevent *uev)
695 const char *uuid = uevent_get_env_var(uev, "DM_UUID");
699 if (strncmp(uuid, UUID_PREFIX, UUID_PREFIX_LEN))
701 return uuid[UUID_PREFIX_LEN] != '\0';