2 * uevent.c - trigger upon netlink uevents from the kernel
4 * Only kernels from version 2.6.10* on provide the uevent netlink socket.
5 * Until the libc-kernel-headers are updated, you need to compile with:
7 * gcc -I /lib/modules/`uname -r`/build/include -o uevent_listen uevent_listen.c
9 * Copyright (C) 2004 Kay Sievers <kay.sievers@vrfy.org>
11 * This program is free software; you can redistribute it and/or modify it
12 * under the terms of the GNU General Public License as published by the
13 * Free Software Foundation version 2 of the License.
15 * This program is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
20 * You should have received a copy of the GNU General Public License along
21 * with this program. If not, see <http://www.gnu.org/licenses/>.
34 #include <sys/socket.h>
38 #include <linux/types.h>
39 #include <linux/netlink.h>
54 #include "blacklist.h"
55 #include "devmapper.h"
57 #define MAX_ACCUMULATION_COUNT 2048
58 #define MAX_ACCUMULATION_TIME 30*1000
59 #define MIN_BURST_SPEED 10
61 typedef int (uev_trigger)(struct uevent *, void * trigger_data);
64 pthread_mutex_t uevq_lock = PTHREAD_MUTEX_INITIALIZER;
65 pthread_mutex_t *uevq_lockp = &uevq_lock;
66 pthread_cond_t uev_cond = PTHREAD_COND_INITIALIZER;
67 pthread_cond_t *uev_condp = &uev_cond;
68 uev_trigger *my_uev_trigger;
69 void * my_trigger_data;
72 int is_uevent_busy(void)
76 pthread_mutex_lock(uevq_lockp);
77 empty = list_empty(&uevq);
78 pthread_mutex_unlock(uevq_lockp);
79 return (!empty || servicing_uev);
82 struct uevent * alloc_uevent (void)
84 struct uevent *uev = MALLOC(sizeof(struct uevent));
87 INIT_LIST_HEAD(&uev->node);
88 INIT_LIST_HEAD(&uev->merge_node);
95 uevq_cleanup(struct list_head *tmpq)
97 struct uevent *uev, *tmp;
99 list_for_each_entry_safe(uev, tmp, tmpq, node) {
100 list_del_init(&uev->node);
103 udev_device_unref(uev->udev);
108 static const char* uevent_get_env_var(const struct uevent *uev,
112 const char *p = NULL;
121 for (i = 0; uev->envp[i] != NULL; i++) {
122 const char *var = uev->envp[i];
124 if (strlen(var) > len &&
125 !memcmp(var, attr, len) && var[len] == '=') {
131 condlog(4, "%s: %s -> '%s'", __func__, attr, p);
135 condlog(2, "%s: empty variable name", __func__);
139 static int uevent_get_env_positive_int(const struct uevent *uev,
142 const char *p = uevent_get_env_var(uev, attr);
146 if (p == NULL || *p == '\0')
149 ret = strtoul(p, &q, 10);
150 if (*q != '\0' || ret < 0) {
151 condlog(2, "%s: invalid %s: '%s'", __func__, attr, p);
158 uevent_get_wwid(struct uevent *uev)
160 const char *uid_attribute;
162 struct config * conf;
164 conf = get_multipath_config();
165 uid_attribute = parse_uid_attribute_by_attrs(conf->uid_attrs, uev->kernel);
166 put_multipath_config(conf);
168 val = uevent_get_env_var(uev, uid_attribute);
171 FREE_CONST(uid_attribute);
175 uevent_need_merge(void)
177 struct config * conf;
178 bool need_merge = false;
180 conf = get_multipath_config();
183 put_multipath_config(conf);
189 uevent_can_discard(struct uevent *uev)
191 struct config * conf;
194 * do not filter dm devices by devnode
196 if (!strncmp(uev->kernel, "dm-", 3))
199 * filter paths devices by devnode
201 conf = get_multipath_config();
202 if (filter_devnode(conf->blist_devnode, conf->elist_devnode,
204 put_multipath_config(conf);
207 put_multipath_config(conf);
213 uevent_can_filter(struct uevent *earlier, struct uevent *later)
217 * filter earlier uvents if path has removed later. Eg:
218 * "add path1 |chang path1 |add path2 |remove path1"
220 * "add path2 |remove path1"
221 * uevents "add path1" and "chang path1" are filtered out
223 if (!strcmp(earlier->kernel, later->kernel) &&
224 !strcmp(later->action, "remove") &&
225 strncmp(later->kernel, "dm-", 3)) {
230 * filter change uvents if add uevents exist. Eg:
231 * "change path1| add path1 |add path2"
233 * "add path1 |add path2"
234 * uevent "chang path1" is filtered out
236 if (!strcmp(earlier->kernel, later->kernel) &&
237 !strcmp(earlier->action, "change") &&
238 !strcmp(later->action, "add") &&
239 strncmp(later->kernel, "dm-", 3)) {
247 merge_need_stop(struct uevent *earlier, struct uevent *later)
250 * dm uevent do not try to merge with left uevents
252 if (!strncmp(later->kernel, "dm-", 3))
256 * we can not make a jugement without wwid,
257 * so it is sensible to stop merging
259 if (!earlier->wwid || !later->wwid)
262 * uevents merging stoped
263 * when we meet an opposite action uevent from the same LUN to AVOID
264 * "add path1 |remove path1 |add path2 |remove path2 |add path3"
265 * to merge as "remove path1, path2" and "add path1, path2, path3"
267 * "remove path1 |add path1 |remove path2 |add path2 |remove path3"
268 * to merge as "add path1, path2" and "remove path1, path2, path3"
270 * when we meet a non-change uevent from the same LUN
271 * with the same wwid and different action
272 * it would be better to stop merging.
274 if (!strcmp(earlier->wwid, later->wwid) &&
275 strcmp(earlier->action, later->action) &&
276 strcmp(earlier->action, "change") &&
277 strcmp(later->action, "change"))
284 uevent_can_merge(struct uevent *earlier, struct uevent *later)
286 /* merge paths uevents
287 * whose wwids exsit and are same
288 * and actions are same,
289 * and actions are addition or deletion
291 if (earlier->wwid && later->wwid &&
292 !strcmp(earlier->wwid, later->wwid) &&
293 !strcmp(earlier->action, later->action) &&
294 strncmp(earlier->action, "change", 6) &&
295 strncmp(earlier->kernel, "dm-", 3)) {
303 uevent_prepare(struct list_head *tmpq)
305 struct uevent *uev, *tmp;
307 list_for_each_entry_reverse_safe(uev, tmp, tmpq, node) {
308 if (uevent_can_discard(uev)) {
309 list_del_init(&uev->node);
311 udev_device_unref(uev->udev);
316 if (strncmp(uev->kernel, "dm-", 3) &&
318 uevent_get_wwid(uev);
323 uevent_filter(struct uevent *later, struct list_head *tmpq)
325 struct uevent *earlier, *tmp;
327 list_for_some_entry_reverse_safe(earlier, tmp, &later->node, tmpq, node) {
329 * filter unnessary earlier uevents
330 * by the later uevent
332 if (uevent_can_filter(earlier, later)) {
333 condlog(2, "uevent: %s-%s has filtered by uevent: %s-%s",
334 earlier->kernel, earlier->action,
335 later->kernel, later->action);
337 list_del_init(&earlier->node);
339 udev_device_unref(earlier->udev);
346 uevent_merge(struct uevent *later, struct list_head *tmpq)
348 struct uevent *earlier, *tmp;
350 list_for_some_entry_reverse_safe(earlier, tmp, &later->node, tmpq, node) {
351 if (merge_need_stop(earlier, later))
354 * merge earlier uevents to the later uevent
356 if (uevent_can_merge(earlier, later)) {
357 condlog(2, "merged uevent: %s-%s-%s with uevent: %s-%s-%s",
358 earlier->action, earlier->kernel, earlier->wwid,
359 later->action, later->kernel, later->wwid);
361 list_move(&earlier->node, &later->merge_node);
367 merge_uevq(struct list_head *tmpq)
369 struct uevent *later;
371 uevent_prepare(tmpq);
372 list_for_each_entry_reverse(later, tmpq, node) {
373 uevent_filter(later, tmpq);
374 if(uevent_need_merge())
375 uevent_merge(later, tmpq);
380 service_uevq(struct list_head *tmpq)
382 struct uevent *uev, *tmp;
384 list_for_each_entry_safe(uev, tmp, tmpq, node) {
385 list_del_init(&uev->node);
387 if (my_uev_trigger && my_uev_trigger(uev, my_trigger_data))
388 condlog(0, "uevent trigger error");
390 uevq_cleanup(&uev->merge_node);
393 udev_device_unref(uev->udev);
398 static void uevent_cleanup(void *arg)
400 struct udev *udev = arg;
402 condlog(3, "Releasing uevent_listen() resources");
406 static void monitor_cleanup(void *arg)
408 struct udev_monitor *monitor = arg;
410 condlog(3, "Releasing uevent_monitor() resources");
411 udev_monitor_unref(monitor);
415 * Service the uevent queue.
417 int uevent_dispatch(int (*uev_trigger)(struct uevent *, void * trigger_data),
420 my_uev_trigger = uev_trigger;
421 my_trigger_data = trigger_data;
423 mlockall(MCL_CURRENT | MCL_FUTURE);
428 pthread_mutex_lock(uevq_lockp);
431 * Condition signals are unreliable,
432 * so make sure we only wait if we have to.
434 if (list_empty(&uevq)) {
435 pthread_cond_wait(uev_condp, uevq_lockp);
438 list_splice_init(&uevq, &uevq_tmp);
439 pthread_mutex_unlock(uevq_lockp);
442 merge_uevq(&uevq_tmp);
443 service_uevq(&uevq_tmp);
445 condlog(3, "Terminating uev service queue");
450 struct uevent *uevent_from_buffer(char *buf, ssize_t buflen)
458 uev = alloc_uevent();
460 condlog(1, "lost uevent, oom");
464 if ((size_t)buflen > sizeof(buf)-1)
465 buflen = sizeof(buf)-1;
468 * Copy the shared receive buffer contents to buffer private
469 * to this uevent so we can immediately reuse the shared buffer.
471 memcpy(uev->buffer, buf, HOTPLUG_BUFFER_SIZE + OBJECT_SIZE);
472 buffer = uev->buffer;
473 buffer[buflen] = '\0';
475 /* save start of payload */
476 bufpos = strlen(buffer) + 1;
479 uev->action = buffer;
480 pos = strchr(buffer, '@');
482 condlog(3, "bad action string '%s'", buffer);
489 uev->devpath = &pos[1];
491 /* hotplug events have the environment attached - reconstruct envp[] */
492 for (i = 0; (bufpos < (size_t)buflen) && (i < HOTPLUG_NUM_ENVP-1); i++) {
496 key = &buffer[bufpos];
497 keylen = strlen(key);
499 /* Filter out sequence number */
500 if (strncmp(key, "SEQNUM=", 7) == 0) {
503 uev->seqnum = strtoul(key + 7, &eptr, 10);
507 bufpos += keylen + 1;
511 condlog(3, "uevent %ld '%s' from '%s'", uev->seqnum,
512 uev->action, uev->devpath);
513 uev->kernel = strrchr(uev->devpath, '/');
517 /* print payload environment */
518 for (i = 0; uev->envp[i] != NULL; i++)
519 condlog(5, "%s", uev->envp[i]);
524 int failback_listen(void)
527 struct sockaddr_nl snl;
528 struct sockaddr_un sun;
531 int rcvbufsz = 128*1024;
533 int rcvszsz = sizeof(rcvsz);
534 unsigned int *prcvszsz = (unsigned int *)&rcvszsz;
535 const int feature_on = 1;
537 * First check whether we have a udev socket
539 memset(&sun, 0x00, sizeof(struct sockaddr_un));
540 sun.sun_family = AF_LOCAL;
541 strcpy(&sun.sun_path[1], "/org/kernel/dm/multipath_event");
542 addrlen = offsetof(struct sockaddr_un, sun_path) + strlen(sun.sun_path+1) + 1;
544 sock = socket(AF_LOCAL, SOCK_DGRAM, 0);
547 condlog(3, "reading events from udev socket.");
549 /* the bind takes care of ensuring only one copy running */
550 retval = bind(sock, (struct sockaddr *) &sun, addrlen);
552 condlog(0, "bind failed, exit");
556 /* enable receiving of the sender credentials */
557 retval = setsockopt(sock, SOL_SOCKET, SO_PASSCRED,
558 &feature_on, sizeof(feature_on));
560 condlog(0, "failed to enable credential passing, exit");
565 /* Fallback to read kernel netlink events */
566 memset(&snl, 0x00, sizeof(struct sockaddr_nl));
567 snl.nl_family = AF_NETLINK;
568 snl.nl_pid = getpid();
569 snl.nl_groups = 0x01;
571 sock = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_KOBJECT_UEVENT);
573 condlog(0, "error getting socket, exit");
577 condlog(3, "reading events from kernel.");
580 * try to avoid dropping uevents, even so, this is not a guarantee,
581 * but it does help to change the netlink uevent socket's
582 * receive buffer threshold from the default value of 106,496 to
583 * the maximum value of 262,142.
585 retval = setsockopt(sock, SOL_SOCKET, SO_RCVBUF, &rcvbufsz,
589 condlog(0, "error setting receive buffer size for socket, exit");
592 retval = getsockopt(sock, SOL_SOCKET, SO_RCVBUF, &rcvsz, prcvszsz);
594 condlog(0, "error setting receive buffer size for socket, exit");
597 condlog(3, "receive buffer size for socket is %u.", rcvsz);
599 /* enable receiving of the sender credentials */
600 if (setsockopt(sock, SOL_SOCKET, SO_PASSCRED,
601 &feature_on, sizeof(feature_on)) < 0) {
602 condlog(0, "error on enabling credential passing for socket");
606 retval = bind(sock, (struct sockaddr *) &snl,
607 sizeof(struct sockaddr_nl));
609 condlog(0, "bind failed, exit");
620 char cred_msg[CMSG_SPACE(sizeof(struct ucred))];
621 struct cmsghdr *cmsg;
623 static char buf[HOTPLUG_BUFFER_SIZE + OBJECT_SIZE];
625 memset(buf, 0x00, sizeof(buf));
627 iov.iov_len = sizeof(buf);
628 memset (&smsg, 0x00, sizeof(struct msghdr));
631 smsg.msg_control = cred_msg;
632 smsg.msg_controllen = sizeof(cred_msg);
634 buflen = recvmsg(sock, &smsg, 0);
637 condlog(0, "error receiving message, errno %d", errno);
641 cmsg = CMSG_FIRSTHDR(&smsg);
642 if (cmsg == NULL || cmsg->cmsg_type != SCM_CREDENTIALS) {
643 condlog(3, "no sender credentials received, message ignored");
647 cred = (struct ucred *)CMSG_DATA(cmsg);
648 if (cred->uid != 0) {
649 condlog(3, "sender uid=%d, message ignored", cred->uid);
654 bufpos = strlen(buf) + 1;
655 if (bufpos < sizeof("a@/d") || bufpos >= sizeof(buf)) {
656 condlog(3, "invalid message length");
660 /* check message header */
661 if (strstr(buf, "@/") == NULL) {
662 condlog(3, "unrecognized message header");
665 if ((size_t)buflen > sizeof(buf)-1) {
666 condlog(2, "buffer overflow for received uevent");
667 buflen = sizeof(buf)-1;
670 uev = uevent_from_buffer(buf, buflen);
674 * Queue uevent and poke service pthread.
676 pthread_mutex_lock(uevq_lockp);
677 list_add_tail(&uev->node, &uevq);
678 pthread_cond_signal(uev_condp);
679 pthread_mutex_unlock(uevq_lockp);
687 struct uevent *uevent_from_udev_device(struct udev_device *dev)
692 struct udev_list_entry *list_entry;
694 uev = alloc_uevent();
696 udev_device_unref(dev);
697 condlog(1, "lost uevent, oom");
701 end = pos + HOTPLUG_BUFFER_SIZE + OBJECT_SIZE - 1;
702 udev_list_entry_foreach(list_entry, udev_device_get_properties_list_entry(dev)) {
703 const char *name, *value;
706 name = udev_list_entry_get_name(list_entry);
709 value = udev_list_entry_get_value(list_entry);
712 bytes = snprintf(pos, end - pos, "%s=%s", name, value);
713 if (pos + bytes >= end) {
714 condlog(2, "buffer overflow for uevent");
721 if (strcmp(name, "DEVPATH") == 0)
722 uev->devpath = uev->envp[i] + 8;
723 if (strcmp(name, "ACTION") == 0)
724 uev->action = uev->envp[i] + 7;
726 if (i == HOTPLUG_NUM_ENVP - 1)
732 condlog(3, "uevent '%s' from '%s'", uev->action, uev->devpath);
733 uev->kernel = strrchr(uev->devpath, '/');
737 /* print payload environment */
738 for (i = 0; uev->envp[i] != NULL; i++)
739 condlog(5, "%s", uev->envp[i]);
743 bool uevent_burst(struct timeval *start_time, int events)
745 struct timeval diff_time, end_time;
747 unsigned long eclipse_ms;
749 if(events > MAX_ACCUMULATION_COUNT) {
750 condlog(2, "burst got %u uevents, too much uevents, stopped", events);
754 gettimeofday(&end_time, NULL);
755 timersub(&end_time, start_time, &diff_time);
757 eclipse_ms = diff_time.tv_sec * 1000 + diff_time.tv_usec / 1000;
762 if (eclipse_ms > MAX_ACCUMULATION_TIME) {
763 condlog(2, "burst continued %lu ms, too long time, stopped", eclipse_ms);
767 speed = (events * 1000) / eclipse_ms;
768 if (speed > MIN_BURST_SPEED)
774 int uevent_listen(struct udev *udev)
777 struct udev_monitor *monitor = NULL;
778 int fd, socket_flags, events;
779 struct timeval start_time;
780 int need_failback = 1;
782 LIST_HEAD(uevlisten_tmp);
785 * Queue uevents for service by dedicated thread so that the uevent
786 * listening thread does not block on multipathd locks (vecs->lock)
787 * thereby not getting to empty the socket's receive buffer queue
791 condlog(1, "no udev context");
795 pthread_cleanup_push(uevent_cleanup, udev);
797 monitor = udev_monitor_new_from_netlink(udev, "udev");
799 condlog(2, "failed to create udev monitor");
802 pthread_cleanup_push(monitor_cleanup, monitor);
803 #ifdef LIBUDEV_API_RECVBUF
804 if (udev_monitor_set_receive_buffer_size(monitor, 128 * 1024 * 1024))
805 condlog(2, "failed to increase buffer size");
807 fd = udev_monitor_get_fd(monitor);
809 condlog(2, "failed to get monitor fd");
812 socket_flags = fcntl(fd, F_GETFL);
813 if (socket_flags < 0) {
814 condlog(2, "failed to get monitor socket flags : %s",
818 if (fcntl(fd, F_SETFL, socket_flags & ~O_NONBLOCK) < 0) {
819 condlog(2, "failed to set monitor socket flags : %s",
823 err = udev_monitor_filter_add_match_subsystem_devtype(monitor, "block",
826 condlog(2, "failed to create filter : %s", strerror(-err));
827 err = udev_monitor_enable_receiving(monitor);
829 condlog(2, "failed to enable receiving : %s", strerror(-err));
834 gettimeofday(&start_time, NULL);
837 struct udev_device *dev;
838 struct pollfd ev_poll;
842 memset(&ev_poll, 0, sizeof(struct pollfd));
844 ev_poll.events = POLLIN;
845 poll_timeout = timeout * 1000;
847 fdcount = poll(&ev_poll, 1, poll_timeout);
848 if (fdcount && ev_poll.revents & POLLIN) {
849 timeout = uevent_burst(&start_time, events + 1) ? 1 : 0;
850 dev = udev_monitor_receive_device(monitor);
852 condlog(0, "failed getting udev device");
855 uev = uevent_from_udev_device(dev);
858 list_add_tail(&uev->node, &uevlisten_tmp);
866 condlog(0, "error receiving "
867 "uevent message: %m");
871 if (!list_empty(&uevlisten_tmp)) {
873 * Queue uevents and poke service pthread.
875 condlog(3, "Forwarding %d uevents", events);
876 pthread_mutex_lock(uevq_lockp);
877 list_splice_tail_init(&uevlisten_tmp, &uevq);
878 pthread_cond_signal(uev_condp);
879 pthread_mutex_unlock(uevq_lockp);
882 gettimeofday(&start_time, NULL);
888 pthread_cleanup_pop(1);
890 err = failback_listen();
891 pthread_cleanup_pop(1);
895 int uevent_get_major(const struct uevent *uev)
897 return uevent_get_env_positive_int(uev, "MAJOR");
900 int uevent_get_minor(const struct uevent *uev)
902 return uevent_get_env_positive_int(uev, "MINOR");
905 int uevent_get_disk_ro(const struct uevent *uev)
907 return uevent_get_env_positive_int(uev, "DISK_RO");
910 static const char *uevent_get_dm_str(const struct uevent *uev, char *attr)
912 const char *tmp = uevent_get_env_var(uev, attr);
919 const char *uevent_get_dm_name(const struct uevent *uev)
921 return uevent_get_dm_str(uev, "DM_NAME");
924 const char *uevent_get_dm_path(const struct uevent *uev)
926 return uevent_get_dm_str(uev, "DM_PATH");
929 const char *uevent_get_dm_action(const struct uevent *uev)
931 return uevent_get_dm_str(uev, "DM_ACTION");
934 bool uevent_is_mpath(const struct uevent *uev)
936 const char *uuid = uevent_get_env_var(uev, "DM_UUID");
940 if (strncmp(uuid, UUID_PREFIX, UUID_PREFIX_LEN))
942 return uuid[UUID_PREFIX_LEN] != '\0';