mount: rescan /proc/self/mountinfo before processing waitid() results

author Lennart Poettering <lennart@poettering.net>

Wed, 17 Jul 2019 16:57:13 +0000 (18:57 +0200)

committer Lennart Poettering <lennart@poettering.net>

Thu, 18 Jul 2019 15:03:11 +0000 (17:03 +0200)
author Lennart Poettering <lennart@poettering.net>
Wed, 17 Jul 2019 16:57:13 +0000 (18:57 +0200)
committer Lennart Poettering <lennart@poettering.net>
Thu, 18 Jul 2019 15:03:11 +0000 (17:03 +0200)
diff --git a/src/core/mount.c b/src/core/mount.c

index 8638b87..eeacf3d 100644 (file)
--- a/src/core/mount.c
+++ b/src/core/mount.c
@@ -50,6 +50,7 @@ static const UnitActiveState state_translation_table[_MOUNT_STATE_MAX] = {
  
  static int mount_dispatch_timer(sd_event_source *source, usec_t usec, void *userdata);
  static int mount_dispatch_io(sd_event_source *source, int fd, uint32_t revents, void *userdata);
+static int mount_process_proc_self_mountinfo(Manager *m);
  
  static bool MOUNT_STATE_WITH_PROCESS(MountState state) {
          return IN_SET(state,
@@ -1280,6 +1281,22 @@ static void mount_sigchld_event(Unit *u, pid_t pid, int code, int status) {
          if (pid != m->control_pid)
                  return;
  
+        /* So here's the thing, we really want to know before /usr/bin/mount or /usr/bin/umount exit whether
+         * they established/remove a mount. This is important when mounting, but even more so when unmounting
+         * since we need to deal with nested mounts and otherwise cannot safely determine whether to repeat
+         * the unmounts. In theory, the kernel fires /proc/self/mountinfo changes off before returning from
+         * the mount() or umount() syscalls, and thus we should see the changes to the proc file before we
+         * process the waitid() for the /usr/bin/(u)mount processes. However, this is unfortunately racy: we
+         * have to waitid() for processes using P_ALL (since we need to reap unexpected children that got
+         * reparented to PID 1), but when using P_ALL we might end up reaping processes that terminated just
+         * instants ago, i.e. already after our last event loop iteration (i.e. after the last point we might
+         * have noticed /proc/self/mountinfo events via epoll). This means event loop priorities for
+         * processing SIGCHLD vs. /proc/self/mountinfo IO events are not as relevant as we want. To fix that
+         * race, let's explicitly scan /proc/self/mountinfo before we start processing /usr/bin/(u)mount
+         * dying. It's ugly, but it makes our ordering systematic again, and makes sure we always see
+         * /proc/self/mountinfo changes before our mount/umount exits. */
+        (void) mount_process_proc_self_mountinfo(u->manager);
+
          m->control_pid = 0;
  
          if (is_clean_exit(code, status, EXIT_CLEAN_COMMAND, NULL))
@@ -1774,16 +1791,14 @@ static int drain_libmount(Manager *m) {
          return rescan;
  }
  
-static int mount_dispatch_io(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
+static int mount_process_proc_self_mountinfo(Manager *m) {
          _cleanup_set_free_free_ Set *around = NULL, *gone = NULL;
-        Manager *m = userdata;
          const char *what;
          Iterator i;
          Unit *u;
          int r;
  
          assert(m);
-        assert(revents & EPOLLIN);
  
          r = drain_libmount(m);
          if (r <= 0)
@@ -1888,6 +1903,15 @@ static int mount_dispatch_io(sd_event_source *source, int fd, uint32_t revents,
          return 0;
  }
  
+static int mount_dispatch_io(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
+        Manager *m = userdata;
+
+        assert(m);
+        assert(revents & EPOLLIN);
+
+        return mount_process_proc_self_mountinfo(m);
+}
+
  static void mount_reset_failed(Unit *u) {
          Mount *m = MOUNT(u);
author	Lennart Poettering <lennart@poettering.net>
	Wed, 17 Jul 2019 16:57:13 +0000 (18:57 +0200)
committer	Lennart Poettering <lennart@poettering.net>
	Thu, 18 Jul 2019 15:03:11 +0000 (17:03 +0200)