core: Implement timeout based umount/remount limit
authorKyle Walker <kwalker@redhat.com>
Wed, 13 Dec 2017 17:49:26 +0000 (12:49 -0500)
committerKyle Walker <kwalker@redhat.com>
Thu, 14 Dec 2017 13:58:56 +0000 (08:58 -0500)
Remount, and subsequent umount, attempts can hang for inaccessible network
based mount points. This can leave a system in a hard hang state that
requires a hard reset in order to recover. This change moves the remount,
and umount attempts into separate child processes. The remount and umount
operations will block for up to 90 seconds (DEFAULT_TIMEOUT_USEC). Should
those waits fail, the parent will issue a SIGKILL to the child and continue
with the shutdown efforts.

In addition, instead of only reporting some additional errors on the final
attempt, failures are reported as they occur.

src/basic/process-util.c
src/basic/process-util.h
src/core/umount.c

index 5f00149..0b2eb07 100644 (file)
@@ -699,6 +699,67 @@ int wait_for_terminate_and_warn(const char *name, pid_t pid, bool check_exit_cod
         return -EPROTO;
 }
 
+/*
+ * Return values:
+ * < 0 : wait_for_terminate_with_timeout() failed to get the state of the
+ *       process, the process timed out, the process was terminated by a
+ *       signal, or failed for an unknown reason.
+ * >=0 : The process terminated normally with no failures.
+ *
+ * Success is indicated by a return value of zero, a timeout is indicated
+ * by ETIMEDOUT, and all other child failure states are indicated by error
+ * is indicated by a non-zero value.
+ */
+int wait_for_terminate_with_timeout(pid_t pid, usec_t timeout) {
+        sigset_t mask;
+        int r;
+        usec_t until;
+
+        assert_se(sigemptyset(&mask) == 0);
+        assert_se(sigaddset(&mask, SIGCHLD) == 0);
+
+        /* Drop into a sigtimewait-based timeout. Waiting for the
+         * pid to exit. */
+        until = now(CLOCK_MONOTONIC) + timeout;
+        for (;;) {
+                usec_t n;
+                siginfo_t status = {};
+                struct timespec ts;
+
+                n = now(CLOCK_MONOTONIC);
+                if (n >= until)
+                        break;
+
+                r = sigtimedwait(&mask, NULL, timespec_store(&ts, until - n)) < 0 ? -errno : 0;
+                /* Assuming we woke due to the child exiting. */
+                if (waitid(P_PID, pid, &status, WEXITED|WNOHANG) == 0) {
+                        if (status.si_pid == pid) {
+                                /* This is the correct child.*/
+                                if (status.si_code == CLD_EXITED)
+                                        return (status.si_status == 0) ? 0 : -EPROTO;
+                                else
+                                        return -EPROTO;
+                        }
+                }
+                /* Not the child, check for errors and proceed appropriately */
+                if (r < 0) {
+                        switch (r) {
+                        case -EAGAIN:
+                                /* Timed out, child is likely hung. */
+                                return -ETIMEDOUT;
+                        case -EINTR:
+                                /* Received a different signal and should retry */
+                                continue;
+                        default:
+                                /* Return any unexpected errors */
+                                return r;
+                        }
+                }
+        }
+
+        return -EPROTO;
+}
+
 void sigkill_wait(pid_t pid) {
         assert(pid > 1);
 
index 39c194d..57955e1 100644 (file)
@@ -33,6 +33,7 @@
 #include "format-util.h"
 #include "ioprio.h"
 #include "macro.h"
+#include "time-util.h"
 
 #define procfs_file_alloca(pid, field)                                  \
         ({                                                              \
@@ -61,6 +62,7 @@ int get_process_ppid(pid_t pid, pid_t *ppid);
 
 int wait_for_terminate(pid_t pid, siginfo_t *status);
 int wait_for_terminate_and_warn(const char *name, pid_t pid, bool check_exit_code);
+int wait_for_terminate_with_timeout(pid_t pid, usec_t timeout);
 
 void sigkill_wait(pid_t pid);
 void sigkill_waitp(pid_t *pid);
index cd58a9c..7f8ddb9 100644 (file)
@@ -28,6 +28,7 @@
 #include "libudev.h"
 
 #include "alloc-util.h"
+#include "def.h"
 #include "escape.h"
 #include "fd-util.h"
 #include "fstab-util.h"
@@ -35,6 +36,7 @@
 #include "list.h"
 #include "mount-setup.h"
 #include "path-util.h"
+#include "signal-util.h"
 #include "string-util.h"
 #include "udev-util.h"
 #include "umount.h"
@@ -376,10 +378,86 @@ static bool nonunmountable_path(const char *path) {
                 || path_startswith(path, "/run/initramfs");
 }
 
+static int remount_with_timeout(MountPoint *m, char *options, int *n_failed) {
+        pid_t pid;
+        int r;
+
+        BLOCK_SIGNALS(SIGCHLD);
+
+        /* Due to the possiblity of a remount operation hanging, we
+         * fork a child process and set a timeout. If the timeout
+         * lapses, the assumption is that that particular remount
+         * failed. */
+        pid = fork();
+        if (pid < 0)
+                return log_error_errno(errno, "Failed to fork: %m");
+
+        if (pid == 0) {
+                log_info("Remounting '%s' read-only in with options '%s'.", m->path, options);
+
+                /* Start the mount operation here in the child */
+                r = mount(NULL, m->path, NULL, MS_REMOUNT|MS_RDONLY, options);
+                if (r < 0)
+                        log_error_errno(errno, "Failed to remount '%s' read-only: %m", m->path);
+
+                _exit(r < 0 ? EXIT_FAILURE : EXIT_SUCCESS);
+        }
+
+        r = wait_for_terminate_with_timeout(pid, DEFAULT_TIMEOUT_USEC);
+        if (r == -ETIMEDOUT) {
+                log_error_errno(errno, "Remounting '%s' - timed out, issuing SIGKILL to PID "PID_FMT".", m->path, pid);
+                (void) kill(pid, SIGKILL);
+        } else if (r < 0)
+                log_error_errno(r, "Failed to wait for process: %m");
+
+        return r;
+}
+
+static int umount_with_timeout(MountPoint *m, bool *changed) {
+        pid_t pid;
+        int r;
+
+        BLOCK_SIGNALS(SIGCHLD);
+
+        /* Due to the possiblity of a umount operation hanging, we
+         * fork a child process and set a timeout. If the timeout
+         * lapses, the assumption is that that particular umount
+         * failed. */
+        pid = fork();
+        if (pid < 0)
+                return log_error_errno(errno, "Failed to fork: %m");
+
+        if (pid == 0) {
+                log_info("Unmounting '%s'.", m->path);
+
+                /* Start the mount operation here in the child Using MNT_FORCE
+                 * causes some filesystems (e.g. FUSE and NFS and other network
+                 * filesystems) to abort any pending requests and return -EIO
+                 * rather than blocking indefinitely. If the filesysten is
+                 * "busy", this may allow processes to die, thus making the
+                 * filesystem less busy so the unmount might succeed (rather
+                 * then return EBUSY).*/
+                r = umount2(m->path, MNT_FORCE);
+                if (r < 0)
+                        log_error_errno(errno, "Failed to unmount %s: %m", m->path);
+
+                _exit(r < 0 ? EXIT_FAILURE : EXIT_SUCCESS);
+        }
+
+        r = wait_for_terminate_with_timeout(pid, DEFAULT_TIMEOUT_USEC);
+        if (r == -ETIMEDOUT) {
+                log_error_errno(errno, "Unmounting '%s' - timed out, issuing SIGKILL to PID "PID_FMT".", m->path, pid);
+                (void) kill(pid, SIGKILL);
+        } else if (r < 0)
+                log_error_errno(r, "Failed to wait for process: %m");
+
+        return r;
+}
+
 /* This includes remounting readonly, which changes the kernel mount options.
  * Therefore the list passed to this function is invalidated, and should not be reused. */
 
-static int mount_points_list_umount(MountPoint **head, bool *changed, bool log_error) {
+static int mount_points_list_umount(MountPoint **head, bool *changed) {
         MountPoint *m;
         int n_failed = 0;
 
@@ -425,13 +503,15 @@ static int mount_points_list_umount(MountPoint **head, bool *changed, bool log_e
                          * explicitly remount the super block of that
                          * alias read-only we hence should be
                          * relatively safe regarding keeping dirty an fs
-                         * we cannot otherwise see. */
-                        log_info("Remounting '%s' read-only with options '%s'.", m->path, options);
-                        if (mount(NULL, m->path, NULL, MS_REMOUNT|MS_RDONLY, options) < 0) {
-                                if (log_error)
-                                        log_notice_errno(errno, "Failed to remount '%s' read-only: %m", m->path);
+                         * we cannot otherwise see.
+                         *
+                         * Since the remount can hang in the instance of
+                         * remote filesystems, we remount asynchronously
+                         * and skip the subsequent umount if it fails */
+                        if (remount_with_timeout(m, options, &n_failed) < 0) {
                                 if (nonunmountable_path(m->path))
                                         n_failed++;
+                                continue;
                         }
                 }
 
@@ -441,21 +521,12 @@ static int mount_points_list_umount(MountPoint **head, bool *changed, bool log_e
                 if (nonunmountable_path(m->path))
                         continue;
 
-                /* Trying to umount. Using MNT_FORCE causes some
-                 * filesystems (e.g. FUSE and NFS and other network
-                 * filesystems) to abort any pending requests and
-                 * return -EIO rather than blocking indefinitely.
-                 * If the filesysten is "busy", this may allow processes
-                 * to die, thus making the filesystem less busy so
-                 * the unmount might succeed (rather then return EBUSY).*/
-                log_info("Unmounting %s.", m->path);
-                if (umount2(m->path, MNT_FORCE) == 0) {
+                /* Trying to umount */
+                if (umount_with_timeout(m, changed) < 0)
+                        n_failed++;
+                else {
                         if (changed)
                                 *changed = true;
-                } else {
-                        if (log_error)
-                                log_warning_errno(errno, "Could not unmount %s: %m", m->path);
-                        n_failed++;
                 }
         }
 
@@ -556,7 +627,7 @@ static int dm_points_list_detach(MountPoint **head, bool *changed) {
         return n_failed;
 }
 
-static int umount_all_once(bool *changed, bool log_error) {
+static int umount_all_once(bool *changed) {
         int r;
         LIST_HEAD(MountPoint, mp_list_head);
 
@@ -565,7 +636,7 @@ static int umount_all_once(bool *changed, bool log_error) {
         if (r < 0)
                 goto end;
 
-        r = mount_points_list_umount(&mp_list_head, changed, log_error);
+        r = mount_points_list_umount(&mp_list_head, changed);
 
   end:
         mount_points_list_free(&mp_list_head);
@@ -577,20 +648,17 @@ int umount_all(bool *changed) {
         bool umount_changed;
         int r;
 
-        /* retry umount, until nothing can be umounted anymore */
+        /* Retry umount, until nothing can be umounted anymore. Mounts are
+         * processed in order, newest first. The retries are needed when
+         * an old mount has been moved, to a path inside a newer mount. */
         do {
                 umount_changed = false;
 
-                umount_all_once(&umount_changed, false);
+                r = umount_all_once(&umount_changed);
                 if (umount_changed)
                         *changed = true;
         } while (umount_changed);
 
-        /* umount one more time with logging enabled */
-        r = umount_all_once(&umount_changed, true);
-        if (umount_changed)
-                *changed = true;
-
         return r;
 }