select: Fix indefinitely sleeping task in poll_schedule_timeout()
authorJan Kara <jack@suse.cz>
Mon, 10 Jan 2022 18:19:23 +0000 (19:19 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 29 Jan 2022 09:58:25 +0000 (10:58 +0100)
commit 68514dacf2715d11b91ca50d88de047c086fea9c upstream.

A task can end up indefinitely sleeping in do_select() ->
poll_schedule_timeout() when the following race happens:

  TASK1 (thread1)             TASK2                   TASK1 (thread2)
  do_select()
    setup poll_wqueues table
    with 'fd'
                              write data to 'fd'
                                pollwake()
                                  table->triggered = 1
                                                      closes 'fd' thread1 is
                                                        waiting for
    poll_schedule_timeout()
      - sees table->triggered
      table->triggered = 0
      return -EINTR
    loop back in do_select()

But at this point when TASK1 loops back, the fdget() in the setup of
poll_wqueues fails.  So now so we never find 'fd' is ready for reading
and sleep in poll_schedule_timeout() indefinitely.

Treat an fd that got closed as a fd on which some event happened.  This
makes sure cannot block indefinitely in do_select().

Another option would be to return -EBADF in this case but that has a
potential of subtly breaking applications that excercise this behavior
and it happens to work for them.  So returning fd as active seems like a
safer choice.

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
CC: stable@vger.kernel.org
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
fs/select.c

index 945896d0ac9e7624db36d48169ebc757d4f18adb..5edffee1162c2423821e2282153a5ff1f3c985eb 100644 (file)
@@ -458,9 +458,11 @@ get_max:
        return max;
 }
 
-#define POLLIN_SET (EPOLLRDNORM | EPOLLRDBAND | EPOLLIN | EPOLLHUP | EPOLLERR)
-#define POLLOUT_SET (EPOLLWRBAND | EPOLLWRNORM | EPOLLOUT | EPOLLERR)
-#define POLLEX_SET (EPOLLPRI)
+#define POLLIN_SET (EPOLLRDNORM | EPOLLRDBAND | EPOLLIN | EPOLLHUP | EPOLLERR |\
+                       EPOLLNVAL)
+#define POLLOUT_SET (EPOLLWRBAND | EPOLLWRNORM | EPOLLOUT | EPOLLERR |\
+                        EPOLLNVAL)
+#define POLLEX_SET (EPOLLPRI | EPOLLNVAL)
 
 static inline void wait_key_set(poll_table *wait, unsigned long in,
                                unsigned long out, unsigned long bit,
@@ -527,6 +529,7 @@ static int do_select(int n, fd_set_bits *fds, struct timespec64 *end_time)
                                        break;
                                if (!(bit & all_bits))
                                        continue;
+                               mask = EPOLLNVAL;
                                f = fdget(i);
                                if (f.file) {
                                        wait_key_set(wait, in, out, bit,
@@ -534,34 +537,34 @@ static int do_select(int n, fd_set_bits *fds, struct timespec64 *end_time)
                                        mask = vfs_poll(f.file, wait);
 
                                        fdput(f);
-                                       if ((mask & POLLIN_SET) && (in & bit)) {
-                                               res_in |= bit;
-                                               retval++;
-                                               wait->_qproc = NULL;
-                                       }
-                                       if ((mask & POLLOUT_SET) && (out & bit)) {
-                                               res_out |= bit;
-                                               retval++;
-                                               wait->_qproc = NULL;
-                                       }
-                                       if ((mask & POLLEX_SET) && (ex & bit)) {
-                                               res_ex |= bit;
-                                               retval++;
-                                               wait->_qproc = NULL;
-                                       }
-                                       /* got something, stop busy polling */
-                                       if (retval) {
-                                               can_busy_loop = false;
-                                               busy_flag = 0;
-
-                                       /*
-                                        * only remember a returned
-                                        * POLL_BUSY_LOOP if we asked for it
-                                        */
-                                       } else if (busy_flag & mask)
-                                               can_busy_loop = true;
-
                                }
+                               if ((mask & POLLIN_SET) && (in & bit)) {
+                                       res_in |= bit;
+                                       retval++;
+                                       wait->_qproc = NULL;
+                               }
+                               if ((mask & POLLOUT_SET) && (out & bit)) {
+                                       res_out |= bit;
+                                       retval++;
+                                       wait->_qproc = NULL;
+                               }
+                               if ((mask & POLLEX_SET) && (ex & bit)) {
+                                       res_ex |= bit;
+                                       retval++;
+                                       wait->_qproc = NULL;
+                               }
+                               /* got something, stop busy polling */
+                               if (retval) {
+                                       can_busy_loop = false;
+                                       busy_flag = 0;
+
+                               /*
+                                * only remember a returned
+                                * POLL_BUSY_LOOP if we asked for it
+                                */
+                               } else if (busy_flag & mask)
+                                       can_busy_loop = true;
+
                        }
                        if (res_in)
                                *rinp = res_in;