Merge branch 'CR_2506_515_usb_wifi_jianlong' into 'vf2-515-devel'

[platform/kernel/linux-starfive.git] / fs / io_uring.c
diff --git a/fs/io_uring.c b/fs/io_uring.c

index 16fb743..bc18af5 100644 (file)
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -403,7 +403,6 @@ struct io_ring_ctx {
                 struct wait_queue_head  cq_wait;
                 unsigned                cq_extra;
                 atomic_t                cq_timeouts;
-               struct fasync_struct    *cq_fasync;
                 unsigned                cq_last_tm_flush;
         } ____cacheline_aligned_in_smp;
  
@@ -457,6 +456,8 @@ struct io_ring_ctx {
                 struct work_struct              exit_work;
                 struct list_head                tctx_list;
                 struct completion               ref_comp;
+               u32                             iowq_limits[2];
+               bool                            iowq_limits_set;
         };
  };
  
@@ -502,6 +503,7 @@ struct io_poll_update {
  struct io_close {
         struct file                     *file;
         int                             fd;
+       u32                             file_slot;
  };
  
  struct io_timeout_data {
@@ -712,6 +714,7 @@ struct io_async_rw {
         struct iovec                    fast_iov[UIO_FASTIOV];
         const struct iovec              *free_iovec;
         struct iov_iter                 iter;
+       struct iov_iter_state           iter_state;
         size_t                          bytes_done;
         struct wait_page_queue          wpq;
  };
@@ -735,7 +738,6 @@ enum {
         REQ_F_BUFFER_SELECTED_BIT,
         REQ_F_COMPLETE_INLINE_BIT,
         REQ_F_REISSUE_BIT,
-       REQ_F_DONT_REISSUE_BIT,
         REQ_F_CREDS_BIT,
         REQ_F_REFCOUNT_BIT,
         REQ_F_ARM_LTIMEOUT_BIT,
@@ -782,8 +784,6 @@ enum {
         REQ_F_COMPLETE_INLINE   = BIT(REQ_F_COMPLETE_INLINE_BIT),
         /* caller should reissue async */
         REQ_F_REISSUE           = BIT(REQ_F_REISSUE_BIT),
-       /* don't attempt request reissue, see io_rw_reissue() */
-       REQ_F_DONT_REISSUE      = BIT(REQ_F_DONT_REISSUE_BIT),
         /* supports async reads */
         REQ_F_NOWAIT_READ       = BIT(REQ_F_NOWAIT_READ_BIT),
         /* supports async writes */
@@ -1100,6 +1100,8 @@ static int io_req_prep_async(struct io_kiocb *req);
  
  static int io_install_fixed_file(struct io_kiocb *req, struct file *file,
                                  unsigned int issue_flags, u32 slot_index);
+static int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags);
+
  static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer);
  
  static struct kmem_cache *req_cachep;
@@ -1368,11 +1370,6 @@ static void io_req_track_inflight(struct io_kiocb *req)
         }
  }
  
-static inline void io_unprep_linked_timeout(struct io_kiocb *req)
-{
-       req->flags &= ~REQ_F_LINK_TIMEOUT;
-}
-
  static struct io_kiocb *__io_prep_linked_timeout(struct io_kiocb *req)
  {
         if (WARN_ON_ONCE(!req->link))
@@ -1613,10 +1610,8 @@ static void io_cqring_ev_posted(struct io_ring_ctx *ctx)
                 wake_up(&ctx->sq_data->wait);
         if (io_should_trigger_evfd(ctx))
                 eventfd_signal(ctx->cq_ev_fd, 1);
-       if (waitqueue_active(&ctx->poll_wait)) {
+       if (waitqueue_active(&ctx->poll_wait))
                 wake_up_interruptible(&ctx->poll_wait);
-               kill_fasync(&ctx->cq_fasync, SIGIO, POLL_IN);
-       }
  }
  
  static void io_cqring_ev_posted_iopoll(struct io_ring_ctx *ctx)
@@ -1630,10 +1625,8 @@ static void io_cqring_ev_posted_iopoll(struct io_ring_ctx *ctx)
         }
         if (io_should_trigger_evfd(ctx))
                 eventfd_signal(ctx->cq_ev_fd, 1);
-       if (waitqueue_active(&ctx->poll_wait)) {
+       if (waitqueue_active(&ctx->poll_wait))
                 wake_up_interruptible(&ctx->poll_wait);
-               kill_fasync(&ctx->cq_fasync, SIGIO, POLL_IN);
-       }
  }
  
  /* Returns true if there are no backlogged entries after the flush */
@@ -2444,13 +2437,6 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
                 req = list_first_entry(done, struct io_kiocb, inflight_entry);
                 list_del(&req->inflight_entry);
  
-               if (READ_ONCE(req->result) == -EAGAIN &&
-                   !(req->flags & REQ_F_DONT_REISSUE)) {
-                       req->iopoll_completed = 0;
-                       io_req_task_queue_reissue(req);
-                       continue;
-               }
-
                 __io_cqring_fill_event(ctx, req->user_data, req->result,
                                         io_put_rw_kbuf(req));
                 (*nr_events)++;
@@ -2613,8 +2599,7 @@ static bool io_resubmit_prep(struct io_kiocb *req)
  
         if (!rw)
                 return !io_req_prep_async(req);
-       /* may have left rw->iter inconsistent on -EIOCBQUEUED */
-       iov_iter_revert(&rw->iter, req->result - iov_iter_count(&rw->iter));
+       iov_iter_restore(&rw->iter, &rw->iter_state);
         return true;
  }
  
@@ -2714,10 +2699,9 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2)
         if (kiocb->ki_flags & IOCB_WRITE)
                 kiocb_end_write(req);
         if (unlikely(res != req->result)) {
-               if (!(res == -EAGAIN && io_rw_should_reissue(req) &&
-                   io_resubmit_prep(req))) {
-                       req_set_fail(req);
-                       req->flags |= REQ_F_DONT_REISSUE;
+               if (res == -EAGAIN && io_rw_should_reissue(req)) {
+                       req->flags |= REQ_F_REISSUE;
+                       return;
                 }
         }
  
@@ -2843,7 +2827,8 @@ static bool io_file_supports_nowait(struct io_kiocb *req, int rw)
         return __io_file_supports_nowait(req->file, rw);
  }
  
-static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
+                     int rw)
  {
         struct io_ring_ctx *ctx = req->ctx;
         struct kiocb *kiocb = &req->rw.kiocb;
@@ -2865,8 +2850,13 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe)
         if (unlikely(ret))
                 return ret;
  
-       /* don't allow async punt for O_NONBLOCK or RWF_NOWAIT */
-       if ((kiocb->ki_flags & IOCB_NOWAIT) || (file->f_flags & O_NONBLOCK))
+       /*
+        * If the file is marked O_NONBLOCK, still allow retry for it if it
+        * supports async. Otherwise it's impossible to use O_NONBLOCK files
+        * reliably. If not, or it IOCB_NOWAIT is set, don't retry.
+        */
+       if ((kiocb->ki_flags & IOCB_NOWAIT) ||
+           ((file->f_flags & O_NONBLOCK) && !io_file_supports_nowait(req, rw)))
                 req->flags |= REQ_F_NOWAIT;
  
         ioprio = READ_ONCE(sqe->ioprio);
@@ -2931,7 +2921,6 @@ static void kiocb_done(struct kiocb *kiocb, ssize_t ret,
  {
         struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb);
         struct io_async_rw *io = req->async_data;
-       bool check_reissue = kiocb->ki_complete == io_complete_rw;
  
         /* add previously done IO, if any */
         if (io && io->bytes_done > 0) {
@@ -2943,19 +2932,27 @@ static void kiocb_done(struct kiocb *kiocb, ssize_t ret,
  
         if (req->flags & REQ_F_CUR_POS)
                 req->file->f_pos = kiocb->ki_pos;
-       if (ret >= 0 && check_reissue)
+       if (ret >= 0 && (kiocb->ki_complete == io_complete_rw))
                 __io_complete_rw(req, ret, 0, issue_flags);
         else
                 io_rw_done(kiocb, ret);
  
-       if (check_reissue && (req->flags & REQ_F_REISSUE)) {
+       if (req->flags & REQ_F_REISSUE) {
                 req->flags &= ~REQ_F_REISSUE;
                 if (io_resubmit_prep(req)) {
                         io_req_task_queue_reissue(req);
                 } else {
+                       unsigned int cflags = io_put_rw_kbuf(req);
+                       struct io_ring_ctx *ctx = req->ctx;
+
                         req_set_fail(req);
-                       __io_req_complete(req, issue_flags, ret,
-                                         io_put_rw_kbuf(req));
+                       if (!(issue_flags & IO_URING_F_NONBLOCK)) {
+                               mutex_lock(&ctx->uring_lock);
+                               __io_req_complete(req, issue_flags, ret, cflags);
+                               mutex_unlock(&ctx->uring_lock);
+                       } else {
+                               __io_req_complete(req, issue_flags, ret, cflags);
+                       }
                 }
         }
  }
@@ -3263,12 +3260,15 @@ static ssize_t loop_rw_iter(int rw, struct io_kiocb *req, struct iov_iter *iter)
                                 ret = nr;
                         break;
                 }
+               if (!iov_iter_is_bvec(iter)) {
+                       iov_iter_advance(iter, nr);
+               } else {
+                       req->rw.len -= nr;
+                       req->rw.addr += nr;
+               }
                 ret += nr;
                 if (nr != iovec.iov_len)
                         break;
-               req->rw.len -= nr;
-               req->rw.addr += nr;
-               iov_iter_advance(iter, nr);
         }
  
         return ret;
@@ -3315,12 +3315,17 @@ static int io_setup_async_rw(struct io_kiocb *req, const struct iovec *iovec,
         if (!force && !io_op_defs[req->opcode].needs_async_setup)
                 return 0;
         if (!req->async_data) {
+               struct io_async_rw *iorw;
+
                 if (io_alloc_async_data(req)) {
                         kfree(iovec);
                         return -ENOMEM;
                 }
  
                 io_req_map_rw(req, iovec, fast_iov, iter);
+               iorw = req->async_data;
+               /* we've copied and mapped the iter, ensure state is saved */
+               iov_iter_save_state(&iorw->iter, &iorw->iter_state);
         }
         return 0;
  }
@@ -3339,6 +3344,7 @@ static inline int io_rw_prep_async(struct io_kiocb *req, int rw)
         iorw->free_iovec = iov;
         if (iov)
                 req->flags |= REQ_F_NEED_CLEANUP;
+       iov_iter_save_state(&iorw->iter, &iorw->iter_state);
         return 0;
  }
  
@@ -3346,7 +3352,7 @@ static int io_read_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
  {
         if (unlikely(!(req->file->f_mode & FMODE_READ)))
                 return -EBADF;
-       return io_prep_rw(req, sqe);
+       return io_prep_rw(req, sqe, READ);
  }
  
  /*
@@ -3442,19 +3448,28 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
         struct kiocb *kiocb = &req->rw.kiocb;
         struct iov_iter __iter, *iter = &__iter;
         struct io_async_rw *rw = req->async_data;
-       ssize_t io_size, ret, ret2;
         bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
+       struct iov_iter_state __state, *state;
+       ssize_t ret, ret2;
  
         if (rw) {
                 iter = &rw->iter;
+               state = &rw->iter_state;
+               /*
+                * We come here from an earlier attempt, restore our state to
+                * match in case it doesn't. It's cheap enough that we don't
+                * need to make this conditional.
+                */
+               iov_iter_restore(iter, state);
                 iovec = NULL;
         } else {
                 ret = io_import_iovec(READ, req, &iovec, iter, !force_nonblock);
                 if (ret < 0)
                         return ret;
+               state = &__state;
+               iov_iter_save_state(iter, state);
         }
-       io_size = iov_iter_count(iter);
-       req->result = io_size;
+       req->result = iov_iter_count(iter);
  
         /* Ensure we clear previously set non-block flag */
         if (!force_nonblock)
@@ -3468,7 +3483,7 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
                 return ret ?: -EAGAIN;
         }
  
-       ret = rw_verify_area(READ, req->file, io_kiocb_ppos(kiocb), io_size);
+       ret = rw_verify_area(READ, req->file, io_kiocb_ppos(kiocb), req->result);
         if (unlikely(ret)) {
                 kfree(iovec);
                 return ret;
@@ -3484,30 +3499,49 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
                 /* no retry on NONBLOCK nor RWF_NOWAIT */
                 if (req->flags & REQ_F_NOWAIT)
                         goto done;
-               /* some cases will consume bytes even on error returns */
-               iov_iter_reexpand(iter, iter->count + iter->truncated);
-               iov_iter_revert(iter, io_size - iov_iter_count(iter));
                 ret = 0;
         } else if (ret == -EIOCBQUEUED) {
                 goto out_free;
-       } else if (ret <= 0 || ret == io_size || !force_nonblock ||
+       } else if (ret <= 0 || ret == req->result || !force_nonblock ||
                    (req->flags & REQ_F_NOWAIT) || !need_read_all(req)) {
                 /* read all, failed, already did sync or don't want to retry */
                 goto done;
         }
  
+       /*
+        * Don't depend on the iter state matching what was consumed, or being
+        * untouched in case of error. Restore it and we'll advance it
+        * manually if we need to.
+        */
+       iov_iter_restore(iter, state);
+
         ret2 = io_setup_async_rw(req, iovec, inline_vecs, iter, true);
         if (ret2)
                 return ret2;
  
         iovec = NULL;
         rw = req->async_data;
-       /* now use our persistent iterator, if we aren't already */
-       iter = &rw->iter;
+       /*
+        * Now use our persistent iterator and state, if we aren't already.
+        * We've restored and mapped the iter to match.
+        */
+       if (iter != &rw->iter) {
+               iter = &rw->iter;
+               state = &rw->iter_state;
+       }
  
         do {
-               io_size -= ret;
+               /*
+                * We end up here because of a partial read, either from
+                * above or inside this loop. Advance the iter by the bytes
+                * that were consumed.
+                */
+               iov_iter_advance(iter, ret);
+               if (!iov_iter_count(iter))
+                       break;
                 rw->bytes_done += ret;
+               iov_iter_save_state(iter, state);
+
                 /* if we can retry, do so with the callbacks armed */
                 if (!io_rw_should_retry(req)) {
                         kiocb->ki_flags &= ~IOCB_WAITQ;
@@ -3525,7 +3559,8 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
                         return 0;
                 /* we got some bytes, but not all. retry. */
                 kiocb->ki_flags &= ~IOCB_WAITQ;
-       } while (ret > 0 && ret < io_size);
+               iov_iter_restore(iter, state);
+       } while (ret > 0);
  done:
         kiocb_done(kiocb, ret, issue_flags);
  out_free:
@@ -3539,7 +3574,7 @@ static int io_write_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
  {
         if (unlikely(!(req->file->f_mode & FMODE_WRITE)))
                 return -EBADF;
-       return io_prep_rw(req, sqe);
+       return io_prep_rw(req, sqe, WRITE);
  }
  
  static int io_write(struct io_kiocb *req, unsigned int issue_flags)
@@ -3548,19 +3583,23 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags)
         struct kiocb *kiocb = &req->rw.kiocb;
         struct iov_iter __iter, *iter = &__iter;
         struct io_async_rw *rw = req->async_data;
-       ssize_t ret, ret2, io_size;
         bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
+       struct iov_iter_state __state, *state;
+       ssize_t ret, ret2;
  
         if (rw) {
                 iter = &rw->iter;
+               state = &rw->iter_state;
+               iov_iter_restore(iter, state);
                 iovec = NULL;
         } else {
                 ret = io_import_iovec(WRITE, req, &iovec, iter, !force_nonblock);
                 if (ret < 0)
                         return ret;
+               state = &__state;
+               iov_iter_save_state(iter, state);
         }
-       io_size = iov_iter_count(iter);
-       req->result = io_size;
+       req->result = iov_iter_count(iter);
  
         /* Ensure we clear previously set non-block flag */
         if (!force_nonblock)
@@ -3577,7 +3616,7 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags)
             (req->flags & REQ_F_ISREG))
                 goto copy_iov;
  
-       ret = rw_verify_area(WRITE, req->file, io_kiocb_ppos(kiocb), io_size);
+       ret = rw_verify_area(WRITE, req->file, io_kiocb_ppos(kiocb), req->result);
         if (unlikely(ret))
                 goto out_free;
  
@@ -3624,9 +3663,7 @@ done:
                 kiocb_done(kiocb, ret2, issue_flags);
         } else {
  copy_iov:
-               /* some cases will consume bytes even on error returns */
-               iov_iter_reexpand(iter, iter->count + iter->truncated);
-               iov_iter_revert(iter, io_size - iov_iter_count(iter));
+               iov_iter_restore(iter, state);
                 ret = io_setup_async_rw(req, iovec, inline_vecs, iter, false);
                 return ret ?: -EAGAIN;
         }
@@ -4342,7 +4379,7 @@ static int io_add_buffers(struct io_provide_buf *pbuf, struct io_buffer **head)
         int i, bid = pbuf->bid;
  
         for (i = 0; i < pbuf->nbufs; i++) {
-               buf = kmalloc(sizeof(*buf), GFP_KERNEL);
+               buf = kmalloc(sizeof(*buf), GFP_KERNEL_ACCOUNT);
                 if (!buf)
                         break;
  
@@ -4549,12 +4586,16 @@ static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
         if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
                 return -EINVAL;
         if (sqe->ioprio || sqe->off || sqe->addr || sqe->len ||
-           sqe->rw_flags || sqe->buf_index || sqe->splice_fd_in)
+           sqe->rw_flags || sqe->buf_index)
                 return -EINVAL;
         if (req->flags & REQ_F_FIXED_FILE)
                 return -EBADF;
  
         req->close.fd = READ_ONCE(sqe->fd);
+       req->close.file_slot = READ_ONCE(sqe->file_index);
+       if (req->close.file_slot && req->close.fd)
+               return -EINVAL;
+
         return 0;
  }
  
@@ -4566,6 +4607,11 @@ static int io_close(struct io_kiocb *req, unsigned int issue_flags)
         struct file *file = NULL;
         int ret = -EBADF;
  
+       if (req->close.file_slot) {
+               ret = io_close_fixed(req, issue_flags);
+               goto err;
+       }
+
         spin_lock(&files->file_lock);
         fdt = files_fdtable(files);
         if (close->fd >= fdt->max_fds) {
@@ -5293,7 +5339,7 @@ static bool __io_poll_complete(struct io_kiocb *req, __poll_t mask)
         if (req->poll.events & EPOLLONESHOT)
                 flags = 0;
         if (!io_cqring_fill_event(ctx, req->user_data, error, flags)) {
-               req->poll.done = true;
+               req->poll.events |= EPOLLONESHOT;
                 flags = 0;
         }
         if (flags & IORING_CQE_F_MORE)
@@ -5322,10 +5368,15 @@ static void io_poll_task_func(struct io_kiocb *req, bool *locked)
         } else {
                 bool done;
  
+               if (req->poll.done) {
+                       spin_unlock(&ctx->completion_lock);
+                       return;
+               }
                 done = __io_poll_complete(req, req->result);
                 if (done) {
                         io_poll_remove_double(req);
                         hash_del(&req->hash_node);
+                       req->poll.done = true;
                 } else {
                         req->result = 0;
                         add_wait_queue(req->poll.head, &req->poll.wait);
@@ -5463,6 +5514,7 @@ static void io_async_task_func(struct io_kiocb *req, bool *locked)
  
         hash_del(&req->hash_node);
         io_poll_remove_double(req);
+       apoll->poll.done = true;
         spin_unlock(&ctx->completion_lock);
  
         if (!READ_ONCE(apoll->poll.canceled))
@@ -5783,6 +5835,7 @@ static int io_poll_add(struct io_kiocb *req, unsigned int issue_flags)
         struct io_ring_ctx *ctx = req->ctx;
         struct io_poll_table ipt;
         __poll_t mask;
+       bool done;
  
         ipt.pt._qproc = io_poll_queue_proc;
  
@@ -5791,13 +5844,13 @@ static int io_poll_add(struct io_kiocb *req, unsigned int issue_flags)
  
         if (mask) { /* no async, we'd stolen it */
                 ipt.error = 0;
-               io_poll_complete(req, mask);
+               done = io_poll_complete(req, mask);
         }
         spin_unlock(&ctx->completion_lock);
  
         if (mask) {
                 io_cqring_ev_posted(ctx);
-               if (poll->events & EPOLLONESHOT)
+               if (done)
                         io_put_req(req);
         }
         return ipt.error;
@@ -6288,19 +6341,16 @@ static int io_files_update(struct io_kiocb *req, unsigned int issue_flags)
         struct io_uring_rsrc_update2 up;
         int ret;
  
-       if (issue_flags & IO_URING_F_NONBLOCK)
-               return -EAGAIN;
-
         up.offset = req->rsrc_update.offset;
         up.data = req->rsrc_update.arg;
         up.nr = 0;
         up.tags = 0;
         up.resv = 0;
  
-       mutex_lock(&ctx->uring_lock);
+       io_ring_submit_lock(ctx, !(issue_flags & IO_URING_F_NONBLOCK));
         ret = __io_register_rsrc_update(ctx, IORING_RSRC_FILE,
                                         &up, req->rsrc_update.nr_args);
-       mutex_unlock(&ctx->uring_lock);
+       io_ring_submit_unlock(ctx, !(issue_flags & IO_URING_F_NONBLOCK));
  
         if (ret < 0)
                 req_set_fail(req);
@@ -6930,7 +6980,7 @@ issue_sqe:
                 switch (io_arm_poll_handler(req)) {
                 case IO_APOLL_READY:
                         if (linked_timeout)
-                               io_unprep_linked_timeout(req);
+                               io_queue_linked_timeout(linked_timeout);
                         goto issue_sqe;
                 case IO_APOLL_ABORTED:
                         /*
@@ -7515,6 +7565,14 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
                         break;
         } while (1);
  
+       if (uts) {
+               struct timespec64 ts;
+
+               if (get_timespec64(&ts, uts))
+                       return -EFAULT;
+               timeout = timespec64_to_jiffies(&ts);
+       }
+
         if (sig) {
  #ifdef CONFIG_COMPAT
                 if (in_compat_syscall())
@@ -7528,14 +7586,6 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
                         return ret;
         }
  
-       if (uts) {
-               struct timespec64 ts;
-
-               if (get_timespec64(&ts, uts))
-                       return -EFAULT;
-               timeout = timespec64_to_jiffies(&ts);
-       }
-
         init_waitqueue_func_entry(&iowq.wq, io_wake_function);
         iowq.wq.private = current;
         INIT_LIST_HEAD(&iowq.wq.entry);
@@ -8284,11 +8334,27 @@ static int io_sqe_file_register(struct io_ring_ctx *ctx, struct file *file,
  #endif
  }
  
+static int io_queue_rsrc_removal(struct io_rsrc_data *data, unsigned idx,
+                                struct io_rsrc_node *node, void *rsrc)
+{
+       struct io_rsrc_put *prsrc;
+
+       prsrc = kzalloc(sizeof(*prsrc), GFP_KERNEL);
+       if (!prsrc)
+               return -ENOMEM;
+
+       prsrc->tag = *io_get_tag_slot(data, idx);
+       prsrc->rsrc = rsrc;
+       list_add(&prsrc->list, &node->rsrc_list);
+       return 0;
+}
+
  static int io_install_fixed_file(struct io_kiocb *req, struct file *file,
                                  unsigned int issue_flags, u32 slot_index)
  {
         struct io_ring_ctx *ctx = req->ctx;
         bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
+       bool needs_switch = false;
         struct io_fixed_file *file_slot;
         int ret = -EBADF;
  
@@ -8304,9 +8370,22 @@ static int io_install_fixed_file(struct io_kiocb *req, struct file *file,
  
         slot_index = array_index_nospec(slot_index, ctx->nr_user_files);
         file_slot = io_fixed_file_slot(&ctx->file_table, slot_index);
-       ret = -EBADF;
-       if (file_slot->file_ptr)
-               goto err;
+
+       if (file_slot->file_ptr) {
+               struct file *old_file;
+
+               ret = io_rsrc_node_switch_start(ctx);
+               if (ret)
+                       goto err;
+
+               old_file = (struct file *)(file_slot->file_ptr & FFS_MASK);
+               ret = io_queue_rsrc_removal(ctx->file_data, slot_index,
+                                           ctx->rsrc_node, old_file);
+               if (ret)
+                       goto err;
+               file_slot->file_ptr = 0;
+               needs_switch = true;
+       }
  
         *io_get_tag_slot(ctx->file_data, slot_index) = 0;
         io_fixed_file_set(file_slot, file);
@@ -8318,25 +8397,50 @@ static int io_install_fixed_file(struct io_kiocb *req, struct file *file,
  
         ret = 0;
  err:
+       if (needs_switch)
+               io_rsrc_node_switch(ctx, ctx->file_data);
         io_ring_submit_unlock(ctx, !force_nonblock);
         if (ret)
                 fput(file);
         return ret;
  }
  
-static int io_queue_rsrc_removal(struct io_rsrc_data *data, unsigned idx,
-                                struct io_rsrc_node *node, void *rsrc)
+static int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags)
  {
-       struct io_rsrc_put *prsrc;
+       unsigned int offset = req->close.file_slot - 1;
+       struct io_ring_ctx *ctx = req->ctx;
+       struct io_fixed_file *file_slot;
+       struct file *file;
+       int ret, i;
  
-       prsrc = kzalloc(sizeof(*prsrc), GFP_KERNEL);
-       if (!prsrc)
-               return -ENOMEM;
+       io_ring_submit_lock(ctx, !(issue_flags & IO_URING_F_NONBLOCK));
+       ret = -ENXIO;
+       if (unlikely(!ctx->file_data))
+               goto out;
+       ret = -EINVAL;
+       if (offset >= ctx->nr_user_files)
+               goto out;
+       ret = io_rsrc_node_switch_start(ctx);
+       if (ret)
+               goto out;
  
-       prsrc->tag = *io_get_tag_slot(data, idx);
-       prsrc->rsrc = rsrc;
-       list_add(&prsrc->list, &node->rsrc_list);
-       return 0;
+       i = array_index_nospec(offset, ctx->nr_user_files);
+       file_slot = io_fixed_file_slot(&ctx->file_table, i);
+       ret = -EBADF;
+       if (!file_slot->file_ptr)
+               goto out;
+
+       file = (struct file *)(file_slot->file_ptr & FFS_MASK);
+       ret = io_queue_rsrc_removal(ctx->file_data, offset, ctx->rsrc_node, file);
+       if (ret)
+               goto out;
+
+       file_slot->file_ptr = 0;
+       io_rsrc_node_switch(ctx, ctx->file_data);
+       ret = 0;
+out:
+       io_ring_submit_unlock(ctx, !(issue_flags & IO_URING_F_NONBLOCK));
+       return ret;
  }
  
  static int __io_sqe_files_update(struct io_ring_ctx *ctx,
@@ -9105,8 +9209,10 @@ static void io_destroy_buffers(struct io_ring_ctx *ctx)
         struct io_buffer *buf;
         unsigned long index;
  
-       xa_for_each(&ctx->io_buffers, index, buf)
+       xa_for_each(&ctx->io_buffers, index, buf) {
                 __io_remove_buffers(ctx, buf, index, -1U);
+               cond_resched();
+       }
  }
  
  static void io_req_cache_free(struct list_head *list)
@@ -9231,13 +9337,6 @@ static __poll_t io_uring_poll(struct file *file, poll_table *wait)
         return mask;
  }
  
-static int io_uring_fasync(int fd, struct file *file, int on)
-{
-       struct io_ring_ctx *ctx = file->private_data;
-
-       return fasync_helper(fd, file, on, &ctx->cq_fasync);
-}
-
  static int io_unregister_personality(struct io_ring_ctx *ctx, unsigned id)
  {
         const struct cred *creds;
@@ -9536,7 +9635,16 @@ static int __io_uring_add_tctx_node(struct io_ring_ctx *ctx)
                 ret = io_uring_alloc_task_context(current, ctx);
                 if (unlikely(ret))
                         return ret;
+
                 tctx = current->io_uring;
+               if (ctx->iowq_limits_set) {
+                       unsigned int limits[2] = { ctx->iowq_limits[0],
+                                                  ctx->iowq_limits[1], };
+
+                       ret = io_wq_max_workers(tctx->io_wq, limits);
+                       if (ret)
+                               return ret;
+               }
         }
         if (!xa_load(&tctx->xa, (unsigned long)ctx)) {
                 node = kmalloc(sizeof(*node), GFP_KERNEL);
@@ -9604,8 +9712,10 @@ static void io_uring_clean_tctx(struct io_uring_task *tctx)
         struct io_tctx_node *node;
         unsigned long index;
  
-       xa_for_each(&tctx->xa, index, node)
+       xa_for_each(&tctx->xa, index, node) {
                 io_uring_del_tctx_node(index);
+               cond_resched();
+       }
         if (wq) {
                 /*
                  * Must be after io_uring_del_task_file() (removes nodes under
@@ -10029,7 +10139,6 @@ static const struct file_operations io_uring_fops = {
         .mmap_capabilities = io_uring_nommu_mmap_capabilities,
  #endif
         .poll           = io_uring_poll,
-       .fasync         = io_uring_fasync,
  #ifdef CONFIG_PROC_FS
         .show_fdinfo    = io_uring_show_fdinfo,
  #endif
@@ -10540,7 +10649,9 @@ static int io_unregister_iowq_aff(struct io_ring_ctx *ctx)
  
  static int io_register_iowq_max_workers(struct io_ring_ctx *ctx,
                                         void __user *arg)
+       __must_hold(&ctx->uring_lock)
  {
+       struct io_tctx_node *node;
         struct io_uring_task *tctx = NULL;
         struct io_sq_data *sqd = NULL;
         __u32 new_count[2];
@@ -10560,33 +10671,61 @@ static int io_register_iowq_max_workers(struct io_ring_ctx *ctx,
                          * ordering. Fine to drop uring_lock here, we hold
                          * a ref to the ctx.
                          */
+                       refcount_inc(&sqd->refs);
                         mutex_unlock(&ctx->uring_lock);
                         mutex_lock(&sqd->lock);
                         mutex_lock(&ctx->uring_lock);
-                       tctx = sqd->thread->io_uring;
+                       if (sqd->thread)
+                               tctx = sqd->thread->io_uring;
                 }
         } else {
                 tctx = current->io_uring;
         }
  
-       ret = -EINVAL;
-       if (!tctx || !tctx->io_wq)
-               goto err;
+       BUILD_BUG_ON(sizeof(new_count) != sizeof(ctx->iowq_limits));
  
-       ret = io_wq_max_workers(tctx->io_wq, new_count);
-       if (ret)
-               goto err;
+       memcpy(ctx->iowq_limits, new_count, sizeof(new_count));
+       ctx->iowq_limits_set = true;
  
-       if (sqd)
+       ret = -EINVAL;
+       if (tctx && tctx->io_wq) {
+               ret = io_wq_max_workers(tctx->io_wq, new_count);
+               if (ret)
+                       goto err;
+       } else {
+               memset(new_count, 0, sizeof(new_count));
+       }
+
+       if (sqd) {
                 mutex_unlock(&sqd->lock);
+               io_put_sq_data(sqd);
+       }
  
         if (copy_to_user(arg, new_count, sizeof(new_count)))
                 return -EFAULT;
  
+       /* that's it for SQPOLL, only the SQPOLL task creates requests */
+       if (sqd)
+               return 0;
+
+       /* now propagate the restriction to all registered users */
+       list_for_each_entry(node, &ctx->tctx_list, ctx_node) {
+               struct io_uring_task *tctx = node->task->io_uring;
+
+               if (WARN_ON_ONCE(!tctx->io_wq))
+                       continue;
+
+               for (i = 0; i < ARRAY_SIZE(new_count); i++)
+                       new_count[i] = ctx->iowq_limits[i];
+               /* ignore errors, it always returns zero anyway */
+               (void)io_wq_max_workers(tctx->io_wq, new_count);
+       }
         return 0;
  err:
-       if (sqd)
+       if (sqd) {
                 mutex_unlock(&sqd->lock);
+               io_put_sq_data(sqd);
+       }
         return ret;
  }