X-Git-Url: http://review.tizen.org/git/?a=blobdiff_plain;f=io_uring%2Frsrc.c;h=62988b3aa927d579af7f6e6f9ce26aa594cdbdf3;hb=fc7f3a8d3a78503c4f3e108155fb9a233dc307a4;hp=7a43aed8e395cb4d22885e0800bf193c862f0b1f;hpb=65aca32efdcb0965502d3db2f1fa33838c070952;p=platform%2Fkernel%2Flinux-starfive.git diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index 7a43aed..62988b3 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -23,25 +23,16 @@ struct io_rsrc_update { u32 offset; }; +static void io_rsrc_buf_put(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc); +static void io_rsrc_file_put(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc); static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov, struct io_mapped_ubuf **pimu, struct page **last_hpage); -#define IO_RSRC_REF_BATCH 100 - /* only define max */ #define IORING_MAX_FIXED_FILES (1U << 20) #define IORING_MAX_REG_BUFFERS (1U << 14) -void io_rsrc_refs_drop(struct io_ring_ctx *ctx) - __must_hold(&ctx->uring_lock) -{ - if (ctx->rsrc_cached_refs) { - io_rsrc_put_node(ctx->rsrc_node, ctx->rsrc_cached_refs); - ctx->rsrc_cached_refs = 0; - } -} - int __io_account_mem(struct user_struct *user, unsigned long nr_pages) { unsigned long page_limit, cur_pages, new_pages; @@ -151,216 +142,130 @@ static void io_buffer_unmap(struct io_ring_ctx *ctx, struct io_mapped_ubuf **slo *slot = NULL; } -void io_rsrc_refs_refill(struct io_ring_ctx *ctx) - __must_hold(&ctx->uring_lock) -{ - ctx->rsrc_cached_refs += IO_RSRC_REF_BATCH; - percpu_ref_get_many(&ctx->rsrc_node->refs, IO_RSRC_REF_BATCH); -} - -static void __io_rsrc_put_work(struct io_rsrc_node *ref_node) +static void io_rsrc_put_work(struct io_rsrc_node *node) { - struct io_rsrc_data *rsrc_data = ref_node->rsrc_data; - struct io_ring_ctx *ctx = rsrc_data->ctx; - struct io_rsrc_put *prsrc, *tmp; - - list_for_each_entry_safe(prsrc, tmp, &ref_node->rsrc_list, list) { - list_del(&prsrc->list); - - if (prsrc->tag) { - if (ctx->flags & IORING_SETUP_IOPOLL) { - mutex_lock(&ctx->uring_lock); - io_post_aux_cqe(ctx, prsrc->tag, 0, 0); - mutex_unlock(&ctx->uring_lock); - } else { - io_post_aux_cqe(ctx, prsrc->tag, 0, 0); - } - } + struct io_rsrc_data *data = node->rsrc_data; + struct io_rsrc_put *prsrc = &node->item; - rsrc_data->do_put(ctx, prsrc); - kfree(prsrc); - } + if (prsrc->tag) + io_post_aux_cqe(data->ctx, prsrc->tag, 0, 0); - io_rsrc_node_destroy(ref_node); - if (atomic_dec_and_test(&rsrc_data->refs)) - complete(&rsrc_data->done); -} - -void io_rsrc_put_work(struct work_struct *work) -{ - struct io_ring_ctx *ctx; - struct llist_node *node; - - ctx = container_of(work, struct io_ring_ctx, rsrc_put_work.work); - node = llist_del_all(&ctx->rsrc_put_llist); - - while (node) { - struct io_rsrc_node *ref_node; - struct llist_node *next = node->next; - - ref_node = llist_entry(node, struct io_rsrc_node, llist); - __io_rsrc_put_work(ref_node); - node = next; + switch (data->rsrc_type) { + case IORING_RSRC_FILE: + io_rsrc_file_put(data->ctx, prsrc); + break; + case IORING_RSRC_BUFFER: + io_rsrc_buf_put(data->ctx, prsrc); + break; + default: + WARN_ON_ONCE(1); + break; } } -void io_rsrc_put_tw(struct callback_head *cb) -{ - struct io_ring_ctx *ctx = container_of(cb, struct io_ring_ctx, - rsrc_put_tw); - - io_rsrc_put_work(&ctx->rsrc_put_work.work); -} - -void io_wait_rsrc_data(struct io_rsrc_data *data) -{ - if (data && !atomic_dec_and_test(&data->refs)) - wait_for_completion(&data->done); -} - -void io_rsrc_node_destroy(struct io_rsrc_node *ref_node) +void io_rsrc_node_destroy(struct io_ring_ctx *ctx, struct io_rsrc_node *node) { - percpu_ref_exit(&ref_node->refs); - kfree(ref_node); + if (!io_alloc_cache_put(&ctx->rsrc_node_cache, &node->cache)) + kfree(node); } -static __cold void io_rsrc_node_ref_zero(struct percpu_ref *ref) +void io_rsrc_node_ref_zero(struct io_rsrc_node *node) + __must_hold(&node->rsrc_data->ctx->uring_lock) { - struct io_rsrc_node *node = container_of(ref, struct io_rsrc_node, refs); struct io_ring_ctx *ctx = node->rsrc_data->ctx; - unsigned long flags; - bool first_add = false; - unsigned long delay = HZ; - - spin_lock_irqsave(&ctx->rsrc_ref_lock, flags); - node->done = true; - - /* if we are mid-quiesce then do not delay */ - if (node->rsrc_data->quiesce) - delay = 0; while (!list_empty(&ctx->rsrc_ref_list)) { node = list_first_entry(&ctx->rsrc_ref_list, struct io_rsrc_node, node); /* recycle ref nodes in order */ - if (!node->done) + if (node->refs) break; list_del(&node->node); - first_add |= llist_add(&node->llist, &ctx->rsrc_put_llist); - } - spin_unlock_irqrestore(&ctx->rsrc_ref_lock, flags); - - if (!first_add) - return; - if (ctx->submitter_task) { - if (!task_work_add(ctx->submitter_task, &ctx->rsrc_put_tw, - ctx->notify_method)) - return; + if (likely(!node->empty)) + io_rsrc_put_work(node); + io_rsrc_node_destroy(ctx, node); } - mod_delayed_work(system_wq, &ctx->rsrc_put_work, delay); + if (list_empty(&ctx->rsrc_ref_list) && unlikely(ctx->rsrc_quiesce)) + wake_up_all(&ctx->rsrc_quiesce_wq); } -static struct io_rsrc_node *io_rsrc_node_alloc(void) +struct io_rsrc_node *io_rsrc_node_alloc(struct io_ring_ctx *ctx) { struct io_rsrc_node *ref_node; + struct io_cache_entry *entry; - ref_node = kzalloc(sizeof(*ref_node), GFP_KERNEL); - if (!ref_node) - return NULL; - - if (percpu_ref_init(&ref_node->refs, io_rsrc_node_ref_zero, - 0, GFP_KERNEL)) { - kfree(ref_node); - return NULL; - } - INIT_LIST_HEAD(&ref_node->node); - INIT_LIST_HEAD(&ref_node->rsrc_list); - ref_node->done = false; - return ref_node; -} - -void io_rsrc_node_switch(struct io_ring_ctx *ctx, - struct io_rsrc_data *data_to_kill) - __must_hold(&ctx->uring_lock) -{ - WARN_ON_ONCE(!ctx->rsrc_backup_node); - WARN_ON_ONCE(data_to_kill && !ctx->rsrc_node); - - io_rsrc_refs_drop(ctx); - - if (data_to_kill) { - struct io_rsrc_node *rsrc_node = ctx->rsrc_node; - - rsrc_node->rsrc_data = data_to_kill; - spin_lock_irq(&ctx->rsrc_ref_lock); - list_add_tail(&rsrc_node->node, &ctx->rsrc_ref_list); - spin_unlock_irq(&ctx->rsrc_ref_lock); - - atomic_inc(&data_to_kill->refs); - percpu_ref_kill(&rsrc_node->refs); - ctx->rsrc_node = NULL; - } - - if (!ctx->rsrc_node) { - ctx->rsrc_node = ctx->rsrc_backup_node; - ctx->rsrc_backup_node = NULL; + entry = io_alloc_cache_get(&ctx->rsrc_node_cache); + if (entry) { + ref_node = container_of(entry, struct io_rsrc_node, cache); + } else { + ref_node = kzalloc(sizeof(*ref_node), GFP_KERNEL); + if (!ref_node) + return NULL; } -} -int io_rsrc_node_switch_start(struct io_ring_ctx *ctx) -{ - if (ctx->rsrc_backup_node) - return 0; - ctx->rsrc_backup_node = io_rsrc_node_alloc(); - return ctx->rsrc_backup_node ? 0 : -ENOMEM; + ref_node->rsrc_data = NULL; + ref_node->empty = 0; + ref_node->refs = 1; + return ref_node; } __cold static int io_rsrc_ref_quiesce(struct io_rsrc_data *data, struct io_ring_ctx *ctx) { + struct io_rsrc_node *backup; + DEFINE_WAIT(we); int ret; - /* As we may drop ->uring_lock, other task may have started quiesce */ + /* As We may drop ->uring_lock, other task may have started quiesce */ if (data->quiesce) return -ENXIO; - ret = io_rsrc_node_switch_start(ctx); - if (ret) - return ret; - io_rsrc_node_switch(ctx, data); - /* kill initial ref, already quiesced if zero */ - if (atomic_dec_and_test(&data->refs)) + backup = io_rsrc_node_alloc(ctx); + if (!backup) + return -ENOMEM; + ctx->rsrc_node->empty = true; + ctx->rsrc_node->rsrc_data = data; + list_add_tail(&ctx->rsrc_node->node, &ctx->rsrc_ref_list); + io_put_rsrc_node(ctx, ctx->rsrc_node); + ctx->rsrc_node = backup; + + if (list_empty(&ctx->rsrc_ref_list)) return 0; + if (ctx->flags & IORING_SETUP_DEFER_TASKRUN) { + atomic_set(&ctx->cq_wait_nr, 1); + smp_mb(); + } + + ctx->rsrc_quiesce++; data->quiesce = true; - mutex_unlock(&ctx->uring_lock); do { + prepare_to_wait(&ctx->rsrc_quiesce_wq, &we, TASK_INTERRUPTIBLE); + mutex_unlock(&ctx->uring_lock); + ret = io_run_task_work_sig(ctx); if (ret < 0) { - atomic_inc(&data->refs); - /* wait for all works potentially completing data->done */ - flush_delayed_work(&ctx->rsrc_put_work); - reinit_completion(&data->done); mutex_lock(&ctx->uring_lock); + if (list_empty(&ctx->rsrc_ref_list)) + ret = 0; break; } - flush_delayed_work(&ctx->rsrc_put_work); - ret = wait_for_completion_interruptible(&data->done); - if (!ret) { - mutex_lock(&ctx->uring_lock); - if (atomic_read(&data->refs) <= 0) - break; - /* - * it has been revived by another thread while - * we were unlocked - */ - mutex_unlock(&ctx->uring_lock); - } - } while (1); + schedule(); + __set_current_state(TASK_RUNNING); + mutex_lock(&ctx->uring_lock); + ret = 0; + } while (!list_empty(&ctx->rsrc_ref_list)); + + finish_wait(&ctx->rsrc_quiesce_wq, &we); data->quiesce = false; + ctx->rsrc_quiesce--; + if (ctx->flags & IORING_SETUP_DEFER_TASKRUN) { + atomic_set(&ctx->cq_wait_nr, 0); + smp_mb(); + } return ret; } @@ -405,8 +310,8 @@ static __cold void **io_alloc_page_table(size_t size) return table; } -__cold static int io_rsrc_data_alloc(struct io_ring_ctx *ctx, - rsrc_put_fn *do_put, u64 __user *utags, +__cold static int io_rsrc_data_alloc(struct io_ring_ctx *ctx, int type, + u64 __user *utags, unsigned nr, struct io_rsrc_data **pdata) { struct io_rsrc_data *data; @@ -424,7 +329,7 @@ __cold static int io_rsrc_data_alloc(struct io_ring_ctx *ctx, data->nr = nr; data->ctx = ctx; - data->do_put = do_put; + data->rsrc_type = type; if (utags) { ret = -EFAULT; for (i = 0; i < nr; i++) { @@ -435,9 +340,6 @@ __cold static int io_rsrc_data_alloc(struct io_ring_ctx *ctx, goto fail; } } - - atomic_set(&data->refs, 1); - init_completion(&data->done); *pdata = data; return 0; fail: @@ -456,7 +358,6 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx, struct file *file; int fd, i, err = 0; unsigned int done; - bool needs_switch = false; if (!ctx->file_data) return -ENXIO; @@ -483,12 +384,11 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx, if (file_slot->file_ptr) { file = (struct file *)(file_slot->file_ptr & FFS_MASK); - err = io_queue_rsrc_removal(data, i, ctx->rsrc_node, file); + err = io_queue_rsrc_removal(data, i, file); if (err) break; file_slot->file_ptr = 0; io_file_bitmap_clear(&ctx->file_table, i); - needs_switch = true; } if (fd != -1) { file = fget(fd); @@ -519,9 +419,6 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx, io_file_bitmap_set(&ctx->file_table, i); } } - - if (needs_switch) - io_rsrc_node_switch(ctx, data); return done ? done : err; } @@ -532,7 +429,6 @@ static int __io_sqe_buffers_update(struct io_ring_ctx *ctx, u64 __user *tags = u64_to_user_ptr(up->tags); struct iovec iov, __user *iovs = u64_to_user_ptr(up->data); struct page *last_hpage = NULL; - bool needs_switch = false; __u32 done; int i, err; @@ -543,7 +439,6 @@ static int __io_sqe_buffers_update(struct io_ring_ctx *ctx, for (done = 0; done < nr_args; done++) { struct io_mapped_ubuf *imu; - int offset = up->offset + done; u64 tag = 0; err = io_copy_iov(ctx, &iov, iovs, done); @@ -564,24 +459,20 @@ static int __io_sqe_buffers_update(struct io_ring_ctx *ctx, if (err) break; - i = array_index_nospec(offset, ctx->nr_user_bufs); + i = array_index_nospec(up->offset + done, ctx->nr_user_bufs); if (ctx->user_bufs[i] != ctx->dummy_ubuf) { err = io_queue_rsrc_removal(ctx->buf_data, i, - ctx->rsrc_node, ctx->user_bufs[i]); + ctx->user_bufs[i]); if (unlikely(err)) { io_buffer_unmap(ctx, &imu); break; } ctx->user_bufs[i] = ctx->dummy_ubuf; - needs_switch = true; } ctx->user_bufs[i] = imu; - *io_get_tag_slot(ctx->buf_data, offset) = tag; + *io_get_tag_slot(ctx->buf_data, i) = tag; } - - if (needs_switch) - io_rsrc_node_switch(ctx, ctx->buf_data); return done ? done : err; } @@ -590,13 +481,11 @@ static int __io_register_rsrc_update(struct io_ring_ctx *ctx, unsigned type, unsigned nr_args) { __u32 tmp; - int err; + + lockdep_assert_held(&ctx->uring_lock); if (check_add_overflow(up->offset, nr_args, &tmp)) return -EOVERFLOW; - err = io_rsrc_node_switch_start(ctx); - if (err) - return err; switch (type) { case IORING_RSRC_FILE: @@ -753,20 +642,25 @@ int io_files_update(struct io_kiocb *req, unsigned int issue_flags) return IOU_OK; } -int io_queue_rsrc_removal(struct io_rsrc_data *data, unsigned idx, - struct io_rsrc_node *node, void *rsrc) +int io_queue_rsrc_removal(struct io_rsrc_data *data, unsigned idx, void *rsrc) { + struct io_ring_ctx *ctx = data->ctx; + struct io_rsrc_node *node = ctx->rsrc_node; u64 *tag_slot = io_get_tag_slot(data, idx); - struct io_rsrc_put *prsrc; - prsrc = kzalloc(sizeof(*prsrc), GFP_KERNEL); - if (!prsrc) + ctx->rsrc_node = io_rsrc_node_alloc(ctx); + if (unlikely(!ctx->rsrc_node)) { + ctx->rsrc_node = node; return -ENOMEM; + } - prsrc->tag = *tag_slot; + node->item.rsrc = rsrc; + node->item.tag = *tag_slot; *tag_slot = 0; - prsrc->rsrc = rsrc; - list_add(&prsrc->list, &node->rsrc_list); + + node->rsrc_data = data; + list_add_tail(&node->node, &ctx->rsrc_ref_list); + io_put_rsrc_node(ctx, node); return 0; } @@ -882,20 +776,14 @@ int __io_scm_file_account(struct io_ring_ctx *ctx, struct file *file) return 0; } -static void io_rsrc_file_put(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc) +static __cold void io_rsrc_file_scm_put(struct io_ring_ctx *ctx, struct file *file) { - struct file *file = prsrc->file; #if defined(CONFIG_UNIX) struct sock *sock = ctx->ring_sock->sk; struct sk_buff_head list, *head = &sock->sk_receive_queue; struct sk_buff *skb; int i; - if (!io_file_need_scm(file)) { - fput(file); - return; - } - __skb_queue_head_init(&list); /* @@ -945,11 +833,19 @@ static void io_rsrc_file_put(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc) __skb_queue_tail(head, skb); spin_unlock_irq(&head->lock); } -#else - fput(file); #endif } +static void io_rsrc_file_put(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc) +{ + struct file *file = prsrc->file; + + if (likely(!io_file_need_scm(file))) + fput(file); + else + io_rsrc_file_scm_put(ctx, file); +} + int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, unsigned nr_args, u64 __user *tags) { @@ -966,10 +862,7 @@ int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, return -EMFILE; if (nr_args > rlimit(RLIMIT_NOFILE)) return -EMFILE; - ret = io_rsrc_node_switch_start(ctx); - if (ret) - return ret; - ret = io_rsrc_data_alloc(ctx, io_rsrc_file_put, tags, nr_args, + ret = io_rsrc_data_alloc(ctx, IORING_RSRC_FILE, tags, nr_args, &ctx->file_data); if (ret) return ret; @@ -1023,7 +916,6 @@ int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, /* default it to the whole table */ io_file_table_set_alloc_range(ctx, 0, ctx->nr_user_files); - io_rsrc_node_switch(ctx, NULL); return 0; fail: __io_sqe_files_unregister(ctx); @@ -1305,10 +1197,7 @@ int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg, return -EBUSY; if (!nr_args || nr_args > IORING_MAX_REG_BUFFERS) return -EINVAL; - ret = io_rsrc_node_switch_start(ctx); - if (ret) - return ret; - ret = io_rsrc_data_alloc(ctx, io_rsrc_buf_put, tags, nr_args, &data); + ret = io_rsrc_data_alloc(ctx, IORING_RSRC_BUFFER, tags, nr_args, &data); if (ret) return ret; ret = io_buffers_map_alloc(ctx, nr_args); @@ -1345,8 +1234,6 @@ int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg, ctx->buf_data = data; if (ret) __io_sqe_buffers_unregister(ctx); - else - io_rsrc_node_switch(ctx, NULL); return ret; }