#include <linux/io_uring.h>
#include <linux/audit.h>
#include <linux/security.h>
+#include <asm/shmparam.h>
#define CREATE_TRACE_POINTS
#include <trace/events/io_uring.h>
fallback_work.work);
struct llist_node *node = llist_del_all(&ctx->fallback_llist);
struct io_kiocb *req, *tmp;
- bool locked = true;
+ struct io_tw_state ts = { .locked = true, };
mutex_lock(&ctx->uring_lock);
llist_for_each_entry_safe(req, tmp, node, io_task_work.node)
- req->io_task_work.func(req, &locked);
- if (WARN_ON_ONCE(!locked))
+ req->io_task_work.func(req, &ts);
+ if (WARN_ON_ONCE(!ts.locked))
return;
io_submit_flush_completions(ctx);
mutex_unlock(&ctx->uring_lock);
INIT_LIST_HEAD(&ctx->sqd_list);
INIT_LIST_HEAD(&ctx->cq_overflow_list);
INIT_LIST_HEAD(&ctx->io_buffers_cache);
- io_alloc_cache_init(&ctx->apoll_cache);
- io_alloc_cache_init(&ctx->netmsg_cache);
+ io_alloc_cache_init(&ctx->rsrc_node_cache, IO_NODE_ALLOC_CACHE_MAX,
+ sizeof(struct io_rsrc_node));
+ io_alloc_cache_init(&ctx->apoll_cache, IO_ALLOC_CACHE_MAX,
+ sizeof(struct async_poll));
+ io_alloc_cache_init(&ctx->netmsg_cache, IO_ALLOC_CACHE_MAX,
+ sizeof(struct io_async_msghdr));
init_completion(&ctx->ref_comp);
xa_init_flags(&ctx->personalities, XA_FLAGS_ALLOC1);
mutex_init(&ctx->uring_lock);
INIT_LIST_HEAD(&ctx->defer_list);
INIT_LIST_HEAD(&ctx->timeout_list);
INIT_LIST_HEAD(&ctx->ltimeout_list);
- spin_lock_init(&ctx->rsrc_ref_lock);
INIT_LIST_HEAD(&ctx->rsrc_ref_list);
- INIT_DELAYED_WORK(&ctx->rsrc_put_work, io_rsrc_put_work);
- init_task_work(&ctx->rsrc_put_tw, io_rsrc_put_tw);
- init_llist_head(&ctx->rsrc_put_llist);
init_llist_head(&ctx->work_llist);
INIT_LIST_HEAD(&ctx->tctx_list);
ctx->submit_state.free_list.next = NULL;
if (req->file && !io_req_ffs_set(req))
req->flags |= io_file_get_flags(req->file) << REQ_F_SUPPORT_NOWAIT_BIT;
- if (req->flags & REQ_F_ISREG) {
- if (def->hash_reg_file || (ctx->flags & IORING_SETUP_IOPOLL))
+ if (req->file && (req->flags & REQ_F_ISREG)) {
+ bool should_hash = def->hash_reg_file;
+
+ /* don't serialize this request if the fs doesn't need it */
+ if (should_hash && (req->file->f_flags & O_DIRECT) &&
+ (req->file->f_mode & FMODE_DIO_PARALLEL_WRITE))
+ should_hash = false;
+ if (should_hash || (ctx->flags & IORING_SETUP_IOPOLL))
io_wq_hash_work(&req->work, file_inode(req->file));
} else if (!req->file || !S_ISBLK(file_inode(req->file)->i_mode)) {
if (def->unbound_nonreg_file)
}
}
-void io_queue_iowq(struct io_kiocb *req, bool *dont_use)
+void io_queue_iowq(struct io_kiocb *req, struct io_tw_state *ts_dont_use)
{
struct io_kiocb *link = io_prep_linked_timeout(req);
struct io_uring_task *tctx = req->task->io_uring;
io_cqring_wake(ctx);
}
-static inline void __io_cq_unlock_post_flush(struct io_ring_ctx *ctx)
+static void __io_cq_unlock_post_flush(struct io_ring_ctx *ctx)
__releases(ctx->completion_lock)
{
io_commit_cqring(ctx);
- __io_cq_unlock(ctx);
- io_commit_cqring_flush(ctx);
- /*
- * As ->task_complete implies that the ring is single tasked, cq_wait
- * may only be waited on by the current in io_cqring_wait(), but since
- * it will re-check the wakeup conditions once we return we can safely
- * skip waking it up.
- */
- if (!(ctx->flags & IORING_SETUP_DEFER_TASKRUN)) {
- smp_mb();
- __io_cqring_wake(ctx);
+ if (ctx->task_complete) {
+ /*
+ * ->task_complete implies that only current might be waiting
+ * for CQEs, and obviously, we currently don't. No one is
+ * waiting, wakeups are futile, skip them.
+ */
+ io_commit_cqring_flush(ctx);
+ } else {
+ __io_cq_unlock(ctx);
+ io_commit_cqring_flush(ctx);
+ io_cqring_wake(ctx);
}
}
return true;
}
-static void __io_req_complete_post(struct io_kiocb *req)
+static void __io_req_complete_post(struct io_kiocb *req, unsigned issue_flags)
{
struct io_ring_ctx *ctx = req->ctx;
+ struct io_rsrc_node *rsrc_node = NULL;
io_cq_lock(ctx);
if (!(req->flags & REQ_F_CQE_SKIP))
}
io_put_kbuf_comp(req);
io_dismantle_req(req);
- io_req_put_rsrc(req);
+ rsrc_node = req->rsrc_node;
/*
* Selected buffer deallocation in io_clean_op() assumes that
* we don't hold ->completion_lock. Clean them here to avoid
ctx->locked_free_nr++;
}
io_cq_unlock_post(ctx);
+
+ if (rsrc_node) {
+ io_ring_submit_lock(ctx, issue_flags);
+ io_put_rsrc_node(ctx, rsrc_node);
+ io_ring_submit_unlock(ctx, issue_flags);
+ }
}
void io_req_complete_post(struct io_kiocb *req, unsigned issue_flags)
io_req_task_work_add(req);
} else if (!(issue_flags & IO_URING_F_UNLOCKED) ||
!(req->ctx->flags & IORING_SETUP_IOPOLL)) {
- __io_req_complete_post(req);
+ __io_req_complete_post(req, issue_flags);
} else {
struct io_ring_ctx *ctx = req->ctx;
mutex_lock(&ctx->uring_lock);
- __io_req_complete_post(req);
+ __io_req_complete_post(req, issue_flags & ~IO_URING_F_UNLOCKED);
mutex_unlock(&ctx->uring_lock);
}
}
io_put_file(req->file);
}
-__cold void io_free_req(struct io_kiocb *req)
+static __cold void io_free_req_tw(struct io_kiocb *req, struct io_tw_state *ts)
{
struct io_ring_ctx *ctx = req->ctx;
- io_req_put_rsrc(req);
+ if (req->rsrc_node) {
+ io_tw_lock(ctx, ts);
+ io_put_rsrc_node(ctx, req->rsrc_node);
+ }
io_dismantle_req(req);
io_put_task_remote(req->task, 1);
spin_unlock(&ctx->completion_lock);
}
+__cold void io_free_req(struct io_kiocb *req)
+{
+ req->io_task_work.func = io_free_req_tw;
+ io_req_task_work_add(req);
+}
+
static void __io_req_find_next_prep(struct io_kiocb *req)
{
struct io_ring_ctx *ctx = req->ctx;
return nxt;
}
-static void ctx_flush_and_put(struct io_ring_ctx *ctx, bool *locked)
+static void ctx_flush_and_put(struct io_ring_ctx *ctx, struct io_tw_state *ts)
{
if (!ctx)
return;
if (ctx->flags & IORING_SETUP_TASKRUN_FLAG)
atomic_andnot(IORING_SQ_TASKRUN, &ctx->rings->sq_flags);
- if (*locked) {
+ if (ts->locked) {
io_submit_flush_completions(ctx);
mutex_unlock(&ctx->uring_lock);
- *locked = false;
+ ts->locked = false;
}
percpu_ref_put(&ctx->refs);
}
static unsigned int handle_tw_list(struct llist_node *node,
- struct io_ring_ctx **ctx, bool *locked,
+ struct io_ring_ctx **ctx,
+ struct io_tw_state *ts,
struct llist_node *last)
{
unsigned int count = 0;
prefetch(container_of(next, struct io_kiocb, io_task_work.node));
if (req->ctx != *ctx) {
- ctx_flush_and_put(*ctx, locked);
+ ctx_flush_and_put(*ctx, ts);
*ctx = req->ctx;
/* if not contended, grab and improve batching */
- *locked = mutex_trylock(&(*ctx)->uring_lock);
+ ts->locked = mutex_trylock(&(*ctx)->uring_lock);
percpu_ref_get(&(*ctx)->refs);
- } else if (!*locked)
- *locked = mutex_trylock(&(*ctx)->uring_lock);
- req->io_task_work.func(req, locked);
+ }
+ req->io_task_work.func(req, ts);
node = next;
count++;
if (unlikely(need_resched())) {
- ctx_flush_and_put(*ctx, locked);
+ ctx_flush_and_put(*ctx, ts);
*ctx = NULL;
cond_resched();
}
void tctx_task_work(struct callback_head *cb)
{
- bool uring_locked = false;
+ struct io_tw_state ts = {};
struct io_ring_ctx *ctx = NULL;
struct io_uring_task *tctx = container_of(cb, struct io_uring_task,
task_work);
do {
loops++;
node = io_llist_xchg(&tctx->task_list, &fake);
- count += handle_tw_list(node, &ctx, &uring_locked, &fake);
+ count += handle_tw_list(node, &ctx, &ts, &fake);
/* skip expensive cmpxchg if there are items in the list */
if (READ_ONCE(tctx->task_list.first) != &fake)
continue;
- if (uring_locked && !wq_list_empty(&ctx->submit_state.compl_reqs)) {
+ if (ts.locked && !wq_list_empty(&ctx->submit_state.compl_reqs)) {
io_submit_flush_completions(ctx);
if (READ_ONCE(tctx->task_list.first) != &fake)
continue;
node = io_llist_cmpxchg(&tctx->task_list, &fake, NULL);
} while (node != &fake);
- ctx_flush_and_put(ctx, &uring_locked);
+ ctx_flush_and_put(ctx, &ts);
/* relaxed read is enough as only the task itself sets ->in_cancel */
if (unlikely(atomic_read(&tctx->in_cancel)))
}
}
-static void io_req_local_work_add(struct io_kiocb *req)
+static void io_req_local_work_add(struct io_kiocb *req, unsigned flags)
{
struct io_ring_ctx *ctx = req->ctx;
+ unsigned nr_wait, nr_tw, nr_tw_prev;
+ struct llist_node *first;
- percpu_ref_get(&ctx->refs);
-
- if (!llist_add(&req->io_task_work.node, &ctx->work_llist))
- goto put_ref;
+ if (req->flags & (REQ_F_LINK | REQ_F_HARDLINK))
+ flags &= ~IOU_F_TWQ_LAZY_WAKE;
- /* needed for the following wake up */
- smp_mb__after_atomic();
-
- if (unlikely(atomic_read(&req->task->io_uring->in_cancel))) {
- io_move_task_work_from_local(ctx);
- goto put_ref;
+ first = READ_ONCE(ctx->work_llist.first);
+ do {
+ nr_tw_prev = 0;
+ if (first) {
+ struct io_kiocb *first_req = container_of(first,
+ struct io_kiocb,
+ io_task_work.node);
+ /*
+ * Might be executed at any moment, rely on
+ * SLAB_TYPESAFE_BY_RCU to keep it alive.
+ */
+ nr_tw_prev = READ_ONCE(first_req->nr_tw);
+ }
+ nr_tw = nr_tw_prev + 1;
+ /* Large enough to fail the nr_wait comparison below */
+ if (!(flags & IOU_F_TWQ_LAZY_WAKE))
+ nr_tw = -1U;
+
+ req->nr_tw = nr_tw;
+ req->io_task_work.node.next = first;
+ } while (!try_cmpxchg(&ctx->work_llist.first, &first,
+ &req->io_task_work.node));
+
+ if (!first) {
+ if (ctx->flags & IORING_SETUP_TASKRUN_FLAG)
+ atomic_or(IORING_SQ_TASKRUN, &ctx->rings->sq_flags);
+ if (ctx->has_evfd)
+ io_eventfd_signal(ctx);
}
- if (ctx->flags & IORING_SETUP_TASKRUN_FLAG)
- atomic_or(IORING_SQ_TASKRUN, &ctx->rings->sq_flags);
- if (ctx->has_evfd)
- io_eventfd_signal(ctx);
-
- if (READ_ONCE(ctx->cq_waiting))
- wake_up_state(ctx->submitter_task, TASK_INTERRUPTIBLE);
-
-put_ref:
- percpu_ref_put(&ctx->refs);
+ nr_wait = atomic_read(&ctx->cq_wait_nr);
+ /* no one is waiting */
+ if (!nr_wait)
+ return;
+ /* either not enough or the previous add has already woken it up */
+ if (nr_wait > nr_tw || nr_tw_prev >= nr_wait)
+ return;
+ /* pairs with set_current_state() in io_cqring_wait() */
+ smp_mb__after_atomic();
+ wake_up_state(ctx->submitter_task, TASK_INTERRUPTIBLE);
}
-void __io_req_task_work_add(struct io_kiocb *req, bool allow_local)
+void __io_req_task_work_add(struct io_kiocb *req, unsigned flags)
{
struct io_uring_task *tctx = req->task->io_uring;
struct io_ring_ctx *ctx = req->ctx;
- if (allow_local && ctx->flags & IORING_SETUP_DEFER_TASKRUN) {
- io_req_local_work_add(req);
+ if (!(flags & IOU_F_TWQ_FORCE_NORMAL) &&
+ (ctx->flags & IORING_SETUP_DEFER_TASKRUN)) {
+ rcu_read_lock();
+ io_req_local_work_add(req, flags);
+ rcu_read_unlock();
return;
}
io_task_work.node);
node = node->next;
- __io_req_task_work_add(req, false);
+ __io_req_task_work_add(req, IOU_F_TWQ_FORCE_NORMAL);
}
}
-static int __io_run_local_work(struct io_ring_ctx *ctx, bool *locked)
+static int __io_run_local_work(struct io_ring_ctx *ctx, struct io_tw_state *ts)
{
struct llist_node *node;
unsigned int loops = 0;
struct io_kiocb *req = container_of(node, struct io_kiocb,
io_task_work.node);
prefetch(container_of(next, struct io_kiocb, io_task_work.node));
- req->io_task_work.func(req, locked);
+ req->io_task_work.func(req, ts);
ret++;
node = next;
}
if (!llist_empty(&ctx->work_llist))
goto again;
- if (*locked) {
+ if (ts->locked) {
io_submit_flush_completions(ctx);
if (!llist_empty(&ctx->work_llist))
goto again;
static inline int io_run_local_work_locked(struct io_ring_ctx *ctx)
{
- bool locked;
+ struct io_tw_state ts = { .locked = true, };
int ret;
if (llist_empty(&ctx->work_llist))
return 0;
- locked = true;
- ret = __io_run_local_work(ctx, &locked);
+ ret = __io_run_local_work(ctx, &ts);
/* shouldn't happen! */
- if (WARN_ON_ONCE(!locked))
+ if (WARN_ON_ONCE(!ts.locked))
mutex_lock(&ctx->uring_lock);
return ret;
}
static int io_run_local_work(struct io_ring_ctx *ctx)
{
- bool locked = mutex_trylock(&ctx->uring_lock);
+ struct io_tw_state ts = {};
int ret;
- ret = __io_run_local_work(ctx, &locked);
- if (locked)
+ ts.locked = mutex_trylock(&ctx->uring_lock);
+ ret = __io_run_local_work(ctx, &ts);
+ if (ts.locked)
mutex_unlock(&ctx->uring_lock);
return ret;
}
-static void io_req_task_cancel(struct io_kiocb *req, bool *locked)
+static void io_req_task_cancel(struct io_kiocb *req, struct io_tw_state *ts)
{
- io_tw_lock(req->ctx, locked);
+ io_tw_lock(req->ctx, ts);
io_req_defer_failed(req, req->cqe.res);
}
-void io_req_task_submit(struct io_kiocb *req, bool *locked)
+void io_req_task_submit(struct io_kiocb *req, struct io_tw_state *ts)
{
- io_tw_lock(req->ctx, locked);
+ io_tw_lock(req->ctx, ts);
/* req->task == current here, checking PF_EXITING is safe */
if (unlikely(req->task->flags & PF_EXITING))
io_req_defer_failed(req, -EFAULT);
else if (req->flags & REQ_F_FORCE_ASYNC)
- io_queue_iowq(req, locked);
+ io_queue_iowq(req, ts);
else
io_queue_sqe(req);
}
static void __io_submit_flush_completions(struct io_ring_ctx *ctx)
__must_hold(&ctx->uring_lock)
{
- struct io_wq_work_node *node, *prev;
struct io_submit_state *state = &ctx->submit_state;
+ struct io_wq_work_node *node;
__io_cq_lock(ctx);
/* must come first to preserve CQE ordering in failure cases */
if (state->cqes_count)
__io_flush_post_cqes(ctx);
- wq_list_for_each(node, prev, &state->compl_reqs) {
+ __wq_list_for_each(node, &state->compl_reqs) {
struct io_kiocb *req = container_of(node, struct io_kiocb,
comp_list);
return ret;
}
-void io_req_task_complete(struct io_kiocb *req, bool *locked)
+void io_req_task_complete(struct io_kiocb *req, struct io_tw_state *ts)
{
- if (*locked)
+ if (ts->locked)
io_req_complete_defer(req);
else
io_req_complete_post(req, IO_URING_F_UNLOCKED);
return 0;
}
-int io_poll_issue(struct io_kiocb *req, bool *locked)
+int io_poll_issue(struct io_kiocb *req, struct io_tw_state *ts)
{
- io_tw_lock(req->ctx, locked);
+ io_tw_lock(req->ctx, ts);
return io_issue_sqe(req, IO_URING_F_NONBLOCK|IO_URING_F_MULTISHOT|
IO_URING_F_COMPLETE_DEFER);
}
if (unlikely(ret))
return io_submit_fail_init(sqe, req, ret);
- /* don't need @sqe from now on */
- trace_io_uring_submit_sqe(req, true);
+ trace_io_uring_submit_req(req);
/*
* If we already have a head request, queue this one for async
if (unlikely(!entries))
return 0;
/* make sure SQ entry isn't read before tail */
- ret = left = min3(nr, ctx->sq_entries, entries);
+ ret = left = min(nr, entries);
io_get_task_refs(left);
io_submit_state_start(&ctx->submit_state, left);
unsigned long check_cq;
if (ctx->flags & IORING_SETUP_DEFER_TASKRUN) {
- WRITE_ONCE(ctx->cq_waiting, 1);
+ int nr_wait = (int) iowq.cq_tail - READ_ONCE(ctx->rings->cq.tail);
+
+ atomic_set(&ctx->cq_wait_nr, nr_wait);
set_current_state(TASK_INTERRUPTIBLE);
} else {
prepare_to_wait_exclusive(&ctx->cq_wait, &iowq.wq,
ret = io_cqring_wait_schedule(ctx, &iowq);
__set_current_state(TASK_RUNNING);
- WRITE_ONCE(ctx->cq_waiting, 0);
+ atomic_set(&ctx->cq_wait_nr, 0);
if (ret < 0)
break;
mutex_unlock(&ctx->uring_lock);
}
+static void io_rsrc_node_cache_free(struct io_cache_entry *entry)
+{
+ kfree(container_of(entry, struct io_rsrc_node, cache));
+}
+
static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
{
io_sq_thread_finish(ctx);
- io_rsrc_refs_drop(ctx);
/* __io_rsrc_put_work() may need uring_lock to progress, wait w/o it */
io_wait_rsrc_data(ctx->buf_data);
io_wait_rsrc_data(ctx->file_data);
/* there are no registered resources left, nobody uses it */
if (ctx->rsrc_node)
- io_rsrc_node_destroy(ctx->rsrc_node);
+ io_rsrc_node_destroy(ctx, ctx->rsrc_node);
if (ctx->rsrc_backup_node)
- io_rsrc_node_destroy(ctx->rsrc_backup_node);
- flush_delayed_work(&ctx->rsrc_put_work);
- flush_delayed_work(&ctx->fallback_work);
+ io_rsrc_node_destroy(ctx, ctx->rsrc_backup_node);
WARN_ON_ONCE(!list_empty(&ctx->rsrc_ref_list));
- WARN_ON_ONCE(!llist_empty(&ctx->rsrc_put_llist));
#if defined(CONFIG_UNIX)
if (ctx->ring_sock) {
#endif
WARN_ON_ONCE(!list_empty(&ctx->ltimeout_list));
+ io_alloc_cache_free(&ctx->rsrc_node_cache, io_rsrc_node_cache_free);
if (ctx->mm_account) {
mmdrop(ctx->mm_account);
ctx->mm_account = NULL;
spin_lock(&ctx->completion_lock);
spin_unlock(&ctx->completion_lock);
+ /* pairs with RCU read section in io_req_local_work_add() */
+ if (ctx->flags & IORING_SETUP_DEFER_TASKRUN)
+ synchronize_rcu();
+
io_ring_ctx_free(ctx);
}
enum io_wq_cancel cret;
bool ret = false;
+ /* set it so io_req_local_work_add() would wake us up */
+ if (ctx->flags & IORING_SETUP_DEFER_TASKRUN) {
+ atomic_set(&ctx->cq_wait_nr, 1);
+ smp_mb();
+ }
+
/* failed during ring init, it couldn't have issued any requests */
if (!ctx->rings)
return false;
{
struct io_uring_task *tctx = current->io_uring;
struct io_ring_ctx *ctx;
+ struct io_tctx_node *node;
+ unsigned long index;
s64 inflight;
DEFINE_WAIT(wait);
break;
if (!sqd) {
- struct io_tctx_node *node;
- unsigned long index;
-
xa_for_each(&tctx->xa, index, node) {
/* sqpoll task will cancel all its requests */
if (node->ctx->sq_data)
prepare_to_wait(&tctx->wait, &wait, TASK_INTERRUPTIBLE);
io_run_task_work();
io_uring_drop_tctx_refs(current);
-
+ xa_for_each(&tctx->xa, index, node) {
+ if (!llist_empty(&node->ctx->work_llist)) {
+ WARN_ON_ONCE(node->ctx->submitter_task &&
+ node->ctx->submitter_task != current);
+ goto end_wait;
+ }
+ }
/*
* If we've seen completions, retry without waiting. This
* avoids a race where a completion comes in before we did
*/
if (inflight == tctx_inflight(tctx, !cancel_all))
schedule();
+end_wait:
finish_wait(&tctx->wait, &wait);
} while (1);
struct page *page;
void *ptr;
- switch (offset) {
+ switch (offset & IORING_OFF_MMAP_MASK) {
case IORING_OFF_SQ_RING:
case IORING_OFF_CQ_RING:
ptr = ctx->rings;
case IORING_OFF_SQES:
ptr = ctx->sq_sqes;
break;
+ case IORING_OFF_PBUF_RING: {
+ unsigned int bgid;
+
+ bgid = (offset & ~IORING_OFF_MMAP_MASK) >> IORING_OFF_PBUF_SHIFT;
+ mutex_lock(&ctx->uring_lock);
+ ptr = io_pbuf_get_address(ctx, bgid);
+ mutex_unlock(&ctx->uring_lock);
+ if (!ptr)
+ return ERR_PTR(-EINVAL);
+ break;
+ }
default:
return ERR_PTR(-EINVAL);
}
return remap_pfn_range(vma, vma->vm_start, pfn, sz, vma->vm_page_prot);
}
+static unsigned long io_uring_mmu_get_unmapped_area(struct file *filp,
+ unsigned long addr, unsigned long len,
+ unsigned long pgoff, unsigned long flags)
+{
+ const unsigned long mmap_end = arch_get_mmap_end(addr, len, flags);
+ struct vm_unmapped_area_info info;
+ void *ptr;
+
+ /*
+ * Do not allow to map to user-provided address to avoid breaking the
+ * aliasing rules. Userspace is not able to guess the offset address of
+ * kernel kmalloc()ed memory area.
+ */
+ if (addr)
+ return -EINVAL;
+
+ ptr = io_uring_validate_mmap_request(filp, pgoff, len);
+ if (IS_ERR(ptr))
+ return -ENOMEM;
+
+ info.flags = VM_UNMAPPED_AREA_TOPDOWN;
+ info.length = len;
+ info.low_limit = max(PAGE_SIZE, mmap_min_addr);
+ info.high_limit = arch_get_mmap_base(addr, current->mm->mmap_base);
+#ifdef SHM_COLOUR
+ info.align_mask = PAGE_MASK & (SHM_COLOUR - 1UL);
+#else
+ info.align_mask = PAGE_MASK & (SHMLBA - 1UL);
+#endif
+ info.align_offset = (unsigned long) ptr;
+
+ /*
+ * A failed mmap() very likely causes application failure,
+ * so fall back to the bottom-up function here. This scenario
+ * can happen with large stack limits and large mmap()
+ * allocations.
+ */
+ addr = vm_unmapped_area(&info);
+ if (offset_in_page(addr)) {
+ info.flags = 0;
+ info.low_limit = TASK_UNMAPPED_BASE;
+ info.high_limit = mmap_end;
+ addr = vm_unmapped_area(&info);
+ }
+
+ return addr;
+}
+
#else /* !CONFIG_MMU */
static int io_uring_mmap(struct file *file, struct vm_area_struct *vma)
#ifndef CONFIG_MMU
.get_unmapped_area = io_uring_nommu_get_unmapped_area,
.mmap_capabilities = io_uring_nommu_mmap_capabilities,
+#else
+ .get_unmapped_area = io_uring_mmu_get_unmapped_area,
#endif
.poll = io_uring_poll,
#ifdef CONFIG_PROC_FS
io_uring_optable_init();
req_cachep = KMEM_CACHE(io_kiocb, SLAB_HWCACHE_ALIGN | SLAB_PANIC |
- SLAB_ACCOUNT);
+ SLAB_ACCOUNT | SLAB_TYPESAFE_BY_RCU);
return 0;
};
__initcall(io_uring_init);