io_uring/io_uring.h

   1 #ifndef IOU_CORE_H
   2 #define IOU_CORE_H
   3
   4 #include <linux/errno.h>
   5 #include <linux/lockdep.h>
   6 #include "io_uring_types.h"
   7
   8 #ifndef CREATE_TRACE_POINTS
   9 #include <trace/events/io_uring.h>
  10 #endif
  11
  12 enum {
  13         IOU_OK                  = 0,
  14         IOU_ISSUE_SKIP_COMPLETE = -EIOCBQUEUED,
  15 };
  16
  17 bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data, s32 res,
  18                               u32 cflags, u64 extra1, u64 extra2);
  19
  20 static inline unsigned int __io_cqring_events(struct io_ring_ctx *ctx)
  21 {
  22         return ctx->cached_cq_tail - READ_ONCE(ctx->rings->cq.head);
  23 }
  24
  25 /*
  26  * writes to the cq entry need to come after reading head; the
  27  * control dependency is enough as we're using WRITE_ONCE to
  28  * fill the cq entry
  29  */
  30 static inline struct io_uring_cqe *__io_get_cqe(struct io_ring_ctx *ctx)
  31 {
  32         struct io_rings *rings = ctx->rings;
  33         unsigned int off = ctx->cached_cq_tail & (ctx->cq_entries - 1);
  34         unsigned int shift = 0;
  35         unsigned int free, queued, len;
  36
  37         if (ctx->flags & IORING_SETUP_CQE32)
  38                 shift = 1;
  39
  40         /* userspace may cheat modifying the tail, be safe and do min */
  41         queued = min(__io_cqring_events(ctx), ctx->cq_entries);
  42         free = ctx->cq_entries - queued;
  43         /* we need a contiguous range, limit based on the current array offset */
  44         len = min(free, ctx->cq_entries - off);
  45         if (!len)
  46                 return NULL;
  47
  48         ctx->cached_cq_tail++;
  49         ctx->cqe_cached = &rings->cqes[off];
  50         ctx->cqe_sentinel = ctx->cqe_cached + len;
  51         ctx->cqe_cached++;
  52         return &rings->cqes[off << shift];
  53 }
  54
  55 static inline struct io_uring_cqe *io_get_cqe(struct io_ring_ctx *ctx)
  56 {
  57         if (likely(ctx->cqe_cached < ctx->cqe_sentinel)) {
  58                 struct io_uring_cqe *cqe = ctx->cqe_cached;
  59
  60                 if (ctx->flags & IORING_SETUP_CQE32) {
  61                         unsigned int off = ctx->cqe_cached - ctx->rings->cqes;
  62
  63                         cqe += off;
  64                 }
  65
  66                 ctx->cached_cq_tail++;
  67                 ctx->cqe_cached++;
  68                 return cqe;
  69         }
  70
  71         return __io_get_cqe(ctx);
  72 }
  73
  74 static inline bool __io_fill_cqe_req(struct io_ring_ctx *ctx,
  75                                      struct io_kiocb *req)
  76 {
  77         struct io_uring_cqe *cqe;
  78
  79         if (!(ctx->flags & IORING_SETUP_CQE32)) {
  80                 trace_io_uring_complete(req->ctx, req, req->cqe.user_data,
  81                                         req->cqe.res, req->cqe.flags, 0, 0);
  82
  83                 /*
  84                  * If we can't get a cq entry, userspace overflowed the
  85                  * submission (by quite a lot). Increment the overflow count in
  86                  * the ring.
  87                  */
  88                 cqe = io_get_cqe(ctx);
  89                 if (likely(cqe)) {
  90                         memcpy(cqe, &req->cqe, sizeof(*cqe));
  91                         return true;
  92                 }
  93
  94                 return io_cqring_event_overflow(ctx, req->cqe.user_data,
  95                                                 req->cqe.res, req->cqe.flags,
  96                                                 0, 0);
  97         } else {
  98                 u64 extra1 = 0, extra2 = 0;
  99
 100                 if (req->flags & REQ_F_CQE32_INIT) {
 101                         extra1 = req->extra1;
 102                         extra2 = req->extra2;
 103                 }
 104
 105                 trace_io_uring_complete(req->ctx, req, req->cqe.user_data,
 106                                         req->cqe.res, req->cqe.flags, extra1, extra2);
 107
 108                 /*
 109                  * If we can't get a cq entry, userspace overflowed the
 110                  * submission (by quite a lot). Increment the overflow count in
 111                  * the ring.
 112                  */
 113                 cqe = io_get_cqe(ctx);
 114                 if (likely(cqe)) {
 115                         memcpy(cqe, &req->cqe, sizeof(struct io_uring_cqe));
 116                         WRITE_ONCE(cqe->big_cqe[0], extra1);
 117                         WRITE_ONCE(cqe->big_cqe[1], extra2);
 118                         return true;
 119                 }
 120
 121                 return io_cqring_event_overflow(ctx, req->cqe.user_data,
 122                                 req->cqe.res, req->cqe.flags,
 123                                 extra1, extra2);
 124         }
 125 }
 126
 127 static inline void req_set_fail(struct io_kiocb *req)
 128 {
 129         req->flags |= REQ_F_FAIL;
 130         if (req->flags & REQ_F_CQE_SKIP) {
 131                 req->flags &= ~REQ_F_CQE_SKIP;
 132                 req->flags |= REQ_F_SKIP_LINK_CQES;
 133         }
 134 }
 135
 136 static inline void io_req_set_res(struct io_kiocb *req, s32 res, u32 cflags)
 137 {
 138         req->cqe.res = res;
 139         req->cqe.flags = cflags;
 140 }
 141
 142 static inline bool req_has_async_data(struct io_kiocb *req)
 143 {
 144         return req->flags & REQ_F_ASYNC_DATA;
 145 }
 146
 147 static inline void io_put_file(struct file *file)
 148 {
 149         if (file)
 150                 fput(file);
 151 }
 152
 153 static inline void io_ring_submit_unlock(struct io_ring_ctx *ctx,
 154                                          unsigned issue_flags)
 155 {
 156         lockdep_assert_held(&ctx->uring_lock);
 157         if (issue_flags & IO_URING_F_UNLOCKED)
 158                 mutex_unlock(&ctx->uring_lock);
 159 }
 160
 161 static inline void io_ring_submit_lock(struct io_ring_ctx *ctx,
 162                                        unsigned issue_flags)
 163 {
 164         /*
 165          * "Normal" inline submissions always hold the uring_lock, since we
 166          * grab it from the system call. Same is true for the SQPOLL offload.
 167          * The only exception is when we've detached the request and issue it
 168          * from an async worker thread, grab the lock for that case.
 169          */
 170         if (issue_flags & IO_URING_F_UNLOCKED)
 171                 mutex_lock(&ctx->uring_lock);
 172         lockdep_assert_held(&ctx->uring_lock);
 173 }
 174
 175 static inline void io_commit_cqring(struct io_ring_ctx *ctx)
 176 {
 177         /* order cqe stores with ring update */
 178         smp_store_release(&ctx->rings->cq.tail, ctx->cached_cq_tail);
 179 }
 180
 181 static inline void io_cqring_wake(struct io_ring_ctx *ctx)
 182 {
 183         /*
 184          * wake_up_all() may seem excessive, but io_wake_function() and
 185          * io_should_wake() handle the termination of the loop and only
 186          * wake as many waiters as we need to.
 187          */
 188         if (wq_has_sleeper(&ctx->cq_wait))
 189                 wake_up_all(&ctx->cq_wait);
 190 }
 191
 192 static inline bool io_sqring_full(struct io_ring_ctx *ctx)
 193 {
 194         struct io_rings *r = ctx->rings;
 195
 196         return READ_ONCE(r->sq.tail) - ctx->cached_sq_head == ctx->sq_entries;
 197 }
 198
 199 static inline unsigned int io_sqring_entries(struct io_ring_ctx *ctx)
 200 {
 201         struct io_rings *rings = ctx->rings;
 202
 203         /* make sure SQ entry isn't read before tail */
 204         return smp_load_acquire(&rings->sq.tail) - ctx->cached_sq_head;
 205 }
 206
 207 static inline bool io_run_task_work(void)
 208 {
 209         if (test_thread_flag(TIF_NOTIFY_SIGNAL) || task_work_pending(current)) {
 210                 __set_current_state(TASK_RUNNING);
 211                 clear_notify_signal();
 212                 if (task_work_pending(current))
 213                         task_work_run();
 214                 return true;
 215         }
 216
 217         return false;
 218 }
 219
 220 int io_run_task_work_sig(void);
 221 void io_req_complete_failed(struct io_kiocb *req, s32 res);
 222 void __io_req_complete(struct io_kiocb *req, unsigned issue_flags);
 223 void io_req_complete_post(struct io_kiocb *req);
 224 void __io_req_complete_post(struct io_kiocb *req);
 225 bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data, s32 res,
 226                      u32 cflags);
 227 void io_cqring_ev_posted(struct io_ring_ctx *ctx);
 228 void __io_commit_cqring_flush(struct io_ring_ctx *ctx);
 229
 230 struct page **io_pin_pages(unsigned long ubuf, unsigned long len, int *npages);
 231
 232 struct file *io_file_get_normal(struct io_kiocb *req, int fd);
 233 struct file *io_file_get_fixed(struct io_kiocb *req, int fd,
 234                                unsigned issue_flags);
 235
 236 bool io_is_uring_fops(struct file *file);
 237 bool io_alloc_async_data(struct io_kiocb *req);
 238 void io_req_task_work_add(struct io_kiocb *req);
 239 void io_req_task_prio_work_add(struct io_kiocb *req);
 240 void io_req_tw_post_queue(struct io_kiocb *req, s32 res, u32 cflags);
 241 void io_req_task_queue(struct io_kiocb *req);
 242 void io_queue_iowq(struct io_kiocb *req, bool *dont_use);
 243 void io_req_task_complete(struct io_kiocb *req, bool *locked);
 244 void io_req_task_queue_fail(struct io_kiocb *req, int ret);
 245 void io_req_task_submit(struct io_kiocb *req, bool *locked);
 246 void tctx_task_work(struct callback_head *cb);
 247 __cold void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd);
 248 int io_uring_alloc_task_context(struct task_struct *task,
 249                                 struct io_ring_ctx *ctx);
 250
 251 int io_poll_issue(struct io_kiocb *req, bool *locked);
 252 int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr);
 253 int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin);
 254 void io_free_batch_list(struct io_ring_ctx *ctx, struct io_wq_work_node *node);
 255 int io_req_prep_async(struct io_kiocb *req);
 256
 257 struct io_wq_work *io_wq_free_work(struct io_wq_work *work);
 258 void io_wq_submit_work(struct io_wq_work *work);
 259
 260 void io_free_req(struct io_kiocb *req);
 261 void io_queue_next(struct io_kiocb *req);
 262
 263 bool io_match_task_safe(struct io_kiocb *head, struct task_struct *task,
 264                         bool cancel_all);
 265
 266 #define io_for_each_link(pos, head) \
 267         for (pos = (head); pos; pos = pos->link)
 268
 269 #endif