io_uring: add IORING_OP_FADVISE
authorJens Axboe <axboe@kernel.dk>
Thu, 26 Dec 2019 05:03:45 +0000 (22:03 -0700)
committerJens Axboe <axboe@kernel.dk>
Tue, 21 Jan 2020 00:04:01 +0000 (17:04 -0700)
This adds support for doing fadvise through io_uring. We assume that
WILLNEED doesn't block, but that DONTNEED may block.

Reviewed-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
fs/io_uring.c
include/uapi/linux/io_uring.h

index 5286620..9ca12b9 100644 (file)
@@ -72,6 +72,7 @@
 #include <linux/highmem.h>
 #include <linux/namei.h>
 #include <linux/fsnotify.h>
+#include <linux/fadvise.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/io_uring.h>
@@ -400,6 +401,13 @@ struct io_files_update {
        u32                             offset;
 };
 
+struct io_fadvise {
+       struct file                     *file;
+       u64                             offset;
+       u32                             len;
+       u32                             advice;
+};
+
 struct io_async_connect {
        struct sockaddr_storage         address;
 };
@@ -452,6 +460,7 @@ struct io_kiocb {
                struct io_open          open;
                struct io_close         close;
                struct io_files_update  files_update;
+               struct io_fadvise       fadvise;
        };
 
        struct io_async_ctx             *io;
@@ -667,6 +676,10 @@ static const struct io_op_def io_op_defs[] = {
                .needs_file             = 1,
                .unbound_nonreg_file    = 1,
        },
+       {
+               /* IORING_OP_FADVISE */
+               .needs_file             = 1,
+       },
 };
 
 static void io_wq_submit_work(struct io_wq_work **workptr);
@@ -2436,6 +2449,35 @@ err:
        return 0;
 }
 
+static int io_fadvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+       if (sqe->ioprio || sqe->buf_index || sqe->addr)
+               return -EINVAL;
+
+       req->fadvise.offset = READ_ONCE(sqe->off);
+       req->fadvise.len = READ_ONCE(sqe->len);
+       req->fadvise.advice = READ_ONCE(sqe->fadvise_advice);
+       return 0;
+}
+
+static int io_fadvise(struct io_kiocb *req, struct io_kiocb **nxt,
+                     bool force_nonblock)
+{
+       struct io_fadvise *fa = &req->fadvise;
+       int ret;
+
+       /* DONTNEED may block, others _should_ not */
+       if (fa->advice == POSIX_FADV_DONTNEED && force_nonblock)
+               return -EAGAIN;
+
+       ret = vfs_fadvise(req->file, fa->offset, fa->len, fa->advice);
+       if (ret < 0)
+               req_set_fail_links(req);
+       io_cqring_add_event(req, ret);
+       io_put_req_find_next(req, nxt);
+       return 0;
+}
+
 static int io_statx_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
        unsigned lookup_flags;
@@ -3721,6 +3763,9 @@ static int io_req_defer_prep(struct io_kiocb *req,
        case IORING_OP_STATX:
                ret = io_statx_prep(req, sqe);
                break;
+       case IORING_OP_FADVISE:
+               ret = io_fadvise_prep(req, sqe);
+               break;
        default:
                printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n",
                                req->opcode);
@@ -3917,6 +3962,14 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
                }
                ret = io_statx(req, nxt, force_nonblock);
                break;
+       case IORING_OP_FADVISE:
+               if (sqe) {
+                       ret = io_fadvise_prep(req, sqe);
+                       if (ret)
+                               break;
+               }
+               ret = io_fadvise(req, nxt, force_nonblock);
+               break;
        default:
                ret = -EINVAL;
                break;
index 1f96136..f86d1c7 100644 (file)
@@ -36,6 +36,7 @@ struct io_uring_sqe {
                __u32           cancel_flags;
                __u32           open_flags;
                __u32           statx_flags;
+               __u32           fadvise_advice;
        };
        __u64   user_data;      /* data to be passed back at completion time */
        union {
@@ -86,6 +87,7 @@ enum {
        IORING_OP_STATX,
        IORING_OP_READ,
        IORING_OP_WRITE,
+       IORING_OP_FADVISE,
 
        /* this goes last, obviously */
        IORING_OP_LAST,