CIFS: Add support for direct I/O write
authorLong Li <longli@microsoft.com>
Wed, 31 Oct 2018 22:13:10 +0000 (22:13 +0000)
committerSteve French <stfrench@microsoft.com>
Fri, 2 Nov 2018 19:09:42 +0000 (14:09 -0500)
With direct I/O write, user supplied buffers are pinned to the memory and data
are transferred directly from user buffers to the transport layer.

Change in v3: add support for kernel AIO

Change in v4:
Refactor common write code to __cifs_writev for direct and non-direct I/O.
Retry on direct I/O failure.

Signed-off-by: Long Li <longli@microsoft.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
fs/cifs/cifsfs.h
fs/cifs/file.c

index 3abea1a..4c3b5cf 100644 (file)
@@ -104,6 +104,7 @@ extern ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to);
 extern ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to);
 extern ssize_t cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to);
 extern ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from);
+extern ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from);
 extern ssize_t cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from);
 extern int cifs_lock(struct file *, int, struct file_lock *);
 extern int cifs_fsync(struct file *, loff_t, loff_t, int);
index 1be3607..937ffa7 100644 (file)
@@ -2538,6 +2538,61 @@ wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
 }
 
 static int
+cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
+       struct cifs_aio_ctx *ctx)
+{
+       int wait_retry = 0;
+       unsigned int wsize, credits;
+       int rc;
+       struct TCP_Server_Info *server =
+               tlink_tcon(wdata->cfile->tlink)->ses->server;
+
+       /*
+        * Try to resend this wdata, waiting for credits up to 3 seconds.
+        * Note: we are attempting to resend the whole wdata not in segments
+        */
+       do {
+               rc = server->ops->wait_mtu_credits(
+                       server, wdata->bytes, &wsize, &credits);
+
+               if (rc)
+                       break;
+
+               if (wsize < wdata->bytes) {
+                       add_credits_and_wake_if(server, credits, 0);
+                       msleep(1000);
+                       wait_retry++;
+               }
+       } while (wsize < wdata->bytes && wait_retry < 3);
+
+       if (wsize < wdata->bytes) {
+               rc = -EBUSY;
+               goto out;
+       }
+
+       rc = -EAGAIN;
+       while (rc == -EAGAIN) {
+               rc = 0;
+               if (wdata->cfile->invalidHandle)
+                       rc = cifs_reopen_file(wdata->cfile, false);
+               if (!rc)
+                       rc = server->ops->async_writev(wdata,
+                                       cifs_uncached_writedata_release);
+       }
+
+       if (!rc) {
+               list_add_tail(&wdata->list, wdata_list);
+               return 0;
+       }
+
+       add_credits_and_wake_if(server, wdata->credits, 0);
+out:
+       kref_put(&wdata->refcount, cifs_uncached_writedata_release);
+
+       return rc;
+}
+
+static int
 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
                     struct cifsFileInfo *open_file,
                     struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
@@ -2551,6 +2606,8 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
        loff_t saved_offset = offset;
        pid_t pid;
        struct TCP_Server_Info *server;
+       struct page **pagevec;
+       size_t start;
 
        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
                pid = open_file->pid;
@@ -2567,38 +2624,76 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
                if (rc)
                        break;
 
-               nr_pages = get_numpages(wsize, len, &cur_len);
-               wdata = cifs_writedata_alloc(nr_pages,
+               if (ctx->direct_io) {
+                       cur_len = iov_iter_get_pages_alloc(
+                               from, &pagevec, wsize, &start);
+                       if (cur_len < 0) {
+                               cifs_dbg(VFS,
+                                       "direct_writev couldn't get user pages "
+                                       "(rc=%zd) iter type %d iov_offset %zd "
+                                       "count %zd\n",
+                                       cur_len, from->type,
+                                       from->iov_offset, from->count);
+                               dump_stack();
+                               break;
+                       }
+                       iov_iter_advance(from, cur_len);
+
+                       nr_pages =
+                               (cur_len + start + PAGE_SIZE - 1) / PAGE_SIZE;
+
+                       wdata = cifs_writedata_direct_alloc(pagevec,
                                             cifs_uncached_writev_complete);
-               if (!wdata) {
-                       rc = -ENOMEM;
-                       add_credits_and_wake_if(server, credits, 0);
-                       break;
-               }
+                       if (!wdata) {
+                               rc = -ENOMEM;
+                               add_credits_and_wake_if(server, credits, 0);
+                               break;
+                       }
 
-               rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
-               if (rc) {
-                       kfree(wdata);
-                       add_credits_and_wake_if(server, credits, 0);
-                       break;
-               }
 
-               num_pages = nr_pages;
-               rc = wdata_fill_from_iovec(wdata, from, &cur_len, &num_pages);
-               if (rc) {
-                       for (i = 0; i < nr_pages; i++)
-                               put_page(wdata->pages[i]);
-                       kfree(wdata);
-                       add_credits_and_wake_if(server, credits, 0);
-                       break;
-               }
+                       wdata->page_offset = start;
+                       wdata->tailsz =
+                               nr_pages > 1 ?
+                                       cur_len - (PAGE_SIZE - start) -
+                                       (nr_pages - 2) * PAGE_SIZE :
+                                       cur_len;
+               } else {
+                       nr_pages = get_numpages(wsize, len, &cur_len);
+                       wdata = cifs_writedata_alloc(nr_pages,
+                                            cifs_uncached_writev_complete);
+                       if (!wdata) {
+                               rc = -ENOMEM;
+                               add_credits_and_wake_if(server, credits, 0);
+                               break;
+                       }
 
-               /*
-                * Bring nr_pages down to the number of pages we actually used,
-                * and free any pages that we didn't use.
-                */
-               for ( ; nr_pages > num_pages; nr_pages--)
-                       put_page(wdata->pages[nr_pages - 1]);
+                       rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
+                       if (rc) {
+                               kfree(wdata);
+                               add_credits_and_wake_if(server, credits, 0);
+                               break;
+                       }
+
+                       num_pages = nr_pages;
+                       rc = wdata_fill_from_iovec(
+                               wdata, from, &cur_len, &num_pages);
+                       if (rc) {
+                               for (i = 0; i < nr_pages; i++)
+                                       put_page(wdata->pages[i]);
+                               kfree(wdata);
+                               add_credits_and_wake_if(server, credits, 0);
+                               break;
+                       }
+
+                       /*
+                        * Bring nr_pages down to the number of pages we
+                        * actually used, and free any pages that we didn't use.
+                        */
+                       for ( ; nr_pages > num_pages; nr_pages--)
+                               put_page(wdata->pages[nr_pages - 1]);
+
+                       wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
+               }
 
                wdata->sync_mode = WB_SYNC_ALL;
                wdata->nr_pages = nr_pages;
@@ -2607,7 +2702,6 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
                wdata->pid = pid;
                wdata->bytes = cur_len;
                wdata->pagesz = PAGE_SIZE;
-               wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
                wdata->credits = credits;
                wdata->ctx = ctx;
                kref_get(&ctx->refcount);
@@ -2682,13 +2776,18 @@ restart_loop:
                                INIT_LIST_HEAD(&tmp_list);
                                list_del_init(&wdata->list);
 
-                               iov_iter_advance(&tmp_from,
+                               if (ctx->direct_io)
+                                       rc = cifs_resend_wdata(
+                                               wdata, &tmp_list, ctx);
+                               else {
+                                       iov_iter_advance(&tmp_from,
                                                 wdata->offset - ctx->pos);
 
-                               rc = cifs_write_from_iter(wdata->offset,
+                                       rc = cifs_write_from_iter(wdata->offset,
                                                wdata->bytes, &tmp_from,
                                                ctx->cfile, cifs_sb, &tmp_list,
                                                ctx);
+                               }
 
                                list_splice(&tmp_list, &ctx->list);
 
@@ -2701,8 +2800,9 @@ restart_loop:
                kref_put(&wdata->refcount, cifs_uncached_writedata_release);
        }
 
-       for (i = 0; i < ctx->npages; i++)
-               put_page(ctx->bv[i].bv_page);
+       if (!ctx->direct_io)
+               for (i = 0; i < ctx->npages; i++)
+                       put_page(ctx->bv[i].bv_page);
 
        cifs_stats_bytes_written(tcon, ctx->total_len);
        set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
@@ -2717,7 +2817,8 @@ restart_loop:
                complete(&ctx->done);
 }
 
-ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
+static ssize_t __cifs_writev(
+       struct kiocb *iocb, struct iov_iter *from, bool direct)
 {
        struct file *file = iocb->ki_filp;
        ssize_t total_written = 0;
@@ -2726,13 +2827,18 @@ ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
        struct cifs_sb_info *cifs_sb;
        struct cifs_aio_ctx *ctx;
        struct iov_iter saved_from = *from;
+       size_t len = iov_iter_count(from);
        int rc;
 
        /*
-        * BB - optimize the way when signing is disabled. We can drop this
-        * extra memory-to-memory copying and use iovec buffers for constructing
-        * write request.
+        * iov_iter_get_pages_alloc doesn't work with ITER_KVEC.
+        * In this case, fall back to non-direct write function.
+        * this could be improved by getting pages directly in ITER_KVEC
         */
+       if (direct && from->type & ITER_KVEC) {
+               cifs_dbg(FYI, "use non-direct cifs_writev for kvec I/O\n");
+               direct = false;
+       }
 
        rc = generic_write_checks(iocb, from);
        if (rc <= 0)
@@ -2756,10 +2862,16 @@ ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
 
        ctx->pos = iocb->ki_pos;
 
-       rc = setup_aio_ctx_iter(ctx, from, WRITE);
-       if (rc) {
-               kref_put(&ctx->refcount, cifs_aio_ctx_release);
-               return rc;
+       if (direct) {
+               ctx->direct_io = true;
+               ctx->iter = *from;
+               ctx->len = len;
+       } else {
+               rc = setup_aio_ctx_iter(ctx, from, WRITE);
+               if (rc) {
+                       kref_put(&ctx->refcount, cifs_aio_ctx_release);
+                       return rc;
+               }
        }
 
        /* grab a lock here due to read response handlers can access ctx */
@@ -2809,6 +2921,16 @@ ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
        return total_written;
 }
 
+ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
+{
+       return __cifs_writev(iocb, from, true);
+}
+
+ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
+{
+       return __cifs_writev(iocb, from, false);
+}
+
 static ssize_t
 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
 {