btrfs: split btrfs_direct_IO to read and write
authorGoldwyn Rodrigues <rgoldwyn@suse.com>
Thu, 24 Sep 2020 16:39:12 +0000 (11:39 -0500)
committerDavid Sterba <dsterba@suse.com>
Tue, 8 Dec 2020 14:53:45 +0000 (15:53 +0100)
The read and write DIO don't have anything in common except for the
call to iomap_dio_rw. Extract the write call into a new function to get
rid of conditional statements for direct write.

Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: David Sterba <dsterba@suse.com>
fs/btrfs/ctree.h
fs/btrfs/file.c
fs/btrfs/inode.c

index 39e4c35e965ae0a35d7b035c6f0bf7ef1364c18a..5244400d5c238b5d26920941474358f797f792c0 100644 (file)
@@ -28,6 +28,7 @@
 #include <linux/dynamic_debug.h>
 #include <linux/refcount.h>
 #include <linux/crc32c.h>
+#include <linux/iomap.h>
 #include "extent-io-tree.h"
 #include "extent_io.h"
 #include "extent_map.h"
@@ -3065,7 +3066,9 @@ int btrfs_writepage_cow_fixup(struct page *page, u64 start, u64 end);
 void btrfs_writepage_endio_finish_ordered(struct page *page, u64 start,
                                          u64 end, int uptodate);
 extern const struct dentry_operations btrfs_dentry_operations;
-ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter);
+extern const struct iomap_ops btrfs_dio_iomap_ops;
+extern const struct iomap_dio_ops btrfs_dio_ops;
+extern const struct iomap_dio_ops btrfs_sync_dops;
 
 /* ioctl.c */
 long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
index 5e2b0592abeaf9797cb5e248e03d50b50fe85f9e..6c12f5703faa7d028776f5c67cc01e38d7fa1417 100644 (file)
@@ -1782,21 +1782,67 @@ again:
        return num_written ? num_written : ret;
 }
 
-static ssize_t __btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)
+static ssize_t check_direct_IO(struct btrfs_fs_info *fs_info,
+                              const struct iov_iter *iter, loff_t offset)
+{
+       const u32 blocksize_mask = fs_info->sectorsize - 1;
+
+       if (offset & blocksize_mask)
+               return -EINVAL;
+
+       if (iov_iter_alignment(iter) & blocksize_mask)
+               return -EINVAL;
+
+       return 0;
+}
+
+static ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)
 {
        struct file *file = iocb->ki_filp;
        struct inode *inode = file_inode(file);
-       loff_t pos;
-       ssize_t written;
+       struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+       loff_t pos = iocb->ki_pos;
+       ssize_t written = 0;
+       bool relock = false;
        ssize_t written_buffered;
        loff_t endbyte;
        int err;
 
-       written = btrfs_direct_IO(iocb, from);
+       if (check_direct_IO(fs_info, from, pos))
+               goto buffered;
+
+       /*
+        * If the write DIO is beyond EOF, we need to update the isize, but it
+        * is protected by inode lock. So we cannot unlock it here.
+        */
+       if (pos + iov_iter_count(from) <= inode->i_size) {
+               inode_unlock(inode);
+               relock = true;
+       }
+       down_read(&BTRFS_I(inode)->dio_sem);
+
+       /*
+        * This is actually a sync iocb, so we need our fancy endio to know if
+        * we need to sync.
+        */
+       if (current->journal_info)
+               written = iomap_dio_rw(iocb, from, &btrfs_dio_iomap_ops,
+                                      &btrfs_sync_dops, is_sync_kiocb(iocb));
+       else
+               written = iomap_dio_rw(iocb, from, &btrfs_dio_iomap_ops,
+                                      &btrfs_dio_ops, is_sync_kiocb(iocb));
+
+       if (written == -ENOTBLK)
+               written = 0;
+
+       up_read(&BTRFS_I(inode)->dio_sem);
+       if (relock)
+               inode_lock(inode);
 
        if (written < 0 || !iov_iter_count(from))
                return written;
 
+buffered:
        pos = iocb->ki_pos;
        written_buffered = btrfs_buffered_write(iocb, from);
        if (written_buffered < 0) {
@@ -1970,7 +2016,7 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
                        iocb->ki_flags &= ~IOCB_DSYNC;
                        current->journal_info = BTRFS_DIO_SYNC_STUB;
                }
-               num_written = __btrfs_direct_write(iocb, from);
+               num_written = btrfs_direct_write(iocb, from);
 
                /*
                 * As stated above, we cleared journal_info, so we need to do
@@ -3545,16 +3591,47 @@ static int btrfs_file_open(struct inode *inode, struct file *filp)
        return generic_file_open(inode, filp);
 }
 
+static int check_direct_read(struct btrfs_fs_info *fs_info,
+                            const struct iov_iter *iter, loff_t offset)
+{
+       int ret;
+       int i, seg;
+
+       ret = check_direct_IO(fs_info, iter, offset);
+       if (ret < 0)
+               return ret;
+
+       if (!iter_is_iovec(iter))
+               return 0;
+
+       for (seg = 0; seg < iter->nr_segs; seg++)
+               for (i = seg + 1; i < iter->nr_segs; i++)
+                       if (iter->iov[seg].iov_base == iter->iov[i].iov_base)
+                               return -EINVAL;
+       return 0;
+}
+
+static ssize_t btrfs_direct_read(struct kiocb *iocb, struct iov_iter *to)
+{
+       struct inode *inode = file_inode(iocb->ki_filp);
+       ssize_t ret;
+
+       if (check_direct_read(btrfs_sb(inode->i_sb), to, iocb->ki_pos))
+               return 0;
+
+       inode_lock_shared(inode);
+       ret = iomap_dio_rw(iocb, to, &btrfs_dio_iomap_ops, &btrfs_dio_ops,
+                          is_sync_kiocb(iocb));
+       inode_unlock_shared(inode);
+       return ret;
+}
+
 static ssize_t btrfs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
 {
        ssize_t ret = 0;
 
        if (iocb->ki_flags & IOCB_DIRECT) {
-               struct inode *inode = file_inode(iocb->ki_filp);
-
-               inode_lock_shared(inode);
-               ret = btrfs_direct_IO(iocb, to);
-               inode_unlock_shared(inode);
+               ret = btrfs_direct_read(iocb, to);
                if (ret < 0 || !iov_iter_count(to) ||
                    iocb->ki_pos >= i_size_read(file_inode(iocb->ki_filp)))
                        return ret;
index 21a354dad6f21a669da4a9ba666c4d5d0b09d011..6fd561ac98667bc530e87d62ad6eb4936e6b82a7 100644 (file)
@@ -7954,39 +7954,6 @@ out_err:
        return BLK_QC_T_NONE;
 }
 
-static ssize_t check_direct_IO(struct btrfs_fs_info *fs_info,
-                              const struct iov_iter *iter, loff_t offset)
-{
-       int seg;
-       int i;
-       unsigned int blocksize_mask = fs_info->sectorsize - 1;
-       ssize_t retval = -EINVAL;
-
-       if (offset & blocksize_mask)
-               goto out;
-
-       if (iov_iter_alignment(iter) & blocksize_mask)
-               goto out;
-
-       /* If this is a write we don't need to check anymore */
-       if (iov_iter_rw(iter) != READ || !iter_is_iovec(iter))
-               return 0;
-       /*
-        * Check to make sure we don't have duplicate iov_base's in this
-        * iovec, if so return EINVAL, otherwise we'll get csum errors
-        * when reading back.
-        */
-       for (seg = 0; seg < iter->nr_segs; seg++) {
-               for (i = seg + 1; i < iter->nr_segs; i++) {
-                       if (iter->iov[seg].iov_base == iter->iov[i].iov_base)
-                               goto out;
-               }
-       }
-       retval = 0;
-out:
-       return retval;
-}
-
 static inline int btrfs_maybe_fsync_end_io(struct kiocb *iocb, ssize_t size,
                                           int error, unsigned flags)
 {
@@ -8011,72 +7978,20 @@ static inline int btrfs_maybe_fsync_end_io(struct kiocb *iocb, ssize_t size,
        return 0;
 }
 
-static const struct iomap_ops btrfs_dio_iomap_ops = {
+const struct iomap_ops btrfs_dio_iomap_ops = {
        .iomap_begin            = btrfs_dio_iomap_begin,
        .iomap_end              = btrfs_dio_iomap_end,
 };
 
-static const struct iomap_dio_ops btrfs_dio_ops = {
+const struct iomap_dio_ops btrfs_dio_ops = {
        .submit_io              = btrfs_submit_direct,
 };
 
-static const struct iomap_dio_ops btrfs_sync_dops = {
+const struct iomap_dio_ops btrfs_sync_dops = {
        .submit_io              = btrfs_submit_direct,
        .end_io                 = btrfs_maybe_fsync_end_io,
 };
 
-ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
-{
-       struct file *file = iocb->ki_filp;
-       struct inode *inode = file->f_mapping->host;
-       struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-       struct extent_changeset *data_reserved = NULL;
-       loff_t offset = iocb->ki_pos;
-       size_t count = 0;
-       bool relock = false;
-       ssize_t ret;
-
-       if (check_direct_IO(fs_info, iter, offset))
-               return 0;
-
-       count = iov_iter_count(iter);
-       if (iov_iter_rw(iter) == WRITE) {
-               /*
-                * If the write DIO is beyond the EOF, we need update
-                * the isize, but it is protected by i_mutex. So we can
-                * not unlock the i_mutex at this case.
-                */
-               if (offset + count <= inode->i_size) {
-                       inode_unlock(inode);
-                       relock = true;
-               }
-               down_read(&BTRFS_I(inode)->dio_sem);
-       }
-
-       /*
-        * We have are actually a sync iocb, so we need our fancy endio to know
-        * if we need to sync.
-        */
-       if (current->journal_info)
-               ret = iomap_dio_rw(iocb, iter, &btrfs_dio_iomap_ops,
-                                  &btrfs_sync_dops, is_sync_kiocb(iocb));
-       else
-               ret = iomap_dio_rw(iocb, iter, &btrfs_dio_iomap_ops,
-                                  &btrfs_dio_ops, is_sync_kiocb(iocb));
-
-       if (ret == -ENOTBLK)
-               ret = 0;
-
-       if (iov_iter_rw(iter) == WRITE)
-               up_read(&BTRFS_I(inode)->dio_sem);
-
-       if (relock)
-               inode_lock(inode);
-
-       extent_changeset_free(data_reserved);
-       return ret;
-}
-
 static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                        u64 start, u64 len)
 {