dax: Remove zeroing from dax_io()
authorJan Kara <jack@suse.cz>
Wed, 11 May 2016 09:58:51 +0000 (11:58 +0200)
committerVishal Verma <vishal.l.verma@intel.com>
Tue, 17 May 2016 06:44:09 +0000 (00:44 -0600)
All the filesystems are now zeroing blocks themselves for DAX IO to avoid
races between dax_io() and dax_fault(). Remove the zeroing code from
dax_io() and add warning to catch the case when somebody unexpectedly
returns new or unwritten buffer.

Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Vishal Verma <vishal.l.verma@intel.com>
fs/dax.c

index ccb8bc3..7c0036d 100644 (file)
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -119,18 +119,6 @@ int dax_clear_sectors(struct block_device *bdev, sector_t _sector, long _size)
 }
 EXPORT_SYMBOL_GPL(dax_clear_sectors);
 
-/* the clear_pmem() calls are ordered by a wmb_pmem() in the caller */
-static void dax_new_buf(void __pmem *addr, unsigned size, unsigned first,
-               loff_t pos, loff_t end)
-{
-       loff_t final = end - pos + first; /* The final byte of the buffer */
-
-       if (first > 0)
-               clear_pmem(addr, first);
-       if (final < size)
-               clear_pmem(addr + final, size - final);
-}
-
 static bool buffer_written(struct buffer_head *bh)
 {
        return buffer_mapped(bh) && !buffer_unwritten(bh);
@@ -169,6 +157,9 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter,
        struct blk_dax_ctl dax = {
                .addr = (void __pmem *) ERR_PTR(-EIO),
        };
+       unsigned blkbits = inode->i_blkbits;
+       sector_t file_blks = (i_size_read(inode) + (1 << blkbits) - 1)
+                                                               >> blkbits;
 
        if (rw == READ)
                end = min(end, i_size_read(inode));
@@ -176,7 +167,6 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter,
        while (pos < end) {
                size_t len;
                if (pos == max) {
-                       unsigned blkbits = inode->i_blkbits;
                        long page = pos >> PAGE_SHIFT;
                        sector_t block = page << (PAGE_SHIFT - blkbits);
                        unsigned first = pos - (block << blkbits);
@@ -192,6 +182,13 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter,
                                        bh->b_size = 1 << blkbits;
                                bh_max = pos - first + bh->b_size;
                                bdev = bh->b_bdev;
+                               /*
+                                * We allow uninitialized buffers for writes
+                                * beyond EOF as those cannot race with faults
+                                */
+                               WARN_ON_ONCE(
+                                       (buffer_new(bh) && block < file_blks) ||
+                                       (rw == WRITE && buffer_unwritten(bh)));
                        } else {
                                unsigned done = bh->b_size -
                                                (bh_max - (pos - first));
@@ -211,11 +208,6 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter,
                                        rc = map_len;
                                        break;
                                }
-                               if (buffer_unwritten(bh) || buffer_new(bh)) {
-                                       dax_new_buf(dax.addr, map_len, first,
-                                                       pos, end);
-                                       need_wmb = true;
-                               }
                                dax.addr += first;
                                size = map_len - first;
                        }