ext4: Optimize ext4 DIO overwrites
authorJan Kara <jack@suse.cz>
Wed, 18 Dec 2019 17:44:33 +0000 (18:44 +0100)
committerTheodore Ts'o <tytso@mit.edu>
Thu, 26 Dec 2019 16:57:18 +0000 (11:57 -0500)
Currently we start transaction for mapping every extent for writing
using direct IO. This is unnecessary when we know we are overwriting
already allocated blocks and the overhead of starting a transaction can
be significant especially for multithreaded workloads doing small writes.
Use iomap operations that avoid starting a transaction for direct IO
overwrites.

This improves throughput of 4k random writes - fio jobfile:
[global]
rw=randrw
norandommap=1
invalidate=0
bs=4k
numjobs=16
time_based=1
ramp_time=30
runtime=120
group_reporting=1
ioengine=psync
direct=1
size=16G
filename=file1.0.0:file1.0.1:file1.0.2:file1.0.3:file1.0.4:file1.0.5:file1.0.6:file1.0.7:file1.0.8:file1.0.9:file1.0.10:file1.0.11:file1.0.12:file1.0.13:file1.0.14:file1.0.15:file1.0.16:file1.0.17:file1.0.18:file1.0.19:file1.0.20:file1.0.21:file1.0.22:file1.0.23:file1.0.24:file1.0.25:file1.0.26:file1.0.27:file1.0.28:file1.0.29:file1.0.30:file1.0.31
file_service_type=random
nrfiles=32

from 3018MB/s to 4059MB/s in my test VM running test against simulated
pmem device (note that before iomap conversion, this workload was able
to achieve 3708MB/s because old direct IO path avoided transaction start
for overwrites as well). For dax, the win is even larger improving
throughput from 3042MB/s to 4311MB/s.

Reported-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/20191218174433.19380-1-jack@suse.cz
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
fs/ext4/ext4.h
fs/ext4/file.c
fs/ext4/inode.c

index 5edc16d..791e544 100644 (file)
@@ -3455,6 +3455,7 @@ static inline void ext4_clear_io_unwritten_flag(ext4_io_end_t *io_end)
 }
 
 extern const struct iomap_ops ext4_iomap_ops;
+extern const struct iomap_ops ext4_iomap_overwrite_ops;
 extern const struct iomap_ops ext4_iomap_report_ops;
 
 static inline int ext4_buffer_uptodate(struct buffer_head *bh)
index 9c2711b..5f22588 100644 (file)
@@ -447,6 +447,7 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
        struct inode *inode = file_inode(iocb->ki_filp);
        loff_t offset = iocb->ki_pos;
        size_t count = iov_iter_count(from);
+       const struct iomap_ops *iomap_ops = &ext4_iomap_ops;
        bool extend = false, unaligned_io = false;
        bool ilock_shared = true;
 
@@ -526,7 +527,9 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
                ext4_journal_stop(handle);
        }
 
-       ret = iomap_dio_rw(iocb, from, &ext4_iomap_ops, &ext4_dio_write_ops,
+       if (ilock_shared)
+               iomap_ops = &ext4_iomap_overwrite_ops;
+       ret = iomap_dio_rw(iocb, from, iomap_ops, &ext4_dio_write_ops,
                           is_sync_kiocb(iocb) || unaligned_io || extend);
 
        if (extend)
index c3270aa..d035aca 100644 (file)
@@ -3451,6 +3451,22 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
        return 0;
 }
 
+static int ext4_iomap_overwrite_begin(struct inode *inode, loff_t offset,
+               loff_t length, unsigned flags, struct iomap *iomap,
+               struct iomap *srcmap)
+{
+       int ret;
+
+       /*
+        * Even for writes we don't need to allocate blocks, so just pretend
+        * we are reading to save overhead of starting a transaction.
+        */
+       flags &= ~IOMAP_WRITE;
+       ret = ext4_iomap_begin(inode, offset, length, flags, iomap, srcmap);
+       WARN_ON_ONCE(iomap->type != IOMAP_MAPPED);
+       return ret;
+}
+
 static int ext4_iomap_end(struct inode *inode, loff_t offset, loff_t length,
                          ssize_t written, unsigned flags, struct iomap *iomap)
 {
@@ -3472,6 +3488,11 @@ const struct iomap_ops ext4_iomap_ops = {
        .iomap_end              = ext4_iomap_end,
 };
 
+const struct iomap_ops ext4_iomap_overwrite_ops = {
+       .iomap_begin            = ext4_iomap_overwrite_begin,
+       .iomap_end              = ext4_iomap_end,
+};
+
 static bool ext4_iomap_is_delalloc(struct inode *inode,
                                   struct ext4_map_blocks *map)
 {