btrfs: avoid blocking nowait dio when locking file range
authorFilipe Manana <fdmanana@suse.com>
Wed, 23 Mar 2022 16:19:24 +0000 (16:19 +0000)
committerDavid Sterba <dsterba@suse.com>
Mon, 16 May 2022 15:03:09 +0000 (17:03 +0200)
If we are doing a NOWAIT direct IO read/write, we can block when locking
the file range at btrfs_dio_iomap_begin(), as it's possible the range (or
a part of it) is already locked by another task (mmap writes, another
direct IO read/write racing with us, fiemap, etc). We are also waiting for
completion of any ordered extent we find in the range, which also can
block us for a significant amount of time.

There's also the incorrect fallback to buffered IO (returning -ENOTBLK)
when we are dealing with a NOWAIT request and we can't proceed. In this
case we should be returning -EAGAIN, as falling back to buffered IO can
result in blocking for many different reasons, so that the caller can
delegate a retry to a context where blocking is more acceptable.

Fix these cases by:

1) Doing a try lock on the file range and failing with -EAGAIN if we
   can not lock right away;

2) Fail with -EAGAIN if we find an ordered extent;

3) Return -EAGAIN instead of -ENOTBLK when we need to fallback to
   buffered IO and we have a NOWAIT request.

This will also allow us to avoid a duplicated check that verifies if we
are able to do a NOCOW write for NOWAIT direct IO writes, done in the
next patch.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
fs/btrfs/inode.c

index a240b7f..fa5dd20 100644 (file)
@@ -7250,14 +7250,22 @@ out:
 }
 
 static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
-                             struct extent_state **cached_state, bool writing)
+                             struct extent_state **cached_state,
+                             unsigned int iomap_flags)
 {
+       const bool writing = (iomap_flags & IOMAP_WRITE);
+       const bool nowait = (iomap_flags & IOMAP_NOWAIT);
+       struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
        struct btrfs_ordered_extent *ordered;
        int ret = 0;
 
        while (1) {
-               lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
-                                cached_state);
+               if (nowait) {
+                       if (!try_lock_extent(io_tree, lockstart, lockend))
+                               return -EAGAIN;
+               } else {
+                       lock_extent_bits(io_tree, lockstart, lockend, cached_state);
+               }
                /*
                 * We're concerned with the entire range that we're going to be
                 * doing DIO to, so we need to make sure there's no ordered
@@ -7278,10 +7286,14 @@ static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
                                                         lockstart, lockend)))
                        break;
 
-               unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
-                                    cached_state);
+               unlock_extent_cached(io_tree, lockstart, lockend, cached_state);
 
                if (ordered) {
+                       if (nowait) {
+                               btrfs_put_ordered_extent(ordered);
+                               ret = -EAGAIN;
+                               break;
+                       }
                        /*
                         * If we are doing a DIO read and the ordered extent we
                         * found is for a buffered write, we can not wait for it
@@ -7301,7 +7313,7 @@ static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
                            test_bit(BTRFS_ORDERED_DIRECT, &ordered->flags))
                                btrfs_start_ordered_extent(ordered, 1);
                        else
-                               ret = -ENOTBLK;
+                               ret = nowait ? -EAGAIN : -ENOTBLK;
                        btrfs_put_ordered_extent(ordered);
                } else {
                        /*
@@ -7317,7 +7329,7 @@ static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
                         * ordered extent to complete while holding a lock on
                         * that page.
                         */
-                       ret = -ENOTBLK;
+                       ret = nowait ? -EAGAIN : -ENOTBLK;
                }
 
                if (ret)
@@ -7572,12 +7584,12 @@ static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start,
 
        /*
         * If this errors out it's because we couldn't invalidate pagecache for
-        * this range and we need to fallback to buffered.
+        * this range and we need to fallback to buffered IO, or we are doing a
+        * NOWAIT read/write and we need to block.
         */
-       if (lock_extent_direct(inode, lockstart, lockend, &cached_state, write)) {
-               ret = -ENOTBLK;
+       ret = lock_extent_direct(inode, lockstart, lockend, &cached_state, flags);
+       if (ret < 0)
                goto err;
-       }
 
        em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, len);
        if (IS_ERR(em)) {