Merge tag 'for-6.1-rc3-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave...
authorLinus Torvalds <torvalds@linux-foundation.org>
Thu, 3 Nov 2022 18:12:48 +0000 (11:12 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 3 Nov 2022 18:12:48 +0000 (11:12 -0700)
Pull btrfs fixes from David Sterba:
 "A batch of error handling fixes for resource leaks, fixes for nowait
  mode in combination with direct and buffered IO:

   - direct IO + dsync + nowait could miss a sync of the file after
     write, add handling for this combination

   - buffered IO + nowait should not fail with ENOSPC, only blocking IO
     could determine that

   - error handling fixes:
      - fix inode reserve space leak due to nowait buffered write
      - check the correct variable after allocation (direct IO submit)
      - fix inode list leak during backref walking
      - fix ulist freeing in self tests"

* tag 'for-6.1-rc3-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
  btrfs: fix inode reserve space leak due to nowait buffered write
  btrfs: fix nowait buffered write returning -ENOSPC
  btrfs: remove pointless and double ulist frees in error paths of qgroup tests
  btrfs: fix ulist leaks in error paths of qgroup self tests
  btrfs: fix inode list leak during backref walking at find_parent_nodes()
  btrfs: fix inode list leak during backref walking at resolve_indirect_refs()
  btrfs: fix lost file sync on direct IO write with nowait and dsync iocb
  btrfs: fix a memory allocation failure test in btrfs_submit_direct

fs/btrfs/backref.c
fs/btrfs/ctree.h
fs/btrfs/file.c
fs/btrfs/inode.c
fs/btrfs/tests/qgroup-tests.c

index 4ec18ce..18374a6 100644 (file)
@@ -289,8 +289,10 @@ static void prelim_release(struct preftree *preftree)
        struct prelim_ref *ref, *next_ref;
 
        rbtree_postorder_for_each_entry_safe(ref, next_ref,
-                                            &preftree->root.rb_root, rbnode)
+                                            &preftree->root.rb_root, rbnode) {
+               free_inode_elem_list(ref->inode_list);
                free_pref(ref);
+       }
 
        preftree->root = RB_ROOT_CACHED;
        preftree->count = 0;
@@ -648,6 +650,18 @@ unode_aux_to_inode_list(struct ulist_node *node)
        return (struct extent_inode_elem *)(uintptr_t)node->aux;
 }
 
+static void free_leaf_list(struct ulist *ulist)
+{
+       struct ulist_node *node;
+       struct ulist_iterator uiter;
+
+       ULIST_ITER_INIT(&uiter);
+       while ((node = ulist_next(ulist, &uiter)))
+               free_inode_elem_list(unode_aux_to_inode_list(node));
+
+       ulist_free(ulist);
+}
+
 /*
  * We maintain three separate rbtrees: one for direct refs, one for
  * indirect refs which have a key, and one for indirect refs which do not
@@ -762,7 +776,11 @@ static int resolve_indirect_refs(struct btrfs_fs_info *fs_info,
                cond_resched();
        }
 out:
-       ulist_free(parents);
+       /*
+        * We may have inode lists attached to refs in the parents ulist, so we
+        * must free them before freeing the ulist and its refs.
+        */
+       free_leaf_list(parents);
        return ret;
 }
 
@@ -1368,6 +1386,12 @@ again:
                                if (ret < 0)
                                        goto out;
                                ref->inode_list = eie;
+                               /*
+                                * We transferred the list ownership to the ref,
+                                * so set to NULL to avoid a double free in case
+                                * an error happens after this.
+                                */
+                               eie = NULL;
                        }
                        ret = ulist_add_merge_ptr(refs, ref->parent,
                                                  ref->inode_list,
@@ -1393,6 +1417,14 @@ again:
                                eie->next = ref->inode_list;
                        }
                        eie = NULL;
+                       /*
+                        * We have transferred the inode list ownership from
+                        * this ref to the ref we added to the 'refs' ulist.
+                        * So set this ref's inode list to NULL to avoid
+                        * use-after-free when our caller uses it or double
+                        * frees in case an error happens before we return.
+                        */
+                       ref->inode_list = NULL;
                }
                cond_resched();
        }
@@ -1409,24 +1441,6 @@ out:
        return ret;
 }
 
-static void free_leaf_list(struct ulist *blocks)
-{
-       struct ulist_node *node = NULL;
-       struct extent_inode_elem *eie;
-       struct ulist_iterator uiter;
-
-       ULIST_ITER_INIT(&uiter);
-       while ((node = ulist_next(blocks, &uiter))) {
-               if (!node->aux)
-                       continue;
-               eie = unode_aux_to_inode_list(node);
-               free_inode_elem_list(eie);
-               node->aux = 0;
-       }
-
-       ulist_free(blocks);
-}
-
 /*
  * Finds all leafs with a reference to the specified combination of bytenr and
  * offset. key_list_head will point to a list of corresponding keys (caller must
index 727595e..f677b49 100644 (file)
@@ -3462,7 +3462,10 @@ ssize_t btrfs_encoded_read(struct kiocb *iocb, struct iov_iter *iter,
 ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from,
                             const struct btrfs_ioctl_encoded_io_args *encoded);
 
-ssize_t btrfs_dio_rw(struct kiocb *iocb, struct iov_iter *iter, size_t done_before);
+ssize_t btrfs_dio_read(struct kiocb *iocb, struct iov_iter *iter,
+                      size_t done_before);
+struct iomap_dio *btrfs_dio_write(struct kiocb *iocb, struct iov_iter *iter,
+                                 size_t done_before);
 
 extern const struct dentry_operations btrfs_dentry_operations;
 
index 176b432..d01631d 100644 (file)
@@ -1598,14 +1598,19 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb,
                                                write_bytes);
                        else
                                btrfs_check_nocow_unlock(BTRFS_I(inode));
+
+                       if (nowait && ret == -ENOSPC)
+                               ret = -EAGAIN;
                        break;
                }
 
                release_bytes = reserve_bytes;
 again:
                ret = balance_dirty_pages_ratelimited_flags(inode->i_mapping, bdp_flags);
-               if (ret)
+               if (ret) {
+                       btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes);
                        break;
+               }
 
                /*
                 * This is going to setup the pages array with the number of
@@ -1765,6 +1770,7 @@ static ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)
        loff_t endbyte;
        ssize_t err;
        unsigned int ilock_flags = 0;
+       struct iomap_dio *dio;
 
        if (iocb->ki_flags & IOCB_NOWAIT)
                ilock_flags |= BTRFS_ILOCK_TRY;
@@ -1825,11 +1831,22 @@ relock:
         * So here we disable page faults in the iov_iter and then retry if we
         * got -EFAULT, faulting in the pages before the retry.
         */
-again:
        from->nofault = true;
-       err = btrfs_dio_rw(iocb, from, written);
+       dio = btrfs_dio_write(iocb, from, written);
        from->nofault = false;
 
+       /*
+        * iomap_dio_complete() will call btrfs_sync_file() if we have a dsync
+        * iocb, and that needs to lock the inode. So unlock it before calling
+        * iomap_dio_complete() to avoid a deadlock.
+        */
+       btrfs_inode_unlock(inode, ilock_flags);
+
+       if (IS_ERR_OR_NULL(dio))
+               err = PTR_ERR_OR_ZERO(dio);
+       else
+               err = iomap_dio_complete(dio);
+
        /* No increment (+=) because iomap returns a cumulative value. */
        if (err > 0)
                written = err;
@@ -1855,12 +1872,10 @@ again:
                } else {
                        fault_in_iov_iter_readable(from, left);
                        prev_left = left;
-                       goto again;
+                       goto relock;
                }
        }
 
-       btrfs_inode_unlock(inode, ilock_flags);
-
        /*
         * If 'err' is -ENOTBLK or we have not written all data, then it means
         * we must fallback to buffered IO.
@@ -4035,7 +4050,7 @@ again:
         */
        pagefault_disable();
        to->nofault = true;
-       ret = btrfs_dio_rw(iocb, to, read);
+       ret = btrfs_dio_read(iocb, to, read);
        to->nofault = false;
        pagefault_enable();
 
index b0807c5..0e516ae 100644 (file)
@@ -7980,7 +7980,7 @@ static void btrfs_submit_direct(const struct iomap_iter *iter,
                 */
                status = BLK_STS_RESOURCE;
                dip->csums = kcalloc(nr_sectors, fs_info->csum_size, GFP_NOFS);
-               if (!dip)
+               if (!dip->csums)
                        goto out_err;
 
                status = btrfs_lookup_bio_sums(inode, dio_bio, dip->csums);
@@ -8078,13 +8078,21 @@ static const struct iomap_dio_ops btrfs_dio_ops = {
        .bio_set                = &btrfs_dio_bioset,
 };
 
-ssize_t btrfs_dio_rw(struct kiocb *iocb, struct iov_iter *iter, size_t done_before)
+ssize_t btrfs_dio_read(struct kiocb *iocb, struct iov_iter *iter, size_t done_before)
 {
        struct btrfs_dio_data data;
 
        return iomap_dio_rw(iocb, iter, &btrfs_dio_iomap_ops, &btrfs_dio_ops,
-                           IOMAP_DIO_PARTIAL | IOMAP_DIO_NOSYNC,
-                           &data, done_before);
+                           IOMAP_DIO_PARTIAL, &data, done_before);
+}
+
+struct iomap_dio *btrfs_dio_write(struct kiocb *iocb, struct iov_iter *iter,
+                                 size_t done_before)
+{
+       struct btrfs_dio_data data;
+
+       return __iomap_dio_rw(iocb, iter, &btrfs_dio_iomap_ops, &btrfs_dio_ops,
+                           IOMAP_DIO_PARTIAL, &data, done_before);
 }
 
 static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
index eee1e44..63676ea 100644 (file)
@@ -225,20 +225,20 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
         */
        ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, false);
        if (ret) {
-               ulist_free(old_roots);
                test_err("couldn't find old roots: %d", ret);
                return ret;
        }
 
        ret = insert_normal_tree_ref(root, nodesize, nodesize, 0,
                                BTRFS_FS_TREE_OBJECTID);
-       if (ret)
+       if (ret) {
+               ulist_free(old_roots);
                return ret;
+       }
 
        ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false);
        if (ret) {
                ulist_free(old_roots);
-               ulist_free(new_roots);
                test_err("couldn't find old roots: %d", ret);
                return ret;
        }
@@ -250,29 +250,31 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
                return ret;
        }
 
+       /* btrfs_qgroup_account_extent() always frees the ulists passed to it. */
+       old_roots = NULL;
+       new_roots = NULL;
+
        if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FS_TREE_OBJECTID,
                                nodesize, nodesize)) {
                test_err("qgroup counts didn't match expected values");
                return -EINVAL;
        }
-       old_roots = NULL;
-       new_roots = NULL;
 
        ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, false);
        if (ret) {
-               ulist_free(old_roots);
                test_err("couldn't find old roots: %d", ret);
                return ret;
        }
 
        ret = remove_extent_item(root, nodesize, nodesize);
-       if (ret)
+       if (ret) {
+               ulist_free(old_roots);
                return -EINVAL;
+       }
 
        ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false);
        if (ret) {
                ulist_free(old_roots);
-               ulist_free(new_roots);
                test_err("couldn't find old roots: %d", ret);
                return ret;
        }
@@ -322,20 +324,20 @@ static int test_multiple_refs(struct btrfs_root *root,
 
        ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, false);
        if (ret) {
-               ulist_free(old_roots);
                test_err("couldn't find old roots: %d", ret);
                return ret;
        }
 
        ret = insert_normal_tree_ref(root, nodesize, nodesize, 0,
                                BTRFS_FS_TREE_OBJECTID);
-       if (ret)
+       if (ret) {
+               ulist_free(old_roots);
                return ret;
+       }
 
        ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false);
        if (ret) {
                ulist_free(old_roots);
-               ulist_free(new_roots);
                test_err("couldn't find old roots: %d", ret);
                return ret;
        }
@@ -355,20 +357,20 @@ static int test_multiple_refs(struct btrfs_root *root,
 
        ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, false);
        if (ret) {
-               ulist_free(old_roots);
                test_err("couldn't find old roots: %d", ret);
                return ret;
        }
 
        ret = add_tree_ref(root, nodesize, nodesize, 0,
                        BTRFS_FIRST_FREE_OBJECTID);
-       if (ret)
+       if (ret) {
+               ulist_free(old_roots);
                return ret;
+       }
 
        ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false);
        if (ret) {
                ulist_free(old_roots);
-               ulist_free(new_roots);
                test_err("couldn't find old roots: %d", ret);
                return ret;
        }
@@ -394,20 +396,20 @@ static int test_multiple_refs(struct btrfs_root *root,
 
        ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, false);
        if (ret) {
-               ulist_free(old_roots);
                test_err("couldn't find old roots: %d", ret);
                return ret;
        }
 
        ret = remove_extent_ref(root, nodesize, nodesize, 0,
                                BTRFS_FIRST_FREE_OBJECTID);
-       if (ret)
+       if (ret) {
+               ulist_free(old_roots);
                return ret;
+       }
 
        ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false);
        if (ret) {
                ulist_free(old_roots);
-               ulist_free(new_roots);
                test_err("couldn't find old roots: %d", ret);
                return ret;
        }