Btrfs: scrub, move setup of nofs contexts higher in the stack
authorFilipe Manana <fdmanana@suse.com>
Fri, 7 Dec 2018 13:23:32 +0000 (13:23 +0000)
committerDavid Sterba <dsterba@suse.com>
Mon, 17 Dec 2018 13:51:48 +0000 (14:51 +0100)
Since scrub workers only do memory allocation with GFP_KERNEL when they
need to perform repair, we can move the recent setup of the nofs context
up to scrub_handle_errored_block() instead of setting it up down the call
chain at insert_full_stripe_lock() and scrub_add_page_to_wr_bio(),
removing some duplicate code and comment. So the only paths for which a
scrub worker can do memory allocations using GFP_KERNEL are the following:

 scrub_bio_end_io_worker()
   scrub_block_complete()
     scrub_handle_errored_block()
       lock_full_stripe()
         insert_full_stripe_lock()
           -> kmalloc with GFP_KERNEL

  scrub_bio_end_io_worker()
    scrub_block_complete()
      scrub_handle_errored_block()
        scrub_write_page_to_dev_replace()
          scrub_add_page_to_wr_bio()
            -> kzalloc with GFP_KERNEL

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
fs/btrfs/scrub.c

index 367ab09..099eb3c 100644 (file)
@@ -322,7 +322,6 @@ static struct full_stripe_lock *insert_full_stripe_lock(
        struct rb_node *parent = NULL;
        struct full_stripe_lock *entry;
        struct full_stripe_lock *ret;
-       unsigned int nofs_flag;
 
        lockdep_assert_held(&locks_root->lock);
 
@@ -342,15 +341,8 @@ static struct full_stripe_lock *insert_full_stripe_lock(
 
        /*
         * Insert new lock.
-        *
-        * We must use GFP_NOFS because the scrub task might be waiting for a
-        * worker task executing this function and in turn a transaction commit
-        * might be waiting the scrub task to pause (which needs to wait for all
-        * the worker tasks to complete before pausing).
         */
-       nofs_flag = memalloc_nofs_save();
        ret = kmalloc(sizeof(*ret), GFP_KERNEL);
-       memalloc_nofs_restore(nofs_flag);
        if (!ret)
                return ERR_PTR(-ENOMEM);
        ret->logical = fstripe_logical;
@@ -841,6 +833,7 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
        int page_num;
        int success;
        bool full_stripe_locked;
+       unsigned int nofs_flag;
        static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
                                      DEFAULT_RATELIMIT_BURST);
 
@@ -866,6 +859,16 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
        dev = sblock_to_check->pagev[0]->dev;
 
        /*
+        * We must use GFP_NOFS because the scrub task might be waiting for a
+        * worker task executing this function and in turn a transaction commit
+        * might be waiting the scrub task to pause (which needs to wait for all
+        * the worker tasks to complete before pausing).
+        * We do allocations in the workers through insert_full_stripe_lock()
+        * and scrub_add_page_to_wr_bio(), which happens down the call chain of
+        * this function.
+        */
+       nofs_flag = memalloc_nofs_save();
+       /*
         * For RAID5/6, race can happen for a different device scrub thread.
         * For data corruption, Parity and Data threads will both try
         * to recovery the data.
@@ -874,6 +877,7 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
         */
        ret = lock_full_stripe(fs_info, logical, &full_stripe_locked);
        if (ret < 0) {
+               memalloc_nofs_restore(nofs_flag);
                spin_lock(&sctx->stat_lock);
                if (ret == -ENOMEM)
                        sctx->stat.malloc_errors++;
@@ -913,7 +917,7 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
         */
 
        sblocks_for_recheck = kcalloc(BTRFS_MAX_MIRRORS,
-                                     sizeof(*sblocks_for_recheck), GFP_NOFS);
+                                     sizeof(*sblocks_for_recheck), GFP_KERNEL);
        if (!sblocks_for_recheck) {
                spin_lock(&sctx->stat_lock);
                sctx->stat.malloc_errors++;
@@ -1211,6 +1215,7 @@ out:
        }
 
        ret = unlock_full_stripe(fs_info, logical, full_stripe_locked);
+       memalloc_nofs_restore(nofs_flag);
        if (ret < 0)
                return ret;
        return 0;
@@ -1629,19 +1634,8 @@ static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx,
        mutex_lock(&sctx->wr_lock);
 again:
        if (!sctx->wr_curr_bio) {
-               unsigned int nofs_flag;
-
-               /*
-                * We must use GFP_NOFS because the scrub task might be waiting
-                * for a worker task executing this function and in turn a
-                * transaction commit might be waiting the scrub task to pause
-                * (which needs to wait for all the worker tasks to complete
-                * before pausing).
-                */
-               nofs_flag = memalloc_nofs_save();
                sctx->wr_curr_bio = kzalloc(sizeof(*sctx->wr_curr_bio),
                                              GFP_KERNEL);
-               memalloc_nofs_restore(nofs_flag);
                if (!sctx->wr_curr_bio) {
                        mutex_unlock(&sctx->wr_lock);
                        return -ENOMEM;