md: be extra careful not to take a reference to a Faulty device.
authorNeilBrown <neilb@suse.com>
Thu, 2 Jun 2016 06:19:53 +0000 (16:19 +1000)
committerShaohua Li <shli@fb.com>
Mon, 13 Jun 2016 18:54:21 +0000 (11:54 -0700)
It is important that we never increment rdev->nr_pending on a Faulty
device as ->hot_remove_disk() assumes that once the Faulty flag is visible
no code will take a new reference.

Some places take a new reference after only check In_sync.  This should
be safe as the two are changed together.  However to make the code more
obviously safe, add checks for 'Faulty' as well.

Note: the actual rule is:
  Never increment nr_pending if  Faulty is set and Blocked is clear,
  never clear Faulty, and never set Blocked without holding a reference
  through nr_pending.

fix build error (Shaohua)
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: Shaohua Li <shli@fb.com>
drivers/md/multipath.c
drivers/md/raid10.c
drivers/md/raid5.c

index 69244de..7eb9972 100644 (file)
@@ -43,7 +43,8 @@ static int multipath_map (struct mpconf *conf)
        rcu_read_lock();
        for (i = 0; i < disks; i++) {
                struct md_rdev *rdev = rcu_dereference(conf->multipaths[i].rdev);
-               if (rdev && test_bit(In_sync, &rdev->flags)) {
+               if (rdev && test_bit(In_sync, &rdev->flags) &&
+                   !test_bit(Faulty, &rdev->flags)) {
                        atomic_inc(&rdev->nr_pending);
                        rcu_read_unlock();
                        return i;
index 526c1d8..34facda 100644 (file)
@@ -2287,6 +2287,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
                        rdev = rcu_dereference(conf->mirrors[d].rdev);
                        if (rdev &&
                            test_bit(In_sync, &rdev->flags) &&
+                           !test_bit(Faulty, &rdev->flags) &&
                            is_badblock(rdev, r10_bio->devs[sl].addr + sect, s,
                                        &first_bad, &bad_sectors) == 0) {
                                atomic_inc(&rdev->nr_pending);
@@ -2339,6 +2340,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
                        d = r10_bio->devs[sl].devnum;
                        rdev = rcu_dereference(conf->mirrors[d].rdev);
                        if (!rdev ||
+                           test_bit(Faulty, &rdev->flags) ||
                            !test_bit(In_sync, &rdev->flags))
                                continue;
 
@@ -2378,6 +2380,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
                        d = r10_bio->devs[sl].devnum;
                        rdev = rcu_dereference(conf->mirrors[d].rdev);
                        if (!rdev ||
+                           test_bit(Faulty, &rdev->flags) ||
                            !test_bit(In_sync, &rdev->flags))
                                continue;
 
@@ -2953,6 +2956,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
                        mreplace = rcu_dereference(mirror->replacement);
 
                        if ((mrdev == NULL ||
+                            test_bit(Faulty, &mrdev->flags) ||
                             test_bit(In_sync, &mrdev->flags)) &&
                            (mreplace == NULL ||
                             test_bit(Faulty, &mreplace->flags))) {
@@ -2971,6 +2975,8 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
                                rcu_read_unlock();
                                continue;
                        }
+                       if (mreplace && test_bit(Faulty, &mreplace->flags))
+                               mreplace = NULL;
                        /* Unless we are doing a full sync, or a replacement
                         * we only need to recover the block if it is set in
                         * the bitmap
index c07b22e..f6a191a 100644 (file)
@@ -3080,7 +3080,8 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
                        struct md_rdev *rdev;
                        rcu_read_lock();
                        rdev = rcu_dereference(conf->disks[i].rdev);
-                       if (rdev && test_bit(In_sync, &rdev->flags))
+                       if (rdev && test_bit(In_sync, &rdev->flags) &&
+                           !test_bit(Faulty, &rdev->flags))
                                atomic_inc(&rdev->nr_pending);
                        else
                                rdev = NULL;