Revert "block: don't call into the driver for BLKROSET"
[platform/kernel/linux-rpi.git] / drivers / md / raid5.c
index 02ed53b..d5b330c 100644 (file)
@@ -36,6 +36,7 @@
  */
 
 #include <linux/blkdev.h>
+#include <linux/delay.h>
 #include <linux/kthread.h>
 #include <linux/raid/pq.h>
 #include <linux/async_tx.h>
@@ -686,17 +687,17 @@ int raid5_calc_degraded(struct r5conf *conf)
        return degraded;
 }
 
-static int has_failed(struct r5conf *conf)
+static bool has_failed(struct r5conf *conf)
 {
-       int degraded;
+       int degraded = conf->mddev->degraded;
 
-       if (conf->mddev->reshape_position == MaxSector)
-               return conf->mddev->degraded > conf->max_degraded;
+       if (test_bit(MD_BROKEN, &conf->mddev->flags))
+               return true;
 
-       degraded = raid5_calc_degraded(conf);
-       if (degraded > conf->max_degraded)
-               return 1;
-       return 0;
+       if (conf->mddev->reshape_position != MaxSector)
+               degraded = raid5_calc_degraded(conf);
+
+       return degraded > conf->max_degraded;
 }
 
 struct stripe_head *
@@ -2217,8 +2218,9 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
        struct raid5_percpu *percpu;
        unsigned long cpu;
 
-       cpu = get_cpu();
+       cpu = get_cpu_light();
        percpu = per_cpu_ptr(conf->percpu, cpu);
+       spin_lock(&percpu->lock);
        if (test_bit(STRIPE_OP_BIOFILL, &ops_request)) {
                ops_run_biofill(sh);
                overlap_clear++;
@@ -2277,7 +2279,8 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
                        if (test_and_clear_bit(R5_Overlap, &dev->flags))
                                wake_up(&sh->raid_conf->wait_for_overlap);
                }
-       put_cpu();
+       spin_unlock(&percpu->lock);
+       put_cpu_light();
 }
 
 static void free_stripe(struct kmem_cache *sc, struct stripe_head *sh)
@@ -2864,10 +2867,10 @@ static void raid5_end_write_request(struct bio *bi)
        if (!test_and_clear_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags))
                clear_bit(R5_LOCKED, &sh->dev[i].flags);
        set_bit(STRIPE_HANDLE, &sh->state);
-       raid5_release_stripe(sh);
 
        if (sh->batch_head && sh != sh->batch_head)
                raid5_release_stripe(sh->batch_head);
+       raid5_release_stripe(sh);
 }
 
 static void raid5_error(struct mddev *mddev, struct md_rdev *rdev)
@@ -2877,34 +2880,31 @@ static void raid5_error(struct mddev *mddev, struct md_rdev *rdev)
        unsigned long flags;
        pr_debug("raid456: error called\n");
 
+       pr_crit("md/raid:%s: Disk failure on %s, disabling device.\n",
+               mdname(mddev), bdevname(rdev->bdev, b));
+
        spin_lock_irqsave(&conf->device_lock, flags);
+       set_bit(Faulty, &rdev->flags);
+       clear_bit(In_sync, &rdev->flags);
+       mddev->degraded = raid5_calc_degraded(conf);
 
-       if (test_bit(In_sync, &rdev->flags) &&
-           mddev->degraded == conf->max_degraded) {
-               /*
-                * Don't allow to achieve failed state
-                * Don't try to recover this device
-                */
+       if (has_failed(conf)) {
+               set_bit(MD_BROKEN, &conf->mddev->flags);
                conf->recovery_disabled = mddev->recovery_disabled;
-               spin_unlock_irqrestore(&conf->device_lock, flags);
-               return;
+
+               pr_crit("md/raid:%s: Cannot continue operation (%d/%d failed).\n",
+                       mdname(mddev), mddev->degraded, conf->raid_disks);
+       } else {
+               pr_crit("md/raid:%s: Operation continuing on %d devices.\n",
+                       mdname(mddev), conf->raid_disks - mddev->degraded);
        }
 
-       set_bit(Faulty, &rdev->flags);
-       clear_bit(In_sync, &rdev->flags);
-       mddev->degraded = raid5_calc_degraded(conf);
        spin_unlock_irqrestore(&conf->device_lock, flags);
        set_bit(MD_RECOVERY_INTR, &mddev->recovery);
 
        set_bit(Blocked, &rdev->flags);
        set_mask_bits(&mddev->sb_flags, 0,
                      BIT(MD_SB_CHANGE_DEVS) | BIT(MD_SB_CHANGE_PENDING));
-       pr_crit("md/raid:%s: Disk failure on %s, disabling device.\n"
-               "md/raid:%s: Operation continuing on %d devices.\n",
-               mdname(mddev),
-               bdevname(rdev->bdev, b),
-               mdname(mddev),
-               conf->raid_disks - mddev->degraded);
        r5c_update_on_rdev_error(mddev, rdev);
 }
 
@@ -3939,7 +3939,7 @@ static void handle_stripe_fill(struct stripe_head *sh,
                 * back cache (prexor with orig_page, and then xor with
                 * page) in the read path
                 */
-               if (s->injournal && s->failed) {
+               if (s->to_read && s->injournal && s->failed) {
                        if (test_bit(STRIPE_R5C_CACHING, &sh->state))
                                r5c_make_stripe_write_out(sh);
                        goto out;
@@ -5433,7 +5433,6 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
 
        if (is_badblock(rdev, sector, bio_sectors(raid_bio), &first_bad,
                        &bad_sectors)) {
-               bio_put(raid_bio);
                rdev_dec_pending(rdev, mddev);
                return 0;
        }
@@ -6525,7 +6524,18 @@ static void raid5d(struct md_thread *thread)
                        spin_unlock_irq(&conf->device_lock);
                        md_check_recovery(mddev);
                        spin_lock_irq(&conf->device_lock);
+
+                       /*
+                        * Waiting on MD_SB_CHANGE_PENDING below may deadlock
+                        * seeing md_check_recovery() is needed to clear
+                        * the flag when using mdmon.
+                        */
+                       continue;
                }
+
+               wait_event_lock_irq(mddev->sb_wait,
+                       !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags),
+                       conf->device_lock);
        }
        pr_debug("%d stripes handled\n", handled);
 
@@ -7102,6 +7112,7 @@ static int raid456_cpu_up_prepare(unsigned int cpu, struct hlist_node *node)
                        __func__, cpu);
                return -ENOMEM;
        }
+       spin_lock_init(&per_cpu_ptr(conf->percpu, cpu)->lock);
        return 0;
 }
 
@@ -7446,12 +7457,19 @@ static int raid5_run(struct mddev *mddev)
        struct md_rdev *rdev;
        struct md_rdev *journal_dev = NULL;
        sector_t reshape_offset = 0;
-       int i;
+       int i, ret = 0;
        long long min_offset_diff = 0;
        int first = 1;
 
-       if (mddev_init_writes_pending(mddev) < 0)
+       if (acct_bioset_init(mddev)) {
+               pr_err("md/raid456:%s: alloc acct bioset failed.\n", mdname(mddev));
                return -ENOMEM;
+       }
+
+       if (mddev_init_writes_pending(mddev) < 0) {
+               ret = -ENOMEM;
+               goto exit_acct_set;
+       }
 
        if (mddev->recovery_cp != MaxSector)
                pr_notice("md/raid:%s: not clean -- starting background reconstruction\n",
@@ -7482,7 +7500,8 @@ static int raid5_run(struct mddev *mddev)
            (mddev->bitmap_info.offset || mddev->bitmap_info.file)) {
                pr_notice("md/raid:%s: array cannot have both journal and bitmap\n",
                          mdname(mddev));
-               return -EINVAL;
+               ret = -EINVAL;
+               goto exit_acct_set;
        }
 
        if (mddev->reshape_position != MaxSector) {
@@ -7507,13 +7526,15 @@ static int raid5_run(struct mddev *mddev)
                if (journal_dev) {
                        pr_warn("md/raid:%s: don't support reshape with journal - aborting.\n",
                                mdname(mddev));
-                       return -EINVAL;
+                       ret = -EINVAL;
+                       goto exit_acct_set;
                }
 
                if (mddev->new_level != mddev->level) {
                        pr_warn("md/raid:%s: unsupported reshape required - aborting.\n",
                                mdname(mddev));
-                       return -EINVAL;
+                       ret = -EINVAL;
+                       goto exit_acct_set;
                }
                old_disks = mddev->raid_disks - mddev->delta_disks;
                /* reshape_position must be on a new-stripe boundary, and one
@@ -7529,7 +7550,8 @@ static int raid5_run(struct mddev *mddev)
                if (sector_div(here_new, chunk_sectors * new_data_disks)) {
                        pr_warn("md/raid:%s: reshape_position not on a stripe boundary\n",
                                mdname(mddev));
-                       return -EINVAL;
+                       ret = -EINVAL;
+                       goto exit_acct_set;
                }
                reshape_offset = here_new * chunk_sectors;
                /* here_new is the stripe we will write to */
@@ -7551,7 +7573,8 @@ static int raid5_run(struct mddev *mddev)
                        else if (mddev->ro == 0) {
                                pr_warn("md/raid:%s: in-place reshape must be started in read-only mode - aborting\n",
                                        mdname(mddev));
-                               return -EINVAL;
+                               ret = -EINVAL;
+                               goto exit_acct_set;
                        }
                } else if (mddev->reshape_backwards
                    ? (here_new * chunk_sectors + min_offset_diff <=
@@ -7561,7 +7584,8 @@ static int raid5_run(struct mddev *mddev)
                        /* Reading from the same stripe as writing to - bad */
                        pr_warn("md/raid:%s: reshape_position too early for auto-recovery - aborting.\n",
                                mdname(mddev));
-                       return -EINVAL;
+                       ret = -EINVAL;
+                       goto exit_acct_set;
                }
                pr_debug("md/raid:%s: reshape will continue\n", mdname(mddev));
                /* OK, we should be able to continue; */
@@ -7585,8 +7609,10 @@ static int raid5_run(struct mddev *mddev)
        else
                conf = mddev->private;
 
-       if (IS_ERR(conf))
-               return PTR_ERR(conf);
+       if (IS_ERR(conf)) {
+               ret = PTR_ERR(conf);
+               goto exit_acct_set;
+       }
 
        if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) {
                if (!journal_dev) {
@@ -7786,7 +7812,10 @@ abort:
        free_conf(conf);
        mddev->private = NULL;
        pr_warn("md/raid:%s: failed to run raid set.\n", mdname(mddev));
-       return -EIO;
+       ret = -EIO;
+exit_acct_set:
+       acct_bioset_exit(mddev);
+       return ret;
 }
 
 static void raid5_free(struct mddev *mddev, void *priv)
@@ -7794,6 +7823,7 @@ static void raid5_free(struct mddev *mddev, void *priv)
        struct r5conf *conf = priv;
 
        free_conf(conf);
+       acct_bioset_exit(mddev);
        mddev->to_remove = &raid5_attrs_group;
 }
 
@@ -8010,6 +8040,7 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev)
         */
        if (rdev->saved_raid_disk >= 0 &&
            rdev->saved_raid_disk >= first &&
+           rdev->saved_raid_disk <= last &&
            conf->disks[rdev->saved_raid_disk].rdev == NULL)
                first = rdev->saved_raid_disk;