Merge branch 'for-3.10/core' of git://git.kernel.dk/linux-block
[platform/adaptation/renesas_rcar/renesas_kernel.git] / drivers / md / md.c
index fcb878f..681d109 100644 (file)
@@ -72,6 +72,9 @@ static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
 static struct workqueue_struct *md_wq;
 static struct workqueue_struct *md_misc_wq;
 
+static int remove_and_add_spares(struct mddev *mddev,
+                                struct md_rdev *this);
+
 #define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); }
 
 /*
@@ -194,21 +197,12 @@ void md_trim_bio(struct bio *bio, int offset, int size)
        if (offset == 0 && size == bio->bi_size)
                return;
 
-       bio->bi_sector += offset;
-       bio->bi_size = size;
-       offset <<= 9;
        clear_bit(BIO_SEG_VALID, &bio->bi_flags);
 
-       while (bio->bi_idx < bio->bi_vcnt &&
-              bio->bi_io_vec[bio->bi_idx].bv_len <= offset) {
-               /* remove this whole bio_vec */
-               offset -= bio->bi_io_vec[bio->bi_idx].bv_len;
-               bio->bi_idx++;
-       }
-       if (bio->bi_idx < bio->bi_vcnt) {
-               bio->bi_io_vec[bio->bi_idx].bv_offset += offset;
-               bio->bi_io_vec[bio->bi_idx].bv_len -= offset;
-       }
+       bio_advance(bio, offset << 9);
+
+       bio->bi_size = size;
+
        /* avoid any complications with bi_idx being non-zero*/
        if (bio->bi_idx) {
                memmove(bio->bi_io_vec, bio->bi_io_vec+bio->bi_idx,
@@ -1564,8 +1558,8 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_
                                             sector, count, 1) == 0)
                                return -EINVAL;
                }
-       } else if (sb->bblog_offset == 0)
-               rdev->badblocks.shift = -1;
+       } else if (sb->bblog_offset != 0)
+               rdev->badblocks.shift = 0;
 
        if (!refdev) {
                ret = 1;
@@ -2411,6 +2405,11 @@ static void md_update_sb(struct mddev * mddev, int force_change)
        int nospares = 0;
        int any_badblocks_changed = 0;
 
+       if (mddev->ro) {
+               if (force_change)
+                       set_bit(MD_CHANGE_DEVS, &mddev->flags);
+               return;
+       }
 repeat:
        /* First make sure individual recovery_offsets are correct */
        rdev_for_each(rdev, mddev) {
@@ -2800,12 +2799,10 @@ slot_store(struct md_rdev *rdev, const char *buf, size_t len)
                /* personality does all needed checks */
                if (rdev->mddev->pers->hot_remove_disk == NULL)
                        return -EINVAL;
-               err = rdev->mddev->pers->
-                       hot_remove_disk(rdev->mddev, rdev);
-               if (err)
-                       return err;
-               sysfs_unlink_rdev(rdev->mddev, rdev);
-               rdev->raid_disk = -1;
+               clear_bit(Blocked, &rdev->flags);
+               remove_and_add_spares(rdev->mddev, rdev);
+               if (rdev->raid_disk >= 0)
+                       return -EBUSY;
                set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
                md_wakeup_thread(rdev->mddev->thread);
        } else if (rdev->mddev->pers) {
@@ -3221,7 +3218,7 @@ int md_rdev_init(struct md_rdev *rdev)
         * be used - I wonder if that matters
         */
        rdev->badblocks.count = 0;
-       rdev->badblocks.shift = 0;
+       rdev->badblocks.shift = -1; /* disabled until explicitly enabled */
        rdev->badblocks.page = kmalloc(PAGE_SIZE, GFP_KERNEL);
        seqlock_init(&rdev->badblocks.lock);
        if (rdev->badblocks.page == NULL)
@@ -3293,9 +3290,6 @@ static struct md_rdev *md_import_device(dev_t newdev, int super_format, int supe
                        goto abort_free;
                }
        }
-       if (super_format == -1)
-               /* hot-add for 0.90, or non-persistent: so no badblocks */
-               rdev->badblocks.shift = -1;
 
        return rdev;
 
@@ -4225,8 +4219,6 @@ action_show(struct mddev *mddev, char *page)
        return sprintf(page, "%s\n", type);
 }
 
-static void reap_sync_thread(struct mddev *mddev);
-
 static ssize_t
 action_store(struct mddev *mddev, const char *page, size_t len)
 {
@@ -4241,7 +4233,7 @@ action_store(struct mddev *mddev, const char *page, size_t len)
        if (cmd_match(page, "idle") || cmd_match(page, "frozen")) {
                if (mddev->sync_thread) {
                        set_bit(MD_RECOVERY_INTR, &mddev->recovery);
-                       reap_sync_thread(mddev);
+                       md_reap_sync_thread(mddev);
                }
        } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
                   test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
@@ -5279,7 +5271,7 @@ static void __md_stop_writes(struct mddev *mddev)
        if (mddev->sync_thread) {
                set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
                set_bit(MD_RECOVERY_INTR, &mddev->recovery);
-               reap_sync_thread(mddev);
+               md_reap_sync_thread(mddev);
        }
 
        del_timer_sync(&mddev->safemode_timer);
@@ -5287,7 +5279,8 @@ static void __md_stop_writes(struct mddev *mddev)
        bitmap_flush(mddev);
        md_super_wait(mddev);
 
-       if (!mddev->in_sync || mddev->flags) {
+       if (mddev->ro == 0 &&
+           (!mddev->in_sync || mddev->flags)) {
                /* mark array as shutdown cleanly */
                mddev->in_sync = 1;
                md_update_sb(mddev, 1);
@@ -5810,7 +5803,7 @@ static int add_new_disk(struct mddev * mddev, mdu_disk_info_t *info)
                else
                        sysfs_notify_dirent_safe(rdev->sysfs_state);
 
-               md_update_sb(mddev, 1);
+               set_bit(MD_CHANGE_DEVS, &mddev->flags);
                if (mddev->degraded)
                        set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
                set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
@@ -5877,6 +5870,9 @@ static int hot_remove_disk(struct mddev * mddev, dev_t dev)
        if (!rdev)
                return -ENXIO;
 
+       clear_bit(Blocked, &rdev->flags);
+       remove_and_add_spares(mddev, rdev);
+
        if (rdev->raid_disk >= 0)
                goto busy;
 
@@ -6490,6 +6486,28 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
                err = md_set_readonly(mddev, bdev);
                goto done_unlock;
 
+       case HOT_REMOVE_DISK:
+               err = hot_remove_disk(mddev, new_decode_dev(arg));
+               goto done_unlock;
+
+       case ADD_NEW_DISK:
+               /* We can support ADD_NEW_DISK on read-only arrays
+                * on if we are re-adding a preexisting device.
+                * So require mddev->pers and MD_DISK_SYNC.
+                */
+               if (mddev->pers) {
+                       mdu_disk_info_t info;
+                       if (copy_from_user(&info, argp, sizeof(info)))
+                               err = -EFAULT;
+                       else if (!(info.state & (1<<MD_DISK_SYNC)))
+                               /* Need to clear read-only for this */
+                               break;
+                       else
+                               err = add_new_disk(mddev, &info);
+                       goto done_unlock;
+               }
+               break;
+
        case BLKROSET:
                if (get_user(ro, (int __user *)(arg))) {
                        err = -EFAULT;
@@ -6560,10 +6578,6 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
                goto done_unlock;
        }
 
-       case HOT_REMOVE_DISK:
-               err = hot_remove_disk(mddev, new_decode_dev(arg));
-               goto done_unlock;
-
        case HOT_ADD_DISK:
                err = hot_add_disk(mddev, new_decode_dev(arg));
                goto done_unlock;
@@ -6651,15 +6665,13 @@ static int md_open(struct block_device *bdev, fmode_t mode)
        return err;
 }
 
-static int md_release(struct gendisk *disk, fmode_t mode)
+static void md_release(struct gendisk *disk, fmode_t mode)
 {
        struct mddev *mddev = disk->private_data;
 
        BUG_ON(!mddev);
        atomic_dec(&mddev->openers);
        mddev_put(mddev);
-
-       return 0;
 }
 
 static int md_media_changed(struct gendisk *disk)
@@ -7644,14 +7656,16 @@ void md_do_sync(struct md_thread *thread)
 }
 EXPORT_SYMBOL_GPL(md_do_sync);
 
-static int remove_and_add_spares(struct mddev *mddev)
+static int remove_and_add_spares(struct mddev *mddev,
+                                struct md_rdev *this)
 {
        struct md_rdev *rdev;
        int spares = 0;
        int removed = 0;
 
        rdev_for_each(rdev, mddev)
-               if (rdev->raid_disk >= 0 &&
+               if ((this == NULL || rdev == this) &&
+                   rdev->raid_disk >= 0 &&
                    !test_bit(Blocked, &rdev->flags) &&
                    (test_bit(Faulty, &rdev->flags) ||
                     ! test_bit(In_sync, &rdev->flags)) &&
@@ -7663,79 +7677,55 @@ static int remove_and_add_spares(struct mddev *mddev)
                                removed++;
                        }
                }
-       if (removed)
-               sysfs_notify(&mddev->kobj, NULL,
-                            "degraded");
+       if (removed && mddev->kobj.sd)
+               sysfs_notify(&mddev->kobj, NULL, "degraded");
 
+       if (this)
+               goto no_add;
 
        rdev_for_each(rdev, mddev) {
                if (rdev->raid_disk >= 0 &&
                    !test_bit(In_sync, &rdev->flags) &&
                    !test_bit(Faulty, &rdev->flags))
                        spares++;
-               if (rdev->raid_disk < 0
-                   && !test_bit(Faulty, &rdev->flags)) {
-                       rdev->recovery_offset = 0;
-                       if (mddev->pers->
-                           hot_add_disk(mddev, rdev) == 0) {
-                               if (sysfs_link_rdev(mddev, rdev))
-                                       /* failure here is OK */;
-                               spares++;
-                               md_new_event(mddev);
-                               set_bit(MD_CHANGE_DEVS, &mddev->flags);
-                       }
+               if (rdev->raid_disk >= 0)
+                       continue;
+               if (test_bit(Faulty, &rdev->flags))
+                       continue;
+               if (mddev->ro &&
+                   rdev->saved_raid_disk < 0)
+                       continue;
+
+               rdev->recovery_offset = 0;
+               if (rdev->saved_raid_disk >= 0 && mddev->in_sync) {
+                       spin_lock_irq(&mddev->write_lock);
+                       if (mddev->in_sync)
+                               /* OK, this device, which is in_sync,
+                                * will definitely be noticed before
+                                * the next write, so recovery isn't
+                                * needed.
+                                */
+                               rdev->recovery_offset = mddev->recovery_cp;
+                       spin_unlock_irq(&mddev->write_lock);
+               }
+               if (mddev->ro && rdev->recovery_offset != MaxSector)
+                       /* not safe to add this disk now */
+                       continue;
+               if (mddev->pers->
+                   hot_add_disk(mddev, rdev) == 0) {
+                       if (sysfs_link_rdev(mddev, rdev))
+                               /* failure here is OK */;
+                       spares++;
+                       md_new_event(mddev);
+                       set_bit(MD_CHANGE_DEVS, &mddev->flags);
                }
        }
+no_add:
        if (removed)
                set_bit(MD_CHANGE_DEVS, &mddev->flags);
        return spares;
 }
 
-static void reap_sync_thread(struct mddev *mddev)
-{
-       struct md_rdev *rdev;
-
-       /* resync has finished, collect result */
-       md_unregister_thread(&mddev->sync_thread);
-       if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
-           !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
-               /* success...*/
-               /* activate any spares */
-               if (mddev->pers->spare_active(mddev)) {
-                       sysfs_notify(&mddev->kobj, NULL,
-                                    "degraded");
-                       set_bit(MD_CHANGE_DEVS, &mddev->flags);
-               }
-       }
-       if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
-           mddev->pers->finish_reshape)
-               mddev->pers->finish_reshape(mddev);
-
-       /* If array is no-longer degraded, then any saved_raid_disk
-        * information must be scrapped.  Also if any device is now
-        * In_sync we must scrape the saved_raid_disk for that device
-        * do the superblock for an incrementally recovered device
-        * written out.
-        */
-       rdev_for_each(rdev, mddev)
-               if (!mddev->degraded ||
-                   test_bit(In_sync, &rdev->flags))
-                       rdev->saved_raid_disk = -1;
-
-       md_update_sb(mddev, 1);
-       clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
-       clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
-       clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
-       clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
-       clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
-       /* flag recovery needed just to double check */
-       set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
-       sysfs_notify_dirent_safe(mddev->sysfs_action);
-       md_new_event(mddev);
-       if (mddev->event_work.func)
-               queue_work(md_misc_wq, &mddev->event_work);
-}
-
 /*
  * This routine is regularly called by all per-raid-array threads to
  * deal with generic issues like resync and super-block update.
@@ -7791,22 +7781,16 @@ void md_check_recovery(struct mddev *mddev)
                int spares = 0;
 
                if (mddev->ro) {
-                       /* Only thing we do on a ro array is remove
-                        * failed devices.
+                       /* On a read-only array we can:
+                        * - remove failed devices
+                        * - add already-in_sync devices if the array itself
+                        *   is in-sync.
+                        * As we only add devices that are already in-sync,
+                        * we can activate the spares immediately.
                         */
-                       struct md_rdev *rdev;
-                       rdev_for_each(rdev, mddev)
-                               if (rdev->raid_disk >= 0 &&
-                                   !test_bit(Blocked, &rdev->flags) &&
-                                   test_bit(Faulty, &rdev->flags) &&
-                                   atomic_read(&rdev->nr_pending)==0) {
-                                       if (mddev->pers->hot_remove_disk(
-                                                   mddev, rdev) == 0) {
-                                               sysfs_unlink_rdev(mddev, rdev);
-                                               rdev->raid_disk = -1;
-                                       }
-                               }
                        clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
+                       remove_and_add_spares(mddev, NULL);
+                       mddev->pers->spare_active(mddev);
                        goto unlock;
                }
 
@@ -7838,7 +7822,7 @@ void md_check_recovery(struct mddev *mddev)
                        goto unlock;
                }
                if (mddev->sync_thread) {
-                       reap_sync_thread(mddev);
+                       md_reap_sync_thread(mddev);
                        goto unlock;
                }
                /* Set RUNNING before clearing NEEDED to avoid
@@ -7869,7 +7853,7 @@ void md_check_recovery(struct mddev *mddev)
                                goto unlock;
                        set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
                        clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
-               } else if ((spares = remove_and_add_spares(mddev))) {
+               } else if ((spares = remove_and_add_spares(mddev, NULL))) {
                        clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
                        clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
                        clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
@@ -7919,6 +7903,51 @@ void md_check_recovery(struct mddev *mddev)
        }
 }
 
+void md_reap_sync_thread(struct mddev *mddev)
+{
+       struct md_rdev *rdev;
+
+       /* resync has finished, collect result */
+       md_unregister_thread(&mddev->sync_thread);
+       if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
+           !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
+               /* success...*/
+               /* activate any spares */
+               if (mddev->pers->spare_active(mddev)) {
+                       sysfs_notify(&mddev->kobj, NULL,
+                                    "degraded");
+                       set_bit(MD_CHANGE_DEVS, &mddev->flags);
+               }
+       }
+       if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
+           mddev->pers->finish_reshape)
+               mddev->pers->finish_reshape(mddev);
+
+       /* If array is no-longer degraded, then any saved_raid_disk
+        * information must be scrapped.  Also if any device is now
+        * In_sync we must scrape the saved_raid_disk for that device
+        * do the superblock for an incrementally recovered device
+        * written out.
+        */
+       rdev_for_each(rdev, mddev)
+               if (!mddev->degraded ||
+                   test_bit(In_sync, &rdev->flags))
+                       rdev->saved_raid_disk = -1;
+
+       md_update_sb(mddev, 1);
+       clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
+       clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
+       clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
+       clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
+       clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
+       /* flag recovery needed just to double check */
+       set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
+       sysfs_notify_dirent_safe(mddev->sysfs_action);
+       md_new_event(mddev);
+       if (mddev->event_work.func)
+               queue_work(md_misc_wq, &mddev->event_work);
+}
+
 void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev)
 {
        sysfs_notify_dirent_safe(rdev->sysfs_state);
@@ -8644,6 +8673,7 @@ EXPORT_SYMBOL(md_register_thread);
 EXPORT_SYMBOL(md_unregister_thread);
 EXPORT_SYMBOL(md_wakeup_thread);
 EXPORT_SYMBOL(md_check_recovery);
+EXPORT_SYMBOL(md_reap_sync_thread);
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("MD RAID framework");
 MODULE_ALIAS("md");