md: only delete entries from all_mddevs when the disk is freed
authorChristoph Hellwig <hch@lst.de>
Tue, 19 Jul 2022 09:18:23 +0000 (11:18 +0200)
committerJens Axboe <axboe@kernel.dk>
Tue, 2 Aug 2022 23:22:44 +0000 (17:22 -0600)
This ensures device names don't get prematurely reused.  Instead add a
deleted flag to skip already deleted devices in mddev_get and other
places that only want to see live mddevs.

Reported-by: Logan Gunthorpe <logang@deltatee.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Logan Gunthorpe <logang@deltatee.com>
Signed-off-by: Song Liu <song@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
drivers/md/md.c
drivers/md/md.h

index 709df90..2e9ed44 100644 (file)
@@ -625,6 +625,10 @@ EXPORT_SYMBOL(md_flush_request);
 
 static inline struct mddev *mddev_get(struct mddev *mddev)
 {
+       lockdep_assert_held(&all_mddevs_lock);
+
+       if (test_bit(MD_DELETED, &mddev->flags))
+               return NULL;
        atomic_inc(&mddev->active);
        return mddev;
 }
@@ -639,7 +643,7 @@ static void mddev_put(struct mddev *mddev)
            mddev->ctime == 0 && !mddev->hold_active) {
                /* Array is not configured at all, and not held active,
                 * so destroy it */
-               list_del_init(&mddev->all_mddevs);
+               set_bit(MD_DELETED, &mddev->flags);
 
                /*
                 * Call queue_work inside the spinlock so that
@@ -719,8 +723,8 @@ static struct mddev *mddev_find(dev_t unit)
 
        spin_lock(&all_mddevs_lock);
        mddev = mddev_find_locked(unit);
-       if (mddev)
-               mddev_get(mddev);
+       if (mddev && !mddev_get(mddev))
+               mddev = NULL;
        spin_unlock(&all_mddevs_lock);
 
        return mddev;
@@ -3338,6 +3342,8 @@ static bool md_rdev_overlaps(struct md_rdev *rdev)
 
        spin_lock(&all_mddevs_lock);
        list_for_each_entry(mddev, &all_mddevs, all_mddevs) {
+               if (test_bit(MD_DELETED, &mddev->flags))
+                       continue;
                rdev_for_each(rdev2, mddev) {
                        if (rdev != rdev2 && rdev->bdev == rdev2->bdev &&
                            md_rdevs_overlap(rdev, rdev2)) {
@@ -5525,11 +5531,10 @@ md_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
        if (!entry->show)
                return -EIO;
        spin_lock(&all_mddevs_lock);
-       if (list_empty(&mddev->all_mddevs)) {
+       if (!mddev_get(mddev)) {
                spin_unlock(&all_mddevs_lock);
                return -EBUSY;
        }
-       mddev_get(mddev);
        spin_unlock(&all_mddevs_lock);
 
        rv = entry->show(mddev, page);
@@ -5550,11 +5555,10 @@ md_attr_store(struct kobject *kobj, struct attribute *attr,
        if (!capable(CAP_SYS_ADMIN))
                return -EACCES;
        spin_lock(&all_mddevs_lock);
-       if (list_empty(&mddev->all_mddevs)) {
+       if (!mddev_get(mddev)) {
                spin_unlock(&all_mddevs_lock);
                return -EBUSY;
        }
-       mddev_get(mddev);
        spin_unlock(&all_mddevs_lock);
        rv = entry->store(mddev, page, length);
        mddev_put(mddev);
@@ -7849,7 +7853,7 @@ static void md_free_disk(struct gendisk *disk)
        bioset_exit(&mddev->bio_set);
        bioset_exit(&mddev->sync_set);
 
-       kfree(mddev);
+       mddev_free(mddev);
 }
 
 const struct block_device_operations md_fops =
@@ -8171,6 +8175,8 @@ static void *md_seq_start(struct seq_file *seq, loff_t *pos)
                if (!l--) {
                        mddev = list_entry(tmp, struct mddev, all_mddevs);
                        mddev_get(mddev);
+                       if (!mddev_get(mddev))
+                               continue;
                        spin_unlock(&all_mddevs_lock);
                        return mddev;
                }
@@ -8184,25 +8190,35 @@ static void *md_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
        struct list_head *tmp;
        struct mddev *next_mddev, *mddev = v;
+       struct mddev *to_put = NULL;
 
        ++*pos;
        if (v == (void*)2)
                return NULL;
 
        spin_lock(&all_mddevs_lock);
-       if (v == (void*)1)
+       if (v == (void*)1) {
                tmp = all_mddevs.next;
-       else
+       } else {
+               to_put = mddev;
                tmp = mddev->all_mddevs.next;
-       if (tmp != &all_mddevs)
-               next_mddev = mddev_get(list_entry(tmp,struct mddev,all_mddevs));
-       else {
-               next_mddev = (void*)2;
-               *pos = 0x10000;
        }
+
+       for (;;) {
+               if (tmp == &all_mddevs) {
+                       next_mddev = (void*)2;
+                       *pos = 0x10000;
+                       break;
+               }
+               next_mddev = list_entry(tmp, struct mddev, all_mddevs);
+               if (mddev_get(next_mddev))
+                       break;
+               mddev = next_mddev;
+               tmp = mddev->all_mddevs.next;
+       };
        spin_unlock(&all_mddevs_lock);
 
-       if (v != (void*)1)
+       if (to_put)
                mddev_put(mddev);
        return next_mddev;
 
@@ -8766,6 +8782,8 @@ void md_do_sync(struct md_thread *thread)
                        goto skip;
                spin_lock(&all_mddevs_lock);
                list_for_each_entry(mddev2, &all_mddevs, all_mddevs) {
+                       if (test_bit(MD_DELETED, &mddev2->flags))
+                               continue;
                        if (mddev2 == mddev)
                                continue;
                        if (!mddev->parallel_resync
@@ -9568,7 +9586,8 @@ static int md_notify_reboot(struct notifier_block *this,
 
        spin_lock(&all_mddevs_lock);
        list_for_each_entry_safe(mddev, n, &all_mddevs, all_mddevs) {
-               mddev_get(mddev);
+               if (!mddev_get(mddev))
+                       continue;
                spin_unlock(&all_mddevs_lock);
                if (mddev_trylock(mddev)) {
                        if (mddev->pers)
@@ -9923,7 +9942,8 @@ static __exit void md_exit(void)
 
        spin_lock(&all_mddevs_lock);
        list_for_each_entry_safe(mddev, n, &all_mddevs, all_mddevs) {
-               mddev_get(mddev);
+               if (!mddev_get(mddev))
+                       continue;
                spin_unlock(&all_mddevs_lock);
                export_array(mddev);
                mddev->ctime = 0;
index 861088b..f6ab73c 100644 (file)
@@ -254,6 +254,7 @@ struct md_cluster_info;
  * @MD_NOT_READY: do_md_run() is active, so 'array_state', ust not report that
  *                array is ready yet.
  * @MD_BROKEN: This is used to stop writes and mark array as failed.
+ * @MD_DELETED: This device is being deleted
  *
  * change UNSUPPORTED_MDDEV_FLAGS for each array type if new flag is added
  */
@@ -270,6 +271,7 @@ enum mddev_flags {
        MD_UPDATING_SB,
        MD_NOT_READY,
        MD_BROKEN,
+       MD_DELETED,
 };
 
 enum mddev_sb_flags {