[ Upstream commit
fa2bbff7b0b4e211fec5e5686ef96350690597b5 ]
Currently rcu is used to protect iterating rdev from submit_flushes():
submit_flushes remove_and_add_spares
synchronize_rcu
pers->hot_remove_disk()
rcu_read_lock()
rdev_for_each_rcu
if (rdev->raid_disk >= 0)
rdev->radi_disk = -1;
atomic_inc(&rdev->nr_pending)
rcu_read_unlock()
bi = bio_alloc_bioset()
bi->bi_end_io = md_end_flush
bi->private = rdev
submit_bio
// issue io for removed rdev
Fix this problem by grabbing 'acive_io' before iterating rdev, make sure
that remove_and_add_spares() won't concurrent with submit_flushes().
Fixes:
a2826aa92e2e ("md: support barrier requests on all personalities.")
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Signed-off-by: Song Liu <song@kernel.org>
Link: https://lore.kernel.org/r/20231129020234.1586910-1-yukuai1@huaweicloud.com
Signed-off-by: Sasha Levin <sashal@kernel.org>
rdev_dec_pending(rdev, mddev);
if (atomic_dec_and_test(&mddev->flush_pending)) {
rdev_dec_pending(rdev, mddev);
if (atomic_dec_and_test(&mddev->flush_pending)) {
+ /* The pair is percpu_ref_get() from md_flush_request() */
+ percpu_ref_put(&mddev->active_io);
+
/* The pre-request flush has finished */
queue_work(md_wq, &mddev->flush_work);
}
/* The pre-request flush has finished */
queue_work(md_wq, &mddev->flush_work);
}
rdev_for_each_rcu(rdev, mddev)
if (rdev->raid_disk >= 0 &&
!test_bit(Faulty, &rdev->flags)) {
rdev_for_each_rcu(rdev, mddev)
if (rdev->raid_disk >= 0 &&
!test_bit(Faulty, &rdev->flags)) {
- /* Take two references, one is dropped
- * when request finishes, one after
- * we reclaim rcu_read_lock
- */
- atomic_inc(&rdev->nr_pending);
atomic_inc(&rdev->nr_pending);
rcu_read_unlock();
bi = bio_alloc_bioset(rdev->bdev, 0,
atomic_inc(&rdev->nr_pending);
rcu_read_unlock();
bi = bio_alloc_bioset(rdev->bdev, 0,
atomic_inc(&mddev->flush_pending);
submit_bio(bi);
rcu_read_lock();
atomic_inc(&mddev->flush_pending);
submit_bio(bi);
rcu_read_lock();
- rdev_dec_pending(rdev, mddev);
}
rcu_read_unlock();
if (atomic_dec_and_test(&mddev->flush_pending))
}
rcu_read_unlock();
if (atomic_dec_and_test(&mddev->flush_pending))
/* new request after previous flush is completed */
if (ktime_after(req_start, mddev->prev_flush_start)) {
WARN_ON(mddev->flush_bio);
/* new request after previous flush is completed */
if (ktime_after(req_start, mddev->prev_flush_start)) {
WARN_ON(mddev->flush_bio);
+ /*
+ * Grab a reference to make sure mddev_suspend() will wait for
+ * this flush to be done.
+ *
+ * md_flush_reqeust() is called under md_handle_request() and
+ * 'active_io' is already grabbed, hence percpu_ref_is_zero()
+ * won't pass, percpu_ref_tryget_live() can't be used because
+ * percpu_ref_kill() can be called by mddev_suspend()
+ * concurrently.
+ */
+ WARN_ON(percpu_ref_is_zero(&mddev->active_io));
+ percpu_ref_get(&mddev->active_io);
mddev->flush_bio = bio;
bio = NULL;
}
mddev->flush_bio = bio;
bio = NULL;
}