From 970fbde1f1ebae0c85bbaed3de83684a58d60fad Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 30 Jul 2012 09:11:38 +0200 Subject: [PATCH] drbd: flush drbd work queue before invalidate/invalidate remote If you do back to back wait-sync/invalidate on a Primary in a tight loop, during application IO load, you could trigger a race: kernel: block drbd6: FIXME going to queue 'set_n_write from StartingSync' but 'write from resync_finished' still pending? Fix this by changing the order of the drbd_queue_work() and the wake_up() in dec_ap_pending(), and adding the additional drbd_flush_workqueue() before requesting the full sync. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 10 ++++++---- drivers/block/drbd/drbd_nl.c | 8 ++++++-- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index e010aff..22adfc7 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -2251,15 +2251,17 @@ static inline void dec_ap_bio(struct drbd_conf *mdev) int ap_bio = atomic_dec_return(&mdev->ap_bio_cnt); D_ASSERT(ap_bio >= 0); + + if (ap_bio == 0 && test_bit(BITMAP_IO, &mdev->flags)) { + if (!test_and_set_bit(BITMAP_IO_QUEUED, &mdev->flags)) + drbd_queue_work(&mdev->tconn->sender_work, &mdev->bm_io_work.w); + } + /* this currently does wake_up for every dec_ap_bio! * maybe rather introduce some type of hysteresis? * e.g. (ap_bio == mxb/2 || ap_bio == 0) ? */ if (ap_bio < mxb) wake_up(&mdev->misc_wait); - if (ap_bio == 0 && test_bit(BITMAP_IO, &mdev->flags)) { - if (!test_and_set_bit(BITMAP_IO_QUEUED, &mdev->flags)) - drbd_queue_work(&mdev->tconn->sender_work, &mdev->bm_io_work.w); - } } static inline int drbd_set_ed_uuid(struct drbd_conf *mdev, u64 val) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index a2925de..4afd626 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -2408,9 +2408,11 @@ int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info) mdev = adm_ctx.mdev; /* If there is still bitmap IO pending, probably because of a previous - * resync just being finished, wait for it before requesting a new resync. */ + * resync just being finished, wait for it before requesting a new resync. + * Also wait for it's after_state_ch(). */ drbd_suspend_io(mdev); wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags)); + drbd_flush_workqueue(mdev); retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T), CS_ORDERED); @@ -2475,9 +2477,11 @@ int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info) mdev = adm_ctx.mdev; /* If there is still bitmap IO pending, probably because of a previous - * resync just being finished, wait for it before requesting a new resync. */ + * resync just being finished, wait for it before requesting a new resync. + * Also wait for it's after_state_ch(). */ drbd_suspend_io(mdev); wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags)); + drbd_flush_workqueue(mdev); retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S), CS_ORDERED); if (retcode < SS_SUCCESS) { -- 2.7.4