drbd: fix refcount error during detach of an already failed disk
authorLars Ellenberg <lars.ellenberg@linbit.com>
Wed, 25 Feb 2015 18:37:28 +0000 (19:37 +0100)
committerJens Axboe <axboe@fb.com>
Wed, 25 Nov 2015 16:22:01 +0000 (09:22 -0700)
A D_FAILED disk transitions as quickly as possible to
D_DISKLESS. But in the "unresponsive local disk" case,
there remains a time window where a administrative detach command could
find the disk already failed, but some internal meta data IO against the
unresponsive local disk still pending.

In that case, drbd_md_get_buffer() will return NULL.
Don't unconditionally call drbd_md_put_buffer(), or it will cause
refcount imbalance, and prevent any further re-attach on this volume
(until it is deleted and re-created).

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
drivers/block/drbd/drbd_nl.c

index 331b378..79dc3d4 100644 (file)
@@ -1915,6 +1915,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 static int adm_detach(struct drbd_device *device, int force)
 {
        enum drbd_state_rv retcode;
+       void *buffer;
        int ret;
 
        if (force) {
@@ -1925,9 +1926,12 @@ static int adm_detach(struct drbd_device *device, int force)
        }
 
        drbd_suspend_io(device); /* so no-one is stuck in drbd_al_begin_io */
-       drbd_md_get_buffer(device, __func__); /* make sure there is no in-flight meta-data IO */
-       retcode = drbd_request_state(device, NS(disk, D_FAILED));
-       drbd_md_put_buffer(device);
+       buffer = drbd_md_get_buffer(device, __func__); /* make sure there is no in-flight meta-data IO */
+       if (buffer) {
+               retcode = drbd_request_state(device, NS(disk, D_FAILED));
+               drbd_md_put_buffer(device);
+       } else /* already <= D_FAILED */
+               retcode = SS_NOTHING_TO_DO;
        /* D_FAILED will transition to DISKLESS. */
        drbd_resume_io(device);
        ret = wait_event_interruptible(device->misc_wait,