drbd: Break a deadlock while concurrent fencing and establishing a connection
authorPhilipp Reisner <philipp.reisner@linbit.com>
Mon, 28 Apr 2014 16:43:12 +0000 (18:43 +0200)
committerJens Axboe <axboe@fb.com>
Wed, 30 Apr 2014 19:46:53 +0000 (13:46 -0600)
When we need to outdate the peer while being promoted to primary,
and the connection gets established at the same time, we deadlock
in drbd_try_outdate_peer() when trying to clear the susp_fen
bit.

Fix this by setting the STATE_SENT bit while holding the mutex.

Using drbd_change_state(.. , CS_HARD, ..) which does not block
until STATE_SENT is cleared, is only for clearness. It does
not contribute anything to the fix.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
drivers/block/drbd/drbd_receiver.c

index 68e3992..125c9e8 100644 (file)
@@ -1026,24 +1026,27 @@ randomize:
        if (drbd_send_protocol(connection) == -EOPNOTSUPP)
                return -1;
 
+       /* Prevent a race between resync-handshake and
+        * being promoted to Primary.
+        *
+        * Grab and release the state mutex, so we know that any current
+        * drbd_set_role() is finished, and any incoming drbd_set_role
+        * will see the STATE_SENT flag, and wait for it to be cleared.
+        */
+       idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
+               mutex_lock(peer_device->device->state_mutex);
+
        set_bit(STATE_SENT, &connection->flags);
 
+       idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
+               mutex_unlock(peer_device->device->state_mutex);
+
        rcu_read_lock();
        idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
                struct drbd_device *device = peer_device->device;
                kref_get(&device->kref);
                rcu_read_unlock();
 
-               /* Prevent a race between resync-handshake and
-                * being promoted to Primary.
-                *
-                * Grab and release the state mutex, so we know that any current
-                * drbd_set_role() is finished, and any incoming drbd_set_role
-                * will see the STATE_SENT flag, and wait for it to be cleared.
-                */
-               mutex_lock(device->state_mutex);
-               mutex_unlock(device->state_mutex);
-
                if (discard_my_data)
                        set_bit(DISCARD_MY_DATA, &device->flags);
                else