wait_queue_head_t ls_uevent_wait; /* user part of join/leave */
int ls_uevent_result;
- struct completion ls_members_done;
- int ls_members_result;
+ struct completion ls_recovery_done;
+ int ls_recovery_result;
struct miscdevice ls_device;
init_waitqueue_head(&ls->ls_uevent_wait);
ls->ls_uevent_result = 0;
- init_completion(&ls->ls_members_done);
- ls->ls_members_result = -1;
+ init_completion(&ls->ls_recovery_done);
+ ls->ls_recovery_result = -1;
mutex_init(&ls->ls_cb_mutex);
INIT_LIST_HEAD(&ls->ls_cb_delay);
if (error)
goto out_recoverd;
- wait_for_completion(&ls->ls_members_done);
- error = ls->ls_members_result;
+ /* wait until recovery is successful or failed */
+ wait_for_completion(&ls->ls_recovery_done);
+ error = ls->ls_recovery_result;
if (error)
goto out_members;
*neg_out = neg;
error = ping_members(ls);
- /* error -EINTR means that a new recovery action is triggered.
- * We ignore this recovery action and let run the new one which might
- * have new member configuration.
- */
- if (error == -EINTR)
- error = 0;
-
- /* new_lockspace() may be waiting to know if the config
- * is good or bad
- */
- ls->ls_members_result = error;
- complete(&ls->ls_members_done);
-
log_rinfo(ls, "dlm_recover_members %d nodes", ls->ls_num_nodes);
return error;
}
jiffies_to_msecs(jiffies - start));
mutex_unlock(&ls->ls_recoverd_active);
+ ls->ls_recovery_result = 0;
+ complete(&ls->ls_recovery_done);
+
dlm_lsop_recover_done(ls);
return 0;
log_rinfo(ls, "dlm_recover %llu error %d",
(unsigned long long)rv->seq, error);
mutex_unlock(&ls->ls_recoverd_active);
+
+ /* let new_lockspace() get aware of critical error if recovery
+ * was interrupted -EINTR we wait for the next ls_recover()
+ * iteration until it succeeds.
+ */
+ if (error != -EINTR) {
+ ls->ls_recovery_result = error;
+ complete(&ls->ls_recovery_done);
+ }
+
return error;
}