void dlm_print_lkb(struct dlm_lkb *lkb)
{
printk(KERN_ERR "lkb: nodeid %d id %x remid %x exflags %x flags %x "
- "sts %d rq %d gr %d wait_type %d wait_nodeid %d\n",
+ "sts %d rq %d gr %d wait_type %d wait_nodeid %d seq %llu\n",
lkb->lkb_nodeid, lkb->lkb_id, lkb->lkb_remid, lkb->lkb_exflags,
lkb->lkb_flags, lkb->lkb_status, lkb->lkb_rqmode,
- lkb->lkb_grmode, lkb->lkb_wait_type, lkb->lkb_wait_nodeid);
+ lkb->lkb_grmode, lkb->lkb_wait_type, lkb->lkb_wait_nodeid,
+ (unsigned long long)lkb->lkb_recover_seq);
}
static void dlm_print_rsb(struct dlm_rsb *r)
static inline int is_master_copy(struct dlm_lkb *lkb)
{
- if (lkb->lkb_flags & DLM_IFL_MSTCPY)
- DLM_ASSERT(lkb->lkb_nodeid, dlm_print_lkb(lkb););
return (lkb->lkb_flags & DLM_IFL_MSTCPY) ? 1 : 0;
}
}
lkb->lkb_rqmode = DLM_LOCK_IV;
+ lkb->lkb_highbast = 0;
}
static void grant_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
{
set_lvb_lock(r, lkb);
_grant_lock(r, lkb);
- lkb->lkb_highbast = 0;
}
static void grant_lock_pc(struct dlm_rsb *r, struct dlm_lkb *lkb,
/* Returns the highest requested mode of all blocked conversions; sets
cw if there's a blocked conversion to DLM_LOCK_CW. */
-static int grant_pending_convert(struct dlm_rsb *r, int high, int *cw)
+static int grant_pending_convert(struct dlm_rsb *r, int high, int *cw,
+ unsigned int *count)
{
struct dlm_lkb *lkb, *s;
int hi, demoted, quit, grant_restart, demote_restart;
if (can_be_granted(r, lkb, 0, &deadlk)) {
grant_lock_pending(r, lkb);
grant_restart = 1;
+ if (count)
+ (*count)++;
continue;
}
return max_t(int, high, hi);
}
-static int grant_pending_wait(struct dlm_rsb *r, int high, int *cw)
+static int grant_pending_wait(struct dlm_rsb *r, int high, int *cw,
+ unsigned int *count)
{
struct dlm_lkb *lkb, *s;
list_for_each_entry_safe(lkb, s, &r->res_waitqueue, lkb_statequeue) {
- if (can_be_granted(r, lkb, 0, NULL))
+ if (can_be_granted(r, lkb, 0, NULL)) {
grant_lock_pending(r, lkb);
- else {
+ if (count)
+ (*count)++;
+ } else {
high = max_t(int, lkb->lkb_rqmode, high);
if (lkb->lkb_rqmode == DLM_LOCK_CW)
*cw = 1;
return 0;
}
-static void grant_pending_locks(struct dlm_rsb *r)
+static void grant_pending_locks(struct dlm_rsb *r, unsigned int *count)
{
struct dlm_lkb *lkb, *s;
int high = DLM_LOCK_IV;
int cw = 0;
- DLM_ASSERT(is_master(r), dlm_dump_rsb(r););
+ if (!is_master(r)) {
+ log_print("grant_pending_locks r nodeid %d", r->res_nodeid);
+ dlm_dump_rsb(r);
+ return;
+ }
- high = grant_pending_convert(r, high, &cw);
- high = grant_pending_wait(r, high, &cw);
+ high = grant_pending_convert(r, high, &cw, count);
+ high = grant_pending_wait(r, high, &cw, count);
if (high == DLM_LOCK_IV)
return;
before we try again to grant this one. */
if (is_demoted(lkb)) {
- grant_pending_convert(r, DLM_LOCK_IV, NULL);
+ grant_pending_convert(r, DLM_LOCK_IV, NULL, NULL);
if (_can_be_granted(r, lkb, 1)) {
grant_lock(r, lkb);
queue_cast(r, lkb, 0);
{
switch (error) {
case 0:
- grant_pending_locks(r);
+ grant_pending_locks(r, NULL);
/* grant_pending_locks also sends basts */
break;
case -EAGAIN:
static void do_unlock_effects(struct dlm_rsb *r, struct dlm_lkb *lkb,
int error)
{
- grant_pending_locks(r);
+ grant_pending_locks(r, NULL);
}
/* returns: 0 did nothing, -DLM_ECANCEL canceled lock */
int error)
{
if (error)
- grant_pending_locks(r);
+ grant_pending_locks(r, NULL);
}
/*
goto fail;
if (lkb->lkb_remid != ms->m_lkid) {
- log_error(ls, "receive_convert %x remid %x remote %d %x",
- lkb->lkb_id, lkb->lkb_remid,
+ log_error(ls, "receive_convert %x remid %x recover_seq %llu "
+ "remote %d %x", lkb->lkb_id, lkb->lkb_remid,
+ (unsigned long long)lkb->lkb_recover_seq,
ms->m_header.h_nodeid, ms->m_lkid);
error = -ENOENT;
goto fail;
goto out;
queue_bast(r, lkb, ms->m_bastmode);
+ lkb->lkb_highbast = ms->m_bastmode;
out:
unlock_rsb(r);
put_rsb(r);
mstype = lkb->lkb_wait_type;
error = remove_from_waiters(lkb, DLM_MSG_REQUEST_REPLY);
- if (error)
+ if (error) {
+ log_error(ls, "receive_request_reply %x remote %d %x result %d",
+ lkb->lkb_id, ms->m_header.h_nodeid, ms->m_lkid,
+ ms->m_result);
+ dlm_dump_rsb(r);
goto out;
+ }
/* Optimization: the dir node was also the master, so it took our
lookup as a request and sent request reply instead of lookup reply */
* happen in normal usage for the async messages and cancel, so
* only use log_debug for them.
*
- * Other errors are expected and normal.
+ * Some errors are expected and normal.
*/
if (error == -ENOENT && noent) {
- log_debug(ls, "receive %d no %x remote %d %x seq %u",
+ log_debug(ls, "receive %d no %x remote %d %x saved_seq %u",
ms->m_type, ms->m_remid, ms->m_header.h_nodeid,
ms->m_lkid, saved_seq);
} else if (error == -ENOENT) {
- log_error(ls, "receive %d no %x remote %d %x seq %u",
+ log_error(ls, "receive %d no %x remote %d %x saved_seq %u",
ms->m_type, ms->m_remid, ms->m_header.h_nodeid,
ms->m_lkid, saved_seq);
if (ms->m_type == DLM_MSG_CONVERT)
dlm_dump_rsb_hash(ls, ms->m_hash);
}
+
+ if (error == -EINVAL) {
+ log_error(ls, "receive %d inval from %d lkid %x remid %x "
+ "saved_seq %u",
+ ms->m_type, ms->m_header.h_nodeid,
+ ms->m_lkid, ms->m_remid, saved_seq);
+ }
}
/* If the lockspace is in recovery mode (locking stopped), then normal
ls = dlm_find_lockspace_global(hd->h_lockspace);
if (!ls) {
- if (dlm_config.ci_log_debug)
- log_print("invalid lockspace %x from %d cmd %d type %d",
- hd->h_lockspace, nodeid, hd->h_cmd, type);
+ if (dlm_config.ci_log_debug) {
+ printk_ratelimited(KERN_DEBUG "dlm: invalid lockspace "
+ "%u from %d cmd %d type %d\n",
+ hd->h_lockspace, nodeid, hd->h_cmd, type);
+ }
if (hd->h_cmd == DLM_RCOM && type == DLM_RCOM_STATUS)
dlm_send_ls_not_ready(nodeid, &p->rcom);
static int waiter_needs_recovery(struct dlm_ls *ls, struct dlm_lkb *lkb,
int dir_nodeid)
{
- if (dlm_is_removed(ls, lkb->lkb_wait_nodeid))
- return 1;
-
- if (!dlm_no_directory(ls))
- return 0;
-
- if (dir_nodeid == dlm_our_nodeid())
+ if (dlm_no_directory(ls))
return 1;
- if (dir_nodeid != lkb->lkb_wait_nodeid)
+ if (dlm_is_removed(ls, lkb->lkb_wait_nodeid))
return 1;
return 0;
return error;
}
-static void purge_queue(struct dlm_rsb *r, struct list_head *queue,
- int (*test)(struct dlm_ls *ls, struct dlm_lkb *lkb))
+static void purge_mstcpy_list(struct dlm_ls *ls, struct dlm_rsb *r,
+ struct list_head *list)
{
- struct dlm_ls *ls = r->res_ls;
struct dlm_lkb *lkb, *safe;
- list_for_each_entry_safe(lkb, safe, queue, lkb_statequeue) {
- if (test(ls, lkb)) {
- rsb_set_flag(r, RSB_LOCKS_PURGED);
- del_lkb(r, lkb);
- /* this put should free the lkb */
- if (!dlm_put_lkb(lkb))
- log_error(ls, "purged lkb not released");
- }
+ list_for_each_entry_safe(lkb, safe, list, lkb_statequeue) {
+ if (!is_master_copy(lkb))
+ continue;
+
+ /* don't purge lkbs we've added in recover_master_copy for
+ the current recovery seq */
+
+ if (lkb->lkb_recover_seq == ls->ls_recover_seq)
+ continue;
+
+ del_lkb(r, lkb);
+
+ /* this put should free the lkb */
+ if (!dlm_put_lkb(lkb))
+ log_error(ls, "purged mstcpy lkb not released");
}
}
-static int purge_dead_test(struct dlm_ls *ls, struct dlm_lkb *lkb)
+void dlm_purge_mstcpy_locks(struct dlm_rsb *r)
{
- return (is_master_copy(lkb) && dlm_is_removed(ls, lkb->lkb_nodeid));
-}
+ struct dlm_ls *ls = r->res_ls;
-static int purge_mstcpy_test(struct dlm_ls *ls, struct dlm_lkb *lkb)
-{
- return is_master_copy(lkb);
+ purge_mstcpy_list(ls, r, &r->res_grantqueue);
+ purge_mstcpy_list(ls, r, &r->res_convertqueue);
+ purge_mstcpy_list(ls, r, &r->res_waitqueue);
}
-static void purge_dead_locks(struct dlm_rsb *r)
+static void purge_dead_list(struct dlm_ls *ls, struct dlm_rsb *r,
+ struct list_head *list,
+ int nodeid_gone, unsigned int *count)
{
- purge_queue(r, &r->res_grantqueue, &purge_dead_test);
- purge_queue(r, &r->res_convertqueue, &purge_dead_test);
- purge_queue(r, &r->res_waitqueue, &purge_dead_test);
-}
+ struct dlm_lkb *lkb, *safe;
-void dlm_purge_mstcpy_locks(struct dlm_rsb *r)
-{
- purge_queue(r, &r->res_grantqueue, &purge_mstcpy_test);
- purge_queue(r, &r->res_convertqueue, &purge_mstcpy_test);
- purge_queue(r, &r->res_waitqueue, &purge_mstcpy_test);
+ list_for_each_entry_safe(lkb, safe, list, lkb_statequeue) {
+ if (!is_master_copy(lkb))
+ continue;
+
+ if ((lkb->lkb_nodeid == nodeid_gone) ||
+ dlm_is_removed(ls, lkb->lkb_nodeid)) {
+
+ del_lkb(r, lkb);
+
+ /* this put should free the lkb */
+ if (!dlm_put_lkb(lkb))
+ log_error(ls, "purged dead lkb not released");
+
+ rsb_set_flag(r, RSB_RECOVER_GRANT);
+
+ (*count)++;
+ }
+ }
}
/* Get rid of locks held by nodes that are gone. */
-int dlm_purge_locks(struct dlm_ls *ls)
+void dlm_recover_purge(struct dlm_ls *ls)
{
struct dlm_rsb *r;
+ struct dlm_member *memb;
+ int nodes_count = 0;
+ int nodeid_gone = 0;
+ unsigned int lkb_count = 0;
+
+ /* cache one removed nodeid to optimize the common
+ case of a single node removed */
+
+ list_for_each_entry(memb, &ls->ls_nodes_gone, list) {
+ nodes_count++;
+ nodeid_gone = memb->nodeid;
+ }
- log_debug(ls, "dlm_purge_locks");
+ if (!nodes_count)
+ return;
down_write(&ls->ls_root_sem);
list_for_each_entry(r, &ls->ls_root_list, res_root_list) {
hold_rsb(r);
lock_rsb(r);
- if (is_master(r))
- purge_dead_locks(r);
+ if (is_master(r)) {
+ purge_dead_list(ls, r, &r->res_grantqueue,
+ nodeid_gone, &lkb_count);
+ purge_dead_list(ls, r, &r->res_convertqueue,
+ nodeid_gone, &lkb_count);
+ purge_dead_list(ls, r, &r->res_waitqueue,
+ nodeid_gone, &lkb_count);
+ }
unlock_rsb(r);
unhold_rsb(r);
-
- schedule();
+ cond_resched();
}
up_write(&ls->ls_root_sem);
- return 0;
+ if (lkb_count)
+ log_debug(ls, "dlm_recover_purge %u locks for %u nodes",
+ lkb_count, nodes_count);
}
-static struct dlm_rsb *find_purged_rsb(struct dlm_ls *ls, int bucket)
+static struct dlm_rsb *find_grant_rsb(struct dlm_ls *ls, int bucket)
{
struct rb_node *n;
- struct dlm_rsb *r, *r_ret = NULL;
+ struct dlm_rsb *r;
spin_lock(&ls->ls_rsbtbl[bucket].lock);
for (n = rb_first(&ls->ls_rsbtbl[bucket].keep); n; n = rb_next(n)) {
r = rb_entry(n, struct dlm_rsb, res_hashnode);
- if (!rsb_flag(r, RSB_LOCKS_PURGED))
+
+ if (!rsb_flag(r, RSB_RECOVER_GRANT))
+ continue;
+ rsb_clear_flag(r, RSB_RECOVER_GRANT);
+ if (!is_master(r))
continue;
hold_rsb(r);
- rsb_clear_flag(r, RSB_LOCKS_PURGED);
- r_ret = r;
- break;
+ spin_unlock(&ls->ls_rsbtbl[bucket].lock);
+ return r;
}
spin_unlock(&ls->ls_rsbtbl[bucket].lock);
- return r_ret;
+ return NULL;
}
-void dlm_grant_after_purge(struct dlm_ls *ls)
+/*
+ * Attempt to grant locks on resources that we are the master of.
+ * Locks may have become grantable during recovery because locks
+ * from departed nodes have been purged (or not rebuilt), allowing
+ * previously blocked locks to now be granted. The subset of rsb's
+ * we are interested in are those with lkb's on either the convert or
+ * waiting queues.
+ *
+ * Simplest would be to go through each master rsb and check for non-empty
+ * convert or waiting queues, and attempt to grant on those rsbs.
+ * Checking the queues requires lock_rsb, though, for which we'd need
+ * to release the rsbtbl lock. This would make iterating through all
+ * rsb's very inefficient. So, we rely on earlier recovery routines
+ * to set RECOVER_GRANT on any rsb's that we should attempt to grant
+ * locks for.
+ */
+
+void dlm_recover_grant(struct dlm_ls *ls)
{
struct dlm_rsb *r;
int bucket = 0;
+ unsigned int count = 0;
+ unsigned int rsb_count = 0;
+ unsigned int lkb_count = 0;
while (1) {
- r = find_purged_rsb(ls, bucket);
+ r = find_grant_rsb(ls, bucket);
if (!r) {
if (bucket == ls->ls_rsbtbl_size - 1)
break;
bucket++;
continue;
}
+ rsb_count++;
+ count = 0;
lock_rsb(r);
- if (is_master(r)) {
- grant_pending_locks(r);
- confirm_master(r, 0);
- }
+ grant_pending_locks(r, &count);
+ lkb_count += count;
+ confirm_master(r, 0);
unlock_rsb(r);
put_rsb(r);
- schedule();
+ cond_resched();
}
+
+ if (lkb_count)
+ log_debug(ls, "dlm_recover_grant %u locks on %u resources",
+ lkb_count, rsb_count);
}
static struct dlm_lkb *search_remid_list(struct list_head *head, int nodeid,
remid = le32_to_cpu(rl->rl_lkid);
- error = find_rsb(ls, rl->rl_name, le16_to_cpu(rl->rl_namelen),
- R_MASTER, &r);
+ /* In general we expect the rsb returned to be R_MASTER, but we don't
+ have to require it. Recovery of masters on one node can overlap
+ recovery of locks on another node, so one node can send us MSTCPY
+ locks before we've made ourselves master of this rsb. We can still
+ add new MSTCPY locks that we receive here without any harm; when
+ we make ourselves master, dlm_recover_masters() won't touch the
+ MSTCPY locks we've received early. */
+
+ error = find_rsb(ls, rl->rl_name, le16_to_cpu(rl->rl_namelen), 0, &r);
if (error)
goto out;
+ if (dlm_no_directory(ls) && (dlm_dir_nodeid(r) != dlm_our_nodeid())) {
+ log_error(ls, "dlm_recover_master_copy remote %d %x not dir",
+ rc->rc_header.h_nodeid, remid);
+ error = -EBADR;
+ put_rsb(r);
+ goto out;
+ }
+
lock_rsb(r);
lkb = search_remid(r, rc->rc_header.h_nodeid, remid);
attach_lkb(r, lkb);
add_lkb(r, lkb, rl->rl_status);
error = 0;
+ ls->ls_recover_locks_in++;
+
+ if (!list_empty(&r->res_waitqueue) || !list_empty(&r->res_convertqueue))
+ rsb_set_flag(r, RSB_RECOVER_GRANT);
out_remid:
/* this is the new value returned to the lock holder for
saving in its process-copy lkb */
rl->rl_remid = cpu_to_le32(lkb->lkb_id);
+ lkb->lkb_recover_seq = ls->ls_recover_seq;
+
out_unlock:
unlock_rsb(r);
put_rsb(r);
return error;
}
+ r = lkb->lkb_resource;
+ hold_rsb(r);
+ lock_rsb(r);
+
if (!is_process_copy(lkb)) {
log_error(ls, "dlm_recover_process_copy bad %x remote %d %x %d",
lkid, rc->rc_header.h_nodeid, remid, result);
- dlm_print_lkb(lkb);
+ dlm_dump_rsb(r);
+ unlock_rsb(r);
+ put_rsb(r);
+ dlm_put_lkb(lkb);
return -EINVAL;
}
- r = lkb->lkb_resource;
- hold_rsb(r);
- lock_rsb(r);
-
switch (result) {
case -EBADR:
/* There's a chance the new master received our lock before
{
struct dlm_lkb *lkb;
- list_for_each_entry(lkb, queue, lkb_statequeue)
- if (!(lkb->lkb_flags & DLM_IFL_MSTCPY))
+ list_for_each_entry(lkb, queue, lkb_statequeue) {
+ if (!(lkb->lkb_flags & DLM_IFL_MSTCPY)) {
lkb->lkb_nodeid = nodeid;
+ lkb->lkb_remid = 0;
+ }
+ }
}
static void set_master_lkbs(struct dlm_rsb *r)
/*
* Propagate the new master nodeid to locks
* The NEW_MASTER flag tells dlm_recover_locks() which rsb's to consider.
- * The NEW_MASTER2 flag tells recover_lvb() and set_locks_purged() which
+ * The NEW_MASTER2 flag tells recover_lvb() and recover_grant() which
* rsb's to consider.
*/
static void set_new_master(struct dlm_rsb *r, int nodeid)
{
- lock_rsb(r);
r->res_nodeid = nodeid;
set_master_lkbs(r);
rsb_set_flag(r, RSB_NEW_MASTER);
rsb_set_flag(r, RSB_NEW_MASTER2);
- unlock_rsb(r);
}
/*
static int recover_master(struct dlm_rsb *r)
{
struct dlm_ls *ls = r->res_ls;
- int error, dir_nodeid, ret_nodeid, our_nodeid = dlm_our_nodeid();
-
- dir_nodeid = dlm_dir_nodeid(r);
+ int error, ret_nodeid;
+ int our_nodeid = dlm_our_nodeid();
+ int dir_nodeid = dlm_dir_nodeid(r);
if (dir_nodeid == our_nodeid) {
error = dlm_dir_lookup(ls, our_nodeid, r->res_name,
if (ret_nodeid == our_nodeid)
ret_nodeid = 0;
+ lock_rsb(r);
set_new_master(r, ret_nodeid);
+ unlock_rsb(r);
} else {
recover_list_add(r);
error = dlm_send_rcom_lookup(r, dir_nodeid);
}
/*
- * When not using a directory, most resource names will hash to a new static
- * master nodeid and the resource will need to be remastered.
+ * All MSTCPY locks are purged and rebuilt, even if the master stayed the same.
+ * This is necessary because recovery can be started, aborted and restarted,
+ * causing the master nodeid to briefly change during the aborted recovery, and
+ * change back to the original value in the second recovery. The MSTCPY locks
+ * may or may not have been purged during the aborted recovery. Another node
+ * with an outstanding request in waiters list and a request reply saved in the
+ * requestqueue, cannot know whether it should ignore the reply and resend the
+ * request, or accept the reply and complete the request. It must do the
+ * former if the remote node purged MSTCPY locks, and it must do the later if
+ * the remote node did not. This is solved by always purging MSTCPY locks, in
+ * which case, the request reply would always be ignored and the request
+ * resent.
*/
static int recover_master_static(struct dlm_rsb *r)
{
- int master = dlm_dir_nodeid(r);
+ int dir_nodeid = dlm_dir_nodeid(r);
+ int new_master = dir_nodeid;
- if (master == dlm_our_nodeid())
- master = 0;
+ if (dir_nodeid == dlm_our_nodeid())
+ new_master = 0;
- if (r->res_nodeid != master) {
- if (is_master(r))
- dlm_purge_mstcpy_locks(r);
- set_new_master(r, master);
- return 1;
- }
- return 0;
+ lock_rsb(r);
+ dlm_purge_mstcpy_locks(r);
+ set_new_master(r, new_master);
+ unlock_rsb(r);
+ return 1;
}
/*
if (nodeid == dlm_our_nodeid())
nodeid = 0;
+ lock_rsb(r);
set_new_master(r, nodeid);
+ unlock_rsb(r);
recover_list_del(r);
if (recover_list_empty(ls))
struct dlm_rsb *r;
int error, count = 0;
- log_debug(ls, "dlm_recover_locks");
-
down_read(&ls->ls_root_sem);
list_for_each_entry(r, &ls->ls_root_list, res_root_list) {
if (is_master(r)) {
}
up_read(&ls->ls_root_sem);
- log_debug(ls, "dlm_recover_locks %d locks", count);
+ log_debug(ls, "dlm_recover_locks %d out", count);
error = dlm_wait_function(ls, &recover_list_empty);
out:
}
/* We've become the new master for this rsb and waiting/converting locks may
- need to be granted in dlm_grant_after_purge() due to locks that may have
+ need to be granted in dlm_recover_grant() due to locks that may have
existed from a removed node. */
-static void set_locks_purged(struct dlm_rsb *r)
+static void recover_grant(struct dlm_rsb *r)
{
if (!list_empty(&r->res_waitqueue) || !list_empty(&r->res_convertqueue))
- rsb_set_flag(r, RSB_LOCKS_PURGED);
+ rsb_set_flag(r, RSB_RECOVER_GRANT);
}
void dlm_recover_rsbs(struct dlm_ls *ls)
{
struct dlm_rsb *r;
- int count = 0;
-
- log_debug(ls, "dlm_recover_rsbs");
+ unsigned int count = 0;
down_read(&ls->ls_root_sem);
list_for_each_entry(r, &ls->ls_root_list, res_root_list) {
if (rsb_flag(r, RSB_RECOVER_CONVERT))
recover_conversion(r);
if (rsb_flag(r, RSB_NEW_MASTER2))
- set_locks_purged(r);
+ recover_grant(r);
recover_lvb(r);
count++;
}
}
up_read(&ls->ls_root_sem);
- log_debug(ls, "dlm_recover_rsbs %d rsbs", count);
+ if (count)
+ log_debug(ls, "dlm_recover_rsbs %d done", count);
}
/* Create a single list of all root rsb's to be used during recovery */