From 813472ced7fac734157fe5be1137ce2bac942902 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 3 May 2011 16:47:02 +0200 Subject: [PATCH] drbd: RCU for rs_plan_s This removes the issue with using peer_seq_lock out of different contexts. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 2 +- drivers/block/drbd/drbd_nl.c | 18 +++++----- drivers/block/drbd/drbd_receiver.c | 71 +++++++++++++++++++++++--------------- drivers/block/drbd/drbd_worker.c | 41 +++++++++++++--------- 4 files changed, 77 insertions(+), 55 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 2ecee6c..3f377e2 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -998,7 +998,7 @@ struct drbd_conf { int rs_last_events; /* counter of read or write "events" (unit sectors) * on the lower level device when we last looked. */ int c_sync_rate; /* current resync rate after syncer throttle magic */ - struct fifo_buffer *rs_plan_s; /* correction values of resync planer */ + struct fifo_buffer *rs_plan_s; /* correction values of resync planer (RCU, tconn->conn_update) */ int rs_in_flight; /* resync sectors in flight (to proxy, in proxy and from proxy) */ atomic_t ap_in_flight; /* App sectors in flight (waiting for ack) */ int peer_max_bio_size; diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 812e91f..9af0974 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1115,7 +1115,7 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) enum drbd_ret_code retcode; struct drbd_conf *mdev; struct disk_conf *new_disk_conf, *old_disk_conf; - struct fifo_buffer *rs_plan_s = NULL; + struct fifo_buffer *old_plan = NULL, *new_plan = NULL; int err, fifo_size; retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); @@ -1158,8 +1158,8 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ; if (fifo_size != mdev->rs_plan_s->size) { - rs_plan_s = fifo_alloc(fifo_size); - if (!rs_plan_s) { + new_plan = fifo_alloc(fifo_size); + if (!new_plan) { dev_err(DEV, "kmalloc of fifo_buffer failed"); retcode = ERR_NOMEM; goto fail_unlock; @@ -1188,13 +1188,10 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) if (retcode != NO_ERROR) goto fail_unlock; - spin_lock(&mdev->peer_seq_lock); - if (rs_plan_s) { - kfree(mdev->rs_plan_s); - mdev->rs_plan_s = rs_plan_s; - rs_plan_s = NULL; + if (new_plan) { + old_plan = mdev->rs_plan_s; + rcu_assign_pointer(mdev->rs_plan_s, new_plan); } - spin_unlock(&mdev->peer_seq_lock); drbd_md_sync(mdev); @@ -1204,13 +1201,14 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) mutex_unlock(&mdev->tconn->conf_update); synchronize_rcu(); kfree(old_disk_conf); + kfree(old_plan); goto success; fail_unlock: mutex_unlock(&mdev->tconn->conf_update); fail: kfree(new_disk_conf); - kfree(rs_plan_s); + kfree(new_plan); success: put_ldev(mdev); out: diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 19b421f..83d3985 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3157,9 +3157,9 @@ static int receive_SyncParam(struct drbd_tconn *tconn, struct packet_info *pi) struct crypto_hash *verify_tfm = NULL; struct crypto_hash *csums_tfm = NULL; struct net_conf *old_net_conf, *new_net_conf = NULL; - struct disk_conf *old_disk_conf, *new_disk_conf = NULL; + struct disk_conf *old_disk_conf = NULL, *new_disk_conf = NULL; const int apv = tconn->agreed_pro_version; - struct fifo_buffer *rs_plan_s = NULL; + struct fifo_buffer *old_plan = NULL, *new_plan = NULL; int fifo_size = 0; int err; @@ -3200,18 +3200,22 @@ static int receive_SyncParam(struct drbd_tconn *tconn, struct packet_info *pi) if (err) return err; - new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL); - if (!new_disk_conf) { - dev_err(DEV, "Allocation of new disk_conf failed\n"); - return -ENOMEM; - } - mutex_lock(&mdev->tconn->conf_update); old_net_conf = mdev->tconn->net_conf; - old_disk_conf = mdev->ldev->disk_conf; - *new_disk_conf = *old_disk_conf; + if (get_ldev(mdev)) { + new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL); + if (!new_disk_conf) { + put_ldev(mdev); + mutex_unlock(&mdev->tconn->conf_update); + dev_err(DEV, "Allocation of new disk_conf failed\n"); + return -ENOMEM; + } - new_disk_conf->resync_rate = be32_to_cpu(p->rate); + old_disk_conf = mdev->ldev->disk_conf; + *new_disk_conf = *old_disk_conf; + + new_disk_conf->resync_rate = be32_to_cpu(p->rate); + } if (apv >= 88) { if (apv == 88) { @@ -3219,15 +3223,13 @@ static int receive_SyncParam(struct drbd_tconn *tconn, struct packet_info *pi) dev_err(DEV, "verify-alg too long, " "peer wants %u, accepting only %u byte\n", data_size, SHARED_SECRET_MAX); - mutex_unlock(&mdev->tconn->conf_update); - return -EIO; + err = -EIO; + goto reconnect; } err = drbd_recv_all(mdev->tconn, p->verify_alg, data_size); - if (err) { - mutex_unlock(&mdev->tconn->conf_update); - return err; - } + if (err) + goto reconnect; /* we expect NUL terminated string */ /* but just in case someone tries to be evil */ D_ASSERT(p->verify_alg[data_size-1] == 0); @@ -3270,7 +3272,7 @@ static int receive_SyncParam(struct drbd_tconn *tconn, struct packet_info *pi) } } - if (apv > 94) { + if (apv > 94 && new_disk_conf) { new_disk_conf->c_plan_ahead = be32_to_cpu(p->c_plan_ahead); new_disk_conf->c_delay_target = be32_to_cpu(p->c_delay_target); new_disk_conf->c_fill_target = be32_to_cpu(p->c_fill_target); @@ -3278,8 +3280,8 @@ static int receive_SyncParam(struct drbd_tconn *tconn, struct packet_info *pi) fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ; if (fifo_size != mdev->rs_plan_s->size) { - rs_plan_s = fifo_alloc(fifo_size); - if (!rs_plan_s) { + new_plan = fifo_alloc(fifo_size); + if (!new_plan) { dev_err(DEV, "kmalloc of fifo_buffer failed"); put_ldev(mdev); goto disconnect; @@ -3314,24 +3316,39 @@ static int receive_SyncParam(struct drbd_tconn *tconn, struct packet_info *pi) } } - rcu_assign_pointer(mdev->ldev->disk_conf, new_disk_conf); - spin_lock(&mdev->peer_seq_lock); - if (rs_plan_s) { - kfree(mdev->rs_plan_s); - mdev->rs_plan_s = rs_plan_s; + if (new_disk_conf) { + rcu_assign_pointer(mdev->ldev->disk_conf, new_disk_conf); + put_ldev(mdev); + } + + if (new_plan) { + old_plan = mdev->rs_plan_s; + rcu_assign_pointer(mdev->rs_plan_s, new_plan); } - spin_unlock(&mdev->peer_seq_lock); mutex_unlock(&mdev->tconn->conf_update); synchronize_rcu(); if (new_net_conf) kfree(old_net_conf); kfree(old_disk_conf); + kfree(old_plan); return 0; +reconnect: + if (new_disk_conf) { + put_ldev(mdev); + kfree(new_disk_conf); + } + mutex_unlock(&mdev->tconn->conf_update); + return -EIO; + disconnect: - kfree(rs_plan_s); + kfree(new_plan); + if (new_disk_conf) { + put_ldev(mdev); + kfree(new_disk_conf); + } mutex_unlock(&mdev->tconn->conf_update); /* just for completeness: actually not needed, * as this is not reached if csums_tfm was ok. */ diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 131887b..e37c42d 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -460,15 +460,15 @@ static int drbd_rs_controller(struct drbd_conf *mdev) int steps; /* Number of time steps to plan ahead */ int curr_corr; int max_sect; + struct fifo_buffer *plan; sect_in = atomic_xchg(&mdev->rs_sect_in, 0); /* Number of sectors that came in */ mdev->rs_in_flight -= sect_in; - spin_lock(&mdev->peer_seq_lock); /* get an atomic view on mdev->rs_plan_s */ - rcu_read_lock(); dc = rcu_dereference(mdev->ldev->disk_conf); + plan = rcu_dereference(mdev->rs_plan_s); - steps = mdev->rs_plan_s->size; /* (dc->c_plan_ahead * 10 * SLEEP_TIME) / HZ; */ + steps = plan->size; /* (dc->c_plan_ahead * 10 * SLEEP_TIME) / HZ; */ if (mdev->rs_in_flight + sect_in == 0) { /* At start of resync */ want = ((dc->resync_rate * 2 * SLEEP_TIME) / HZ) * steps; @@ -477,16 +477,16 @@ static int drbd_rs_controller(struct drbd_conf *mdev) sect_in * dc->c_delay_target * HZ / (SLEEP_TIME * 10); } - correction = want - mdev->rs_in_flight - mdev->rs_plan_s->total; + correction = want - mdev->rs_in_flight - plan->total; /* Plan ahead */ cps = correction / steps; - fifo_add_val(mdev->rs_plan_s, cps); - mdev->rs_plan_s->total += cps * steps; + fifo_add_val(plan, cps); + plan->total += cps * steps; /* What we do in this step */ - curr_corr = fifo_push(mdev->rs_plan_s, 0); - mdev->rs_plan_s->total -= curr_corr; + curr_corr = fifo_push(plan, 0); + plan->total -= curr_corr; req_sect = sect_in + curr_corr; if (req_sect < 0) @@ -501,8 +501,6 @@ static int drbd_rs_controller(struct drbd_conf *mdev) sect_in, mdev->rs_in_flight, want, correction, steps, cps, mdev->rs_planed, curr_corr, req_sect); */ - rcu_read_unlock(); - spin_unlock(&mdev->peer_seq_lock); return req_sect; } @@ -510,15 +508,16 @@ static int drbd_rs_controller(struct drbd_conf *mdev) static int drbd_rs_number_requests(struct drbd_conf *mdev) { int number; - if (mdev->rs_plan_s->size) { /* rcu_dereference(mdev->ldev->disk_conf)->c_plan_ahead */ + + rcu_read_lock(); + if (rcu_dereference(mdev->rs_plan_s)->size) { number = drbd_rs_controller(mdev) >> (BM_BLOCK_SHIFT - 9); mdev->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME; } else { - rcu_read_lock(); mdev->c_sync_rate = rcu_dereference(mdev->ldev->disk_conf)->resync_rate; - rcu_read_unlock(); number = SLEEP_TIME * mdev->c_sync_rate / ((BM_BLOCK_SIZE / 1024) * HZ); } + rcu_read_unlock(); /* ignore the amount of pending requests, the resync controller should * throttle down to incoming reply rate soon enough anyways. */ @@ -1468,13 +1467,21 @@ void drbd_sync_after_changed(struct drbd_conf *mdev) void drbd_rs_controller_reset(struct drbd_conf *mdev) { + struct fifo_buffer *plan; + atomic_set(&mdev->rs_sect_in, 0); atomic_set(&mdev->rs_sect_ev, 0); mdev->rs_in_flight = 0; - mdev->rs_plan_s->total = 0; - spin_lock(&mdev->peer_seq_lock); - fifo_set(mdev->rs_plan_s, 0); - spin_unlock(&mdev->peer_seq_lock); + + /* Updating the RCU protected object in place is necessary since + this function gets called from atomic context. + It is valid since all other updates also lead to an completely + empty fifo */ + rcu_read_lock(); + plan = rcu_dereference(mdev->rs_plan_s); + plan->total = 0; + fifo_set(plan, 0); + rcu_read_unlock(); } void start_resync_timer_fn(unsigned long data) -- 2.7.4