nfsd: Fix races between nfsd4_cb_release() and nfsd4_shutdown_callback()
authorTrond Myklebust <trondmy@gmail.com>
Wed, 23 Oct 2019 21:43:18 +0000 (17:43 -0400)
committerJ. Bruce Fields <bfields@redhat.com>
Fri, 8 Nov 2019 17:32:59 +0000 (12:32 -0500)
When we're destroying the client lease, and we call
nfsd4_shutdown_callback(), we must ensure that we do not return
before all outstanding callbacks have terminated and have
released their payloads.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
fs/nfsd/nfs4callback.c
fs/nfsd/state.h

index 1542e1d..67d24a5 100644 (file)
@@ -826,6 +826,31 @@ static int max_cb_time(struct net *net)
        return max(nn->nfsd4_lease/10, (time_t)1) * HZ;
 }
 
+static struct workqueue_struct *callback_wq;
+
+static bool nfsd4_queue_cb(struct nfsd4_callback *cb)
+{
+       return queue_work(callback_wq, &cb->cb_work);
+}
+
+static void nfsd41_cb_inflight_begin(struct nfs4_client *clp)
+{
+       atomic_inc(&clp->cl_cb_inflight);
+}
+
+static void nfsd41_cb_inflight_end(struct nfs4_client *clp)
+{
+
+       if (atomic_dec_and_test(&clp->cl_cb_inflight))
+               wake_up_var(&clp->cl_cb_inflight);
+}
+
+static void nfsd41_cb_inflight_wait_complete(struct nfs4_client *clp)
+{
+       wait_var_event(&clp->cl_cb_inflight,
+                       !atomic_read(&clp->cl_cb_inflight));
+}
+
 static const struct cred *get_backchannel_cred(struct nfs4_client *clp, struct rpc_clnt *client, struct nfsd4_session *ses)
 {
        if (clp->cl_minorversion == 0) {
@@ -937,14 +962,21 @@ static void nfsd4_cb_probe_done(struct rpc_task *task, void *calldata)
                clp->cl_cb_state = NFSD4_CB_UP;
 }
 
+static void nfsd4_cb_probe_release(void *calldata)
+{
+       struct nfs4_client *clp = container_of(calldata, struct nfs4_client, cl_cb_null);
+
+       nfsd41_cb_inflight_end(clp);
+
+}
+
 static const struct rpc_call_ops nfsd4_cb_probe_ops = {
        /* XXX: release method to ensure we set the cb channel down if
         * necessary on early failure? */
        .rpc_call_done = nfsd4_cb_probe_done,
+       .rpc_release = nfsd4_cb_probe_release,
 };
 
-static struct workqueue_struct *callback_wq;
-
 /*
  * Poke the callback thread to process any updates to the callback
  * parameters, and send a null probe.
@@ -1004,6 +1036,16 @@ static void nfsd41_cb_release_slot(struct nfsd4_callback *cb)
        }
 }
 
+static void nfsd41_destroy_cb(struct nfsd4_callback *cb)
+{
+       struct nfs4_client *clp = cb->cb_clp;
+
+       nfsd41_cb_release_slot(cb);
+       if (cb->cb_ops && cb->cb_ops->release)
+               cb->cb_ops->release(cb);
+       nfsd41_cb_inflight_end(clp);
+}
+
 /*
  * TODO: cb_sequence should support referring call lists, cachethis, multiple
  * slots, and mark callback channel down on communication errors.
@@ -1101,8 +1143,10 @@ retry_nowait:
                ret = false;
        goto out;
 need_restart:
-       task->tk_status = 0;
-       cb->cb_need_restart = true;
+       if (!test_bit(NFSD4_CLIENT_CB_KILL, &clp->cl_flags)) {
+               task->tk_status = 0;
+               cb->cb_need_restart = true;
+       }
        return false;
 }
 
@@ -1144,9 +1188,9 @@ static void nfsd4_cb_release(void *calldata)
        struct nfsd4_callback *cb = calldata;
 
        if (cb->cb_need_restart)
-               nfsd4_run_cb(cb);
+               nfsd4_queue_cb(cb);
        else
-               cb->cb_ops->release(cb);
+               nfsd41_destroy_cb(cb);
 
 }
 
@@ -1180,6 +1224,7 @@ void nfsd4_shutdown_callback(struct nfs4_client *clp)
         */
        nfsd4_run_cb(&clp->cl_cb_null);
        flush_workqueue(callback_wq);
+       nfsd41_cb_inflight_wait_complete(clp);
 }
 
 /* requires cl_lock: */
@@ -1265,8 +1310,7 @@ nfsd4_run_cb_work(struct work_struct *work)
        clnt = clp->cl_cb_client;
        if (!clnt) {
                /* Callback channel broken, or client killed; give up: */
-               if (cb->cb_ops && cb->cb_ops->release)
-                       cb->cb_ops->release(cb);
+               nfsd41_destroy_cb(cb);
                return;
        }
 
@@ -1275,6 +1319,7 @@ nfsd4_run_cb_work(struct work_struct *work)
         */
        if (!cb->cb_ops && clp->cl_minorversion) {
                clp->cl_cb_state = NFSD4_CB_UP;
+               nfsd41_destroy_cb(cb);
                return;
        }
 
@@ -1300,5 +1345,9 @@ void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp,
 
 void nfsd4_run_cb(struct nfsd4_callback *cb)
 {
-       queue_work(callback_wq, &cb->cb_work);
+       struct nfs4_client *clp = cb->cb_clp;
+
+       nfsd41_cb_inflight_begin(clp);
+       if (!nfsd4_queue_cb(cb))
+               nfsd41_cb_inflight_end(clp);
 }
index 46f56af..d61b83b 100644 (file)
@@ -367,6 +367,7 @@ struct nfs4_client {
        struct net              *net;
        struct list_head        async_copies;   /* list of async copies */
        spinlock_t              async_lock;     /* lock for async copies */
+       atomic_t                cl_cb_inflight; /* Outstanding callbacks */
 };
 
 /* struct nfs4_client_reset