NFSD: Trace boot verifier resets
authorChuck Lever <chuck.lever@oracle.com>
Tue, 28 Dec 2021 19:27:56 +0000 (14:27 -0500)
committerChuck Lever <chuck.lever@oracle.com>
Sat, 8 Jan 2022 19:42:03 +0000 (14:42 -0500)
According to commit bbf2f098838a ("nfsd: Reset the boot verifier on
all write I/O errors"), the Linux NFS server forces all clients to
resend pending unstable writes if any server-side write or commit
operation encounters an error (say, ENOSPC). This is a rare and
quite exceptional event that could require administrative recovery
action, so it should be made trace-able. Example trace event:

nfsd-938   [002]  7174.945558: nfsd_writeverf_reset: boot_time=        61cc920d xid=0xdcd62036 error=-28 new verifier=0x08aecc6142515904

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
fs/nfsd/trace.h
fs/nfsd/vfs.c

index a0b2b8d..c4cf563 100644 (file)
@@ -574,6 +574,34 @@ DEFINE_EVENT(nfsd_net_class, nfsd_##name, \
 DEFINE_NET_EVENT(grace_start);
 DEFINE_NET_EVENT(grace_complete);
 
+TRACE_EVENT(nfsd_writeverf_reset,
+       TP_PROTO(
+               const struct nfsd_net *nn,
+               const struct svc_rqst *rqstp,
+               int error
+       ),
+       TP_ARGS(nn, rqstp, error),
+       TP_STRUCT__entry(
+               __field(unsigned long long, boot_time)
+               __field(u32, xid)
+               __field(int, error)
+               __array(unsigned char, verifier, NFS4_VERIFIER_SIZE)
+       ),
+       TP_fast_assign(
+               __entry->boot_time = nn->boot_time;
+               __entry->xid = be32_to_cpu(rqstp->rq_xid);
+               __entry->error = error;
+
+               /* avoid seqlock inside TP_fast_assign */
+               memcpy(__entry->verifier, nn->writeverf,
+                      NFS4_VERIFIER_SIZE);
+       ),
+       TP_printk("boot_time=%16llx xid=0x%08x error=%d new verifier=0x%s",
+               __entry->boot_time, __entry->xid, __entry->error,
+               __print_hex_str(__entry->verifier, NFS4_VERIFIER_SIZE)
+       )
+);
+
 TRACE_EVENT(nfsd_clid_cred_mismatch,
        TP_PROTO(
                const struct nfs4_client *clp,
index 4956445..e4e59e1 100644 (file)
@@ -553,14 +553,17 @@ __be32 nfsd4_clone_file_range(struct svc_rqst *rqstp,
                if (!status)
                        status = commit_inode_metadata(file_inode(src));
                if (status < 0) {
+                       struct nfsd_net *nn = net_generic(nf_dst->nf_net,
+                                                         nfsd_net_id);
+
                        trace_nfsd_clone_file_range_err(rqstp,
                                        &nfsd4_get_cstate(rqstp)->save_fh,
                                        src_pos,
                                        &nfsd4_get_cstate(rqstp)->current_fh,
                                        dst_pos,
                                        count, status);
-                       nfsd_reset_write_verifier(net_generic(nf_dst->nf_net,
-                                                 nfsd_net_id));
+                       nfsd_reset_write_verifier(nn);
+                       trace_nfsd_writeverf_reset(nn, rqstp, status);
                        ret = nfserrno(status);
                }
        }
@@ -1017,6 +1020,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
        host_err = vfs_iter_write(file, &iter, &pos, flags);
        if (host_err < 0) {
                nfsd_reset_write_verifier(nn);
+               trace_nfsd_writeverf_reset(nn, rqstp, host_err);
                goto out_nfserr;
        }
        *cnt = host_err;
@@ -1028,8 +1032,10 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
 
        if (stable && use_wgather) {
                host_err = wait_for_concurrent_writes(file);
-               if (host_err < 0)
+               if (host_err < 0) {
                        nfsd_reset_write_verifier(nn);
+                       trace_nfsd_writeverf_reset(nn, rqstp, host_err);
+               }
        }
 
 out_nfserr:
@@ -1151,6 +1157,7 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
                        break;
                default:
                        nfsd_reset_write_verifier(nn);
+                       trace_nfsd_writeverf_reset(nn, rqstp, err2);
                }
                err = nfserrno(err2);
        } else