NFSD: Replace nfsd_prune_bucket()
authorChuck Lever <chuck.lever@oracle.com>
Sun, 9 Jul 2023 15:45:29 +0000 (11:45 -0400)
committerChuck Lever <chuck.lever@oracle.com>
Tue, 29 Aug 2023 21:45:22 +0000 (17:45 -0400)
Enable nfsd_prune_bucket() to drop the bucket lock while calling
kfree(). Use the same pattern that Jeff recently introduced in the
NFSD filecache.

A few percpu operations are moved outside the lock since they
temporarily disable local IRQs which is expensive and does not
need to be done while the lock is held.

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
fs/nfsd/nfscache.c
fs/nfsd/trace.h

index 02259d280f51cf3da2b7fb4eed13410c802421ae..787d15b623369ad3b9eb0a97eaf3defc148893e9 100644 (file)
@@ -117,6 +117,21 @@ static void nfsd_cacherep_free(struct svc_cacherep *rp)
        kmem_cache_free(drc_slab, rp);
 }
 
+static unsigned long
+nfsd_cacherep_dispose(struct list_head *dispose)
+{
+       struct svc_cacherep *rp;
+       unsigned long freed = 0;
+
+       while (!list_empty(dispose)) {
+               rp = list_first_entry(dispose, struct svc_cacherep, c_lru);
+               list_del(&rp->c_lru);
+               nfsd_cacherep_free(rp);
+               freed++;
+       }
+       return freed;
+}
+
 static void
 nfsd_cacherep_unlink_locked(struct nfsd_net *nn, struct nfsd_drc_bucket *b,
                            struct svc_cacherep *rp)
@@ -260,6 +275,41 @@ nfsd_cache_bucket_find(__be32 xid, struct nfsd_net *nn)
        return &nn->drc_hashtbl[hash];
 }
 
+/*
+ * Remove and return no more than @max expired entries in bucket @b.
+ * If @max is zero, do not limit the number of removed entries.
+ */
+static void
+nfsd_prune_bucket_locked(struct nfsd_net *nn, struct nfsd_drc_bucket *b,
+                        unsigned int max, struct list_head *dispose)
+{
+       unsigned long expiry = jiffies - RC_EXPIRE;
+       struct svc_cacherep *rp, *tmp;
+       unsigned int freed = 0;
+
+       lockdep_assert_held(&b->cache_lock);
+
+       /* The bucket LRU is ordered oldest-first. */
+       list_for_each_entry_safe(rp, tmp, &b->lru_head, c_lru) {
+               /*
+                * Don't free entries attached to calls that are still
+                * in-progress, but do keep scanning the list.
+                */
+               if (rp->c_state == RC_INPROG)
+                       continue;
+
+               if (atomic_read(&nn->num_drc_entries) <= nn->max_drc_entries &&
+                   time_before(expiry, rp->c_timestamp))
+                       break;
+
+               nfsd_cacherep_unlink_locked(nn, b, rp);
+               list_add(&rp->c_lru, dispose);
+
+               if (max && ++freed > max)
+                       break;
+       }
+}
+
 static long prune_bucket(struct nfsd_drc_bucket *b, struct nfsd_net *nn,
                         unsigned int max)
 {
@@ -283,11 +333,6 @@ static long prune_bucket(struct nfsd_drc_bucket *b, struct nfsd_net *nn,
        return freed;
 }
 
-static long nfsd_prune_bucket(struct nfsd_drc_bucket *b, struct nfsd_net *nn)
-{
-       return prune_bucket(b, nn, 3);
-}
-
 /*
  * Walk the LRU list and prune off entries that are older than RC_EXPIRE.
  * Also prune the oldest ones when the total exceeds the max number of entries.
@@ -443,6 +488,8 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp)
        __wsum                  csum;
        struct nfsd_drc_bucket  *b;
        int type = rqstp->rq_cachetype;
+       unsigned long freed;
+       LIST_HEAD(dispose);
        int rtn = RC_DOIT;
 
        rqstp->rq_cacherep = NULL;
@@ -467,20 +514,18 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp)
        found = nfsd_cache_insert(b, rp, nn);
        if (found != rp)
                goto found_entry;
-
-       nfsd_stats_rc_misses_inc();
        rqstp->rq_cacherep = rp;
        rp->c_state = RC_INPROG;
+       nfsd_prune_bucket_locked(nn, b, 3, &dispose);
+       spin_unlock(&b->cache_lock);
 
+       freed = nfsd_cacherep_dispose(&dispose);
+       trace_nfsd_drc_gc(nn, freed);
+
+       nfsd_stats_rc_misses_inc();
        atomic_inc(&nn->num_drc_entries);
        nfsd_stats_drc_mem_usage_add(nn, sizeof(*rp));
-
-       nfsd_prune_bucket(b, nn);
-
-out_unlock:
-       spin_unlock(&b->cache_lock);
-out:
-       return rtn;
+       goto out;
 
 found_entry:
        /* We found a matching entry which is either in progress or done. */
@@ -518,7 +563,10 @@ found_entry:
 
 out_trace:
        trace_nfsd_drc_found(nn, rqstp, rtn);
-       goto out_unlock;
+out_unlock:
+       spin_unlock(&b->cache_lock);
+out:
+       return rtn;
 }
 
 /**
index 693fe6d465aa6dd6d287f21d450139e35262bbb8..c48419c0a58a5b42ec8f2e86c318a318a077f817 100644 (file)
@@ -1262,6 +1262,28 @@ TRACE_EVENT(nfsd_drc_mismatch,
                __entry->ingress)
 );
 
+TRACE_EVENT_CONDITION(nfsd_drc_gc,
+       TP_PROTO(
+               const struct nfsd_net *nn,
+               unsigned long freed
+       ),
+       TP_ARGS(nn, freed),
+       TP_CONDITION(freed > 0),
+       TP_STRUCT__entry(
+               __field(unsigned long long, boot_time)
+               __field(unsigned long, freed)
+               __field(int, total)
+       ),
+       TP_fast_assign(
+               __entry->boot_time = nn->boot_time;
+               __entry->freed = freed;
+               __entry->total = atomic_read(&nn->num_drc_entries);
+       ),
+       TP_printk("boot_time=%16llx total=%d freed=%lu",
+               __entry->boot_time, __entry->total, __entry->freed
+       )
+);
+
 TRACE_EVENT(nfsd_cb_args,
        TP_PROTO(
                const struct nfs4_client *clp,