#include <linux/list.h>
static void lu_object_free(const struct lu_env *env, struct lu_object *o);
+static __u32 ls_stats_read(struct lprocfs_stats *stats, int idx);
/**
* Decrease reference counter on object. If last reference is freed, return
LASSERT(list_empty(&top->loh_lru));
list_add_tail(&top->loh_lru, &bkt->lsb_lru);
bkt->lsb_lru_len++;
+ lprocfs_counter_incr(site->ls_stats, LU_SS_LRU_LEN);
+ CDEBUG(D_INODE, "Add %p to site lru. hash: %p, bkt: %p, lru_len: %ld\n",
+ o, site->ls_obj_hash, bkt, bkt->lsb_lru_len);
cfs_hash_bd_unlock(site->ls_obj_hash, &bd, 1);
return;
}
top = o->lo_header;
set_bit(LU_OBJECT_HEARD_BANSHEE, &top->loh_flags);
if (!test_and_set_bit(LU_OBJECT_UNHASHED, &top->loh_flags)) {
- struct cfs_hash *obj_hash = o->lo_dev->ld_site->ls_obj_hash;
+ struct lu_site *site = o->lo_dev->ld_site;
+ struct cfs_hash *obj_hash = site->ls_obj_hash;
struct cfs_hash_bd bd;
cfs_hash_bd_get_and_lock(obj_hash, &top->loh_fid, &bd, 1);
if (!list_empty(&top->loh_lru)) {
struct lu_site_bkt_data *bkt;
- list_del_init(&top->loh_lru);
+ list_del_init(&top->loh_lru);
bkt = cfs_hash_bd_extra_get(obj_hash, &bd);
bkt->lsb_lru_len--;
+ lprocfs_counter_decr(site->ls_stats, LU_SS_LRU_LEN);
}
cfs_hash_bd_del_locked(obj_hash, &bd, &top->loh_hash);
cfs_hash_bd_unlock(obj_hash, &bd, 1);
&bd2, &h->loh_hash);
list_move(&h->loh_lru, &dispose);
bkt->lsb_lru_len--;
+ lprocfs_counter_decr(s->ls_stats, LU_SS_LRU_LEN);
if (did_sth == 0)
did_sth = 1;
cfs_hash_get(s->ls_obj_hash, hnode);
lprocfs_counter_incr(s->ls_stats, LU_SS_CACHE_HIT);
if (!list_empty(&h->loh_lru)) {
- list_del_init(&h->loh_lru);
+ list_del_init(&h->loh_lru);
bkt->lsb_lru_len--;
+ lprocfs_counter_decr(s->ls_stats, LU_SS_LRU_LEN);
}
return lu_object_top(h);
}
0, "cache_death_race", "cache_death_race");
lprocfs_counter_init(s->ls_stats, LU_SS_LRU_PURGED,
0, "lru_purged", "lru_purged");
+ /*
+ * Unlike other counters, lru_len can be decremented so
+ * need lc_sum instead of just lc_count
+ */
+ lprocfs_counter_init(s->ls_stats, LU_SS_LRU_LEN,
+ LPROCFS_CNTR_AVGMINMAX, "lru_len", "lru_len");
INIT_LIST_HEAD(&s->ls_linkage);
s->ls_top_dev = top;
/*
- * There exists a potential lock inversion deadlock scenario when using
- * Lustre on top of ZFS. This occurs between one of ZFS's
- * buf_hash_table.ht_lock's, and Lustre's lu_sites_guard lock. Essentially,
- * thread A will take the lu_sites_guard lock and sleep on the ht_lock,
- * while thread B will take the ht_lock and sleep on the lu_sites_guard
- * lock. Obviously neither thread will wake and drop their respective hold
- * on their lock.
+ * lu_cache_shrink_count returns the number of cached objects that are
+ * candidates to be freed by shrink_slab(). A counter, which tracks
+ * the number of items in the site's lru, is maintained in the per cpu
+ * stats of each site. The counter is incremented when an object is added
+ * to a site's lru and decremented when one is removed. The number of
+ * free-able objects is the sum of all per cpu counters for all sites.
*
- * To prevent this from happening we must ensure the lu_sites_guard lock is
- * not taken while down this code path. ZFS reliably does not set the
- * __GFP_FS bit in its code paths, so this can be used to determine if it
- * is safe to take the lu_sites_guard lock.
- *
- * Ideally we should accurately return the remaining number of cached
- * objects without taking the lu_sites_guard lock, but this is not
- * possible in the current implementation.
+ * Using a per cpu counter is a compromise solution to concurrent access:
+ * lu_object_put() can update the counter without locking the site and
+ * lu_cache_shrink_count can sum the counters without locking each
+ * ls_obj_hash bucket.
*/
static unsigned long lu_cache_shrink_count(struct shrinker *sk,
struct shrink_control *sc)
{
- struct lu_site_stats stats;
struct lu_site *s;
struct lu_site *tmp;
unsigned long cached = 0;
mutex_lock(&lu_sites_guard);
list_for_each_entry_safe(s, tmp, &lu_sites, ls_linkage) {
- memset(&stats, 0, sizeof(stats));
- lu_site_stats_get(s->ls_obj_hash, &stats, 0);
- cached += stats.lss_total - stats.lss_busy;
+ cached += ls_stats_read(s->ls_stats, LU_SS_LRU_LEN);
}
mutex_unlock(&lu_sites_guard);
cached = (cached / 100) * sysctl_vfs_cache_pressure;
- CDEBUG(D_INODE, "%ld objects cached\n", cached);
+ CDEBUG(D_INODE, "%ld objects cached, cache pressure %d\n",
+ cached, sysctl_vfs_cache_pressure);
+
return cached;
}
struct lprocfs_counter ret;
lprocfs_stats_collect(stats, idx, &ret);
+ if (idx == LU_SS_LRU_LEN)
+ /*
+ * protect against counter on cpu A being decremented
+ * before counter is incremented on cpu B; unlikely
+ */
+ return (__u32)((ret.lc_sum > 0) ? ret.lc_sum : 0);
+
return (__u32)ret.lc_count;
}
memset(&stats, 0, sizeof(stats));
lu_site_stats_get(s->ls_obj_hash, &stats, 1);
- seq_printf(m, "%d/%d %d/%d %d %d %d %d %d %d %d\n",
+ seq_printf(m, "%d/%d %d/%d %d %d %d %d %d %d %d %d\n",
stats.lss_busy,
stats.lss_total,
stats.lss_populated,
ls_stats_read(s->ls_stats, LU_SS_CACHE_MISS),
ls_stats_read(s->ls_stats, LU_SS_CACHE_RACE),
ls_stats_read(s->ls_stats, LU_SS_CACHE_DEATH_RACE),
- ls_stats_read(s->ls_stats, LU_SS_LRU_PURGED));
+ ls_stats_read(s->ls_stats, LU_SS_LRU_PURGED),
+ ls_stats_read(s->ls_stats, LU_SS_LRU_LEN));
return 0;
}
EXPORT_SYMBOL(lu_site_stats_print);