f2fs: avoid race condition for shrinker count
authorJaegeuk Kim <jaegeuk@kernel.org>
Fri, 6 Nov 2020 21:22:05 +0000 (13:22 -0800)
committerJaegeuk Kim <jaegeuk@kernel.org>
Thu, 3 Dec 2020 08:59:26 +0000 (00:59 -0800)
Light reported sometimes shinker gets nat_cnt < dirty_nat_cnt resulting in
wrong do_shinker work. Let's avoid to return insane overflowed value by adding
single tracking value.

Reported-by: Light Hsieh <Light.Hsieh@mediatek.com>
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
fs/f2fs/checkpoint.c
fs/f2fs/debug.c
fs/f2fs/f2fs.h
fs/f2fs/node.c
fs/f2fs/node.h
fs/f2fs/shrinker.c

index 14ba151..617d0f6 100644 (file)
@@ -1619,7 +1619,7 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
                        goto out;
                }
 
-               if (NM_I(sbi)->dirty_nat_cnt == 0 &&
+               if (NM_I(sbi)->nat_cnt[DIRTY_NAT] == 0 &&
                                SIT_I(sbi)->dirty_sentries == 0 &&
                                prefree_segments(sbi) == 0) {
                        f2fs_flush_sit_entries(sbi, cpc);
index a8357fd..197c914 100644 (file)
@@ -145,8 +145,8 @@ static void update_general_status(struct f2fs_sb_info *sbi)
                si->node_pages = NODE_MAPPING(sbi)->nrpages;
        if (sbi->meta_inode)
                si->meta_pages = META_MAPPING(sbi)->nrpages;
-       si->nats = NM_I(sbi)->nat_cnt;
-       si->dirty_nats = NM_I(sbi)->dirty_nat_cnt;
+       si->nats = NM_I(sbi)->nat_cnt[TOTAL_NAT];
+       si->dirty_nats = NM_I(sbi)->nat_cnt[DIRTY_NAT];
        si->sits = MAIN_SEGS(sbi);
        si->dirty_sits = SIT_I(sbi)->dirty_sentries;
        si->free_nids = NM_I(sbi)->nid_cnt[FREE_NID];
@@ -278,9 +278,10 @@ get_cache:
        si->cache_mem += (NM_I(sbi)->nid_cnt[FREE_NID] +
                                NM_I(sbi)->nid_cnt[PREALLOC_NID]) *
                                sizeof(struct free_nid);
-       si->cache_mem += NM_I(sbi)->nat_cnt * sizeof(struct nat_entry);
-       si->cache_mem += NM_I(sbi)->dirty_nat_cnt *
-                                       sizeof(struct nat_entry_set);
+       si->cache_mem += NM_I(sbi)->nat_cnt[TOTAL_NAT] *
+                               sizeof(struct nat_entry);
+       si->cache_mem += NM_I(sbi)->nat_cnt[DIRTY_NAT] *
+                               sizeof(struct nat_entry_set);
        si->cache_mem += si->inmem_pages * sizeof(struct inmem_pages);
        for (i = 0; i < MAX_INO_ENTRY; i++)
                si->cache_mem += sbi->im[i].ino_num * sizeof(struct ino_entry);
index 7c02b6d..36090cd 100644 (file)
@@ -820,6 +820,13 @@ enum nid_state {
        MAX_NID_STATE,
 };
 
+enum nat_state {
+       TOTAL_NAT,
+       DIRTY_NAT,
+       RECLAIMABLE_NAT,
+       MAX_NAT_STATE,
+};
+
 struct f2fs_nm_info {
        block_t nat_blkaddr;            /* base disk address of NAT */
        nid_t max_nid;                  /* maximum possible node ids */
@@ -835,8 +842,7 @@ struct f2fs_nm_info {
        struct rw_semaphore nat_tree_lock;      /* protect nat_tree_lock */
        struct list_head nat_entries;   /* cached nat entry list (clean) */
        spinlock_t nat_list_lock;       /* protect clean nat entry list */
-       unsigned int nat_cnt;           /* the # of cached nat entries */
-       unsigned int dirty_nat_cnt;     /* total num of nat entries in set */
+       unsigned int nat_cnt[MAX_NAT_STATE]; /* the # of cached nat entries */
        unsigned int nat_blocks;        /* # of nat blocks */
 
        /* free node ids management */
index 42394de..e65d732 100644 (file)
@@ -62,8 +62,8 @@ bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type)
                                sizeof(struct free_nid)) >> PAGE_SHIFT;
                res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2);
        } else if (type == NAT_ENTRIES) {
-               mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >>
-                                                       PAGE_SHIFT;
+               mem_size = (nm_i->nat_cnt[TOTAL_NAT] *
+                               sizeof(struct nat_entry)) >> PAGE_SHIFT;
                res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2);
                if (excess_cached_nats(sbi))
                        res = false;
@@ -177,7 +177,8 @@ static struct nat_entry *__init_nat_entry(struct f2fs_nm_info *nm_i,
        list_add_tail(&ne->list, &nm_i->nat_entries);
        spin_unlock(&nm_i->nat_list_lock);
 
-       nm_i->nat_cnt++;
+       nm_i->nat_cnt[TOTAL_NAT]++;
+       nm_i->nat_cnt[RECLAIMABLE_NAT]++;
        return ne;
 }
 
@@ -207,7 +208,8 @@ static unsigned int __gang_lookup_nat_cache(struct f2fs_nm_info *nm_i,
 static void __del_from_nat_cache(struct f2fs_nm_info *nm_i, struct nat_entry *e)
 {
        radix_tree_delete(&nm_i->nat_root, nat_get_nid(e));
-       nm_i->nat_cnt--;
+       nm_i->nat_cnt[TOTAL_NAT]--;
+       nm_i->nat_cnt[RECLAIMABLE_NAT]--;
        __free_nat_entry(e);
 }
 
@@ -253,7 +255,8 @@ static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i,
        if (get_nat_flag(ne, IS_DIRTY))
                goto refresh_list;
 
-       nm_i->dirty_nat_cnt++;
+       nm_i->nat_cnt[DIRTY_NAT]++;
+       nm_i->nat_cnt[RECLAIMABLE_NAT]--;
        set_nat_flag(ne, IS_DIRTY, true);
 refresh_list:
        spin_lock(&nm_i->nat_list_lock);
@@ -273,7 +276,8 @@ static void __clear_nat_cache_dirty(struct f2fs_nm_info *nm_i,
 
        set_nat_flag(ne, IS_DIRTY, false);
        set->entry_cnt--;
-       nm_i->dirty_nat_cnt--;
+       nm_i->nat_cnt[DIRTY_NAT]--;
+       nm_i->nat_cnt[RECLAIMABLE_NAT]++;
 }
 
 static unsigned int __gang_lookup_nat_set(struct f2fs_nm_info *nm_i,
@@ -2944,14 +2948,17 @@ int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
        LIST_HEAD(sets);
        int err = 0;
 
-       /* during unmount, let's flush nat_bits before checking dirty_nat_cnt */
+       /*
+        * during unmount, let's flush nat_bits before checking
+        * nat_cnt[DIRTY_NAT].
+        */
        if (enabled_nat_bits(sbi, cpc)) {
                down_write(&nm_i->nat_tree_lock);
                remove_nats_in_journal(sbi);
                up_write(&nm_i->nat_tree_lock);
        }
 
-       if (!nm_i->dirty_nat_cnt)
+       if (!nm_i->nat_cnt[DIRTY_NAT])
                return 0;
 
        down_write(&nm_i->nat_tree_lock);
@@ -2962,7 +2969,8 @@ int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
         * into nat entry set.
         */
        if (enabled_nat_bits(sbi, cpc) ||
-               !__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL))
+               !__has_cursum_space(journal,
+                       nm_i->nat_cnt[DIRTY_NAT], NAT_JOURNAL))
                remove_nats_in_journal(sbi);
 
        while ((found = __gang_lookup_nat_set(nm_i,
@@ -3086,7 +3094,6 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
                                                F2FS_RESERVED_NODE_NUM;
        nm_i->nid_cnt[FREE_NID] = 0;
        nm_i->nid_cnt[PREALLOC_NID] = 0;
-       nm_i->nat_cnt = 0;
        nm_i->ram_thresh = DEF_RAM_THRESHOLD;
        nm_i->ra_nid_pages = DEF_RA_NID_PAGES;
        nm_i->dirty_nats_ratio = DEF_DIRTY_NAT_RATIO_THRESHOLD;
@@ -3220,7 +3227,7 @@ void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi)
                        __del_from_nat_cache(nm_i, natvec[idx]);
                }
        }
-       f2fs_bug_on(sbi, nm_i->nat_cnt);
+       f2fs_bug_on(sbi, nm_i->nat_cnt[TOTAL_NAT]);
 
        /* destroy nat set cache */
        nid = 0;
index 69e5859..f84541b 100644 (file)
@@ -126,13 +126,13 @@ static inline void raw_nat_from_node_info(struct f2fs_nat_entry *raw_ne,
 
 static inline bool excess_dirty_nats(struct f2fs_sb_info *sbi)
 {
-       return NM_I(sbi)->dirty_nat_cnt >= NM_I(sbi)->max_nid *
+       return NM_I(sbi)->nat_cnt[DIRTY_NAT] >= NM_I(sbi)->max_nid *
                                        NM_I(sbi)->dirty_nats_ratio / 100;
 }
 
 static inline bool excess_cached_nats(struct f2fs_sb_info *sbi)
 {
-       return NM_I(sbi)->nat_cnt >= DEF_NAT_CACHE_THRESHOLD;
+       return NM_I(sbi)->nat_cnt[TOTAL_NAT] >= DEF_NAT_CACHE_THRESHOLD;
 }
 
 static inline bool excess_dirty_nodes(struct f2fs_sb_info *sbi)
index d66de59..dd3c3c7 100644 (file)
@@ -18,9 +18,7 @@ static unsigned int shrinker_run_no;
 
 static unsigned long __count_nat_entries(struct f2fs_sb_info *sbi)
 {
-       long count = NM_I(sbi)->nat_cnt - NM_I(sbi)->dirty_nat_cnt;
-
-       return count > 0 ? count : 0;
+       return NM_I(sbi)->nat_cnt[RECLAIMABLE_NAT];
 }
 
 static unsigned long __count_free_nids(struct f2fs_sb_info *sbi)