f2fs: fix race condition in between free nid allocator/initializer
authorChao Yu <yuchao0@huawei.com>
Wed, 22 Mar 2017 06:45:05 +0000 (14:45 +0800)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 8 Dec 2018 12:05:13 +0000 (13:05 +0100)
commit 30a61ddf8117c26ac5b295e1233eaa9629a94ca3 upstream.

In below concurrent case, allocated nid can be loaded into free nid cache
and be allocated again.

Thread A Thread B
- f2fs_create
 - f2fs_new_inode
  - alloc_nid
   - __insert_nid_to_list(ALLOC_NID_LIST)
- f2fs_balance_fs_bg
 - build_free_nids
  - __build_free_nids
   - scan_nat_page
    - add_free_nid
     - __lookup_nat_cache
 - f2fs_add_link
  - init_inode_metadata
   - new_inode_page
    - new_node_page
     - set_node_addr
 - alloc_nid_done
  - __remove_nid_from_list(ALLOC_NID_LIST)
     - __insert_nid_to_list(FREE_NID_LIST)

This patch makes nat cache lookup and free nid list operation being atomical
to avoid this race condition.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
[bwh: Backported to 4.9:
 - add_free_nid() returns 0 in case of any error (except low memory)
 - Tree/list addition has not been moved into __insert_nid_to_list()]
Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
fs/f2fs/node.c

index addff6a..e5d4803 100644 (file)
@@ -1704,8 +1704,9 @@ static void __del_from_free_nid_list(struct f2fs_nm_info *nm_i,
 static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
 {
        struct f2fs_nm_info *nm_i = NM_I(sbi);
-       struct free_nid *i;
+       struct free_nid *i, *e;
        struct nat_entry *ne;
+       int err = -EINVAL;
 
        if (!available_free_memory(sbi, FREE_NIDS))
                return -1;
@@ -1714,35 +1715,58 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
        if (unlikely(nid == 0))
                return 0;
 
-       if (build) {
-               /* do not add allocated nids */
-               ne = __lookup_nat_cache(nm_i, nid);
-               if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) ||
-                               nat_get_blkaddr(ne) != NULL_ADDR))
-                       return 0;
-       }
-
        i = f2fs_kmem_cache_alloc(free_nid_slab, GFP_NOFS);
        i->nid = nid;
        i->state = NID_NEW;
 
-       if (radix_tree_preload(GFP_NOFS)) {
-               kmem_cache_free(free_nid_slab, i);
-               return 0;
-       }
+       if (radix_tree_preload(GFP_NOFS))
+               goto err;
 
        spin_lock(&nm_i->free_nid_list_lock);
-       if (radix_tree_insert(&nm_i->free_nid_root, i->nid, i)) {
-               spin_unlock(&nm_i->free_nid_list_lock);
-               radix_tree_preload_end();
-               kmem_cache_free(free_nid_slab, i);
-               return 0;
+
+       if (build) {
+               /*
+                *   Thread A             Thread B
+                *  - f2fs_create
+                *   - f2fs_new_inode
+                *    - alloc_nid
+                *     - __insert_nid_to_list(ALLOC_NID_LIST)
+                *                     - f2fs_balance_fs_bg
+                *                      - build_free_nids
+                *                       - __build_free_nids
+                *                        - scan_nat_page
+                *                         - add_free_nid
+                *                          - __lookup_nat_cache
+                *  - f2fs_add_link
+                *   - init_inode_metadata
+                *    - new_inode_page
+                *     - new_node_page
+                *      - set_node_addr
+                *  - alloc_nid_done
+                *   - __remove_nid_from_list(ALLOC_NID_LIST)
+                *                         - __insert_nid_to_list(FREE_NID_LIST)
+                */
+               ne = __lookup_nat_cache(nm_i, nid);
+               if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) ||
+                               nat_get_blkaddr(ne) != NULL_ADDR))
+                       goto err_out;
+
+               e = __lookup_free_nid_list(nm_i, nid);
+               if (e)
+                       goto err_out;
        }
+       if (radix_tree_insert(&nm_i->free_nid_root, i->nid, i))
+               goto err_out;
+       err = 0;
        list_add_tail(&i->list, &nm_i->free_nid_list);
        nm_i->fcnt++;
+err_out:
        spin_unlock(&nm_i->free_nid_list_lock);
        radix_tree_preload_end();
-       return 1;
+err:
+       if (err)
+               kmem_cache_free(free_nid_slab, i);
+       return !err;
 }
 
 static void remove_free_nid(struct f2fs_nm_info *nm_i, nid_t nid)