f2fs: fix race between write_checkpoint and write_begin
authorSheng Yong <shengyong1@huawei.com>
Wed, 14 Nov 2018 11:34:28 +0000 (19:34 +0800)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 12 Feb 2019 18:46:00 +0000 (19:46 +0100)
[ Upstream commit 2866fb16d67992195b0526d19e65acb6640fb87f ]

The following race could lead to inconsistent SIT bitmap:

Task A                          Task B
======                          ======
f2fs_write_checkpoint
  block_operations
    f2fs_lock_all
      down_write(node_change)
      down_write(node_write)
      ... sync ...
      up_write(node_change)
                                f2fs_file_write_iter
                                  set_inode_flag(FI_NO_PREALLOC)
                                  ......
                                  f2fs_write_begin(index=0, has inline data)
                                    prepare_write_begin
                                      __do_map_lock(AIO) => down_read(node_change)
                                      f2fs_convert_inline_page => update SIT
                                      __do_map_lock(AIO) => up_read(node_change)
  f2fs_flush_sit_entries <= inconsistent SIT
  finish write checkpoint
  sudden-power-off

If SPO occurs after checkpoint is finished, SIT bitmap will be set
incorrectly.

Signed-off-by: Sheng Yong <shengyong1@huawei.com>
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
fs/f2fs/data.c

index c68b319..3d37124 100644 (file)
@@ -1880,6 +1880,7 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi,
        bool locked = false;
        struct extent_info ei = {0,0,0};
        int err = 0;
+       int flag;
 
        /*
         * we already allocated all the blocks, so we don't need to get
@@ -1889,9 +1890,15 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi,
                        !is_inode_flag_set(inode, FI_NO_PREALLOC))
                return 0;
 
+       /* f2fs_lock_op avoids race between write CP and convert_inline_page */
+       if (f2fs_has_inline_data(inode) && pos + len > MAX_INLINE_DATA(inode))
+               flag = F2FS_GET_BLOCK_DEFAULT;
+       else
+               flag = F2FS_GET_BLOCK_PRE_AIO;
+
        if (f2fs_has_inline_data(inode) ||
                        (pos & PAGE_MASK) >= i_size_read(inode)) {
-               __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true);
+               __do_map_lock(sbi, flag, true);
                locked = true;
        }
 restart:
@@ -1929,6 +1936,7 @@ restart:
                                f2fs_put_dnode(&dn);
                                __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO,
                                                                true);
+                               WARN_ON(flag != F2FS_GET_BLOCK_PRE_AIO);
                                locked = true;
                                goto restart;
                        }
@@ -1942,7 +1950,7 @@ out:
        f2fs_put_dnode(&dn);
 unlock_out:
        if (locked)
-               __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false);
+               __do_map_lock(sbi, flag, false);
        return err;
 }