f2fs: rework write preallocations
authorEric Biggers <ebiggers@google.com>
Fri, 16 Jul 2021 14:39:13 +0000 (09:39 -0500)
committerJaegeuk Kim <jaegeuk@kernel.org>
Wed, 17 Nov 2021 19:28:22 +0000 (11:28 -0800)
f2fs_write_begin() assumes that all blocks were preallocated by
default unless FI_NO_PREALLOC is explicitly set.  This invites data
corruption, as there are cases in which not all blocks are preallocated.
Commit 47501f87c61a ("f2fs: preallocate DIO blocks when forcing
buffered_io") fixed one case, but there are others remaining.

Fix up this logic by replacing this flag with FI_PREALLOCATED_ALL, which
only gets set if all blocks for the current write were preallocated.

Also clean up f2fs_preallocate_blocks(), move it to file.c, and make it
handle some of the logic that was previously in write_iter() directly.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
fs/f2fs/data.c
fs/f2fs/f2fs.h
fs/f2fs/file.c

index d8190e8..3db0f30 100644 (file)
@@ -1384,53 +1384,6 @@ alloc:
        return 0;
 }
 
-int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
-{
-       struct inode *inode = file_inode(iocb->ki_filp);
-       struct f2fs_map_blocks map;
-       int flag;
-       int err = 0;
-       bool direct_io = iocb->ki_flags & IOCB_DIRECT;
-
-       map.m_lblk = F2FS_BLK_ALIGN(iocb->ki_pos);
-       map.m_len = F2FS_BYTES_TO_BLK(iocb->ki_pos + iov_iter_count(from));
-       if (map.m_len > map.m_lblk)
-               map.m_len -= map.m_lblk;
-       else
-               map.m_len = 0;
-
-       map.m_next_pgofs = NULL;
-       map.m_next_extent = NULL;
-       map.m_seg_type = NO_CHECK_TYPE;
-       map.m_may_create = true;
-
-       if (direct_io) {
-               map.m_seg_type = f2fs_rw_hint_to_seg_type(iocb->ki_hint);
-               flag = f2fs_force_buffered_io(inode, iocb, from) ?
-                                       F2FS_GET_BLOCK_PRE_AIO :
-                                       F2FS_GET_BLOCK_PRE_DIO;
-               goto map_blocks;
-       }
-       if (iocb->ki_pos + iov_iter_count(from) > MAX_INLINE_DATA(inode)) {
-               err = f2fs_convert_inline_inode(inode);
-               if (err)
-                       return err;
-       }
-       if (f2fs_has_inline_data(inode))
-               return err;
-
-       flag = F2FS_GET_BLOCK_PRE_AIO;
-
-map_blocks:
-       err = f2fs_map_blocks(inode, &map, 1, flag);
-       if (map.m_len > 0 && err == -ENOSPC) {
-               if (!direct_io)
-                       set_inode_flag(inode, FI_NO_PREALLOC);
-               err = 0;
-       }
-       return err;
-}
-
 void f2fs_do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock)
 {
        if (flag == F2FS_GET_BLOCK_PRE_AIO) {
@@ -3340,12 +3293,10 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi,
        int flag;
 
        /*
-        * we already allocated all the blocks, so we don't need to get
-        * the block addresses when there is no need to fill the page.
+        * If a whole page is being written and we already preallocated all the
+        * blocks, then there is no need to get a block address now.
         */
-       if (!f2fs_has_inline_data(inode) && len == PAGE_SIZE &&
-           !is_inode_flag_set(inode, FI_NO_PREALLOC) &&
-           !f2fs_verity_in_progress(inode))
+       if (len == PAGE_SIZE && is_inode_flag_set(inode, FI_PREALLOCATED_ALL))
                return 0;
 
        /* f2fs_lock_op avoids race between write CP and convert_inline_page */
index ff37cdd..6f19662 100644 (file)
@@ -715,7 +715,7 @@ enum {
        FI_INLINE_DOTS,         /* indicate inline dot dentries */
        FI_DO_DEFRAG,           /* indicate defragment is running */
        FI_DIRTY_FILE,          /* indicate regular/symlink has dirty pages */
-       FI_NO_PREALLOC,         /* indicate skipped preallocated blocks */
+       FI_PREALLOCATED_ALL,    /* all blocks for write were preallocated */
        FI_HOT_DATA,            /* indicate file is hot */
        FI_EXTRA_ATTR,          /* indicate file has extra attribute */
        FI_PROJ_INHERIT,        /* indicate file inherits projectid */
@@ -3615,7 +3615,6 @@ void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr);
 int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count);
 int f2fs_reserve_new_block(struct dnode_of_data *dn);
 int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index);
-int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from);
 int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index);
 struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index,
                        int op_flags, bool for_write);
index 92ec269..fc87d0f 100644 (file)
@@ -4235,10 +4235,77 @@ static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
        return ret;
 }
 
+/*
+ * Preallocate blocks for a write request, if it is possible and helpful to do
+ * so.  Returns a positive number if blocks may have been preallocated, 0 if no
+ * blocks were preallocated, or a negative errno value if something went
+ * seriously wrong.  Also sets FI_PREALLOCATED_ALL on the inode if *all* the
+ * requested blocks (not just some of them) have been allocated.
+ */
+static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter)
+{
+       struct inode *inode = file_inode(iocb->ki_filp);
+       struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+       const loff_t pos = iocb->ki_pos;
+       const size_t count = iov_iter_count(iter);
+       struct f2fs_map_blocks map = {};
+       bool dio = (iocb->ki_flags & IOCB_DIRECT) &&
+                  !f2fs_force_buffered_io(inode, iocb, iter);
+       int flag;
+       int ret;
+
+       /* If it will be an out-of-place direct write, don't bother. */
+       if (dio && f2fs_lfs_mode(sbi))
+               return 0;
+
+       /* No-wait I/O can't allocate blocks. */
+       if (iocb->ki_flags & IOCB_NOWAIT)
+               return 0;
+
+       /* If it will be a short write, don't bother. */
+       if (fault_in_iov_iter_readable(iter, count))
+               return 0;
+
+       if (f2fs_has_inline_data(inode)) {
+               /* If the data will fit inline, don't bother. */
+               if (pos + count <= MAX_INLINE_DATA(inode))
+                       return 0;
+               ret = f2fs_convert_inline_inode(inode);
+               if (ret)
+                       return ret;
+       }
+
+       /* Do not preallocate blocks that will be written partially in 4KB. */
+       map.m_lblk = F2FS_BLK_ALIGN(pos);
+       map.m_len = F2FS_BYTES_TO_BLK(pos + count);
+       if (map.m_len > map.m_lblk)
+               map.m_len -= map.m_lblk;
+       else
+               map.m_len = 0;
+       map.m_may_create = true;
+       if (dio) {
+               map.m_seg_type = f2fs_rw_hint_to_seg_type(inode->i_write_hint);
+               flag = F2FS_GET_BLOCK_PRE_DIO;
+       } else {
+               map.m_seg_type = NO_CHECK_TYPE;
+               flag = F2FS_GET_BLOCK_PRE_AIO;
+       }
+
+       ret = f2fs_map_blocks(inode, &map, 1, flag);
+       /* -ENOSPC is only a fatal error if no blocks could be allocated. */
+       if (ret < 0 && !(ret == -ENOSPC && map.m_len > 0))
+               return ret;
+       if (ret == 0)
+               set_inode_flag(inode, FI_PREALLOCATED_ALL);
+       return map.m_len;
+}
+
 static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 {
        struct file *file = iocb->ki_filp;
        struct inode *inode = file_inode(file);
+       loff_t target_size;
+       int preallocated;
        ssize_t ret;
 
        if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) {
@@ -4262,84 +4329,54 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 
        if (unlikely(IS_IMMUTABLE(inode))) {
                ret = -EPERM;
-               goto unlock;
+               goto out_unlock;
        }
 
        if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) {
                ret = -EPERM;
-               goto unlock;
+               goto out_unlock;
        }
 
        ret = generic_write_checks(iocb, from);
        if (ret > 0) {
-               bool preallocated = false;
-               size_t target_size = 0;
-               int err;
-
-               if (fault_in_iov_iter_readable(from, iov_iter_count(from)))
-                       set_inode_flag(inode, FI_NO_PREALLOC);
-
-               if ((iocb->ki_flags & IOCB_NOWAIT)) {
+               if (iocb->ki_flags & IOCB_NOWAIT) {
                        if (!f2fs_overwrite_io(inode, iocb->ki_pos,
                                                iov_iter_count(from)) ||
                                f2fs_has_inline_data(inode) ||
                                f2fs_force_buffered_io(inode, iocb, from)) {
-                               clear_inode_flag(inode, FI_NO_PREALLOC);
-                               inode_unlock(inode);
                                ret = -EAGAIN;
-                               goto out;
+                               goto out_unlock;
                        }
-                       goto write;
                }
-
-               if (is_inode_flag_set(inode, FI_NO_PREALLOC))
-                       goto write;
-
                if (iocb->ki_flags & IOCB_DIRECT) {
-                       /*
-                        * Convert inline data for Direct I/O before entering
-                        * f2fs_direct_IO().
-                        */
-                       err = f2fs_convert_inline_inode(inode);
-                       if (err)
-                               goto out_err;
-                       /*
-                        * If force_buffere_io() is true, we have to allocate
-                        * blocks all the time, since f2fs_direct_IO will fall
-                        * back to buffered IO.
-                        */
-                       if (!f2fs_force_buffered_io(inode, iocb, from) &&
-                                       f2fs_lfs_mode(F2FS_I_SB(inode)))
-                               goto write;
+                       ret = f2fs_convert_inline_inode(inode);
+                       if (ret)
+                               goto out_unlock;
                }
-               preallocated = true;
+               /* Possibly preallocate the blocks for the write. */
                target_size = iocb->ki_pos + iov_iter_count(from);
-
-               err = f2fs_preallocate_blocks(iocb, from);
-               if (err) {
-out_err:
-                       clear_inode_flag(inode, FI_NO_PREALLOC);
-                       inode_unlock(inode);
-                       ret = err;
-                       goto out;
+               preallocated = f2fs_preallocate_blocks(iocb, from);
+               if (preallocated < 0) {
+                       ret = preallocated;
+                       goto out_unlock;
                }
-write:
+
                ret = __generic_file_write_iter(iocb, from);
-               clear_inode_flag(inode, FI_NO_PREALLOC);
 
-               /* if we couldn't write data, we should deallocate blocks. */
-               if (preallocated && i_size_read(inode) < target_size) {
+               /* Don't leave any preallocated blocks around past i_size. */
+               if (preallocated > 0 && i_size_read(inode) < target_size) {
                        down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
                        filemap_invalidate_lock(inode->i_mapping);
                        f2fs_truncate(inode);
                        filemap_invalidate_unlock(inode->i_mapping);
                        up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
                }
+               clear_inode_flag(inode, FI_PREALLOCATED_ALL);
 
                if (ret > 0)
                        f2fs_update_iostat(F2FS_I_SB(inode), APP_WRITE_IO, ret);
        }
-unlock:
+out_unlock:
        inode_unlock(inode);
 out:
        trace_f2fs_file_write_iter(inode, iocb->ki_pos,