Merge tag 'f2fs-for-5.13-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeu...
authorLinus Torvalds <torvalds@linux-foundation.org>
Wed, 5 May 2021 01:03:38 +0000 (18:03 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 5 May 2021 01:03:38 +0000 (18:03 -0700)
Pull f2fs updates from Jaegeuk Kim:
 "In this round, we added a new mount option, "checkpoint_merge", which
  introduces a kernel thread dealing with the f2fs checkpoints. Once we
  start to manage the IO priority along with blk-cgroup, the checkpoint
  operation can be processed in a lower priority under the process
  context. Since the checkpoint holds all the filesystem operations, we
  give a higher priority to the checkpoint thread all the time.

  Enhancements:
   - introduce gc_merge mount option to introduce a checkpoint thread
   - improve to run discard thread efficiently
   - allow modular compression algorithms
   - expose # of overprivision segments to sysfs
   - expose runtime compression stat to sysfs

  Bug fixes:
   - fix OOB memory access by the node id lookup
   - avoid touching checkpointed data in the checkpoint-disabled mode
   - fix the resizing flow to avoid kernel panic and race conditions
   - fix block allocation issues on pinned files
   - address some swapfile issues
   - fix hugtask problem and kernel panic during atomic write operations
   - don't start checkpoint thread in RO

  And, we've cleaned up some kernel coding style and build warnings. In
  addition, we fixed some minor race conditions and error handling
  routines"

* tag 'f2fs-for-5.13-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (48 commits)
  f2fs: drop inplace IO if fs status is abnormal
  f2fs: compress: remove unneed check condition
  f2fs: clean up left deprecated IO trace codes
  f2fs: avoid using native allocate_segment_by_default()
  f2fs: remove unnecessary struct declaration
  f2fs: fix to avoid NULL pointer dereference
  f2fs: avoid duplicated codes for cleanup
  f2fs: document: add description about compressed space handling
  f2fs: clean up build warnings
  f2fs: fix the periodic wakeups of discard thread
  f2fs: fix to avoid accessing invalid fio in f2fs_allocate_data_block()
  f2fs: fix to avoid GC/mmap race with f2fs_truncate()
  f2fs: set checkpoint_merge by default
  f2fs: Fix a hungtask problem in atomic write
  f2fs: fix to restrict mount condition on readonly block device
  f2fs: introduce gc_merge mount option
  f2fs: fix to cover __allocate_new_section() with curseg_lock
  f2fs: fix wrong alloc_type in f2fs_do_replace_block
  f2fs: delete empty compress.h
  f2fs: fix a typo in inode.c
  ...

27 files changed:
Documentation/ABI/testing/sysfs-fs-f2fs
Documentation/filesystems/f2fs.rst
fs/f2fs/Kconfig
fs/f2fs/acl.c
fs/f2fs/checkpoint.c
fs/f2fs/compress.c
fs/f2fs/compress.h [deleted file]
fs/f2fs/data.c
fs/f2fs/debug.c
fs/f2fs/dir.c
fs/f2fs/f2fs.h
fs/f2fs/file.c
fs/f2fs/gc.c
fs/f2fs/gc.h
fs/f2fs/inline.c
fs/f2fs/inode.c
fs/f2fs/namei.c
fs/f2fs/node.c
fs/f2fs/node.h
fs/f2fs/recovery.c
fs/f2fs/segment.c
fs/f2fs/segment.h
fs/f2fs/super.c
fs/f2fs/sysfs.c
fs/f2fs/verity.c
fs/f2fs/xattr.c
include/linux/f2fs_fs.h

index cbeac1b..4849b8e 100644 (file)
@@ -276,7 +276,7 @@ Date                April 2019
 Contact:       "Daniel Rosenberg" <drosen@google.com>
 Description:   If checkpoint=disable, it displays the number of blocks that
                are unusable.
-               If checkpoint=enable it displays the enumber of blocks that
+               If checkpoint=enable it displays the number of blocks that
                would be unusable if checkpoint=disable were to be set.
 
 What:          /sys/fs/f2fs/<disk>/encoding
@@ -409,3 +409,32 @@ Description:       Give a way to change checkpoint merge daemon's io priority.
                I/O priority "3". We can select the class between "rt" and "be",
                and set the I/O priority within valid range of it. "," delimiter
                is necessary in between I/O class and priority number.
+
+What:          /sys/fs/f2fs/<disk>/ovp_segments
+Date:          March 2021
+Contact:       "Jaegeuk Kim" <jaegeuk@kernel.org>
+Description:   Shows the number of overprovision segments.
+
+What:          /sys/fs/f2fs/<disk>/compr_written_block
+Date:          March 2021
+Contact:       "Daeho Jeong" <daehojeong@google.com>
+Description:   Show the block count written after compression since mount. Note
+               that when the compressed blocks are deleted, this count doesn't
+               decrease. If you write "0" here, you can initialize
+               compr_written_block and compr_saved_block to "0".
+
+What:          /sys/fs/f2fs/<disk>/compr_saved_block
+Date:          March 2021
+Contact:       "Daeho Jeong" <daehojeong@google.com>
+Description:   Show the saved block count with compression since mount. Note
+               that when the compressed blocks are deleted, this count doesn't
+               decrease. If you write "0" here, you can initialize
+               compr_written_block and compr_saved_block to "0".
+
+What:          /sys/fs/f2fs/<disk>/compr_new_inode
+Date:          March 2021
+Contact:       "Daeho Jeong" <daehojeong@google.com>
+Description:   Show the count of inode newly enabled for compression since mount.
+               Note that when the compression is disabled for the files, this count
+               doesn't decrease. If you write "0" here, you can initialize
+               compr_new_inode to "0".
index 35ed01a..992bf91 100644 (file)
@@ -110,6 +110,12 @@ background_gc=%s    Turn on/off cleaning operations, namely garbage
                         on synchronous garbage collection running in background.
                         Default value for this option is on. So garbage
                         collection is on by default.
+gc_merge                When background_gc is on, this option can be enabled to
+                        let background GC thread to handle foreground GC requests,
+                        it can eliminate the sluggish issue caused by slow foreground
+                        GC operation when GC is triggered from a process with limited
+                        I/O and CPU resources.
+nogc_merge              Disable GC merge feature.
 disable_roll_forward    Disable the roll-forward recovery routine
 norecovery              Disable the roll-forward recovery routine, mounted read-
                         only (i.e., -o ro,disable_roll_forward)
@@ -813,6 +819,14 @@ Compression implementation
   * chattr +c file
   * chattr +c dir; touch dir/file
   * mount w/ -o compress_extension=ext; touch file.ext
+  * mount w/ -o compress_extension=*; touch any_file
+
+- At this point, compression feature doesn't expose compressed space to user
+  directly in order to guarantee potential data updates later to the space.
+  Instead, the main goal is to reduce data writes to flash disk as much as
+  possible, resulting in extending disk life time as well as relaxing IO
+  congestion. Alternatively, we've added ioctl interface to reclaim compressed
+  space and show it to user after putting the immutable bit.
 
 Compress metadata layout::
 
index 62e638a..7669de7 100644 (file)
@@ -7,6 +7,13 @@ config F2FS_FS
        select CRYPTO_CRC32
        select F2FS_FS_XATTR if FS_ENCRYPTION
        select FS_ENCRYPTION_ALGS if FS_ENCRYPTION
+       select LZ4_COMPRESS if F2FS_FS_LZ4
+       select LZ4_DECOMPRESS if F2FS_FS_LZ4
+       select LZ4HC_COMPRESS if F2FS_FS_LZ4HC
+       select LZO_COMPRESS if F2FS_FS_LZO
+       select LZO_DECOMPRESS if F2FS_FS_LZO
+       select ZSTD_COMPRESS if F2FS_FS_ZSTD
+       select ZSTD_DECOMPRESS if F2FS_FS_ZSTD
        help
          F2FS is based on Log-structured File System (LFS), which supports
          versatile "flash-friendly" features. The design has been focused on
@@ -94,8 +101,6 @@ config F2FS_FS_COMPRESSION
 config F2FS_FS_LZO
        bool "LZO compression support"
        depends on F2FS_FS_COMPRESSION
-       select LZO_COMPRESS
-       select LZO_DECOMPRESS
        default y
        help
          Support LZO compress algorithm, if unsure, say Y.
@@ -103,8 +108,6 @@ config F2FS_FS_LZO
 config F2FS_FS_LZ4
        bool "LZ4 compression support"
        depends on F2FS_FS_COMPRESSION
-       select LZ4_COMPRESS
-       select LZ4_DECOMPRESS
        default y
        help
          Support LZ4 compress algorithm, if unsure, say Y.
@@ -113,7 +116,6 @@ config F2FS_FS_LZ4HC
        bool "LZ4HC compression support"
        depends on F2FS_FS_COMPRESSION
        depends on F2FS_FS_LZ4
-       select LZ4HC_COMPRESS
        default y
        help
          Support LZ4HC compress algorithm, LZ4HC has compatible on-disk
@@ -122,8 +124,6 @@ config F2FS_FS_LZ4HC
 config F2FS_FS_ZSTD
        bool "ZSTD compression support"
        depends on F2FS_FS_COMPRESSION
-       select ZSTD_COMPRESS
-       select ZSTD_DECOMPRESS
        default y
        help
          Support ZSTD compress algorithm, if unsure, say Y.
@@ -132,8 +132,6 @@ config F2FS_FS_LZORLE
        bool "LZO-RLE compression support"
        depends on F2FS_FS_COMPRESSION
        depends on F2FS_FS_LZO
-       select LZO_COMPRESS
-       select LZO_DECOMPRESS
        default y
        help
          Support LZO-RLE compress algorithm, if unsure, say Y.
index 965037a..239ad94 100644 (file)
@@ -29,6 +29,7 @@ static inline size_t f2fs_acl_size(int count)
 static inline int f2fs_acl_count(size_t size)
 {
        ssize_t s;
+
        size -= sizeof(struct f2fs_acl_header);
        s = size - 4 * sizeof(struct f2fs_acl_entry_short);
        if (s < 0) {
index be5415a..f795049 100644 (file)
@@ -719,6 +719,7 @@ int f2fs_recover_orphan_inodes(struct f2fs_sb_info *sbi)
                orphan_blk = (struct f2fs_orphan_block *)page_address(page);
                for (j = 0; j < le32_to_cpu(orphan_blk->entry_count); j++) {
                        nid_t ino = le32_to_cpu(orphan_blk->ino[j]);
+
                        err = recover_orphan_inode(sbi, ino);
                        if (err) {
                                f2fs_put_page(page, 1);
@@ -1456,7 +1457,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
                        orphan_blocks);
 
        if (__remain_node_summaries(cpc->reason))
-               ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS+
+               ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS +
                                cp_payload_blks + data_sum_blocks +
                                orphan_blocks + NR_CURSEG_NODE_TYPE);
        else
@@ -1818,7 +1819,11 @@ int f2fs_issue_checkpoint(struct f2fs_sb_info *sbi)
        llist_add(&req.llnode, &cprc->issue_list);
        atomic_inc(&cprc->queued_ckpt);
 
-       /* update issue_list before we wake up issue_checkpoint thread */
+       /*
+        * update issue_list before we wake up issue_checkpoint thread,
+        * this smp_mb() pairs with another barrier in ___wait_event(),
+        * see more details in comments of waitqueue_active().
+        */
        smp_mb();
 
        if (waitqueue_active(&cprc->ckpt_wait_queue))
index 77fa342..53b1378 100644 (file)
@@ -76,12 +76,6 @@ bool f2fs_is_compressed_page(struct page *page)
                return false;
        if (IS_ATOMIC_WRITTEN_PAGE(page) || IS_DUMMY_WRITTEN_PAGE(page))
                return false;
-       /*
-        * page->private may be set with pid.
-        * pid_max is enough to check if it is traced.
-        */
-       if (IS_IO_TRACED_PAGE(page))
-               return false;
 
        f2fs_bug_on(F2FS_M_SB(page->mapping),
                *((u32 *)page_private(page)) != F2FS_COMPRESSED_PAGE_MAGIC);
@@ -896,7 +890,6 @@ bool f2fs_cluster_can_merge_page(struct compress_ctx *cc, pgoff_t index)
 
 static bool __cluster_may_compress(struct compress_ctx *cc)
 {
-       struct f2fs_sb_info *sbi = F2FS_I_SB(cc->inode);
        loff_t i_size = i_size_read(cc->inode);
        unsigned nr_pages = DIV_ROUND_UP(i_size, PAGE_SIZE);
        int i;
@@ -904,12 +897,7 @@ static bool __cluster_may_compress(struct compress_ctx *cc)
        for (i = 0; i < cc->cluster_size; i++) {
                struct page *page = cc->rpages[i];
 
-               f2fs_bug_on(sbi, !page);
-
-               if (unlikely(f2fs_cp_error(sbi)))
-                       return false;
-               if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
-                       return false;
+               f2fs_bug_on(F2FS_I_SB(cc->inode), !page);
 
                /* beyond EOF */
                if (page->index >= nr_pages)
@@ -1353,6 +1341,7 @@ unlock_continue:
        if (fio.compr_blocks)
                f2fs_i_compr_blocks_update(inode, fio.compr_blocks - 1, false);
        f2fs_i_compr_blocks_update(inode, cc->nr_cpages, true);
+       add_compr_block_stat(inode, cc->nr_cpages);
 
        set_inode_flag(cc->inode, FI_APPEND_WRITE);
        if (cc->cluster_idx == 0)
diff --git a/fs/f2fs/compress.h b/fs/f2fs/compress.h
deleted file mode 100644 (file)
index e69de29..0000000
index 4e5257c..96f1a35 100644 (file)
@@ -1086,6 +1086,7 @@ int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count)
 
        for (; count > 0; dn->ofs_in_node++) {
                block_t blkaddr = f2fs_data_blkaddr(dn);
+
                if (blkaddr == NULL_ADDR) {
                        dn->data_blkaddr = NEW_ADDR;
                        __set_data_blkaddr(dn);
@@ -1722,7 +1723,7 @@ static int get_data_block_dio_write(struct inode *inode, sector_t iblock,
        return __get_data_block(inode, iblock, bh_result, create,
                                F2FS_GET_BLOCK_DIO, NULL,
                                f2fs_rw_hint_to_seg_type(inode->i_write_hint),
-                               IS_SWAPFILE(inode) ? false : true);
+                               true);
 }
 
 static int get_data_block_dio(struct inode *inode, sector_t iblock,
@@ -1837,6 +1838,7 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
        int ret = 0;
        bool compr_cluster = false;
        unsigned int cluster_size = F2FS_I(inode)->i_cluster_size;
+       loff_t maxbytes;
 
        if (fieinfo->fi_flags & FIEMAP_FLAG_CACHE) {
                ret = f2fs_precache_extents(inode);
@@ -1850,6 +1852,15 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 
        inode_lock(inode);
 
+       maxbytes = max_file_blocks(inode) << F2FS_BLKSIZE_BITS;
+       if (start > maxbytes) {
+               ret = -EFBIG;
+               goto out;
+       }
+
+       if (len > maxbytes || (maxbytes - len) < start)
+               len = maxbytes - start;
+
        if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) {
                ret = f2fs_xattr_fiemap(inode, fieinfo);
                goto out;
@@ -3755,6 +3766,7 @@ int f2fs_migrate_page(struct address_space *mapping,
 
        if (atomic_written) {
                struct inmem_pages *cur;
+
                list_for_each_entry(cur, &fi->inmem_pages, list)
                        if (cur->page == page) {
                                cur->page = newpage;
@@ -3780,11 +3792,64 @@ int f2fs_migrate_page(struct address_space *mapping,
 #endif
 
 #ifdef CONFIG_SWAP
+static int f2fs_is_file_aligned(struct inode *inode)
+{
+       struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+       block_t main_blkaddr = SM_I(sbi)->main_blkaddr;
+       block_t cur_lblock;
+       block_t last_lblock;
+       block_t pblock;
+       unsigned long nr_pblocks;
+       unsigned int blocks_per_sec = BLKS_PER_SEC(sbi);
+       int ret = 0;
+
+       cur_lblock = 0;
+       last_lblock = bytes_to_blks(inode, i_size_read(inode));
+
+       while (cur_lblock < last_lblock) {
+               struct f2fs_map_blocks map;
+
+               memset(&map, 0, sizeof(map));
+               map.m_lblk = cur_lblock;
+               map.m_len = last_lblock - cur_lblock;
+               map.m_next_pgofs = NULL;
+               map.m_next_extent = NULL;
+               map.m_seg_type = NO_CHECK_TYPE;
+               map.m_may_create = false;
+
+               ret = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_FIEMAP);
+               if (ret)
+                       goto out;
+
+               /* hole */
+               if (!(map.m_flags & F2FS_MAP_FLAGS)) {
+                       f2fs_err(sbi, "Swapfile has holes\n");
+                       ret = -ENOENT;
+                       goto out;
+               }
+
+               pblock = map.m_pblk;
+               nr_pblocks = map.m_len;
+
+               if ((pblock - main_blkaddr) & (blocks_per_sec - 1) ||
+                       nr_pblocks & (blocks_per_sec - 1)) {
+                       f2fs_err(sbi, "Swapfile does not align to section");
+                       ret = -EINVAL;
+                       goto out;
+               }
+
+               cur_lblock += nr_pblocks;
+       }
+out:
+       return ret;
+}
+
 static int check_swap_activate_fast(struct swap_info_struct *sis,
                                struct file *swap_file, sector_t *span)
 {
        struct address_space *mapping = swap_file->f_mapping;
        struct inode *inode = mapping->host;
+       struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
        sector_t cur_lblock;
        sector_t last_lblock;
        sector_t pblock;
@@ -3792,8 +3857,8 @@ static int check_swap_activate_fast(struct swap_info_struct *sis,
        sector_t highest_pblock = 0;
        int nr_extents = 0;
        unsigned long nr_pblocks;
-       u64 len;
-       int ret;
+       unsigned int blocks_per_sec = BLKS_PER_SEC(sbi);
+       int ret = 0;
 
        /*
         * Map all the blocks into the extent list.  This code doesn't try
@@ -3801,31 +3866,41 @@ static int check_swap_activate_fast(struct swap_info_struct *sis,
         */
        cur_lblock = 0;
        last_lblock = bytes_to_blks(inode, i_size_read(inode));
-       len = i_size_read(inode);
 
-       while (cur_lblock <= last_lblock && cur_lblock < sis->max) {
+       while (cur_lblock < last_lblock && cur_lblock < sis->max) {
                struct f2fs_map_blocks map;
-               pgoff_t next_pgofs;
 
                cond_resched();
 
                memset(&map, 0, sizeof(map));
                map.m_lblk = cur_lblock;
-               map.m_len = bytes_to_blks(inode, len) - cur_lblock;
-               map.m_next_pgofs = &next_pgofs;
+               map.m_len = last_lblock - cur_lblock;
+               map.m_next_pgofs = NULL;
+               map.m_next_extent = NULL;
                map.m_seg_type = NO_CHECK_TYPE;
+               map.m_may_create = false;
 
                ret = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_FIEMAP);
                if (ret)
-                       goto err_out;
+                       goto out;
 
                /* hole */
-               if (!(map.m_flags & F2FS_MAP_FLAGS))
-                       goto err_out;
+               if (!(map.m_flags & F2FS_MAP_FLAGS)) {
+                       f2fs_err(sbi, "Swapfile has holes\n");
+                       ret = -ENOENT;
+                       goto out;
+               }
 
                pblock = map.m_pblk;
                nr_pblocks = map.m_len;
 
+               if ((pblock - SM_I(sbi)->main_blkaddr) & (blocks_per_sec - 1) ||
+                               nr_pblocks & (blocks_per_sec - 1)) {
+                       f2fs_err(sbi, "Swapfile does not align to section");
+                       ret = -EINVAL;
+                       goto out;
+               }
+
                if (cur_lblock + nr_pblocks >= sis->max)
                        nr_pblocks = sis->max - cur_lblock;
 
@@ -3854,9 +3929,6 @@ static int check_swap_activate_fast(struct swap_info_struct *sis,
        sis->highest_bit = cur_lblock - 1;
 out:
        return ret;
-err_out:
-       pr_err("swapon: swapfile has holes\n");
-       return -EINVAL;
 }
 
 /* Copied from generic_swapfile_activate() to check any holes */
@@ -3865,6 +3937,7 @@ static int check_swap_activate(struct swap_info_struct *sis,
 {
        struct address_space *mapping = swap_file->f_mapping;
        struct inode *inode = mapping->host;
+       struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
        unsigned blocks_per_page;
        unsigned long page_no;
        sector_t probe_block;
@@ -3872,11 +3945,15 @@ static int check_swap_activate(struct swap_info_struct *sis,
        sector_t lowest_block = -1;
        sector_t highest_block = 0;
        int nr_extents = 0;
-       int ret;
+       int ret = 0;
 
        if (PAGE_SIZE == F2FS_BLKSIZE)
                return check_swap_activate_fast(sis, swap_file, span);
 
+       ret = f2fs_is_file_aligned(inode);
+       if (ret)
+               goto out;
+
        blocks_per_page = bytes_to_blks(inode, PAGE_SIZE);
 
        /*
@@ -3891,13 +3968,14 @@ static int check_swap_activate(struct swap_info_struct *sis,
                unsigned block_in_page;
                sector_t first_block;
                sector_t block = 0;
-               int      err = 0;
 
                cond_resched();
 
                block = probe_block;
-               err = bmap(inode, &block);
-               if (err || !block)
+               ret = bmap(inode, &block);
+               if (ret)
+                       goto out;
+               if (!block)
                        goto bad_bmap;
                first_block = block;
 
@@ -3913,9 +3991,10 @@ static int check_swap_activate(struct swap_info_struct *sis,
                                        block_in_page++) {
 
                        block = probe_block + block_in_page;
-                       err = bmap(inode, &block);
-
-                       if (err || !block)
+                       ret = bmap(inode, &block);
+                       if (ret)
+                               goto out;
+                       if (!block)
                                goto bad_bmap;
 
                        if (block != first_block + block_in_page) {
@@ -3955,8 +4034,8 @@ reprobe:
 out:
        return ret;
 bad_bmap:
-       pr_err("swapon: swapfile has holes\n");
-       return -EINVAL;
+       f2fs_err(sbi, "Swapfile has holes\n");
+       return -ENOENT;
 }
 
 static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
index 91855d5..c03949a 100644 (file)
@@ -173,6 +173,7 @@ static void update_general_status(struct f2fs_sb_info *sbi)
        si->util_invalid = 50 - si->util_free - si->util_valid;
        for (i = CURSEG_HOT_DATA; i < NO_CHECK_TYPE; i++) {
                struct curseg_info *curseg = CURSEG_I(sbi, i);
+
                si->curseg[i] = curseg->segno;
                si->cursec[i] = GET_SEC_FROM_SEG(sbi, curseg->segno);
                si->curzone[i] = GET_ZONE_FROM_SEC(sbi, si->cursec[i]);
@@ -300,10 +301,12 @@ get_cache:
        si->page_mem = 0;
        if (sbi->node_inode) {
                unsigned npages = NODE_MAPPING(sbi)->nrpages;
+
                si->page_mem += (unsigned long long)npages << PAGE_SHIFT;
        }
        if (sbi->meta_inode) {
                unsigned npages = META_MAPPING(sbi)->nrpages;
+
                si->page_mem += (unsigned long long)npages << PAGE_SHIFT;
        }
 }
index e211a1b..dc7ce79 100644 (file)
@@ -471,6 +471,7 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de,
                struct page *page, struct inode *inode)
 {
        enum page_type type = f2fs_has_inline_dentry(dir) ? NODE : DATA;
+
        lock_page(page);
        f2fs_wait_on_page_writeback(page, type, true, true);
        de->ino = cpu_to_le32(inode->i_ino);
index 11a20dc..0448788 100644 (file)
@@ -97,6 +97,7 @@ extern const char *f2fs_fault_name[FAULT_MAX];
 #define F2FS_MOUNT_NORECOVERY          0x04000000
 #define F2FS_MOUNT_ATGC                        0x08000000
 #define F2FS_MOUNT_MERGE_CHECKPOINT    0x10000000
+#define        F2FS_MOUNT_GC_MERGE             0x20000000
 
 #define F2FS_OPTION(sbi)       ((sbi)->mount_opt)
 #define clear_opt(sbi, option) (F2FS_OPTION(sbi).opt &= ~F2FS_MOUNT_##option)
@@ -637,21 +638,26 @@ enum {
 #define FADVISE_MODIFIABLE_BITS        (FADVISE_COLD_BIT | FADVISE_HOT_BIT)
 
 #define file_is_cold(inode)    is_file(inode, FADVISE_COLD_BIT)
-#define file_wrong_pino(inode) is_file(inode, FADVISE_LOST_PINO_BIT)
 #define file_set_cold(inode)   set_file(inode, FADVISE_COLD_BIT)
-#define file_lost_pino(inode)  set_file(inode, FADVISE_LOST_PINO_BIT)
 #define file_clear_cold(inode) clear_file(inode, FADVISE_COLD_BIT)
+
+#define file_wrong_pino(inode) is_file(inode, FADVISE_LOST_PINO_BIT)
+#define file_lost_pino(inode)  set_file(inode, FADVISE_LOST_PINO_BIT)
 #define file_got_pino(inode)   clear_file(inode, FADVISE_LOST_PINO_BIT)
+
 #define file_is_encrypt(inode) is_file(inode, FADVISE_ENCRYPT_BIT)
 #define file_set_encrypt(inode)        set_file(inode, FADVISE_ENCRYPT_BIT)
-#define file_clear_encrypt(inode) clear_file(inode, FADVISE_ENCRYPT_BIT)
+
 #define file_enc_name(inode)   is_file(inode, FADVISE_ENC_NAME_BIT)
 #define file_set_enc_name(inode) set_file(inode, FADVISE_ENC_NAME_BIT)
+
 #define file_keep_isize(inode) is_file(inode, FADVISE_KEEP_SIZE_BIT)
 #define file_set_keep_isize(inode) set_file(inode, FADVISE_KEEP_SIZE_BIT)
+
 #define file_is_hot(inode)     is_file(inode, FADVISE_HOT_BIT)
 #define file_set_hot(inode)    set_file(inode, FADVISE_HOT_BIT)
 #define file_clear_hot(inode)  clear_file(inode, FADVISE_HOT_BIT)
+
 #define file_is_verity(inode)  is_file(inode, FADVISE_VERITY_BIT)
 #define file_set_verity(inode) set_file(inode, FADVISE_VERITY_BIT)
 
@@ -860,7 +866,7 @@ struct f2fs_nm_info {
        /* NAT cache management */
        struct radix_tree_root nat_root;/* root of the nat entry cache */
        struct radix_tree_root nat_set_root;/* root of the nat set cache */
-       struct rw_semaphore nat_tree_lock;      /* protect nat_tree_lock */
+       struct rw_semaphore nat_tree_lock;      /* protect nat entry tree */
        struct list_head nat_entries;   /* cached nat entry list (clean) */
        spinlock_t nat_list_lock;       /* protect clean nat entry list */
        unsigned int nat_cnt[MAX_NAT_STATE]; /* the # of cached nat entries */
@@ -1297,14 +1303,6 @@ enum {
 #define IS_DUMMY_WRITTEN_PAGE(page)                    \
                (page_private(page) == DUMMY_WRITTEN_PAGE)
 
-#ifdef CONFIG_F2FS_IO_TRACE
-#define IS_IO_TRACED_PAGE(page)                        \
-               (page_private(page) > 0 &&              \
-                page_private(page) < (unsigned long)PID_MAX_LIMIT)
-#else
-#define IS_IO_TRACED_PAGE(page) (0)
-#endif
-
 /* For compression */
 enum compress_algorithm_type {
        COMPRESS_LZO,
@@ -1623,6 +1621,11 @@ struct f2fs_sb_info {
 #ifdef CONFIG_F2FS_FS_COMPRESSION
        struct kmem_cache *page_array_slab;     /* page array entry */
        unsigned int page_array_slab_size;      /* default page array slab size */
+
+       /* For runtime compression statistics */
+       u64 compr_written_block;
+       u64 compr_saved_block;
+       u32 compr_new_inode;
 #endif
 };
 
@@ -2215,6 +2218,7 @@ static inline block_t __cp_payload(struct f2fs_sb_info *sbi)
 static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag)
 {
        struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
+       void *tmp_ptr = &ckpt->sit_nat_version_bitmap;
        int offset;
 
        if (is_set_ckpt_flags(sbi, CP_LARGE_NAT_BITMAP_FLAG)) {
@@ -2224,7 +2228,7 @@ static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag)
                 * if large_nat_bitmap feature is enabled, leave checksum
                 * protection for all nat/sit bitmaps.
                 */
-               return &ckpt->sit_nat_version_bitmap + offset + sizeof(__le32);
+               return tmp_ptr + offset + sizeof(__le32);
        }
 
        if (__cp_payload(sbi) > 0) {
@@ -2235,7 +2239,7 @@ static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag)
        } else {
                offset = (flag == NAT_BITMAP) ?
                        le32_to_cpu(ckpt->sit_ver_bitmap_bytesize) : 0;
-               return &ckpt->sit_nat_version_bitmap + offset;
+               return tmp_ptr + offset;
        }
 }
 
@@ -3302,7 +3306,6 @@ void f2fs_hash_filename(const struct inode *dir, struct f2fs_filename *fname);
 /*
  * node.c
  */
-struct dnode_of_data;
 struct node_info;
 
 int f2fs_check_nid_range(struct f2fs_sb_info *sbi, nid_t nid);
@@ -3379,6 +3382,7 @@ block_t f2fs_get_unusable_blocks(struct f2fs_sb_info *sbi);
 int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable);
 void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi);
 int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra);
+bool f2fs_segment_has_free_slot(struct f2fs_sb_info *sbi, int segno);
 void f2fs_init_inmem_curseg(struct f2fs_sb_info *sbi);
 void f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi);
 void f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi);
@@ -3386,7 +3390,7 @@ void f2fs_get_new_segment(struct f2fs_sb_info *sbi,
                        unsigned int *newseg, bool new_sec, int dir);
 void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
                                        unsigned int start, unsigned int end);
-void f2fs_allocate_new_segment(struct f2fs_sb_info *sbi, int type);
+void f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force);
 void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi);
 int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range);
 bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi,
@@ -3550,7 +3554,7 @@ void f2fs_destroy_post_read_wq(struct f2fs_sb_info *sbi);
 int f2fs_start_gc_thread(struct f2fs_sb_info *sbi);
 void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi);
 block_t f2fs_start_bidx_of_node(unsigned int node_ofs, struct inode *inode);
-int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background,
+int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background, bool force,
                        unsigned int segno);
 void f2fs_build_gc_manager(struct f2fs_sb_info *sbi);
 int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count);
@@ -3958,6 +3962,18 @@ int f2fs_init_page_array_cache(struct f2fs_sb_info *sbi);
 void f2fs_destroy_page_array_cache(struct f2fs_sb_info *sbi);
 int __init f2fs_init_compress_cache(void);
 void f2fs_destroy_compress_cache(void);
+#define inc_compr_inode_stat(inode)                                    \
+       do {                                                            \
+               struct f2fs_sb_info *sbi = F2FS_I_SB(inode);            \
+               sbi->compr_new_inode++;                                 \
+       } while (0)
+#define add_compr_block_stat(inode, blocks)                            \
+       do {                                                            \
+               struct f2fs_sb_info *sbi = F2FS_I_SB(inode);            \
+               int diff = F2FS_I(inode)->i_cluster_size - blocks;      \
+               sbi->compr_written_block += blocks;                     \
+               sbi->compr_saved_block += diff;                         \
+       } while (0)
 #else
 static inline bool f2fs_is_compressed_page(struct page *page) { return false; }
 static inline bool f2fs_is_compress_backend_ready(struct inode *inode)
@@ -3986,6 +4002,7 @@ static inline int f2fs_init_page_array_cache(struct f2fs_sb_info *sbi) { return
 static inline void f2fs_destroy_page_array_cache(struct f2fs_sb_info *sbi) { }
 static inline int __init f2fs_init_compress_cache(void) { return 0; }
 static inline void f2fs_destroy_compress_cache(void) { }
+#define inc_compr_inode_stat(inode)            do { } while (0)
 #endif
 
 static inline void set_compress_context(struct inode *inode)
@@ -4009,6 +4026,7 @@ static inline void set_compress_context(struct inode *inode)
        F2FS_I(inode)->i_flags |= F2FS_COMPR_FL;
        set_inode_flag(inode, FI_COMPRESSED_FILE);
        stat_inc_compr_inode(inode);
+       inc_compr_inode_stat(inode);
        f2fs_mark_inode_dirty_sync(inode, true);
 }
 
@@ -4179,8 +4197,7 @@ static inline bool f2fs_force_buffered_io(struct inode *inode,
                if (F2FS_IO_ALIGNED(sbi))
                        return true;
        }
-       if (is_sbi_flag_set(F2FS_I_SB(inode), SBI_CP_DISABLED) &&
-                                       !IS_SWAPFILE(inode))
+       if (is_sbi_flag_set(F2FS_I_SB(inode), SBI_CP_DISABLED))
                return true;
 
        return false;
index 8a56acb..44a4650 100644 (file)
@@ -1622,9 +1622,10 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
        struct f2fs_map_blocks map = { .m_next_pgofs = NULL,
                        .m_next_extent = NULL, .m_seg_type = NO_CHECK_TYPE,
                        .m_may_create = true };
-       pgoff_t pg_end;
+       pgoff_t pg_start, pg_end;
        loff_t new_size = i_size_read(inode);
        loff_t off_end;
+       block_t expanded = 0;
        int err;
 
        err = inode_newsize_ok(inode, (len + offset));
@@ -1637,11 +1638,12 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
 
        f2fs_balance_fs(sbi, true);
 
+       pg_start = ((unsigned long long)offset) >> PAGE_SHIFT;
        pg_end = ((unsigned long long)offset + len) >> PAGE_SHIFT;
        off_end = (offset + len) & (PAGE_SIZE - 1);
 
-       map.m_lblk = ((unsigned long long)offset) >> PAGE_SHIFT;
-       map.m_len = pg_end - map.m_lblk;
+       map.m_lblk = pg_start;
+       map.m_len = pg_end - pg_start;
        if (off_end)
                map.m_len++;
 
@@ -1649,19 +1651,15 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
                return 0;
 
        if (f2fs_is_pinned_file(inode)) {
-               block_t len = (map.m_len >> sbi->log_blocks_per_seg) <<
-                                       sbi->log_blocks_per_seg;
-               block_t done = 0;
+               block_t sec_blks = BLKS_PER_SEC(sbi);
+               block_t sec_len = roundup(map.m_len, sec_blks);
 
-               if (map.m_len % sbi->blocks_per_seg)
-                       len += sbi->blocks_per_seg;
-
-               map.m_len = sbi->blocks_per_seg;
+               map.m_len = sec_blks;
 next_alloc:
                if (has_not_enough_free_secs(sbi, 0,
                        GET_SEC_FROM_SEG(sbi, overprovision_segments(sbi)))) {
                        down_write(&sbi->gc_lock);
-                       err = f2fs_gc(sbi, true, false, NULL_SEGNO);
+                       err = f2fs_gc(sbi, true, false, false, NULL_SEGNO);
                        if (err && err != -ENODATA && err != -EAGAIN)
                                goto out_err;
                }
@@ -1669,7 +1667,7 @@ next_alloc:
                down_write(&sbi->pin_sem);
 
                f2fs_lock_op(sbi);
-               f2fs_allocate_new_segment(sbi, CURSEG_COLD_DATA_PINNED);
+               f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false);
                f2fs_unlock_op(sbi);
 
                map.m_seg_type = CURSEG_COLD_DATA_PINNED;
@@ -1677,24 +1675,25 @@ next_alloc:
 
                up_write(&sbi->pin_sem);
 
-               done += map.m_len;
-               len -= map.m_len;
+               expanded += map.m_len;
+               sec_len -= map.m_len;
                map.m_lblk += map.m_len;
-               if (!err && len)
+               if (!err && sec_len)
                        goto next_alloc;
 
-               map.m_len = done;
+               map.m_len = expanded;
        } else {
                err = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_AIO);
+               expanded = map.m_len;
        }
 out_err:
        if (err) {
                pgoff_t last_off;
 
-               if (!map.m_len)
+               if (!expanded)
                        return err;
 
-               last_off = map.m_lblk + map.m_len - 1;
+               last_off = pg_start + expanded - 1;
 
                /* update new size to the failed position */
                new_size = (last_off == pg_end) ? offset + len :
@@ -2434,7 +2433,7 @@ static int f2fs_ioc_gc(struct file *filp, unsigned long arg)
                down_write(&sbi->gc_lock);
        }
 
-       ret = f2fs_gc(sbi, sync, true, NULL_SEGNO);
+       ret = f2fs_gc(sbi, sync, true, false, NULL_SEGNO);
 out:
        mnt_drop_write_file(filp);
        return ret;
@@ -2470,7 +2469,8 @@ do_more:
                down_write(&sbi->gc_lock);
        }
 
-       ret = f2fs_gc(sbi, range->sync, true, GET_SEGNO(sbi, range->start));
+       ret = f2fs_gc(sbi, range->sync, true, false,
+                               GET_SEGNO(sbi, range->start));
        if (ret) {
                if (ret == -EBUSY)
                        ret = -EAGAIN;
@@ -2527,7 +2527,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
 {
        struct inode *inode = file_inode(filp);
        struct f2fs_map_blocks map = { .m_next_extent = NULL,
-                                       .m_seg_type = NO_CHECK_TYPE ,
+                                       .m_seg_type = NO_CHECK_TYPE,
                                        .m_may_create = false };
        struct extent_info ei = {0, 0, 0};
        pgoff_t pg_start, pg_end, next_pgofs;
@@ -2923,7 +2923,7 @@ static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg)
                sm->last_victim[GC_CB] = end_segno + 1;
                sm->last_victim[GC_GREEDY] = end_segno + 1;
                sm->last_victim[ALLOC_NEXT] = end_segno + 1;
-               ret = f2fs_gc(sbi, true, true, start_segno);
+               ret = f2fs_gc(sbi, true, true, true, start_segno);
                if (ret == -EAGAIN)
                        ret = 0;
                else if (ret < 0)
@@ -4311,8 +4311,13 @@ write:
                clear_inode_flag(inode, FI_NO_PREALLOC);
 
                /* if we couldn't write data, we should deallocate blocks. */
-               if (preallocated && i_size_read(inode) < target_size)
+               if (preallocated && i_size_read(inode) < target_size) {
+                       down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+                       down_write(&F2FS_I(inode)->i_mmap_sem);
                        f2fs_truncate(inode);
+                       up_write(&F2FS_I(inode)->i_mmap_sem);
+                       up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+               }
 
                if (ret > 0)
                        f2fs_update_iostat(F2FS_I_SB(inode), APP_WRITE_IO, ret);
index 39330ad..8d1f17a 100644 (file)
@@ -31,19 +31,24 @@ static int gc_thread_func(void *data)
        struct f2fs_sb_info *sbi = data;
        struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
        wait_queue_head_t *wq = &sbi->gc_thread->gc_wait_queue_head;
+       wait_queue_head_t *fggc_wq = &sbi->gc_thread->fggc_wq;
        unsigned int wait_ms;
 
        wait_ms = gc_th->min_sleep_time;
 
        set_freezable();
        do {
-               bool sync_mode;
+               bool sync_mode, foreground = false;
 
                wait_event_interruptible_timeout(*wq,
                                kthread_should_stop() || freezing(current) ||
+                               waitqueue_active(fggc_wq) ||
                                gc_th->gc_wake,
                                msecs_to_jiffies(wait_ms));
 
+               if (test_opt(sbi, GC_MERGE) && waitqueue_active(fggc_wq))
+                       foreground = true;
+
                /* give it a try one time */
                if (gc_th->gc_wake)
                        gc_th->gc_wake = 0;
@@ -90,7 +95,10 @@ static int gc_thread_func(void *data)
                        goto do_gc;
                }
 
-               if (!down_write_trylock(&sbi->gc_lock)) {
+               if (foreground) {
+                       down_write(&sbi->gc_lock);
+                       goto do_gc;
+               } else if (!down_write_trylock(&sbi->gc_lock)) {
                        stat_other_skip_bggc_count(sbi);
                        goto next;
                }
@@ -107,14 +115,22 @@ static int gc_thread_func(void *data)
                else
                        increase_sleep_time(gc_th, &wait_ms);
 do_gc:
-               stat_inc_bggc_count(sbi->stat_info);
+               if (!foreground)
+                       stat_inc_bggc_count(sbi->stat_info);
 
                sync_mode = F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_SYNC;
 
+               /* foreground GC was been triggered via f2fs_balance_fs() */
+               if (foreground)
+                       sync_mode = false;
+
                /* if return value is not zero, no victim was selected */
-               if (f2fs_gc(sbi, sync_mode, true, NULL_SEGNO))
+               if (f2fs_gc(sbi, sync_mode, !foreground, false, NULL_SEGNO))
                        wait_ms = gc_th->no_gc_sleep_time;
 
+               if (foreground)
+                       wake_up_all(&gc_th->fggc_wq);
+
                trace_f2fs_background_gc(sbi->sb, wait_ms,
                                prefree_segments(sbi), free_segments(sbi));
 
@@ -144,10 +160,11 @@ int f2fs_start_gc_thread(struct f2fs_sb_info *sbi)
        gc_th->max_sleep_time = DEF_GC_THREAD_MAX_SLEEP_TIME;
        gc_th->no_gc_sleep_time = DEF_GC_THREAD_NOGC_SLEEP_TIME;
 
-       gc_th->gc_wake= 0;
+       gc_th->gc_wake = 0;
 
        sbi->gc_thread = gc_th;
        init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head);
+       init_waitqueue_head(&sbi->gc_thread->fggc_wq);
        sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi,
                        "f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev));
        if (IS_ERR(gc_th->f2fs_gc_task)) {
@@ -162,9 +179,11 @@ out:
 void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi)
 {
        struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
+
        if (!gc_th)
                return;
        kthread_stop(gc_th->f2fs_gc_task);
+       wake_up_all(&gc_th->fggc_wq);
        kfree(gc_th);
        sbi->gc_thread = NULL;
 }
@@ -392,10 +411,6 @@ static void add_victim_entry(struct f2fs_sb_info *sbi,
                if (p->gc_mode == GC_AT &&
                        get_valid_blocks(sbi, segno, true) == 0)
                        return;
-
-               if (p->alloc_mode == AT_SSR &&
-                       get_seg_entry(sbi, segno)->ckpt_valid_blocks == 0)
-                       return;
        }
 
        for (i = 0; i < sbi->segs_per_sec; i++)
@@ -728,11 +743,27 @@ retry:
 
                if (sec_usage_check(sbi, secno))
                        goto next;
+
                /* Don't touch checkpointed data */
-               if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
-                                       get_ckpt_valid_blocks(sbi, segno) &&
-                                       p.alloc_mode == LFS))
-                       goto next;
+               if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
+                       if (p.alloc_mode == LFS) {
+                               /*
+                                * LFS is set to find source section during GC.
+                                * The victim should have no checkpointed data.
+                                */
+                               if (get_ckpt_valid_blocks(sbi, segno, true))
+                                       goto next;
+                       } else {
+                               /*
+                                * SSR | AT_SSR are set to find target segment
+                                * for writes which can be full by checkpointed
+                                * and newly written blocks.
+                                */
+                               if (!f2fs_segment_has_free_slot(sbi, segno))
+                                       goto next;
+                       }
+               }
+
                if (gc_type == BG_GC && test_bit(secno, dirty_i->victim_secmap))
                        goto next;
 
@@ -828,6 +859,7 @@ static void add_gc_inode(struct gc_inode_list *gc_list, struct inode *inode)
 static void put_gc_inode(struct gc_inode_list *gc_list)
 {
        struct inode_entry *ie, *next_ie;
+
        list_for_each_entry_safe(ie, next_ie, &gc_list->ilist, list) {
                radix_tree_delete(&gc_list->iroot, ie->inode->i_ino);
                iput(ie->inode);
@@ -952,9 +984,11 @@ block_t f2fs_start_bidx_of_node(unsigned int node_ofs, struct inode *inode)
                bidx = node_ofs - 1;
        } else if (node_ofs <= indirect_blks) {
                int dec = (node_ofs - 4) / (NIDS_PER_BLOCK + 1);
+
                bidx = node_ofs - 2 - dec;
        } else {
                int dec = (node_ofs - indirect_blks - 3) / (NIDS_PER_BLOCK + 1);
+
                bidx = node_ofs - 5 - dec;
        }
        return bidx * ADDRS_PER_BLOCK(inode) + ADDRS_PER_INODE(inode);
@@ -1120,7 +1154,8 @@ static int move_data_block(struct inode *inode, block_t bidx,
        block_t newaddr;
        int err = 0;
        bool lfs_mode = f2fs_lfs_mode(fio.sbi);
-       int type = fio.sbi->am.atgc_enabled ?
+       int type = fio.sbi->am.atgc_enabled && (gc_type == BG_GC) &&
+                               (fio.sbi->gc_mode != GC_URGENT_HIGH) ?
                                CURSEG_ALL_DATA_ATGC : CURSEG_COLD_DATA;
 
        /* do not read out */
@@ -1354,7 +1389,8 @@ out:
  * the victim data block is ignored.
  */
 static int gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
-               struct gc_inode_list *gc_list, unsigned int segno, int gc_type)
+               struct gc_inode_list *gc_list, unsigned int segno, int gc_type,
+               bool force_migrate)
 {
        struct super_block *sb = sbi->sb;
        struct f2fs_summary *entry;
@@ -1383,8 +1419,8 @@ next_step:
                 * race condition along with SSR block allocation.
                 */
                if ((gc_type == BG_GC && has_not_enough_free_secs(sbi, 0, 0)) ||
-                               get_valid_blocks(sbi, segno, true) ==
-                                                       BLKS_PER_SEC(sbi))
+                       (!force_migrate && get_valid_blocks(sbi, segno, true) ==
+                                                       BLKS_PER_SEC(sbi)))
                        return submitted;
 
                if (check_valid_map(sbi, segno, off) == 0)
@@ -1519,7 +1555,8 @@ static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim,
 
 static int do_garbage_collect(struct f2fs_sb_info *sbi,
                                unsigned int start_segno,
-                               struct gc_inode_list *gc_list, int gc_type)
+                               struct gc_inode_list *gc_list, int gc_type,
+                               bool force_migrate)
 {
        struct page *sum_page;
        struct f2fs_summary_block *sum;
@@ -1606,7 +1643,8 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
                                                                gc_type);
                else
                        submitted += gc_data_segment(sbi, sum->entries, gc_list,
-                                                       segno, gc_type);
+                                                       segno, gc_type,
+                                                       force_migrate);
 
                stat_inc_seg_count(sbi, type, gc_type);
                migrated++;
@@ -1634,7 +1672,7 @@ skip:
 }
 
 int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
-                       bool background, unsigned int segno)
+                       bool background, bool force, unsigned int segno)
 {
        int gc_type = sync ? FG_GC : BG_GC;
        int sec_freed = 0, seg_freed = 0, total_freed = 0;
@@ -1696,7 +1734,7 @@ gc_more:
        if (ret)
                goto stop;
 
-       seg_freed = do_garbage_collect(sbi, segno, &gc_list, gc_type);
+       seg_freed = do_garbage_collect(sbi, segno, &gc_list, gc_type, force);
        if (gc_type == FG_GC &&
                seg_freed == f2fs_usable_segs_in_sec(sbi, segno))
                sec_freed++;
@@ -1835,7 +1873,7 @@ static int free_segment_range(struct f2fs_sb_info *sbi,
                        .iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS),
                };
 
-               do_garbage_collect(sbi, segno, &gc_list, FG_GC);
+               do_garbage_collect(sbi, segno, &gc_list, FG_GC, true);
                put_gc_inode(&gc_list);
 
                if (!gc_only && get_valid_blocks(sbi, segno, true)) {
@@ -1974,7 +2012,20 @@ int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count)
 
        /* stop CP to protect MAIN_SEC in free_segment_range */
        f2fs_lock_op(sbi);
+
+       spin_lock(&sbi->stat_lock);
+       if (shrunk_blocks + valid_user_blocks(sbi) +
+               sbi->current_reserved_blocks + sbi->unusable_block_count +
+               F2FS_OPTION(sbi).root_reserved_blocks > sbi->user_block_count)
+               err = -ENOSPC;
+       spin_unlock(&sbi->stat_lock);
+
+       if (err)
+               goto out_unlock;
+
        err = free_segment_range(sbi, secs, true);
+
+out_unlock:
        f2fs_unlock_op(sbi);
        up_write(&sbi->gc_lock);
        if (err)
index 0c8dae1..3fe145e 100644 (file)
@@ -42,6 +42,12 @@ struct f2fs_gc_kthread {
 
        /* for changing gc mode */
        unsigned int gc_wake;
+
+       /* for GC_MERGE mount option */
+       wait_queue_head_t fggc_wq;              /*
+                                                * caller of f2fs_balance_fs()
+                                                * will wait on this wait queue.
+                                                */
 };
 
 struct gc_inode_list {
index 993caef..92652ca 100644 (file)
@@ -219,7 +219,8 @@ out:
 
        f2fs_put_page(page, 1);
 
-       f2fs_balance_fs(sbi, dn.node_changed);
+       if (!err)
+               f2fs_balance_fs(sbi, dn.node_changed);
 
        return err;
 }
index 349d9cb..b401f08 100644 (file)
@@ -666,6 +666,7 @@ retry:
        node_page = f2fs_get_node_page(sbi, inode->i_ino);
        if (IS_ERR(node_page)) {
                int err = PTR_ERR(node_page);
+
                if (err == -ENOMEM) {
                        cond_resched();
                        goto retry;
@@ -698,7 +699,7 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
 
        /*
         * We need to balance fs here to prevent from producing dirty node pages
-        * during the urgent cleaning time when runing out of free sections.
+        * during the urgent cleaning time when running out of free sections.
         */
        f2fs_update_inode_page(inode);
        if (wbc && wbc->nr_to_write)
index 377c6b1..a9cd9cf 100644 (file)
@@ -418,6 +418,7 @@ struct dentry *f2fs_get_parent(struct dentry *child)
 {
        struct page *page;
        unsigned long ino = f2fs_inode_by_name(d_inode(child), &dotdot_name, &page);
+
        if (!ino) {
                if (IS_ERR(page))
                        return ERR_CAST(page);
@@ -627,6 +628,7 @@ static const char *f2fs_get_link(struct dentry *dentry,
                                 struct delayed_call *done)
 {
        const char *link = page_get_link(dentry, inode, done);
+
        if (!IS_ERR(link) && !*link) {
                /* this is broken symlink case */
                do_delayed_call(done);
@@ -765,6 +767,7 @@ out_fail:
 static int f2fs_rmdir(struct inode *dir, struct dentry *dentry)
 {
        struct inode *inode = d_inode(dentry);
+
        if (f2fs_empty_dir(inode))
                return f2fs_unlink(dir, dentry);
        return -ENOTEMPTY;
index 4b0e2e3..e67ce5f 100644 (file)
@@ -43,11 +43,15 @@ int f2fs_check_nid_range(struct f2fs_sb_info *sbi, nid_t nid)
 bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type)
 {
        struct f2fs_nm_info *nm_i = NM_I(sbi);
+       struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
        struct sysinfo val;
        unsigned long avail_ram;
        unsigned long mem_size = 0;
        bool res = false;
 
+       if (!nm_i)
+               return true;
+
        si_meminfo(&val);
 
        /* only uses low memory */
@@ -89,6 +93,10 @@ bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type)
                /* it allows 20% / total_ram for inmemory pages */
                mem_size = get_pages(sbi, F2FS_INMEM_PAGES);
                res = mem_size < (val.totalram / 5);
+       } else if (type == DISCARD_CACHE) {
+               mem_size = (atomic_read(&dcc->discard_cmd_cnt) *
+                               sizeof(struct discard_cmd)) >> PAGE_SHIFT;
+               res = mem_size < (avail_ram * nm_i->ram_thresh / 100);
        } else {
                if (!sbi->sb->s_bdi->wb.dirty_exceeded)
                        return true;
@@ -462,6 +470,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
        /* increment version no as node is removed */
        if (nat_get_blkaddr(e) != NEW_ADDR && new_blkaddr == NULL_ADDR) {
                unsigned char version = nat_get_version(e);
+
                nat_set_version(e, inc_node_version(version));
        }
 
@@ -1383,7 +1392,7 @@ repeat:
                goto out_err;
        }
 page_hit:
-       if(unlikely(nid != nid_of_node(page))) {
+       if (unlikely(nid != nid_of_node(page))) {
                f2fs_warn(sbi, "inconsistent node block, nid:%lu, node_footer[nid:%u,ino:%u,ofs:%u,cpver:%llu,blkaddr:%u]",
                          nid, nid_of_node(page), ino_of_node(page),
                          ofs_of_node(page), cpver_of_node(page),
@@ -1775,7 +1784,7 @@ continue_unlock:
 out:
        if (nwritten)
                f2fs_submit_merged_write_cond(sbi, NULL, NULL, ino, NODE);
-       return ret ? -EIO: 0;
+       return ret ? -EIO : 0;
 }
 
 static int f2fs_match_ino(struct inode *inode, unsigned long ino, void *data)
@@ -2117,8 +2126,8 @@ static int __insert_free_nid(struct f2fs_sb_info *sbi,
                                struct free_nid *i)
 {
        struct f2fs_nm_info *nm_i = NM_I(sbi);
-
        int err = radix_tree_insert(&nm_i->free_nid_root, i->nid, i);
+
        if (err)
                return err;
 
@@ -2785,6 +2794,9 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
                struct f2fs_nat_entry raw_ne;
                nid_t nid = le32_to_cpu(nid_in_journal(journal, i));
 
+               if (f2fs_check_nid_range(sbi, nid))
+                       continue;
+
                raw_ne = nat_in_journal(journal, i);
 
                ne = __lookup_nat_cache(nm_i, nid);
@@ -2980,6 +2992,7 @@ int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
        while ((found = __gang_lookup_nat_set(nm_i,
                                        set_idx, SETVEC_SIZE, setvec))) {
                unsigned idx;
+
                set_idx = setvec[found - 1]->set + 1;
                for (idx = 0; idx < found; idx++)
                        __adjust_nat_entry_set(setvec[idx], &sets,
index f84541b..7a45c0f 100644 (file)
@@ -147,6 +147,7 @@ enum mem_type {
        INO_ENTRIES,    /* indicates inode entries */
        EXTENT_CACHE,   /* indicates extent cache */
        INMEM_PAGES,    /* indicates inmemory pages */
+       DISCARD_CACHE,  /* indicates memory of cached discard cmds */
        BASE_CHECK,     /* check kernel status */
 };
 
index da75d5d..422146c 100644 (file)
@@ -458,6 +458,7 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
        /* Get the previous summary */
        for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
                struct curseg_info *curseg = CURSEG_I(sbi, i);
+
                if (curseg->segno == segno) {
                        sum = curseg->sum_blk->entries[blkoff];
                        goto got_it;
@@ -875,5 +876,5 @@ out:
 #endif
        sbi->sb->s_flags = s_flags; /* Restore SB_RDONLY status */
 
-       return ret ? ret: err;
+       return ret ? ret : err;
 }
index c286656..c605415 100644 (file)
@@ -186,7 +186,10 @@ void f2fs_register_inmem_page(struct inode *inode, struct page *page)
 {
        struct inmem_pages *new;
 
-       f2fs_set_page_private(page, ATOMIC_WRITTEN_PAGE);
+       if (PagePrivate(page))
+               set_page_private(page, (unsigned long)ATOMIC_WRITTEN_PAGE);
+       else
+               f2fs_set_page_private(page, ATOMIC_WRITTEN_PAGE);
 
        new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS);
 
@@ -324,23 +327,27 @@ void f2fs_drop_inmem_pages(struct inode *inode)
        struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
        struct f2fs_inode_info *fi = F2FS_I(inode);
 
-       while (!list_empty(&fi->inmem_pages)) {
+       do {
                mutex_lock(&fi->inmem_lock);
+               if (list_empty(&fi->inmem_pages)) {
+                       fi->i_gc_failures[GC_FAILURE_ATOMIC] = 0;
+
+                       spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
+                       if (!list_empty(&fi->inmem_ilist))
+                               list_del_init(&fi->inmem_ilist);
+                       if (f2fs_is_atomic_file(inode)) {
+                               clear_inode_flag(inode, FI_ATOMIC_FILE);
+                               sbi->atomic_files--;
+                       }
+                       spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
+
+                       mutex_unlock(&fi->inmem_lock);
+                       break;
+               }
                __revoke_inmem_pages(inode, &fi->inmem_pages,
                                                true, false, true);
                mutex_unlock(&fi->inmem_lock);
-       }
-
-       fi->i_gc_failures[GC_FAILURE_ATOMIC] = 0;
-
-       spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
-       if (!list_empty(&fi->inmem_ilist))
-               list_del_init(&fi->inmem_ilist);
-       if (f2fs_is_atomic_file(inode)) {
-               clear_inode_flag(inode, FI_ATOMIC_FILE);
-               sbi->atomic_files--;
-       }
-       spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
+       } while (1);
 }
 
 void f2fs_drop_inmem_page(struct inode *inode, struct page *page)
@@ -503,8 +510,19 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
         * dir/node pages without enough free segments.
         */
        if (has_not_enough_free_secs(sbi, 0, 0)) {
-               down_write(&sbi->gc_lock);
-               f2fs_gc(sbi, false, false, NULL_SEGNO);
+               if (test_opt(sbi, GC_MERGE) && sbi->gc_thread &&
+                                       sbi->gc_thread->f2fs_gc_task) {
+                       DEFINE_WAIT(wait);
+
+                       prepare_to_wait(&sbi->gc_thread->fggc_wq, &wait,
+                                               TASK_UNINTERRUPTIBLE);
+                       wake_up(&sbi->gc_thread->gc_wait_queue_head);
+                       io_schedule();
+                       finish_wait(&sbi->gc_thread->fggc_wq, &wait);
+               } else {
+                       down_write(&sbi->gc_lock);
+                       f2fs_gc(sbi, false, false, false, NULL_SEGNO);
+               }
        }
 }
 
@@ -653,7 +671,11 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino)
 
        llist_add(&cmd.llnode, &fcc->issue_list);
 
-       /* update issue_list before we wake up issue_flush thread */
+       /*
+        * update issue_list before we wake up issue_flush thread, this
+        * smp_mb() pairs with another barrier in ___wait_event(), see
+        * more details in comments of waitqueue_active().
+        */
        smp_mb();
 
        if (waitqueue_active(&fcc->flush_wait_queue))
@@ -861,7 +883,7 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
        mutex_lock(&dirty_i->seglist_lock);
 
        valid_blocks = get_valid_blocks(sbi, segno, false);
-       ckpt_valid_blocks = get_ckpt_valid_blocks(sbi, segno);
+       ckpt_valid_blocks = get_ckpt_valid_blocks(sbi, segno, false);
 
        if (valid_blocks == 0 && (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) ||
                ckpt_valid_blocks == usable_blocks)) {
@@ -946,7 +968,7 @@ static unsigned int get_free_segment(struct f2fs_sb_info *sbi)
        for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
                if (get_valid_blocks(sbi, segno, false))
                        continue;
-               if (get_ckpt_valid_blocks(sbi, segno))
+               if (get_ckpt_valid_blocks(sbi, segno, false))
                        continue;
                mutex_unlock(&dirty_i->seglist_lock);
                return segno;
@@ -1095,6 +1117,8 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi,
                                struct discard_policy *dpolicy,
                                int discard_type, unsigned int granularity)
 {
+       struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
+
        /* common policy */
        dpolicy->type = discard_type;
        dpolicy->sync = true;
@@ -1114,7 +1138,9 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi,
                dpolicy->ordered = true;
                if (utilization(sbi) > DEF_DISCARD_URGENT_UTIL) {
                        dpolicy->granularity = 1;
-                       dpolicy->max_interval = DEF_MIN_DISCARD_ISSUE_TIME;
+                       if (atomic_read(&dcc->discard_cmd_cnt))
+                               dpolicy->max_interval =
+                                       DEF_MIN_DISCARD_ISSUE_TIME;
                }
        } else if (discard_type == DPOLICY_FORCE) {
                dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
@@ -1730,8 +1756,15 @@ static int issue_discard_thread(void *data)
        set_freezable();
 
        do {
-               __init_discard_policy(sbi, &dpolicy, DPOLICY_BG,
-                                       dcc->discard_granularity);
+               if (sbi->gc_mode == GC_URGENT_HIGH ||
+                       !f2fs_available_free_memory(sbi, DISCARD_CACHE))
+                       __init_discard_policy(sbi, &dpolicy, DPOLICY_FORCE, 1);
+               else
+                       __init_discard_policy(sbi, &dpolicy, DPOLICY_BG,
+                                               dcc->discard_granularity);
+
+               if (!atomic_read(&dcc->discard_cmd_cnt))
+                      wait_ms = dpolicy.max_interval;
 
                wait_event_interruptible_timeout(*q,
                                kthread_should_stop() || freezing(current) ||
@@ -1755,9 +1788,8 @@ static int issue_discard_thread(void *data)
                        wait_ms = dpolicy.max_interval;
                        continue;
                }
-
-               if (sbi->gc_mode == GC_URGENT_HIGH)
-                       __init_discard_policy(sbi, &dpolicy, DPOLICY_FORCE, 1);
+               if (!atomic_read(&dcc->discard_cmd_cnt))
+                       continue;
 
                sb_start_intwrite(sbi->sb);
 
@@ -1765,7 +1797,7 @@ static int issue_discard_thread(void *data)
                if (issued > 0) {
                        __wait_all_discard_cmd(sbi, &dpolicy);
                        wait_ms = dpolicy.min_interval;
-               } else if (issued == -1){
+               } else if (issued == -1) {
                        wait_ms = f2fs_time_to_wait(sbi, DISCARD_TIME);
                        if (!wait_ms)
                                wait_ms = dpolicy.mid_interval;
@@ -2142,6 +2174,7 @@ static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type,
                                        unsigned int segno, int modified)
 {
        struct seg_entry *se = get_seg_entry(sbi, segno);
+
        se->type = type;
        if (modified)
                __mark_sit_entry_dirty(sbi, segno);
@@ -2333,6 +2366,7 @@ static void __add_sum_entry(struct f2fs_sb_info *sbi, int type,
 {
        struct curseg_info *curseg = CURSEG_I(sbi, type);
        void *addr = curseg->sum_blk;
+
        addr += curseg->next_blkoff * sizeof(struct f2fs_summary);
        memcpy(addr, sum, sizeof(struct f2fs_summary));
 }
@@ -2604,22 +2638,20 @@ static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
        curseg->alloc_type = LFS;
 }
 
-static void __next_free_blkoff(struct f2fs_sb_info *sbi,
-                       struct curseg_info *seg, block_t start)
+static int __next_free_blkoff(struct f2fs_sb_info *sbi,
+                                       int segno, block_t start)
 {
-       struct seg_entry *se = get_seg_entry(sbi, seg->segno);
+       struct seg_entry *se = get_seg_entry(sbi, segno);
        int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
        unsigned long *target_map = SIT_I(sbi)->tmp_map;
        unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
        unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
-       int i, pos;
+       int i;
 
        for (i = 0; i < entries; i++)
                target_map[i] = ckpt_map[i] | cur_map[i];
 
-       pos = __find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, start);
-
-       seg->next_blkoff = pos;
+       return __find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, start);
 }
 
 /*
@@ -2631,11 +2663,18 @@ static void __refresh_next_blkoff(struct f2fs_sb_info *sbi,
                                struct curseg_info *seg)
 {
        if (seg->alloc_type == SSR)
-               __next_free_blkoff(sbi, seg, seg->next_blkoff + 1);
+               seg->next_blkoff =
+                       __next_free_blkoff(sbi, seg->segno,
+                                               seg->next_blkoff + 1);
        else
                seg->next_blkoff++;
 }
 
+bool f2fs_segment_has_free_slot(struct f2fs_sb_info *sbi, int segno)
+{
+       return __next_free_blkoff(sbi, segno, 0) < sbi->blocks_per_seg;
+}
+
 /*
  * This function always allocates a used segment(from dirty seglist) by SSR
  * manner, so it should recover the existing segment information of valid blocks
@@ -2661,7 +2700,7 @@ static void change_curseg(struct f2fs_sb_info *sbi, int type, bool flush)
 
        reset_curseg(sbi, type, 1);
        curseg->alloc_type = SSR;
-       __next_free_blkoff(sbi, curseg, 0);
+       curseg->next_blkoff = __next_free_blkoff(sbi, curseg->segno, 0);
 
        sum_page = f2fs_get_sum_page(sbi, new_segno);
        if (IS_ERR(sum_page)) {
@@ -2893,7 +2932,8 @@ unlock:
        up_read(&SM_I(sbi)->curseg_lock);
 }
 
-static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type)
+static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
+                                               bool new_sec, bool force)
 {
        struct curseg_info *curseg = CURSEG_I(sbi, type);
        unsigned int old_segno;
@@ -2901,32 +2941,43 @@ static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type)
        if (!curseg->inited)
                goto alloc;
 
-       if (!curseg->next_blkoff &&
-               !get_valid_blocks(sbi, curseg->segno, false) &&
-               !get_ckpt_valid_blocks(sbi, curseg->segno))
-               return;
+       if (force || curseg->next_blkoff ||
+               get_valid_blocks(sbi, curseg->segno, new_sec))
+               goto alloc;
 
+       if (!get_ckpt_valid_blocks(sbi, curseg->segno, new_sec))
+               return;
 alloc:
        old_segno = curseg->segno;
        SIT_I(sbi)->s_ops->allocate_segment(sbi, type, true);
        locate_dirty_segment(sbi, old_segno);
 }
 
-void f2fs_allocate_new_segment(struct f2fs_sb_info *sbi, int type)
+static void __allocate_new_section(struct f2fs_sb_info *sbi,
+                                               int type, bool force)
+{
+       __allocate_new_segment(sbi, type, true, force);
+}
+
+void f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force)
 {
+       down_read(&SM_I(sbi)->curseg_lock);
        down_write(&SIT_I(sbi)->sentry_lock);
-       __allocate_new_segment(sbi, type);
+       __allocate_new_section(sbi, type, force);
        up_write(&SIT_I(sbi)->sentry_lock);
+       up_read(&SM_I(sbi)->curseg_lock);
 }
 
 void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi)
 {
        int i;
 
+       down_read(&SM_I(sbi)->curseg_lock);
        down_write(&SIT_I(sbi)->sentry_lock);
        for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++)
-               __allocate_new_segment(sbi, i);
+               __allocate_new_segment(sbi, i, false, false);
        up_write(&SIT_I(sbi)->sentry_lock);
+       up_read(&SM_I(sbi)->curseg_lock);
 }
 
 static const struct segment_allocation default_salloc_ops = {
@@ -3239,7 +3290,9 @@ static int __get_segment_type_6(struct f2fs_io_info *fio)
                struct inode *inode = fio->page->mapping->host;
 
                if (is_cold_data(fio->page)) {
-                       if (fio->sbi->am.atgc_enabled)
+                       if (fio->sbi->am.atgc_enabled &&
+                               (fio->io_type == FS_DATA_IO) &&
+                               (fio->sbi->gc_mode != GC_URGENT_HIGH))
                                return CURSEG_ALL_DATA_ATGC;
                        else
                                return CURSEG_COLD_DATA;
@@ -3365,12 +3418,12 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
                f2fs_inode_chksum_set(sbi, page);
        }
 
-       if (F2FS_IO_ALIGNED(sbi))
-               fio->retry = false;
-
        if (fio) {
                struct f2fs_bio_info *io;
 
+               if (F2FS_IO_ALIGNED(sbi))
+                       fio->retry = false;
+
                INIT_LIST_HEAD(&fio->list);
                fio->in_list = true;
                io = sbi->write_io[fio->type] + fio->temp;
@@ -3499,7 +3552,13 @@ int f2fs_inplace_write_data(struct f2fs_io_info *fio)
                set_sbi_flag(sbi, SBI_NEED_FSCK);
                f2fs_warn(sbi, "%s: incorrect segment(%u) type, run fsck to fix.",
                          __func__, segno);
-               return -EFSCORRUPTED;
+               err = -EFSCORRUPTED;
+               goto drop_bio;
+       }
+
+       if (is_sbi_flag_set(sbi, SBI_NEED_FSCK) || f2fs_cp_error(sbi)) {
+               err = -EIO;
+               goto drop_bio;
        }
 
        stat_inc_inplace_blocks(fio->sbi);
@@ -3514,6 +3573,15 @@ int f2fs_inplace_write_data(struct f2fs_io_info *fio)
        }
 
        return err;
+drop_bio:
+       if (fio->bio) {
+               struct bio *bio = *(fio->bio);
+
+               bio->bi_status = BLK_STS_IOERR;
+               bio_endio(bio);
+               fio->bio = NULL;
+       }
+       return err;
 }
 
 static inline int __f2fs_get_curseg(struct f2fs_sb_info *sbi,
@@ -3539,6 +3607,7 @@ void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
        struct seg_entry *se;
        int type;
        unsigned short old_blkoff;
+       unsigned char old_alloc_type;
 
        segno = GET_SEGNO(sbi, new_blkaddr);
        se = get_seg_entry(sbi, segno);
@@ -3572,6 +3641,7 @@ void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
 
        old_cursegno = curseg->segno;
        old_blkoff = curseg->next_blkoff;
+       old_alloc_type = curseg->alloc_type;
 
        /* change the current segment */
        if (segno != curseg->segno) {
@@ -3606,6 +3676,7 @@ void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
                        change_curseg(sbi, type, true);
                }
                curseg->next_blkoff = old_blkoff;
+               curseg->alloc_type = old_alloc_type;
        }
 
        up_write(&sit_i->sentry_lock);
@@ -3717,6 +3788,7 @@ static int read_compacted_summaries(struct f2fs_sb_info *sbi)
 
                for (j = 0; j < blk_off; j++) {
                        struct f2fs_summary *s;
+
                        s = (struct f2fs_summary *)(kaddr + offset);
                        seg_i->sum_blk->entries[j] = *s;
                        offset += SUMMARY_SIZE;
@@ -3779,6 +3851,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
                if (__exist_node_summaries(sbi)) {
                        struct f2fs_summary *ns = &sum->entries[0];
                        int i;
+
                        for (i = 0; i < sbi->blocks_per_seg; i++, ns++) {
                                ns->version = 0;
                                ns->ofs_in_node = 0;
@@ -3880,6 +3953,7 @@ static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr)
        /* Step 3: write summary entries */
        for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
                unsigned short blkoff;
+
                seg_i = CURSEG_I(sbi, i);
                if (sbi->ckpt->alloc_type[i] == SSR)
                        blkoff = sbi->blocks_per_seg;
@@ -3916,6 +3990,7 @@ static void write_normal_summaries(struct f2fs_sb_info *sbi,
                                        block_t blkaddr, int type)
 {
        int i, end;
+
        if (IS_DATASEG(type))
                end = type + NR_CURSEG_DATA_TYPE;
        else
@@ -4499,6 +4574,7 @@ static void init_free_segmap(struct f2fs_sb_info *sbi)
        /* set use the current segments */
        for (type = CURSEG_HOT_DATA; type <= CURSEG_COLD_NODE; type++) {
                struct curseg_info *curseg_t = CURSEG_I(sbi, type);
+
                __set_test_and_inuse(sbi, curseg_t->segno);
        }
 }
@@ -4731,7 +4807,8 @@ static struct f2fs_dev_info *get_target_zoned_dev(struct f2fs_sb_info *sbi,
 }
 
 static int report_one_zone_cb(struct blk_zone *zone, unsigned int idx,
-                             void *data) {
+                             void *data)
+{
        memcpy(data, zone, sizeof(struct blk_zone));
        return 0;
 }
@@ -4783,7 +4860,8 @@ static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type)
 
        f2fs_notice(sbi, "Assign new section to curseg[%d]: "
                    "curseg[0x%x,0x%x]", type, cs->segno, cs->next_blkoff);
-       allocate_segment_by_default(sbi, type, true);
+
+       f2fs_allocate_new_section(sbi, type, true);
 
        /* check consistency of the zone curseg pointed to */
        if (check_zone_write_pointer(sbi, zbd, &zone))
@@ -4847,8 +4925,10 @@ struct check_zone_write_pointer_args {
 };
 
 static int check_zone_write_pointer_cb(struct blk_zone *zone, unsigned int idx,
-                                     void *data) {
+                                     void *data)
+{
        struct check_zone_write_pointer_args *args;
+
        args = (struct check_zone_write_pointer_args *)data;
 
        return check_zone_write_pointer(args->sbi, args->fdev, zone);
@@ -5127,6 +5207,7 @@ static void discard_dirty_segmap(struct f2fs_sb_info *sbi,
 static void destroy_victim_secmap(struct f2fs_sb_info *sbi)
 {
        struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+
        kvfree(dirty_i->victim_secmap);
 }
 
@@ -5171,6 +5252,7 @@ static void destroy_curseg(struct f2fs_sb_info *sbi)
 static void destroy_free_segmap(struct f2fs_sb_info *sbi)
 {
        struct free_segmap_info *free_i = SM_I(sbi)->free_info;
+
        if (!free_i)
                return;
        SM_I(sbi)->free_info = NULL;
index e9a7a63..050230c 100644 (file)
@@ -172,12 +172,10 @@ enum {
 /*
  * BG_GC means the background cleaning job.
  * FG_GC means the on-demand cleaning job.
- * FORCE_FG_GC means on-demand cleaning job in background.
  */
 enum {
        BG_GC = 0,
        FG_GC,
-       FORCE_FG_GC,
 };
 
 /* for a function parameter to select a victim segment */
@@ -361,8 +359,20 @@ static inline unsigned int get_valid_blocks(struct f2fs_sb_info *sbi,
 }
 
 static inline unsigned int get_ckpt_valid_blocks(struct f2fs_sb_info *sbi,
-                               unsigned int segno)
+                               unsigned int segno, bool use_section)
 {
+       if (use_section && __is_large_section(sbi)) {
+               unsigned int start_segno = START_SEGNO(segno);
+               unsigned int blocks = 0;
+               int i;
+
+               for (i = 0; i < sbi->segs_per_sec; i++, start_segno++) {
+                       struct seg_entry *se = get_seg_entry(sbi, start_segno);
+
+                       blocks += se->ckpt_valid_blocks;
+               }
+               return blocks;
+       }
        return get_seg_entry(sbi, segno)->ckpt_valid_blocks;
 }
 
index 82592b1..7d325bf 100644 (file)
@@ -151,6 +151,8 @@ enum {
        Opt_compress_chksum,
        Opt_compress_mode,
        Opt_atgc,
+       Opt_gc_merge,
+       Opt_nogc_merge,
        Opt_err,
 };
 
@@ -223,6 +225,8 @@ static match_table_t f2fs_tokens = {
        {Opt_compress_chksum, "compress_chksum"},
        {Opt_compress_mode, "compress_mode=%s"},
        {Opt_atgc, "atgc"},
+       {Opt_gc_merge, "gc_merge"},
+       {Opt_nogc_merge, "nogc_merge"},
        {Opt_err, NULL},
 };
 
@@ -555,6 +559,7 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
 
        while ((p = strsep(&options, ",")) != NULL) {
                int token;
+
                if (!*p)
                        continue;
                /*
@@ -1073,6 +1078,12 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
                case Opt_atgc:
                        set_opt(sbi, ATGC);
                        break;
+               case Opt_gc_merge:
+                       set_opt(sbi, GC_MERGE);
+                       break;
+               case Opt_nogc_merge:
+                       clear_opt(sbi, GC_MERGE);
+                       break;
                default:
                        f2fs_err(sbi, "Unrecognized mount option \"%s\" or missing value",
                                 p);
@@ -1616,6 +1627,7 @@ static inline void f2fs_show_quota_options(struct seq_file *seq,
 #endif
 }
 
+#ifdef CONFIG_F2FS_FS_COMPRESSION
 static inline void f2fs_show_compress_options(struct seq_file *seq,
                                                        struct super_block *sb)
 {
@@ -1661,6 +1673,7 @@ static inline void f2fs_show_compress_options(struct seq_file *seq,
        else if (F2FS_OPTION(sbi).compress_mode == COMPR_MODE_USER)
                seq_printf(seq, ",compress_mode=%s", "user");
 }
+#endif
 
 static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
 {
@@ -1673,6 +1686,9 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
        else if (F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_OFF)
                seq_printf(seq, ",background_gc=%s", "off");
 
+       if (test_opt(sbi, GC_MERGE))
+               seq_puts(seq, ",gc_merge");
+
        if (test_opt(sbi, DISABLE_ROLL_FORWARD))
                seq_puts(seq, ",disable_roll_forward");
        if (test_opt(sbi, NORECOVERY))
@@ -1824,6 +1840,7 @@ static void default_options(struct f2fs_sb_info *sbi)
        set_opt(sbi, EXTENT_CACHE);
        set_opt(sbi, NOHEAP);
        clear_opt(sbi, DISABLE_CHECKPOINT);
+       set_opt(sbi, MERGE_CHECKPOINT);
        F2FS_OPTION(sbi).unusable_cap = 0;
        sbi->sb->s_flags |= SB_LAZYTIME;
        set_opt(sbi, FLUSH_MERGE);
@@ -1865,7 +1882,7 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)
 
        while (!f2fs_time_over(sbi, DISABLE_TIME)) {
                down_write(&sbi->gc_lock);
-               err = f2fs_gc(sbi, true, false, NULL_SEGNO);
+               err = f2fs_gc(sbi, true, false, false, NULL_SEGNO);
                if (err == -ENODATA) {
                        err = 0;
                        break;
@@ -1876,7 +1893,7 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)
 
        ret = sync_filesystem(sbi->sb);
        if (ret || err) {
-               err = ret ? ret: err;
+               err = ret ? ret : err;
                goto restore_flag;
        }
 
@@ -1925,8 +1942,9 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
        struct f2fs_mount_info org_mount_opt;
        unsigned long old_sb_flags;
        int err;
-       bool need_restart_gc = false;
-       bool need_stop_gc = false;
+       bool need_restart_gc = false, need_stop_gc = false;
+       bool need_restart_ckpt = false, need_stop_ckpt = false;
+       bool need_restart_flush = false, need_stop_flush = false;
        bool no_extent_cache = !test_opt(sbi, EXTENT_CACHE);
        bool disable_checkpoint = test_opt(sbi, DISABLE_CHECKPOINT);
        bool no_io_align = !F2FS_IO_ALIGNED(sbi);
@@ -2035,7 +2053,8 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
         * option. Also sync the filesystem.
         */
        if ((*flags & SB_RDONLY) ||
-                       F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_OFF) {
+                       (F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_OFF &&
+                       !test_opt(sbi, GC_MERGE))) {
                if (sbi->gc_thread) {
                        f2fs_stop_gc_thread(sbi);
                        need_restart_gc = true;
@@ -2057,18 +2076,11 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
                clear_sbi_flag(sbi, SBI_IS_CLOSE);
        }
 
-       if (checkpoint_changed) {
-               if (test_opt(sbi, DISABLE_CHECKPOINT)) {
-                       err = f2fs_disable_checkpoint(sbi);
-                       if (err)
-                               goto restore_gc;
-               } else {
-                       f2fs_enable_checkpoint(sbi);
-               }
-       }
-
-       if (!test_opt(sbi, DISABLE_CHECKPOINT) &&
-                       test_opt(sbi, MERGE_CHECKPOINT)) {
+       if ((*flags & SB_RDONLY) || test_opt(sbi, DISABLE_CHECKPOINT) ||
+                       !test_opt(sbi, MERGE_CHECKPOINT)) {
+               f2fs_stop_ckpt_thread(sbi);
+               need_restart_ckpt = true;
+       } else {
                err = f2fs_start_ckpt_thread(sbi);
                if (err) {
                        f2fs_err(sbi,
@@ -2076,8 +2088,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
                            err);
                        goto restore_gc;
                }
-       } else {
-               f2fs_stop_ckpt_thread(sbi);
+               need_stop_ckpt = true;
        }
 
        /*
@@ -2087,11 +2098,24 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
        if ((*flags & SB_RDONLY) || !test_opt(sbi, FLUSH_MERGE)) {
                clear_opt(sbi, FLUSH_MERGE);
                f2fs_destroy_flush_cmd_control(sbi, false);
+               need_restart_flush = true;
        } else {
                err = f2fs_create_flush_cmd_control(sbi);
                if (err)
-                       goto restore_gc;
+                       goto restore_ckpt;
+               need_stop_flush = true;
        }
+
+       if (checkpoint_changed) {
+               if (test_opt(sbi, DISABLE_CHECKPOINT)) {
+                       err = f2fs_disable_checkpoint(sbi);
+                       if (err)
+                               goto restore_flush;
+               } else {
+                       f2fs_enable_checkpoint(sbi);
+               }
+       }
+
 skip:
 #ifdef CONFIG_QUOTA
        /* Release old quota file names */
@@ -2106,6 +2130,21 @@ skip:
        adjust_unusable_cap_perc(sbi);
        *flags = (*flags & ~SB_LAZYTIME) | (sb->s_flags & SB_LAZYTIME);
        return 0;
+restore_flush:
+       if (need_restart_flush) {
+               if (f2fs_create_flush_cmd_control(sbi))
+                       f2fs_warn(sbi, "background flush thread has stopped");
+       } else if (need_stop_flush) {
+               clear_opt(sbi, FLUSH_MERGE);
+               f2fs_destroy_flush_cmd_control(sbi, false);
+       }
+restore_ckpt:
+       if (need_restart_ckpt) {
+               if (f2fs_start_ckpt_thread(sbi))
+                       f2fs_warn(sbi, "background ckpt thread has stopped");
+       } else if (need_stop_ckpt) {
+               f2fs_stop_ckpt_thread(sbi);
+       }
 restore_gc:
        if (need_restart_gc) {
                if (f2fs_start_gc_thread(sbi))
@@ -3719,7 +3758,7 @@ try_onemore:
        sbi->iostat_period_ms = DEFAULT_IOSTAT_PERIOD_MS;
 
        for (i = 0; i < NR_PAGE_TYPE; i++) {
-               int n = (i == META) ? 1: NR_TEMP_TYPE;
+               int n = (i == META) ? 1 : NR_TEMP_TYPE;
                int j;
 
                sbi->write_io[i] =
@@ -3833,7 +3872,7 @@ try_onemore:
 
        /* setup checkpoint request control and start checkpoint issue thread */
        f2fs_init_ckpt_req_control(sbi);
-       if (!test_opt(sbi, DISABLE_CHECKPOINT) &&
+       if (!f2fs_readonly(sb) && !test_opt(sbi, DISABLE_CHECKPOINT) &&
                        test_opt(sbi, MERGE_CHECKPOINT)) {
                err = f2fs_start_ckpt_thread(sbi);
                if (err) {
@@ -3929,10 +3968,18 @@ try_onemore:
                 * previous checkpoint was not done by clean system shutdown.
                 */
                if (f2fs_hw_is_readonly(sbi)) {
-                       if (!is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG))
-                               f2fs_err(sbi, "Need to recover fsync data, but write access unavailable");
-                       else
-                               f2fs_info(sbi, "write access unavailable, skipping recovery");
+                       if (!is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) {
+                               err = f2fs_recover_fsync_data(sbi, true);
+                               if (err > 0) {
+                                       err = -EROFS;
+                                       f2fs_err(sbi, "Need to recover fsync data, but "
+                                               "write access unavailable, please try "
+                                               "mount w/ disable_roll_forward or norecovery");
+                               }
+                               if (err < 0)
+                                       goto free_meta;
+                       }
+                       f2fs_info(sbi, "write access unavailable, skipping recovery");
                        goto reset_checkpoint;
                }
 
@@ -3989,7 +4036,8 @@ reset_checkpoint:
         * If filesystem is not mounted as read-only then
         * do start the gc_thread.
         */
-       if (F2FS_OPTION(sbi).bggc_mode != BGGC_MODE_OFF && !f2fs_readonly(sb)) {
+       if ((F2FS_OPTION(sbi).bggc_mode != BGGC_MODE_OFF ||
+               test_opt(sbi, GC_MERGE)) && !f2fs_readonly(sb)) {
                /* After POR, we can run background GC thread.*/
                err = f2fs_start_gc_thread(sbi);
                if (err)
index e38a7f6..39b522e 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/seq_file.h>
 #include <linux/unicode.h>
 #include <linux/ioprio.h>
+#include <linux/sysfs.h>
 
 #include "f2fs.h"
 #include "segment.h"
@@ -91,6 +92,13 @@ static ssize_t free_segments_show(struct f2fs_attr *a,
                        (unsigned long long)(free_segments(sbi)));
 }
 
+static ssize_t ovp_segments_show(struct f2fs_attr *a,
+               struct f2fs_sb_info *sbi, char *buf)
+{
+       return sprintf(buf, "%llu\n",
+                       (unsigned long long)(overprovision_segments(sbi)));
+}
+
 static ssize_t lifetime_write_kbytes_show(struct f2fs_attr *a,
                struct f2fs_sb_info *sbi, char *buf)
 {
@@ -282,6 +290,17 @@ static ssize_t f2fs_sbi_show(struct f2fs_attr *a,
                return len;
        }
 
+#ifdef CONFIG_F2FS_FS_COMPRESSION
+       if (!strcmp(a->attr.name, "compr_written_block"))
+               return sysfs_emit(buf, "%llu\n", sbi->compr_written_block);
+
+       if (!strcmp(a->attr.name, "compr_saved_block"))
+               return sysfs_emit(buf, "%llu\n", sbi->compr_saved_block);
+
+       if (!strcmp(a->attr.name, "compr_new_inode"))
+               return sysfs_emit(buf, "%u\n", sbi->compr_new_inode);
+#endif
+
        ui = (unsigned int *)(ptr + a->offset);
 
        return sprintf(buf, "%u\n", *ui);
@@ -458,6 +477,24 @@ out:
                return count;
        }
 
+#ifdef CONFIG_F2FS_FS_COMPRESSION
+       if (!strcmp(a->attr.name, "compr_written_block") ||
+               !strcmp(a->attr.name, "compr_saved_block")) {
+               if (t != 0)
+                       return -EINVAL;
+               sbi->compr_written_block = 0;
+               sbi->compr_saved_block = 0;
+               return count;
+       }
+
+       if (!strcmp(a->attr.name, "compr_new_inode")) {
+               if (t != 0)
+                       return -EINVAL;
+               sbi->compr_new_inode = 0;
+               return count;
+       }
+#endif
+
        *ui = (unsigned int)t;
 
        return count;
@@ -629,6 +666,7 @@ F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, node_io_flag, node_io_flag);
 F2FS_RW_ATTR(CPRC_INFO, ckpt_req_control, ckpt_thread_ioprio, ckpt_thread_ioprio);
 F2FS_GENERAL_RO_ATTR(dirty_segments);
 F2FS_GENERAL_RO_ATTR(free_segments);
+F2FS_GENERAL_RO_ATTR(ovp_segments);
 F2FS_GENERAL_RO_ATTR(lifetime_write_kbytes);
 F2FS_GENERAL_RO_ATTR(features);
 F2FS_GENERAL_RO_ATTR(current_reserved_blocks);
@@ -668,6 +706,9 @@ F2FS_FEATURE_RO_ATTR(sb_checksum, FEAT_SB_CHECKSUM);
 F2FS_FEATURE_RO_ATTR(casefold, FEAT_CASEFOLD);
 #ifdef CONFIG_F2FS_FS_COMPRESSION
 F2FS_FEATURE_RO_ATTR(compression, FEAT_COMPRESSION);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, compr_written_block, compr_written_block);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, compr_saved_block, compr_saved_block);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, compr_new_inode, compr_new_inode);
 #endif
 
 #define ATTR_LIST(name) (&f2fs_attr_##name.attr)
@@ -715,6 +756,7 @@ static struct attribute *f2fs_attrs[] = {
        ATTR_LIST(ckpt_thread_ioprio),
        ATTR_LIST(dirty_segments),
        ATTR_LIST(free_segments),
+       ATTR_LIST(ovp_segments),
        ATTR_LIST(unusable),
        ATTR_LIST(lifetime_write_kbytes),
        ATTR_LIST(features),
@@ -731,6 +773,11 @@ static struct attribute *f2fs_attrs[] = {
        ATTR_LIST(moved_blocks_background),
        ATTR_LIST(avg_vblocks),
 #endif
+#ifdef CONFIG_F2FS_FS_COMPRESSION
+       ATTR_LIST(compr_written_block),
+       ATTR_LIST(compr_saved_block),
+       ATTR_LIST(compr_new_inode),
+#endif
        NULL,
 };
 ATTRIBUTE_GROUPS(f2fs);
index a7beff2..03549b5 100644 (file)
@@ -152,40 +152,73 @@ static int f2fs_end_enable_verity(struct file *filp, const void *desc,
                                  size_t desc_size, u64 merkle_tree_size)
 {
        struct inode *inode = file_inode(filp);
+       struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
        u64 desc_pos = f2fs_verity_metadata_pos(inode) + merkle_tree_size;
        struct fsverity_descriptor_location dloc = {
                .version = cpu_to_le32(F2FS_VERIFY_VER),
                .size = cpu_to_le32(desc_size),
                .pos = cpu_to_le64(desc_pos),
        };
-       int err = 0;
+       int err = 0, err2 = 0;
 
-       if (desc != NULL) {
-               /* Succeeded; write the verity descriptor. */
-               err = pagecache_write(inode, desc, desc_size, desc_pos);
+       /*
+        * If an error already occurred (which fs/verity/ signals by passing
+        * desc == NULL), then only clean-up is needed.
+        */
+       if (desc == NULL)
+               goto cleanup;
 
-               /* Write all pages before clearing FI_VERITY_IN_PROGRESS. */
-               if (!err)
-                       err = filemap_write_and_wait(inode->i_mapping);
-       }
+       /* Append the verity descriptor. */
+       err = pagecache_write(inode, desc, desc_size, desc_pos);
+       if (err)
+               goto cleanup;
+
+       /*
+        * Write all pages (both data and verity metadata).  Note that this must
+        * happen before clearing FI_VERITY_IN_PROGRESS; otherwise pages beyond
+        * i_size won't be written properly.  For crash consistency, this also
+        * must happen before the verity inode flag gets persisted.
+        */
+       err = filemap_write_and_wait(inode->i_mapping);
+       if (err)
+               goto cleanup;
+
+       /* Set the verity xattr. */
+       err = f2fs_setxattr(inode, F2FS_XATTR_INDEX_VERITY,
+                           F2FS_XATTR_NAME_VERITY, &dloc, sizeof(dloc),
+                           NULL, XATTR_CREATE);
+       if (err)
+               goto cleanup;
 
-       /* If we failed, truncate anything we wrote past i_size. */
-       if (desc == NULL || err)
-               f2fs_truncate(inode);
+       /* Finally, set the verity inode flag. */
+       file_set_verity(inode);
+       f2fs_set_inode_flags(inode);
+       f2fs_mark_inode_dirty_sync(inode, true);
 
        clear_inode_flag(inode, FI_VERITY_IN_PROGRESS);
+       return 0;
 
-       if (desc != NULL && !err) {
-               err = f2fs_setxattr(inode, F2FS_XATTR_INDEX_VERITY,
-                                   F2FS_XATTR_NAME_VERITY, &dloc, sizeof(dloc),
-                                   NULL, XATTR_CREATE);
-               if (!err) {
-                       file_set_verity(inode);
-                       f2fs_set_inode_flags(inode);
-                       f2fs_mark_inode_dirty_sync(inode, true);
-               }
+cleanup:
+       /*
+        * Verity failed to be enabled, so clean up by truncating any verity
+        * metadata that was written beyond i_size (both from cache and from
+        * disk) and clearing FI_VERITY_IN_PROGRESS.
+        *
+        * Taking i_gc_rwsem[WRITE] is needed to stop f2fs garbage collection
+        * from re-instantiating cached pages we are truncating (since unlike
+        * normal file accesses, garbage collection isn't limited by i_size).
+        */
+       down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+       truncate_inode_pages(inode->i_mapping, inode->i_size);
+       err2 = f2fs_truncate(inode);
+       if (err2) {
+               f2fs_err(sbi, "Truncating verity metadata failed (errno=%d)",
+                        err2);
+               set_sbi_flag(sbi, SBI_NEED_FSCK);
        }
-       return err;
+       up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+       clear_inode_flag(inode, FI_VERITY_IN_PROGRESS);
+       return err ?: err2;
 }
 
 static int f2fs_get_verity_descriptor(struct inode *inode, void *buf,
index 490f843..c8f34de 100644 (file)
@@ -488,6 +488,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
                f2fs_wait_on_page_writeback(xpage, NODE, true, true);
        } else {
                struct dnode_of_data dn;
+
                set_new_dnode(&dn, inode, NULL, NULL, new_nid);
                xpage = f2fs_new_node_page(&dn, XATTR_NODE_OFFSET);
                if (IS_ERR(xpage)) {
index c6cc0a5..5487a80 100644 (file)
@@ -168,7 +168,7 @@ struct f2fs_checkpoint {
        unsigned char alloc_type[MAX_ACTIVE_LOGS];
 
        /* SIT and NAT version bitmap */
-       unsigned char sit_nat_version_bitmap[1];
+       unsigned char sit_nat_version_bitmap[];
 } __packed;
 
 #define CP_CHKSUM_OFFSET       4092    /* default chksum offset in checkpoint */