f2fs: checkpoint disabling 60/266760/1
authorDaniel Rosenberg <drosen@google.com>
Tue, 21 Aug 2018 02:21:43 +0000 (19:21 -0700)
committerDongwoo Lee <dwoo08.lee@samsung.com>
Thu, 18 Nov 2021 08:46:38 +0000 (17:46 +0900)
Note that, it requires "f2fs: return correct errno in f2fs_gc".

This adds a lightweight non-persistent snapshotting scheme to f2fs.

To use, mount with the option checkpoint=disable, and to return to
normal operation, remount with checkpoint=enable. If the filesystem
is shut down before remounting with checkpoint=enable, it will revert
back to its apparent state when it was first mounted with
checkpoint=disable. This is useful for situations where you wish to be
able to roll back the state of the disk in case of some critical
failure.

Signed-off-by: Daniel Rosenberg <drosen@google.com>
[Jaegeuk Kim: use SB_RDONLY instead of MS_RDONLY]
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
[dwoo08.lee: port from android-common-kernel branch android-4.9-q commit f22f93a90601]
Signed-off-by: Dongwoo Lee <dwoo08.lee@samsung.com>
Change-Id: I7020cb09b360b291b324baae71882e64a7776ed8

13 files changed:
Documentation/filesystems/f2fs.txt
fs/f2fs/checkpoint.c
fs/f2fs/data.c
fs/f2fs/debug.c
fs/f2fs/f2fs.h
fs/f2fs/file.c
fs/f2fs/gc.c
fs/f2fs/inode.c
fs/f2fs/namei.c
fs/f2fs/segment.c
fs/f2fs/segment.h
fs/f2fs/super.c
include/linux/f2fs_fs.h

index 193a034..9a17cd9 100644 (file)
@@ -190,6 +190,11 @@ fsync_mode=%s          Control the policy of fsync. Currently supports "posix",
                        non-atomic files likewise "nobarrier" mount option.
 test_dummy_encryption  Enable dummy encryption, which provides a fake fscrypt
                        context. The fake fscrypt context is used by xfstests.
+checkpoint=%s          Set to "disable" to turn off checkpointing. Set to "enable"
+                       to reenable checkpointing. Is enabled by default. While
+                       disabled, any unmounting or unexpected shutdowns will cause
+                       the filesystem contents to appear as they did when the
+                       filesystem was mounted with that option.
 
 ================================================================================
 DEBUGFS ENTRIES
index 79134af..0bf44b5 100644 (file)
@@ -1194,6 +1194,11 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc)
        if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
                __set_ckpt_flags(ckpt, CP_FSCK_FLAG);
 
+       if (is_sbi_flag_set(sbi, SBI_CP_DISABLED))
+               __set_ckpt_flags(ckpt, CP_DISABLED_FLAG);
+       else
+               __clear_ckpt_flags(ckpt, CP_DISABLED_FLAG);
+
        /* set this flag to activate crc|cp_ver for recovery */
        __set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG);
        __clear_ckpt_flags(ckpt, CP_NOCRC_RECOVERY_FLAG);
@@ -1392,6 +1397,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 
        clear_sbi_flag(sbi, SBI_IS_DIRTY);
        clear_sbi_flag(sbi, SBI_NEED_CP);
+       sbi->unusable_block_count = 0;
        __set_cp_next_pack(sbi);
 
        /*
@@ -1416,6 +1422,12 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
        unsigned long long ckpt_ver;
        int err = 0;
 
+       if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
+               if (cpc->reason != CP_PAUSE)
+                       return 0;
+               f2fs_msg(sbi->sb, KERN_WARNING,
+                               "Start checkpoint disabled!");
+       }
        mutex_lock(&sbi->cp_mutex);
 
        if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) &&
index f838236..5aa811f 100644 (file)
@@ -1644,6 +1644,10 @@ static inline bool check_inplace_update_policy(struct inode *inode,
                        is_inode_flag_set(inode, FI_NEED_IPU))
                return true;
 
+       if (unlikely(fio && is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
+                       !is_checkpointed_data(sbi, fio->old_blkaddr)))
+               return true;
+
        return false;
 }
 
@@ -1674,6 +1678,9 @@ bool should_update_outplace(struct inode *inode, struct f2fs_io_info *fio)
                        return true;
                if (IS_ATOMIC_WRITTEN_PAGE(fio->page))
                        return true;
+               if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
+                       is_checkpointed_data(sbi, fio->old_blkaddr)))
+                       return true;
        }
        return false;
 }
@@ -2272,6 +2279,10 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
 #endif
        trace_f2fs_write_begin(inode, pos, len, flags);
 
+       err = f2fs_is_checkpoint_ready(sbi);
+       if (err)
+               goto fail;
+
        if (f2fs_is_atomic_file(inode) &&
                        !available_free_memory(sbi, INMEM_PAGES)) {
                err = -ENOMEM;
index a66107b..4355d7c 100644 (file)
@@ -268,7 +268,8 @@ static int stat_show(struct seq_file *s, void *v)
                seq_printf(s, "\n=====[ partition info(%pg). #%d, %s, CP: %s]=====\n",
                        si->sbi->sb->s_bdev, i++,
                        f2fs_readonly(si->sbi->sb) ? "RO": "RW",
-                       f2fs_cp_error(si->sbi) ? "Error": "Good");
+                       is_set_ckpt_flags(si->sbi, CP_DISABLED_FLAG) ?
+                       "Disabled" : (f2fs_cp_error(si->sbi) ? "Error" : "Good"));
                seq_printf(s, "[SB: 1] [CP: 2] [SIT: %d] [NAT: %d] ",
                           si->sit_area_segs, si->nat_area_segs);
                seq_printf(s, "[SSA: %d] [MAIN: %d",
index 6194c16..2b53d0d 100644 (file)
@@ -98,6 +98,7 @@ extern char *fault_name[FAULT_MAX];
 #define F2FS_MOUNT_QUOTA               0x00400000
 #define F2FS_MOUNT_INLINE_XATTR_SIZE   0x00800000
 #define F2FS_MOUNT_RESERVE_ROOT                0x01000000
+#define F2FS_MOUNT_DISABLE_CHECKPOINT  0x02000000
 
 #define F2FS_OPTION(sbi)       ((sbi)->mount_opt)
 #define clear_opt(sbi, option) (F2FS_OPTION(sbi).opt &= ~F2FS_MOUNT_##option)
@@ -176,6 +177,7 @@ enum {
 #define        CP_RECOVERY     0x00000008
 #define        CP_DISCARD      0x00000010
 #define CP_TRIMMED     0x00000020
+#define CP_PAUSE       0x00000040
 
 #define MAX_DISCARD_BLOCKS(sbi)                BLKS_PER_SEC(sbi)
 #define DEF_MAX_DISCARD_REQUEST                8       /* issue 8 discards per round */
@@ -185,6 +187,7 @@ enum {
 #define DEF_DISCARD_URGENT_UTIL                80      /* do more discard over 80% */
 #define DEF_CP_INTERVAL                        60      /* 60 secs */
 #define DEF_IDLE_INTERVAL              5       /* 5 secs */
+#define DEF_DISABLE_INTERVAL           5       /* 5 secs */
 
 struct cp_control {
        int reason;
@@ -1056,11 +1059,13 @@ enum {
        SBI_POR_DOING,                          /* recovery is doing or not */
        SBI_NEED_SB_WRITE,                      /* need to recover superblock */
        SBI_NEED_CP,                            /* need to checkpoint */
+       SBI_CP_DISABLED,                        /* CP was disabled last mount */
 };
 
 enum {
        CP_TIME,
        REQ_TIME,
+       DISABLE_TIME,
        MAX_TIME,
 };
 
@@ -1172,6 +1177,9 @@ struct f2fs_sb_info {
        block_t reserved_blocks;                /* configurable reserved blocks */
        block_t current_reserved_blocks;        /* current reserved blocks */
 
+       /* Additional tracking for no checkpoint mode */
+       block_t unusable_block_count;           /* # of blocks saved by last cp */
+
        unsigned int nquota_files;              /* # of quota sysfile */
 
        u32 s_next_generation;                  /* for NFS support */
@@ -1664,6 +1672,9 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi,
        if (!__allow_reserved_blocks(sbi, inode, true))
                avail_user_block_count -= F2FS_OPTION(sbi).root_reserved_blocks;
 
+       if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
+               avail_user_block_count -= sbi->unusable_block_count;
+
        if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) {
                diff = sbi->total_valid_block_count - avail_user_block_count;
                if (diff > *count)
@@ -1870,6 +1881,8 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi,
 
        if (!__allow_reserved_blocks(sbi, inode, false))
                valid_block_count += F2FS_OPTION(sbi).root_reserved_blocks;
+       if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
+               valid_block_count += sbi->unusable_block_count;
 
        if (unlikely(valid_block_count > sbi->user_block_count)) {
                spin_unlock(&sbi->stat_lock);
@@ -2849,6 +2862,8 @@ void drop_discard_cmd(struct f2fs_sb_info *sbi);
 void stop_discard_thread(struct f2fs_sb_info *sbi);
 bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi);
 void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc);
+void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi);
+int f2fs_disable_cp_again(struct f2fs_sb_info *sbi);
 void release_discard_addrs(struct f2fs_sb_info *sbi);
 int npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra);
 void allocate_new_segments(struct f2fs_sb_info *sbi);
@@ -3364,6 +3379,9 @@ static inline bool f2fs_may_encrypt(struct inode *inode)
 
 static inline bool f2fs_force_buffered_io(struct inode *inode, int rw)
 {
+       if (is_sbi_flag_set(F2FS_I_SB(inode), SBI_CP_DISABLED))
+               return true;
+
        return (f2fs_post_read_required(inode) ||
                        (rw == WRITE && test_opt(F2FS_I_SB(inode), LFS)) ||
                        F2FS_I_SB(inode)->s_ndevs);
index 0561020..1609e02 100644 (file)
@@ -214,7 +214,8 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end,
                .for_reclaim = 0,
        };
 
-       if (unlikely(f2fs_readonly(inode->i_sb)))
+       if (unlikely(f2fs_readonly(inode->i_sb) ||
+                        is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
                return 0;
 
        trace_f2fs_sync_file_enter(inode);
@@ -2109,6 +2110,12 @@ static int f2fs_ioc_write_checkpoint(struct file *filp, unsigned long arg)
        if (f2fs_readonly(sbi->sb))
                return -EROFS;
 
+       if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
+               f2fs_msg(sbi->sb, KERN_INFO,
+                               "Skipping Checkpoint. Checkpoints currently disabled.");
+               return -EINVAL;
+       }
+
        ret = mnt_want_write_file(filp);
        if (ret)
                return ret;
@@ -2475,6 +2482,9 @@ static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg)
        if (f2fs_readonly(sbi->sb))
                return -EROFS;
 
+       if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
+               return -EINVAL;
+
        if (copy_from_user(&range, (struct f2fs_flush_device __user *)arg,
                                                        sizeof(range)))
                return -EFAULT;
index 59005e0..bce2e7e 100644 (file)
@@ -375,6 +375,10 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
 
                if (sec_usage_check(sbi, secno))
                        goto next;
+               /* Don't touch checkpointed data */
+               if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
+                                       get_ckpt_valid_blocks(sbi, segno)))
+                       goto next;
                if (gc_type == BG_GC && test_bit(secno, dirty_i->victim_secmap))
                        goto next;
                if (gc_type == FG_GC && p.alloc_mode == LFS &&
@@ -1056,7 +1060,8 @@ gc_more:
                 * threshold, we can make them free by checkpoint. Then, we
                 * secure free segments which doesn't need fggc any more.
                 */
-               if (prefree_segments(sbi)) {
+               if (prefree_segments(sbi) &&
+                               !is_sbi_flag_set(sbi, SBI_CP_DISABLED)) {
                        ret = write_checkpoint(sbi, &cpc);
                        if (ret)
                                goto stop;
@@ -1089,7 +1094,7 @@ gc_more:
                        goto gc_more;
                }
 
-               if (gc_type == FG_GC)
+               if (gc_type == FG_GC && !is_sbi_flag_set(sbi, SBI_CP_DISABLED))
                        ret = write_checkpoint(sbi, &cpc);
        }
 stop:
index d4b3545..838a7af 100644 (file)
@@ -544,6 +544,9 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
        if (!is_inode_flag_set(inode, FI_DIRTY_INODE))
                return 0;
 
+       if (f2fs_is_checkpoint_ready(sbi))
+               return -ENOSPC;
+
        /*
         * We need to balance fs here to prevent from producing dirty node pages
         * during the urgent cleaning time when runing out of free sections.
@@ -626,7 +629,8 @@ no_delete:
        stat_dec_inline_dir(inode);
        stat_dec_inline_inode(inode);
 
-       if (likely(!is_set_ckpt_flags(sbi, CP_ERROR_FLAG)))
+       if (likely(!is_set_ckpt_flags(sbi, CP_ERROR_FLAG) &&
+                               !is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
                f2fs_bug_on(sbi, is_inode_flag_set(inode, FI_DIRTY_INODE));
        else
                f2fs_inode_synced(inode);
index f1e1ff1..fa95531 100644 (file)
@@ -19,6 +19,7 @@
 
 #include "f2fs.h"
 #include "node.h"
+#include "segment.h"
 #include "xattr.h"
 #include "acl.h"
 #include <trace/events/f2fs.h>
@@ -269,6 +270,9 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
 
        if (unlikely(f2fs_cp_error(sbi)))
                return -EIO;
+       err = f2fs_is_checkpoint_ready(sbi);
+       if (err)
+               return err;
 
        err = dquot_initialize(dir);
        if (err)
@@ -315,6 +319,9 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
 
        if (unlikely(f2fs_cp_error(sbi)))
                return -EIO;
+       err = f2fs_is_checkpoint_ready(sbi);
+       if (err)
+               return err;
 
        err = fscrypt_prepare_link(old_dentry, dir, dentry);
        if (err)
@@ -561,6 +568,9 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
 
        if (unlikely(f2fs_cp_error(sbi)))
                return -EIO;
+       err = f2fs_is_checkpoint_ready(sbi);
+       if (err)
+               return err;
 
        err = fscrypt_prepare_symlink(dir, symname, len, dir->i_sb->s_blocksize,
                                      &disk_link);
@@ -690,6 +700,9 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
 
        if (unlikely(f2fs_cp_error(sbi)))
                return -EIO;
+       err = f2fs_is_checkpoint_ready(sbi);
+       if (err)
+               return err;
 
        err = dquot_initialize(dir);
        if (err)
@@ -824,6 +837,9 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
 
        if (unlikely(f2fs_cp_error(sbi)))
                return -EIO;
+       err = f2fs_is_checkpoint_ready(sbi);
+       if (err)
+               return err;
 
        if (is_inode_flag_set(new_dir, FI_PROJ_INHERIT) &&
                        (!projid_eq(F2FS_I(new_dir)->i_projid,
@@ -1009,6 +1025,9 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
 
        if (unlikely(f2fs_cp_error(sbi)))
                return -EIO;
+       err = f2fs_is_checkpoint_ready(sbi);
+       if (err)
+               return err;
 
        if ((is_inode_flag_set(new_dir, FI_PROJ_INHERIT) &&
                        !projid_eq(F2FS_I(new_dir)->i_projid,
index 9dc1e0d..5befb1a 100644 (file)
@@ -179,6 +179,8 @@ bool need_SSR(struct f2fs_sb_info *sbi)
                return false;
        if (sbi->gc_thread && sbi->gc_thread->gc_urgent)
                return true;
+       if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
+               return true;
 
        return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs +
                        SM_I(sbi)->min_ssr_sections + reserved_sections(sbi));
@@ -470,6 +472,9 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
        if (need && excess_cached_nats(sbi))
                f2fs_balance_fs_bg(sbi);
 
+       if (f2fs_is_checkpoint_ready(sbi))
+               return;
+
        /*
         * We should do GC or end up with checkpoint, if there are so many dirty
         * dir/node pages without enough free segments.
@@ -784,7 +789,7 @@ static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
 static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
 {
        struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
-       unsigned short valid_blocks;
+       unsigned short valid_blocks, ckpt_valid_blocks;
 
        if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno))
                return;
@@ -792,8 +797,10 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
        mutex_lock(&dirty_i->seglist_lock);
 
        valid_blocks = get_valid_blocks(sbi, segno, false);
+       ckpt_valid_blocks = get_ckpt_valid_blocks(sbi, segno);
 
-       if (valid_blocks == 0) {
+       if (valid_blocks == 0 && (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) ||
+                               ckpt_valid_blocks == sbi->blocks_per_seg)) {
                __locate_dirty_segment(sbi, segno, PRE);
                __remove_dirty_segment(sbi, segno, DIRTY);
        } else if (valid_blocks < sbi->blocks_per_seg) {
@@ -806,6 +813,66 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
        mutex_unlock(&dirty_i->seglist_lock);
 }
 
+/* This moves currently empty dirty blocks to prefree. Must hold seglist_lock */
+void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi)
+{
+       struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+       unsigned int segno;
+
+       mutex_lock(&dirty_i->seglist_lock);
+       for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
+               if (get_valid_blocks(sbi, segno, false))
+                       continue;
+               if (IS_CURSEG(sbi, segno))
+                       continue;
+               __locate_dirty_segment(sbi, segno, PRE);
+               __remove_dirty_segment(sbi, segno, DIRTY);
+       }
+       mutex_unlock(&dirty_i->seglist_lock);
+}
+
+int f2fs_disable_cp_again(struct f2fs_sb_info *sbi)
+{
+       struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+       block_t ovp = overprovision_segments(sbi) << sbi->log_blocks_per_seg;
+       block_t holes[2] = {0, 0};      /* DATA and NODE */
+       struct seg_entry *se;
+       unsigned int segno;
+
+       mutex_lock(&dirty_i->seglist_lock);
+       for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
+               se = get_seg_entry(sbi, segno);
+               if (IS_NODESEG(se->type))
+                       holes[NODE] += sbi->blocks_per_seg - se->valid_blocks;
+               else
+                       holes[DATA] += sbi->blocks_per_seg - se->valid_blocks;
+       }
+       mutex_unlock(&dirty_i->seglist_lock);
+
+       if (holes[DATA] > ovp || holes[NODE] > ovp)
+               return -EAGAIN;
+       return 0;
+}
+
+/* This is only used by SBI_CP_DISABLED */
+static unsigned int get_free_segment(struct f2fs_sb_info *sbi)
+{
+       struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+       unsigned int segno = 0;
+
+       mutex_lock(&dirty_i->seglist_lock);
+       for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
+               if (get_valid_blocks(sbi, segno, false))
+                       continue;
+               if (get_ckpt_valid_blocks(sbi, segno))
+                       continue;
+               mutex_unlock(&dirty_i->seglist_lock);
+               return segno;
+       }
+       mutex_unlock(&dirty_i->seglist_lock);
+       return NULL_SEGNO;
+}
+
 static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi,
                struct block_device *bdev, block_t lstart,
                block_t start, block_t len)
@@ -1827,7 +1894,8 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
                        sbi->discard_blks--;
 
                /* don't overwrite by SSR to keep node chain */
-               if (IS_NODESEG(se->type)) {
+               if (IS_NODESEG(se->type) &&
+                       is_sbi_flag_set(sbi, SBI_CP_DISABLED)) {
                        if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map))
                                se->ckpt_valid_blocks++;
                }
@@ -1849,6 +1917,15 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
                        f2fs_bug_on(sbi, 1);
                        se->valid_blocks++;
                        del = 0;
+               } else if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
+                       /*
+                        * If checkpoints are off, we must not reuse data that
+                        * was used in the previous checkpoint. If it was used
+                        * before, we must track that to know how much space we
+                        * really have.
+                        */
+                       if (f2fs_test_bit(offset, se->ckpt_valid_map))
+                               sbi->unusable_block_count++;
                }
 
                if (f2fs_discard_en(sbi) &&
@@ -2128,6 +2205,9 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
        if (sbi->segs_per_sec != 1)
                return CURSEG_I(sbi, type)->segno;
 
+       if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
+               return 0;
+
        if (test_opt(sbi, NOHEAP) &&
                (type == CURSEG_HOT_DATA || IS_NODESEG(type)))
                return 0;
@@ -2271,6 +2351,15 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
                        return 1;
                }
        }
+
+       /* find valid_blocks=0 in dirty list */
+       if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
+               segno = get_free_segment(sbi);
+               if (segno != NULL_SEGNO) {
+                       curseg->next_segno = segno;
+                       return 1;
+               }
+       }
        return 0;
 }
 
@@ -2288,7 +2377,8 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi,
        else if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) &&
                                        type == CURSEG_WARM_NODE)
                new_curseg(sbi, type, false);
-       else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type))
+       else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type) &&
+                       likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
                new_curseg(sbi, type, false);
        else if (need_SSR(sbi) && get_ssr_segment(sbi, type))
                change_curseg(sbi, type);
index 0af6dca..b507202 100644 (file)
@@ -340,6 +340,12 @@ static inline unsigned int get_valid_blocks(struct f2fs_sb_info *sbi,
                return get_seg_entry(sbi, segno)->valid_blocks;
 }
 
+static inline unsigned int get_ckpt_valid_blocks(struct f2fs_sb_info *sbi,
+                               unsigned int segno)
+{
+       return get_seg_entry(sbi, segno)->ckpt_valid_blocks;
+}
+
 static inline void seg_info_from_raw_sit(struct seg_entry *se,
                                        struct f2fs_sit_entry *rs)
 {
@@ -576,6 +582,15 @@ static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi,
                reserved_sections(sbi) + needed);
 }
 
+static inline int f2fs_is_checkpoint_ready(struct f2fs_sb_info *sbi)
+{
+       if (likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
+               return 0;
+       if (likely(!has_not_enough_free_secs(sbi, 0, 0)))
+               return 0;
+       return -ENOSPC;
+}
+
 static inline bool excess_prefree_segs(struct f2fs_sb_info *sbi)
 {
        return prefree_segments(sbi) > SM_I(sbi)->rec_prefree_segments;
index 0f95dce..be58d77 100644 (file)
@@ -133,6 +133,7 @@ enum {
        Opt_alloc,
        Opt_fsync,
        Opt_test_dummy_encryption,
+       Opt_checkpoint,
        Opt_err,
 };
 
@@ -190,6 +191,7 @@ static match_table_t f2fs_tokens = {
        {Opt_alloc, "alloc_mode=%s"},
        {Opt_fsync, "fsync_mode=%s"},
        {Opt_test_dummy_encryption, "test_dummy_encryption"},
+       {Opt_checkpoint, "checkpoint=%s"},
        {Opt_err, NULL},
 };
 
@@ -765,6 +767,23 @@ static int parse_options(struct super_block *sb, char *options)
                                        "Test dummy encryption mount option ignored");
 #endif
                        break;
+               case Opt_checkpoint:
+                       name = match_strdup(&args[0]);
+                       if (!name)
+                               return -ENOMEM;
+
+                       if (strlen(name) == 6 &&
+                                       !strncmp(name, "enable", 6)) {
+                               clear_opt(sbi, DISABLE_CHECKPOINT);
+                       } else if (strlen(name) == 7 &&
+                                       !strncmp(name, "disable", 7)) {
+                               set_opt(sbi, DISABLE_CHECKPOINT);
+                       } else {
+                               kfree(name);
+                               return -EINVAL;
+                       }
+                       kfree(name);
+                       break;
                default:
                        f2fs_msg(sb, KERN_ERR,
                                "Unrecognized mount option \"%s\" or missing value",
@@ -810,6 +829,12 @@ static int parse_options(struct super_block *sb, char *options)
                }
        }
 
+       if (test_opt(sbi, DISABLE_CHECKPOINT) && test_opt(sbi, LFS)) {
+               f2fs_msg(sb, KERN_ERR,
+                               "LFS not compatible with checkpoint=disable\n");
+               return -EINVAL;
+       }
+
        /* Not pass down write hints if the number of active logs is lesser
         * than NR_CURSEG_TYPE.
         */
@@ -1066,6 +1091,8 @@ int f2fs_sync_fs(struct super_block *sb, int sync)
 
        if (unlikely(f2fs_cp_error(sbi)))
                return 0;
+       if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
+               return 0;
 
        trace_f2fs_sync_fs(sb, sync);
 
@@ -1165,6 +1192,11 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf)
        buf->f_blocks = total_count - start_count;
        buf->f_bfree = user_block_count - valid_user_blocks(sbi) -
                                                sbi->current_reserved_blocks;
+       if (unlikely(buf->f_bfree <= sbi->unusable_block_count))
+               buf->f_bfree = 0;
+       else
+               buf->f_bfree -= sbi->unusable_block_count;
+
        if (buf->f_bfree > F2FS_OPTION(sbi).root_reserved_blocks)
                buf->f_bavail = buf->f_bfree -
                                F2FS_OPTION(sbi).root_reserved_blocks;
@@ -1340,6 +1372,9 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
        else if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE)
                seq_printf(seq, ",alloc_mode=%s", "reuse");
 
+       if (test_opt(sbi, DISABLE_CHECKPOINT))
+               seq_puts(seq, ",checkpoint=disable");
+
        if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_POSIX)
                seq_printf(seq, ",fsync_mode=%s", "posix");
        else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT)
@@ -1365,6 +1400,7 @@ static void default_options(struct f2fs_sb_info *sbi)
        set_opt(sbi, EXTENT_CACHE);
        set_opt(sbi, NOHEAP);
        sbi->sb->s_flags |= MS_LAZYTIME;
+       clear_opt(sbi, DISABLE_CHECKPOINT);
        set_opt(sbi, FLUSH_MERGE);
        if (f2fs_sb_has_blkzoned(sbi->sb)) {
                set_opt_mode(sbi, F2FS_MOUNT_LFS);
@@ -1388,6 +1424,57 @@ static void default_options(struct f2fs_sb_info *sbi)
 #ifdef CONFIG_QUOTA
 static int f2fs_enable_quotas(struct super_block *sb);
 #endif
+
+static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)
+{
+       struct cp_control cpc;
+       int err;
+
+       sbi->sb->s_flags |= MS_ACTIVE;
+
+       mutex_lock(&sbi->gc_mutex);
+       f2fs_update_time(sbi, DISABLE_TIME);
+
+       while (!f2fs_time_over(sbi, DISABLE_TIME)) {
+               err = f2fs_gc(sbi, true, false, NULL_SEGNO);
+               if (err == -ENODATA)
+                       break;
+               if (err && err != -EAGAIN) {
+                       mutex_unlock(&sbi->gc_mutex);
+                       return err;
+               }
+       }
+       mutex_unlock(&sbi->gc_mutex);
+
+       err = sync_filesystem(sbi->sb);
+       if (err)
+               return err;
+
+       if (f2fs_disable_cp_again(sbi))
+               return -EAGAIN;
+
+       mutex_lock(&sbi->gc_mutex);
+       cpc.reason = CP_PAUSE;
+       set_sbi_flag(sbi, SBI_CP_DISABLED);
+       write_checkpoint(sbi, &cpc);
+
+       sbi->unusable_block_count = 0;
+       mutex_unlock(&sbi->gc_mutex);
+       return 0;
+}
+
+static void f2fs_enable_checkpoint(struct f2fs_sb_info *sbi)
+{
+       mutex_lock(&sbi->gc_mutex);
+       f2fs_dirty_to_prefree(sbi);
+
+       clear_sbi_flag(sbi, SBI_CP_DISABLED);
+       set_sbi_flag(sbi, SBI_IS_DIRTY);
+       mutex_unlock(&sbi->gc_mutex);
+
+       f2fs_sync_fs(sbi->sb, 1);
+}
+
 static int f2fs_remount(struct super_block *sb, int *flags, char *data)
 {
        struct f2fs_sb_info *sbi = F2FS_SB(sb);
@@ -1397,6 +1484,8 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
        bool need_restart_gc = false;
        bool need_stop_gc = false;
        bool no_extent_cache = !test_opt(sbi, EXTENT_CACHE);
+       bool disable_checkpoint = test_opt(sbi, DISABLE_CHECKPOINT);
+       bool checkpoint_changed;
 #ifdef CONFIG_QUOTA
        int i, j;
 #endif
@@ -1441,6 +1530,8 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
        err = parse_options(sb, data);
        if (err)
                goto restore_opts;
+       checkpoint_changed =
+               disable_checkpoint != test_opt(sbi, DISABLE_CHECKPOINT);
 
        /*
         * Previous and new state of filesystem is RO,
@@ -1474,6 +1565,13 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
                goto restore_opts;
        }
 
+       if ((*flags & MS_RDONLY) && test_opt(sbi, DISABLE_CHECKPOINT)) {
+               err = -EINVAL;
+               f2fs_msg(sbi->sb, KERN_WARNING,
+                               "disabling checkpoint not compatible with read-only");
+               goto restore_opts;
+       }
+
        /*
         * We stop the GC thread if FS is mounted as RO
         * or if background_gc = off is passed in mount
@@ -1502,6 +1600,16 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
                clear_sbi_flag(sbi, SBI_IS_CLOSE);
        }
 
+       if (checkpoint_changed) {
+               if (test_opt(sbi, DISABLE_CHECKPOINT)) {
+                       err = f2fs_disable_checkpoint(sbi);
+                       if (err)
+                               goto restore_gc;
+               } else {
+                       f2fs_enable_checkpoint(sbi);
+               }
+       }
+
        /*
         * We stop issue flush thread if FS is mounted as RO
         * or if flush_merge is not passed in mount option.
@@ -2426,6 +2534,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
        sbi->dir_level = DEF_DIR_LEVEL;
        sbi->interval_time[CP_TIME] = DEF_CP_INTERVAL;
        sbi->interval_time[REQ_TIME] = DEF_IDLE_INTERVAL;
+       sbi->interval_time[DISABLE_TIME] = DEF_DISABLE_INTERVAL;
        clear_sbi_flag(sbi, SBI_NEED_FSCK);
 
        for (i = 0; i < NR_COUNT_TYPE; i++)
@@ -3021,6 +3130,9 @@ try_onemore:
        if (err)
                goto free_meta;
 
+       if (unlikely(is_set_ckpt_flags(sbi, CP_DISABLED_FLAG)))
+               goto skip_recovery;
+
        /* recover fsynced data */
        if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) {
                /*
@@ -3060,6 +3172,14 @@ skip_recovery:
        /* recover_fsync_data() cleared this already */
        clear_sbi_flag(sbi, SBI_POR_DOING);
 
+       if (test_opt(sbi, DISABLE_CHECKPOINT)) {
+               err = f2fs_disable_checkpoint(sbi);
+               if (err)
+                       goto free_meta;
+       } else if (is_set_ckpt_flags(sbi, CP_DISABLED_FLAG)) {
+               f2fs_enable_checkpoint(sbi);
+       }
+
        /*
         * If filesystem is not mounted as read-only then
         * do start the gc_thread.
index aa5db8b..d885420 100644 (file)
@@ -118,6 +118,7 @@ struct f2fs_super_block {
 /*
  * For checkpoint
  */
+#define CP_DISABLED_FLAG               0x00001000
 #define CP_LARGE_NAT_BITMAP_FLAG       0x00000400
 #define CP_NOCRC_RECOVERY_FLAG 0x00000200
 #define CP_TRIMMED_FLAG                0x00000100