From: Linus Torvalds Date: Wed, 5 May 2021 01:03:38 +0000 (-0700) Subject: Merge tag 'f2fs-for-5.13-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeu... X-Git-Tag: v5.15~1195 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=d0195c7d7af6a456c37f4b4b2df5528f10714482;hp=-c;p=platform%2Fkernel%2Flinux-starfive.git Merge tag 'f2fs-for-5.13-rc1' of git://git./linux/kernel/git/jaegeuk/f2fs Pull f2fs updates from Jaegeuk Kim: "In this round, we added a new mount option, "checkpoint_merge", which introduces a kernel thread dealing with the f2fs checkpoints. Once we start to manage the IO priority along with blk-cgroup, the checkpoint operation can be processed in a lower priority under the process context. Since the checkpoint holds all the filesystem operations, we give a higher priority to the checkpoint thread all the time. Enhancements: - introduce gc_merge mount option to introduce a checkpoint thread - improve to run discard thread efficiently - allow modular compression algorithms - expose # of overprivision segments to sysfs - expose runtime compression stat to sysfs Bug fixes: - fix OOB memory access by the node id lookup - avoid touching checkpointed data in the checkpoint-disabled mode - fix the resizing flow to avoid kernel panic and race conditions - fix block allocation issues on pinned files - address some swapfile issues - fix hugtask problem and kernel panic during atomic write operations - don't start checkpoint thread in RO And, we've cleaned up some kernel coding style and build warnings. In addition, we fixed some minor race conditions and error handling routines" * tag 'f2fs-for-5.13-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (48 commits) f2fs: drop inplace IO if fs status is abnormal f2fs: compress: remove unneed check condition f2fs: clean up left deprecated IO trace codes f2fs: avoid using native allocate_segment_by_default() f2fs: remove unnecessary struct declaration f2fs: fix to avoid NULL pointer dereference f2fs: avoid duplicated codes for cleanup f2fs: document: add description about compressed space handling f2fs: clean up build warnings f2fs: fix the periodic wakeups of discard thread f2fs: fix to avoid accessing invalid fio in f2fs_allocate_data_block() f2fs: fix to avoid GC/mmap race with f2fs_truncate() f2fs: set checkpoint_merge by default f2fs: Fix a hungtask problem in atomic write f2fs: fix to restrict mount condition on readonly block device f2fs: introduce gc_merge mount option f2fs: fix to cover __allocate_new_section() with curseg_lock f2fs: fix wrong alloc_type in f2fs_do_replace_block f2fs: delete empty compress.h f2fs: fix a typo in inode.c ... --- d0195c7d7af6a456c37f4b4b2df5528f10714482 diff --combined fs/f2fs/checkpoint.c index be5415a,817d0bc..f795049 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@@ -292,7 -292,7 +292,7 @@@ void f2fs_ra_meta_pages_cond(struct f2f f2fs_put_page(page, 0); if (readahead) - f2fs_ra_meta_pages(sbi, index, BIO_MAX_PAGES, META_POR, true); + f2fs_ra_meta_pages(sbi, index, BIO_MAX_VECS, META_POR, true); } static int __f2fs_write_meta_page(struct page *page, @@@ -719,6 -719,7 +719,7 @@@ int f2fs_recover_orphan_inodes(struct f orphan_blk = (struct f2fs_orphan_block *)page_address(page); for (j = 0; j < le32_to_cpu(orphan_blk->entry_count); j++) { nid_t ino = le32_to_cpu(orphan_blk->ino[j]); + err = recover_orphan_inode(sbi, ino); if (err) { f2fs_put_page(page, 1); @@@ -1456,7 -1457,7 +1457,7 @@@ static int do_checkpoint(struct f2fs_sb orphan_blocks); if (__remain_node_summaries(cpc->reason)) - ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS+ + ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS + cp_payload_blks + data_sum_blocks + orphan_blocks + NR_CURSEG_NODE_TYPE); else @@@ -1818,7 -1819,11 +1819,11 @@@ int f2fs_issue_checkpoint(struct f2fs_s llist_add(&req.llnode, &cprc->issue_list); atomic_inc(&cprc->queued_ckpt); - /* update issue_list before we wake up issue_checkpoint thread */ + /* + * update issue_list before we wake up issue_checkpoint thread, + * this smp_mb() pairs with another barrier in ___wait_event(), + * see more details in comments of waitqueue_active(). + */ smp_mb(); if (waitqueue_active(&cprc->ckpt_wait_queue)) diff --combined fs/f2fs/data.c index 4e5257c,cf93547..96f1a35 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@@ -857,7 -857,7 +857,7 @@@ int f2fs_merge_page_bio(struct f2fs_io_ f2fs_submit_merged_ipu_write(fio->sbi, &bio, NULL); alloc_new: if (!bio) { - bio = __bio_alloc(fio, BIO_MAX_PAGES); + bio = __bio_alloc(fio, BIO_MAX_VECS); __attach_io_flag(fio); f2fs_set_bio_crypt_ctx(bio, fio->page->mapping->host, fio->page->index, fio, GFP_NOIO); @@@ -932,7 -932,7 +932,7 @@@ alloc_new fio->retry = true; goto skip; } - io->bio = __bio_alloc(fio, BIO_MAX_PAGES); + io->bio = __bio_alloc(fio, BIO_MAX_VECS); f2fs_set_bio_crypt_ctx(io->bio, fio->page->mapping->host, bio_page->index, fio, GFP_NOIO); io->fio = *fio; @@@ -1086,6 -1086,7 +1086,7 @@@ int f2fs_reserve_new_blocks(struct dnod for (; count > 0; dn->ofs_in_node++) { block_t blkaddr = f2fs_data_blkaddr(dn); + if (blkaddr == NULL_ADDR) { dn->data_blkaddr = NEW_ADDR; __set_data_blkaddr(dn); @@@ -1722,7 -1723,7 +1723,7 @@@ static int get_data_block_dio_write(str return __get_data_block(inode, iblock, bh_result, create, F2FS_GET_BLOCK_DIO, NULL, f2fs_rw_hint_to_seg_type(inode->i_write_hint), - IS_SWAPFILE(inode) ? false : true); + true); } static int get_data_block_dio(struct inode *inode, sector_t iblock, @@@ -1837,6 -1838,7 +1838,7 @@@ int f2fs_fiemap(struct inode *inode, st int ret = 0; bool compr_cluster = false; unsigned int cluster_size = F2FS_I(inode)->i_cluster_size; + loff_t maxbytes; if (fieinfo->fi_flags & FIEMAP_FLAG_CACHE) { ret = f2fs_precache_extents(inode); @@@ -1850,6 -1852,15 +1852,15 @@@ inode_lock(inode); + maxbytes = max_file_blocks(inode) << F2FS_BLKSIZE_BITS; + if (start > maxbytes) { + ret = -EFBIG; + goto out; + } + + if (len > maxbytes || (maxbytes - len) < start) + len = maxbytes - start; + if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) { ret = f2fs_xattr_fiemap(inode, fieinfo); goto out; @@@ -3755,6 -3766,7 +3766,7 @@@ int f2fs_migrate_page(struct address_sp if (atomic_written) { struct inmem_pages *cur; + list_for_each_entry(cur, &fi->inmem_pages, list) if (cur->page == page) { cur->page = newpage; @@@ -3780,11 -3792,64 +3792,64 @@@ #endif #ifdef CONFIG_SWAP + static int f2fs_is_file_aligned(struct inode *inode) + { + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + block_t main_blkaddr = SM_I(sbi)->main_blkaddr; + block_t cur_lblock; + block_t last_lblock; + block_t pblock; + unsigned long nr_pblocks; + unsigned int blocks_per_sec = BLKS_PER_SEC(sbi); + int ret = 0; + + cur_lblock = 0; + last_lblock = bytes_to_blks(inode, i_size_read(inode)); + + while (cur_lblock < last_lblock) { + struct f2fs_map_blocks map; + + memset(&map, 0, sizeof(map)); + map.m_lblk = cur_lblock; + map.m_len = last_lblock - cur_lblock; + map.m_next_pgofs = NULL; + map.m_next_extent = NULL; + map.m_seg_type = NO_CHECK_TYPE; + map.m_may_create = false; + + ret = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_FIEMAP); + if (ret) + goto out; + + /* hole */ + if (!(map.m_flags & F2FS_MAP_FLAGS)) { + f2fs_err(sbi, "Swapfile has holes\n"); + ret = -ENOENT; + goto out; + } + + pblock = map.m_pblk; + nr_pblocks = map.m_len; + + if ((pblock - main_blkaddr) & (blocks_per_sec - 1) || + nr_pblocks & (blocks_per_sec - 1)) { + f2fs_err(sbi, "Swapfile does not align to section"); + ret = -EINVAL; + goto out; + } + + cur_lblock += nr_pblocks; + } + out: + return ret; + } + static int check_swap_activate_fast(struct swap_info_struct *sis, struct file *swap_file, sector_t *span) { struct address_space *mapping = swap_file->f_mapping; struct inode *inode = mapping->host; + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); sector_t cur_lblock; sector_t last_lblock; sector_t pblock; @@@ -3792,8 -3857,8 +3857,8 @@@ sector_t highest_pblock = 0; int nr_extents = 0; unsigned long nr_pblocks; - u64 len; - int ret; + unsigned int blocks_per_sec = BLKS_PER_SEC(sbi); + int ret = 0; /* * Map all the blocks into the extent list. This code doesn't try @@@ -3801,31 -3866,41 +3866,41 @@@ */ cur_lblock = 0; last_lblock = bytes_to_blks(inode, i_size_read(inode)); - len = i_size_read(inode); - while (cur_lblock <= last_lblock && cur_lblock < sis->max) { + while (cur_lblock < last_lblock && cur_lblock < sis->max) { struct f2fs_map_blocks map; - pgoff_t next_pgofs; cond_resched(); memset(&map, 0, sizeof(map)); map.m_lblk = cur_lblock; - map.m_len = bytes_to_blks(inode, len) - cur_lblock; - map.m_next_pgofs = &next_pgofs; + map.m_len = last_lblock - cur_lblock; + map.m_next_pgofs = NULL; + map.m_next_extent = NULL; map.m_seg_type = NO_CHECK_TYPE; + map.m_may_create = false; ret = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_FIEMAP); if (ret) - goto err_out; + goto out; /* hole */ - if (!(map.m_flags & F2FS_MAP_FLAGS)) - goto err_out; + if (!(map.m_flags & F2FS_MAP_FLAGS)) { + f2fs_err(sbi, "Swapfile has holes\n"); + ret = -ENOENT; + goto out; + } pblock = map.m_pblk; nr_pblocks = map.m_len; + if ((pblock - SM_I(sbi)->main_blkaddr) & (blocks_per_sec - 1) || + nr_pblocks & (blocks_per_sec - 1)) { + f2fs_err(sbi, "Swapfile does not align to section"); + ret = -EINVAL; + goto out; + } + if (cur_lblock + nr_pblocks >= sis->max) nr_pblocks = sis->max - cur_lblock; @@@ -3854,9 -3929,6 +3929,6 @@@ sis->highest_bit = cur_lblock - 1; out: return ret; - err_out: - pr_err("swapon: swapfile has holes\n"); - return -EINVAL; } /* Copied from generic_swapfile_activate() to check any holes */ @@@ -3865,6 -3937,7 +3937,7 @@@ static int check_swap_activate(struct s { struct address_space *mapping = swap_file->f_mapping; struct inode *inode = mapping->host; + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); unsigned blocks_per_page; unsigned long page_no; sector_t probe_block; @@@ -3872,11 -3945,15 +3945,15 @@@ sector_t lowest_block = -1; sector_t highest_block = 0; int nr_extents = 0; - int ret; + int ret = 0; if (PAGE_SIZE == F2FS_BLKSIZE) return check_swap_activate_fast(sis, swap_file, span); + ret = f2fs_is_file_aligned(inode); + if (ret) + goto out; + blocks_per_page = bytes_to_blks(inode, PAGE_SIZE); /* @@@ -3891,13 -3968,14 +3968,14 @@@ unsigned block_in_page; sector_t first_block; sector_t block = 0; - int err = 0; cond_resched(); block = probe_block; - err = bmap(inode, &block); - if (err || !block) + ret = bmap(inode, &block); + if (ret) + goto out; + if (!block) goto bad_bmap; first_block = block; @@@ -3913,9 -3991,10 +3991,10 @@@ block_in_page++) { block = probe_block + block_in_page; - err = bmap(inode, &block); - - if (err || !block) + ret = bmap(inode, &block); + if (ret) + goto out; + if (!block) goto bad_bmap; if (block != first_block + block_in_page) { @@@ -3955,8 -4034,8 +4034,8 @@@ reprobe out: return ret; bad_bmap: - pr_err("swapon: swapfile has holes\n"); - return -EINVAL; + f2fs_err(sbi, "Swapfile has holes\n"); + return -ENOENT; } static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file, diff --combined fs/f2fs/dir.c index e211a1b,ebf65c5..dc7ce79 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@@ -449,7 -449,9 +449,7 @@@ struct f2fs_dir_entry *f2fs_find_entry( struct f2fs_dir_entry *f2fs_parent_dir(struct inode *dir, struct page **p) { - struct qstr dotdot = QSTR_INIT("..", 2); - - return f2fs_find_entry(dir, &dotdot, p); + return f2fs_find_entry(dir, &dotdot_name, p); } ino_t f2fs_inode_by_name(struct inode *dir, const struct qstr *qstr, @@@ -471,6 -473,7 +471,7 @@@ void f2fs_set_link(struct inode *dir, s struct page *page, struct inode *inode) { enum page_type type = f2fs_has_inline_dentry(dir) ? NODE : DATA; + lock_page(page); f2fs_wait_on_page_writeback(page, type, true, true); de->ino = cpu_to_le32(inode->i_ino); diff --combined fs/f2fs/f2fs.h index 11a20dc,b9d5317..0448788 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@@ -97,6 -97,7 +97,7 @@@ extern const char *f2fs_fault_name[FAUL #define F2FS_MOUNT_NORECOVERY 0x04000000 #define F2FS_MOUNT_ATGC 0x08000000 #define F2FS_MOUNT_MERGE_CHECKPOINT 0x10000000 + #define F2FS_MOUNT_GC_MERGE 0x20000000 #define F2FS_OPTION(sbi) ((sbi)->mount_opt) #define clear_opt(sbi, option) (F2FS_OPTION(sbi).opt &= ~F2FS_MOUNT_##option) @@@ -637,21 -638,26 +638,26 @@@ enum #define FADVISE_MODIFIABLE_BITS (FADVISE_COLD_BIT | FADVISE_HOT_BIT) #define file_is_cold(inode) is_file(inode, FADVISE_COLD_BIT) - #define file_wrong_pino(inode) is_file(inode, FADVISE_LOST_PINO_BIT) #define file_set_cold(inode) set_file(inode, FADVISE_COLD_BIT) - #define file_lost_pino(inode) set_file(inode, FADVISE_LOST_PINO_BIT) #define file_clear_cold(inode) clear_file(inode, FADVISE_COLD_BIT) + + #define file_wrong_pino(inode) is_file(inode, FADVISE_LOST_PINO_BIT) + #define file_lost_pino(inode) set_file(inode, FADVISE_LOST_PINO_BIT) #define file_got_pino(inode) clear_file(inode, FADVISE_LOST_PINO_BIT) + #define file_is_encrypt(inode) is_file(inode, FADVISE_ENCRYPT_BIT) #define file_set_encrypt(inode) set_file(inode, FADVISE_ENCRYPT_BIT) - #define file_clear_encrypt(inode) clear_file(inode, FADVISE_ENCRYPT_BIT) + #define file_enc_name(inode) is_file(inode, FADVISE_ENC_NAME_BIT) #define file_set_enc_name(inode) set_file(inode, FADVISE_ENC_NAME_BIT) + #define file_keep_isize(inode) is_file(inode, FADVISE_KEEP_SIZE_BIT) #define file_set_keep_isize(inode) set_file(inode, FADVISE_KEEP_SIZE_BIT) + #define file_is_hot(inode) is_file(inode, FADVISE_HOT_BIT) #define file_set_hot(inode) set_file(inode, FADVISE_HOT_BIT) #define file_clear_hot(inode) clear_file(inode, FADVISE_HOT_BIT) + #define file_is_verity(inode) is_file(inode, FADVISE_VERITY_BIT) #define file_set_verity(inode) set_file(inode, FADVISE_VERITY_BIT) @@@ -860,7 -866,7 +866,7 @@@ struct f2fs_nm_info /* NAT cache management */ struct radix_tree_root nat_root;/* root of the nat entry cache */ struct radix_tree_root nat_set_root;/* root of the nat set cache */ - struct rw_semaphore nat_tree_lock; /* protect nat_tree_lock */ + struct rw_semaphore nat_tree_lock; /* protect nat entry tree */ struct list_head nat_entries; /* cached nat entry list (clean) */ spinlock_t nat_list_lock; /* protect clean nat entry list */ unsigned int nat_cnt[MAX_NAT_STATE]; /* the # of cached nat entries */ @@@ -1297,14 -1303,6 +1303,6 @@@ enum #define IS_DUMMY_WRITTEN_PAGE(page) \ (page_private(page) == DUMMY_WRITTEN_PAGE) - #ifdef CONFIG_F2FS_IO_TRACE - #define IS_IO_TRACED_PAGE(page) \ - (page_private(page) > 0 && \ - page_private(page) < (unsigned long)PID_MAX_LIMIT) - #else - #define IS_IO_TRACED_PAGE(page) (0) - #endif - /* For compression */ enum compress_algorithm_type { COMPRESS_LZO, @@@ -1623,6 -1621,11 +1621,11 @@@ struct f2fs_sb_info #ifdef CONFIG_F2FS_FS_COMPRESSION struct kmem_cache *page_array_slab; /* page array entry */ unsigned int page_array_slab_size; /* default page array slab size */ + + /* For runtime compression statistics */ + u64 compr_written_block; + u64 compr_saved_block; + u32 compr_new_inode; #endif }; @@@ -2215,6 -2218,7 +2218,7 @@@ static inline block_t __cp_payload(stru static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag) { struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); + void *tmp_ptr = &ckpt->sit_nat_version_bitmap; int offset; if (is_set_ckpt_flags(sbi, CP_LARGE_NAT_BITMAP_FLAG)) { @@@ -2224,7 -2228,7 +2228,7 @@@ * if large_nat_bitmap feature is enabled, leave checksum * protection for all nat/sit bitmaps. */ - return &ckpt->sit_nat_version_bitmap + offset + sizeof(__le32); + return tmp_ptr + offset + sizeof(__le32); } if (__cp_payload(sbi) > 0) { @@@ -2235,7 -2239,7 +2239,7 @@@ } else { offset = (flag == NAT_BITMAP) ? le32_to_cpu(ckpt->sit_ver_bitmap_bytesize) : 0; - return &ckpt->sit_nat_version_bitmap + offset; + return tmp_ptr + offset; } } @@@ -3194,9 -3198,6 +3198,9 @@@ int f2fs_setattr(struct user_namespace int f2fs_truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end); void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count); int f2fs_precache_extents(struct inode *inode); +int f2fs_fileattr_get(struct dentry *dentry, struct fileattr *fa); +int f2fs_fileattr_set(struct user_namespace *mnt_userns, + struct dentry *dentry, struct fileattr *fa); long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg); int f2fs_transfer_project_quota(struct inode *inode, kprojid_t kprojid); @@@ -3302,7 -3303,6 +3306,6 @@@ void f2fs_hash_filename(const struct in /* * node.c */ - struct dnode_of_data; struct node_info; int f2fs_check_nid_range(struct f2fs_sb_info *sbi, nid_t nid); @@@ -3379,6 -3379,7 +3382,7 @@@ block_t f2fs_get_unusable_blocks(struc int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable); void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi); int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra); + bool f2fs_segment_has_free_slot(struct f2fs_sb_info *sbi, int segno); void f2fs_init_inmem_curseg(struct f2fs_sb_info *sbi); void f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi); void f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi); @@@ -3386,7 -3387,7 +3390,7 @@@ void f2fs_get_new_segment(struct f2fs_s unsigned int *newseg, bool new_sec, int dir); void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type, unsigned int start, unsigned int end); - void f2fs_allocate_new_segment(struct f2fs_sb_info *sbi, int type); + void f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force); void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi); int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range); bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi, @@@ -3550,7 -3551,7 +3554,7 @@@ void f2fs_destroy_post_read_wq(struct f int f2fs_start_gc_thread(struct f2fs_sb_info *sbi); void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi); block_t f2fs_start_bidx_of_node(unsigned int node_ofs, struct inode *inode); - int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background, + int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background, bool force, unsigned int segno); void f2fs_build_gc_manager(struct f2fs_sb_info *sbi); int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count); @@@ -3958,6 -3959,18 +3962,18 @@@ int f2fs_init_page_array_cache(struct f void f2fs_destroy_page_array_cache(struct f2fs_sb_info *sbi); int __init f2fs_init_compress_cache(void); void f2fs_destroy_compress_cache(void); + #define inc_compr_inode_stat(inode) \ + do { \ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); \ + sbi->compr_new_inode++; \ + } while (0) + #define add_compr_block_stat(inode, blocks) \ + do { \ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); \ + int diff = F2FS_I(inode)->i_cluster_size - blocks; \ + sbi->compr_written_block += blocks; \ + sbi->compr_saved_block += diff; \ + } while (0) #else static inline bool f2fs_is_compressed_page(struct page *page) { return false; } static inline bool f2fs_is_compress_backend_ready(struct inode *inode) @@@ -3986,6 -3999,7 +4002,7 @@@ static inline int f2fs_init_page_array_ static inline void f2fs_destroy_page_array_cache(struct f2fs_sb_info *sbi) { } static inline int __init f2fs_init_compress_cache(void) { return 0; } static inline void f2fs_destroy_compress_cache(void) { } + #define inc_compr_inode_stat(inode) do { } while (0) #endif static inline void set_compress_context(struct inode *inode) @@@ -4009,6 -4023,7 +4026,7 @@@ F2FS_I(inode)->i_flags |= F2FS_COMPR_FL; set_inode_flag(inode, FI_COMPRESSED_FILE); stat_inc_compr_inode(inode); + inc_compr_inode_stat(inode); f2fs_mark_inode_dirty_sync(inode, true); } @@@ -4179,8 -4194,7 +4197,7 @@@ static inline bool f2fs_force_buffered_ if (F2FS_IO_ALIGNED(sbi)) return true; } - if (is_sbi_flag_set(F2FS_I_SB(inode), SBI_CP_DISABLED) && - !IS_SWAPFILE(inode)) + if (is_sbi_flag_set(F2FS_I_SB(inode), SBI_CP_DISABLED)) return true; return false; diff --combined fs/f2fs/file.c index 8a56acb,af7230f..44a4650 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@@ -22,7 -22,6 +22,7 @@@ #include #include #include +#include #include "f2fs.h" #include "node.h" @@@ -991,8 -990,6 +991,8 @@@ const struct inode_operations f2fs_file .set_acl = f2fs_set_acl, .listxattr = f2fs_listxattr, .fiemap = f2fs_fiemap, + .fileattr_get = f2fs_fileattr_get, + .fileattr_set = f2fs_fileattr_set, }; static int fill_zero(struct inode *inode, pgoff_t index, @@@ -1622,9 -1619,10 +1622,10 @@@ static int expand_inode_data(struct ino struct f2fs_map_blocks map = { .m_next_pgofs = NULL, .m_next_extent = NULL, .m_seg_type = NO_CHECK_TYPE, .m_may_create = true }; - pgoff_t pg_end; + pgoff_t pg_start, pg_end; loff_t new_size = i_size_read(inode); loff_t off_end; + block_t expanded = 0; int err; err = inode_newsize_ok(inode, (len + offset)); @@@ -1637,11 -1635,12 +1638,12 @@@ f2fs_balance_fs(sbi, true); + pg_start = ((unsigned long long)offset) >> PAGE_SHIFT; pg_end = ((unsigned long long)offset + len) >> PAGE_SHIFT; off_end = (offset + len) & (PAGE_SIZE - 1); - map.m_lblk = ((unsigned long long)offset) >> PAGE_SHIFT; - map.m_len = pg_end - map.m_lblk; + map.m_lblk = pg_start; + map.m_len = pg_end - pg_start; if (off_end) map.m_len++; @@@ -1649,19 -1648,15 +1651,15 @@@ return 0; if (f2fs_is_pinned_file(inode)) { - block_t len = (map.m_len >> sbi->log_blocks_per_seg) << - sbi->log_blocks_per_seg; - block_t done = 0; + block_t sec_blks = BLKS_PER_SEC(sbi); + block_t sec_len = roundup(map.m_len, sec_blks); - if (map.m_len % sbi->blocks_per_seg) - len += sbi->blocks_per_seg; - - map.m_len = sbi->blocks_per_seg; + map.m_len = sec_blks; next_alloc: if (has_not_enough_free_secs(sbi, 0, GET_SEC_FROM_SEG(sbi, overprovision_segments(sbi)))) { down_write(&sbi->gc_lock); - err = f2fs_gc(sbi, true, false, NULL_SEGNO); + err = f2fs_gc(sbi, true, false, false, NULL_SEGNO); if (err && err != -ENODATA && err != -EAGAIN) goto out_err; } @@@ -1669,7 -1664,7 +1667,7 @@@ down_write(&sbi->pin_sem); f2fs_lock_op(sbi); - f2fs_allocate_new_segment(sbi, CURSEG_COLD_DATA_PINNED); + f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false); f2fs_unlock_op(sbi); map.m_seg_type = CURSEG_COLD_DATA_PINNED; @@@ -1677,24 -1672,25 +1675,25 @@@ up_write(&sbi->pin_sem); - done += map.m_len; - len -= map.m_len; + expanded += map.m_len; + sec_len -= map.m_len; map.m_lblk += map.m_len; - if (!err && len) + if (!err && sec_len) goto next_alloc; - map.m_len = done; + map.m_len = expanded; } else { err = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_AIO); + expanded = map.m_len; } out_err: if (err) { pgoff_t last_off; - if (!map.m_len) + if (!expanded) return err; - last_off = map.m_lblk + map.m_len - 1; + last_off = pg_start + expanded - 1; /* update new size to the failed position */ new_size = (last_off == pg_end) ? offset + len : @@@ -1874,16 -1870,13 +1873,16 @@@ static int f2fs_setflags_common(struct return 0; } -/* FS_IOC_GETFLAGS and FS_IOC_SETFLAGS support */ +/* FS_IOC_[GS]ETFLAGS and FS_IOC_FS[GS]ETXATTR support */ /* * To make a new on-disk f2fs i_flag gettable via FS_IOC_GETFLAGS, add an entry * for it to f2fs_fsflags_map[], and add its FS_*_FL equivalent to * F2FS_GETTABLE_FS_FL. To also make it settable via FS_IOC_SETFLAGS, also add * its FS_*_FL equivalent to F2FS_SETTABLE_FS_FL. + * + * Translating flags to fsx_flags value used by FS_IOC_FSGETXATTR and + * FS_IOC_FSSETXATTR is done by the VFS. */ static const struct { @@@ -1958,6 -1951,67 +1957,6 @@@ static inline u32 f2fs_fsflags_to_iflag return iflags; } -static int f2fs_ioc_getflags(struct file *filp, unsigned long arg) -{ - struct inode *inode = file_inode(filp); - struct f2fs_inode_info *fi = F2FS_I(inode); - u32 fsflags = f2fs_iflags_to_fsflags(fi->i_flags); - - if (IS_ENCRYPTED(inode)) - fsflags |= FS_ENCRYPT_FL; - if (IS_VERITY(inode)) - fsflags |= FS_VERITY_FL; - if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode)) - fsflags |= FS_INLINE_DATA_FL; - if (is_inode_flag_set(inode, FI_PIN_FILE)) - fsflags |= FS_NOCOW_FL; - - fsflags &= F2FS_GETTABLE_FS_FL; - - return put_user(fsflags, (int __user *)arg); -} - -static int f2fs_ioc_setflags(struct file *filp, unsigned long arg) -{ - struct inode *inode = file_inode(filp); - struct f2fs_inode_info *fi = F2FS_I(inode); - u32 fsflags, old_fsflags; - u32 iflags; - int ret; - - if (!inode_owner_or_capable(&init_user_ns, inode)) - return -EACCES; - - if (get_user(fsflags, (int __user *)arg)) - return -EFAULT; - - if (fsflags & ~F2FS_GETTABLE_FS_FL) - return -EOPNOTSUPP; - fsflags &= F2FS_SETTABLE_FS_FL; - - iflags = f2fs_fsflags_to_iflags(fsflags); - if (f2fs_mask_flags(inode->i_mode, iflags) != iflags) - return -EOPNOTSUPP; - - ret = mnt_want_write_file(filp); - if (ret) - return ret; - - inode_lock(inode); - - old_fsflags = f2fs_iflags_to_fsflags(fi->i_flags); - ret = vfs_ioc_setflags_prepare(inode, old_fsflags, fsflags); - if (ret) - goto out; - - ret = f2fs_setflags_common(inode, iflags, - f2fs_fsflags_to_iflags(F2FS_SETTABLE_FS_FL)); -out: - inode_unlock(inode); - mnt_drop_write_file(filp); - return ret; -} - static int f2fs_ioc_getversion(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); @@@ -2434,7 -2488,7 +2433,7 @@@ static int f2fs_ioc_gc(struct file *fil down_write(&sbi->gc_lock); } - ret = f2fs_gc(sbi, sync, true, NULL_SEGNO); + ret = f2fs_gc(sbi, sync, true, false, NULL_SEGNO); out: mnt_drop_write_file(filp); return ret; @@@ -2470,7 -2524,8 +2469,8 @@@ do_more down_write(&sbi->gc_lock); } - ret = f2fs_gc(sbi, range->sync, true, GET_SEGNO(sbi, range->start)); + ret = f2fs_gc(sbi, range->sync, true, false, + GET_SEGNO(sbi, range->start)); if (ret) { if (ret == -EBUSY) ret = -EAGAIN; @@@ -2527,7 -2582,7 +2527,7 @@@ static int f2fs_defragment_range(struc { struct inode *inode = file_inode(filp); struct f2fs_map_blocks map = { .m_next_extent = NULL, - .m_seg_type = NO_CHECK_TYPE , + .m_seg_type = NO_CHECK_TYPE, .m_may_create = false }; struct extent_info ei = {0, 0, 0}; pgoff_t pg_start, pg_end, next_pgofs; @@@ -2923,7 -2978,7 +2923,7 @@@ static int f2fs_ioc_flush_device(struc sm->last_victim[GC_CB] = end_segno + 1; sm->last_victim[GC_GREEDY] = end_segno + 1; sm->last_victim[ALLOC_NEXT] = end_segno + 1; - ret = f2fs_gc(sbi, true, true, start_segno); + ret = f2fs_gc(sbi, true, true, true, start_segno); if (ret == -EAGAIN) ret = 0; else if (ret < 0) @@@ -2964,8 -3019,9 +2964,8 @@@ int f2fs_transfer_project_quota(struct return err; } -static int f2fs_ioc_setproject(struct file *filp, __u32 projid) +static int f2fs_ioc_setproject(struct inode *inode, __u32 projid) { - struct inode *inode = file_inode(filp); struct f2fs_inode_info *fi = F2FS_I(inode); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct page *ipage; @@@ -3026,7 -3082,7 +3026,7 @@@ int f2fs_transfer_project_quota(struct return 0; } -static int f2fs_ioc_setproject(struct file *filp, __u32 projid) +static int f2fs_ioc_setproject(struct inode *inode, __u32 projid) { if (projid != F2FS_DEF_PROJID) return -EOPNOTSUPP; @@@ -3034,55 -3090,123 +3034,55 @@@ } #endif -/* FS_IOC_FSGETXATTR and FS_IOC_FSSETXATTR support */ - -/* - * To make a new on-disk f2fs i_flag gettable via FS_IOC_FSGETXATTR and settable - * via FS_IOC_FSSETXATTR, add an entry for it to f2fs_xflags_map[], and add its - * FS_XFLAG_* equivalent to F2FS_SUPPORTED_XFLAGS. - */ - -static const struct { - u32 iflag; - u32 xflag; -} f2fs_xflags_map[] = { - { F2FS_SYNC_FL, FS_XFLAG_SYNC }, - { F2FS_IMMUTABLE_FL, FS_XFLAG_IMMUTABLE }, - { F2FS_APPEND_FL, FS_XFLAG_APPEND }, - { F2FS_NODUMP_FL, FS_XFLAG_NODUMP }, - { F2FS_NOATIME_FL, FS_XFLAG_NOATIME }, - { F2FS_PROJINHERIT_FL, FS_XFLAG_PROJINHERIT }, -}; - -#define F2FS_SUPPORTED_XFLAGS ( \ - FS_XFLAG_SYNC | \ - FS_XFLAG_IMMUTABLE | \ - FS_XFLAG_APPEND | \ - FS_XFLAG_NODUMP | \ - FS_XFLAG_NOATIME | \ - FS_XFLAG_PROJINHERIT) - -/* Convert f2fs on-disk i_flags to FS_IOC_FS{GET,SET}XATTR flags */ -static inline u32 f2fs_iflags_to_xflags(u32 iflags) -{ - u32 xflags = 0; - int i; - - for (i = 0; i < ARRAY_SIZE(f2fs_xflags_map); i++) - if (iflags & f2fs_xflags_map[i].iflag) - xflags |= f2fs_xflags_map[i].xflag; - - return xflags; -} - -/* Convert FS_IOC_FS{GET,SET}XATTR flags to f2fs on-disk i_flags */ -static inline u32 f2fs_xflags_to_iflags(u32 xflags) -{ - u32 iflags = 0; - int i; - - for (i = 0; i < ARRAY_SIZE(f2fs_xflags_map); i++) - if (xflags & f2fs_xflags_map[i].xflag) - iflags |= f2fs_xflags_map[i].iflag; - - return iflags; -} - -static void f2fs_fill_fsxattr(struct inode *inode, struct fsxattr *fa) +int f2fs_fileattr_get(struct dentry *dentry, struct fileattr *fa) { + struct inode *inode = d_inode(dentry); struct f2fs_inode_info *fi = F2FS_I(inode); + u32 fsflags = f2fs_iflags_to_fsflags(fi->i_flags); - simple_fill_fsxattr(fa, f2fs_iflags_to_xflags(fi->i_flags)); + if (IS_ENCRYPTED(inode)) + fsflags |= FS_ENCRYPT_FL; + if (IS_VERITY(inode)) + fsflags |= FS_VERITY_FL; + if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode)) + fsflags |= FS_INLINE_DATA_FL; + if (is_inode_flag_set(inode, FI_PIN_FILE)) + fsflags |= FS_NOCOW_FL; + + fileattr_fill_flags(fa, fsflags & F2FS_GETTABLE_FS_FL); if (f2fs_sb_has_project_quota(F2FS_I_SB(inode))) fa->fsx_projid = from_kprojid(&init_user_ns, fi->i_projid); -} -static int f2fs_ioc_fsgetxattr(struct file *filp, unsigned long arg) -{ - struct inode *inode = file_inode(filp); - struct fsxattr fa; - - f2fs_fill_fsxattr(inode, &fa); - - if (copy_to_user((struct fsxattr __user *)arg, &fa, sizeof(fa))) - return -EFAULT; return 0; } -static int f2fs_ioc_fssetxattr(struct file *filp, unsigned long arg) +int f2fs_fileattr_set(struct user_namespace *mnt_userns, + struct dentry *dentry, struct fileattr *fa) { - struct inode *inode = file_inode(filp); - struct fsxattr fa, old_fa; + struct inode *inode = d_inode(dentry); + u32 fsflags = fa->flags, mask = F2FS_SETTABLE_FS_FL; u32 iflags; int err; - if (copy_from_user(&fa, (struct fsxattr __user *)arg, sizeof(fa))) - return -EFAULT; - - /* Make sure caller has proper permission */ - if (!inode_owner_or_capable(&init_user_ns, inode)) - return -EACCES; - - if (fa.fsx_xflags & ~F2FS_SUPPORTED_XFLAGS) + if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) + return -EIO; + if (!f2fs_is_checkpoint_ready(F2FS_I_SB(inode))) + return -ENOSPC; + if (fsflags & ~F2FS_GETTABLE_FS_FL) return -EOPNOTSUPP; + fsflags &= F2FS_SETTABLE_FS_FL; + if (!fa->flags_valid) + mask &= FS_COMMON_FL; - iflags = f2fs_xflags_to_iflags(fa.fsx_xflags); + iflags = f2fs_fsflags_to_iflags(fsflags); if (f2fs_mask_flags(inode->i_mode, iflags) != iflags) return -EOPNOTSUPP; - err = mnt_want_write_file(filp); - if (err) - return err; - - inode_lock(inode); - - f2fs_fill_fsxattr(inode, &old_fa); - err = vfs_ioc_fssetxattr_check(inode, &old_fa, &fa); - if (err) - goto out; - - err = f2fs_setflags_common(inode, iflags, - f2fs_xflags_to_iflags(F2FS_SUPPORTED_XFLAGS)); - if (err) - goto out; + err = f2fs_setflags_common(inode, iflags, f2fs_fsflags_to_iflags(mask)); + if (!err) + err = f2fs_ioc_setproject(inode, fa->fsx_projid); - err = f2fs_ioc_setproject(filp, fa.fsx_projid); -out: - inode_unlock(inode); - mnt_drop_write_file(filp); return err; } @@@ -3927,7 -4051,7 +3927,7 @@@ out static int redirty_blocks(struct inode *inode, pgoff_t page_idx, int len) { - DEFINE_READAHEAD(ractl, NULL, inode->i_mapping, page_idx); + DEFINE_READAHEAD(ractl, NULL, NULL, inode->i_mapping, page_idx); struct address_space *mapping = inode->i_mapping; struct page *page; pgoff_t redirty_idx = page_idx; @@@ -4109,6 -4233,10 +4109,6 @@@ out static long __f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { switch (cmd) { - case FS_IOC_GETFLAGS: - return f2fs_ioc_getflags(filp, arg); - case FS_IOC_SETFLAGS: - return f2fs_ioc_setflags(filp, arg); case FS_IOC_GETVERSION: return f2fs_ioc_getversion(filp, arg); case F2FS_IOC_START_ATOMIC_WRITE: @@@ -4157,6 -4285,10 +4157,6 @@@ return f2fs_ioc_flush_device(filp, arg); case F2FS_IOC_GET_FEATURES: return f2fs_ioc_get_features(filp, arg); - case FS_IOC_FSGETXATTR: - return f2fs_ioc_fsgetxattr(filp, arg); - case FS_IOC_FSSETXATTR: - return f2fs_ioc_fssetxattr(filp, arg); case F2FS_IOC_GET_PIN_FILE: return f2fs_ioc_get_pin_file(filp, arg); case F2FS_IOC_SET_PIN_FILE: @@@ -4311,8 -4443,13 +4311,13 @@@ write clear_inode_flag(inode, FI_NO_PREALLOC); /* if we couldn't write data, we should deallocate blocks. */ - if (preallocated && i_size_read(inode) < target_size) + if (preallocated && i_size_read(inode) < target_size) { + down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + down_write(&F2FS_I(inode)->i_mmap_sem); f2fs_truncate(inode); + up_write(&F2FS_I(inode)->i_mmap_sem); + up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + } if (ret > 0) f2fs_update_iostat(F2FS_I_SB(inode), APP_WRITE_IO, ret); @@@ -4386,6 -4523,12 +4391,6 @@@ long f2fs_compat_ioctl(struct file *fil return -ENOSPC; switch (cmd) { - case FS_IOC32_GETFLAGS: - cmd = FS_IOC_GETFLAGS; - break; - case FS_IOC32_SETFLAGS: - cmd = FS_IOC_SETFLAGS; - break; case FS_IOC32_GETVERSION: cmd = FS_IOC_GETVERSION; break; @@@ -4414,6 -4557,8 +4419,6 @@@ case F2FS_IOC_DEFRAGMENT: case F2FS_IOC_FLUSH_DEVICE: case F2FS_IOC_GET_FEATURES: - case FS_IOC_FSGETXATTR: - case FS_IOC_FSSETXATTR: case F2FS_IOC_GET_PIN_FILE: case F2FS_IOC_SET_PIN_FILE: case F2FS_IOC_PRECACHE_EXTENTS: diff --combined fs/f2fs/namei.c index 377c6b1,405d85d..a9cd9cf --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@@ -416,8 -416,10 +416,9 @@@ out struct dentry *f2fs_get_parent(struct dentry *child) { - struct qstr dotdot = QSTR_INIT("..", 2); struct page *page; - unsigned long ino = f2fs_inode_by_name(d_inode(child), &dotdot, &page); + unsigned long ino = f2fs_inode_by_name(d_inode(child), &dotdot_name, &page); + if (!ino) { if (IS_ERR(page)) return ERR_CAST(page); @@@ -627,6 -629,7 +628,7 @@@ static const char *f2fs_get_link(struc struct delayed_call *done) { const char *link = page_get_link(dentry, inode, done); + if (!IS_ERR(link) && !*link) { /* this is broken symlink case */ do_delayed_call(done); @@@ -765,6 -768,7 +767,7 @@@ out_fail static int f2fs_rmdir(struct inode *dir, struct dentry *dentry) { struct inode *inode = d_inode(dentry); + if (f2fs_empty_dir(inode)) return f2fs_unlink(dir, dentry); return -ENOTEMPTY; @@@ -1326,8 -1330,6 +1329,8 @@@ const struct inode_operations f2fs_dir_ .set_acl = f2fs_set_acl, .listxattr = f2fs_listxattr, .fiemap = f2fs_fiemap, + .fileattr_get = f2fs_fileattr_get, + .fileattr_set = f2fs_fileattr_set, }; const struct inode_operations f2fs_symlink_inode_operations = { diff --combined fs/f2fs/segment.c index c286656,efac388..c605415 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@@ -186,7 -186,10 +186,10 @@@ void f2fs_register_inmem_page(struct in { struct inmem_pages *new; - f2fs_set_page_private(page, ATOMIC_WRITTEN_PAGE); + if (PagePrivate(page)) + set_page_private(page, (unsigned long)ATOMIC_WRITTEN_PAGE); + else + f2fs_set_page_private(page, ATOMIC_WRITTEN_PAGE); new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS); @@@ -324,23 -327,27 +327,27 @@@ void f2fs_drop_inmem_pages(struct inod struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_inode_info *fi = F2FS_I(inode); - while (!list_empty(&fi->inmem_pages)) { + do { mutex_lock(&fi->inmem_lock); + if (list_empty(&fi->inmem_pages)) { + fi->i_gc_failures[GC_FAILURE_ATOMIC] = 0; + + spin_lock(&sbi->inode_lock[ATOMIC_FILE]); + if (!list_empty(&fi->inmem_ilist)) + list_del_init(&fi->inmem_ilist); + if (f2fs_is_atomic_file(inode)) { + clear_inode_flag(inode, FI_ATOMIC_FILE); + sbi->atomic_files--; + } + spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); + + mutex_unlock(&fi->inmem_lock); + break; + } __revoke_inmem_pages(inode, &fi->inmem_pages, true, false, true); mutex_unlock(&fi->inmem_lock); - } - - fi->i_gc_failures[GC_FAILURE_ATOMIC] = 0; - - spin_lock(&sbi->inode_lock[ATOMIC_FILE]); - if (!list_empty(&fi->inmem_ilist)) - list_del_init(&fi->inmem_ilist); - if (f2fs_is_atomic_file(inode)) { - clear_inode_flag(inode, FI_ATOMIC_FILE); - sbi->atomic_files--; - } - spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); + } while (1); } void f2fs_drop_inmem_page(struct inode *inode, struct page *page) @@@ -503,8 -510,19 +510,19 @@@ void f2fs_balance_fs(struct f2fs_sb_inf * dir/node pages without enough free segments. */ if (has_not_enough_free_secs(sbi, 0, 0)) { - down_write(&sbi->gc_lock); - f2fs_gc(sbi, false, false, NULL_SEGNO); + if (test_opt(sbi, GC_MERGE) && sbi->gc_thread && + sbi->gc_thread->f2fs_gc_task) { + DEFINE_WAIT(wait); + + prepare_to_wait(&sbi->gc_thread->fggc_wq, &wait, + TASK_UNINTERRUPTIBLE); + wake_up(&sbi->gc_thread->gc_wait_queue_head); + io_schedule(); + finish_wait(&sbi->gc_thread->fggc_wq, &wait); + } else { + down_write(&sbi->gc_lock); + f2fs_gc(sbi, false, false, false, NULL_SEGNO); + } } } @@@ -653,7 -671,11 +671,11 @@@ int f2fs_issue_flush(struct f2fs_sb_inf llist_add(&cmd.llnode, &fcc->issue_list); - /* update issue_list before we wake up issue_flush thread */ + /* + * update issue_list before we wake up issue_flush thread, this + * smp_mb() pairs with another barrier in ___wait_event(), see + * more details in comments of waitqueue_active(). + */ smp_mb(); if (waitqueue_active(&fcc->flush_wait_queue)) @@@ -861,7 -883,7 +883,7 @@@ static void locate_dirty_segment(struc mutex_lock(&dirty_i->seglist_lock); valid_blocks = get_valid_blocks(sbi, segno, false); - ckpt_valid_blocks = get_ckpt_valid_blocks(sbi, segno); + ckpt_valid_blocks = get_ckpt_valid_blocks(sbi, segno, false); if (valid_blocks == 0 && (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) || ckpt_valid_blocks == usable_blocks)) { @@@ -946,7 -968,7 +968,7 @@@ static unsigned int get_free_segment(st for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) { if (get_valid_blocks(sbi, segno, false)) continue; - if (get_ckpt_valid_blocks(sbi, segno)) + if (get_ckpt_valid_blocks(sbi, segno, false)) continue; mutex_unlock(&dirty_i->seglist_lock); return segno; @@@ -1095,6 -1117,8 +1117,8 @@@ static void __init_discard_policy(struc struct discard_policy *dpolicy, int discard_type, unsigned int granularity) { + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + /* common policy */ dpolicy->type = discard_type; dpolicy->sync = true; @@@ -1114,7 -1138,9 +1138,9 @@@ dpolicy->ordered = true; if (utilization(sbi) > DEF_DISCARD_URGENT_UTIL) { dpolicy->granularity = 1; - dpolicy->max_interval = DEF_MIN_DISCARD_ISSUE_TIME; + if (atomic_read(&dcc->discard_cmd_cnt)) + dpolicy->max_interval = + DEF_MIN_DISCARD_ISSUE_TIME; } } else if (discard_type == DPOLICY_FORCE) { dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME; @@@ -1730,8 -1756,15 +1756,15 @@@ static int issue_discard_thread(void *d set_freezable(); do { - __init_discard_policy(sbi, &dpolicy, DPOLICY_BG, - dcc->discard_granularity); + if (sbi->gc_mode == GC_URGENT_HIGH || + !f2fs_available_free_memory(sbi, DISCARD_CACHE)) + __init_discard_policy(sbi, &dpolicy, DPOLICY_FORCE, 1); + else + __init_discard_policy(sbi, &dpolicy, DPOLICY_BG, + dcc->discard_granularity); + + if (!atomic_read(&dcc->discard_cmd_cnt)) + wait_ms = dpolicy.max_interval; wait_event_interruptible_timeout(*q, kthread_should_stop() || freezing(current) || @@@ -1755,9 -1788,8 +1788,8 @@@ wait_ms = dpolicy.max_interval; continue; } - - if (sbi->gc_mode == GC_URGENT_HIGH) - __init_discard_policy(sbi, &dpolicy, DPOLICY_FORCE, 1); + if (!atomic_read(&dcc->discard_cmd_cnt)) + continue; sb_start_intwrite(sbi->sb); @@@ -1765,7 -1797,7 +1797,7 @@@ if (issued > 0) { __wait_all_discard_cmd(sbi, &dpolicy); wait_ms = dpolicy.min_interval; - } else if (issued == -1){ + } else if (issued == -1) { wait_ms = f2fs_time_to_wait(sbi, DISCARD_TIME); if (!wait_ms) wait_ms = dpolicy.mid_interval; @@@ -2142,6 -2174,7 +2174,7 @@@ static void __set_sit_entry_type(struc unsigned int segno, int modified) { struct seg_entry *se = get_seg_entry(sbi, segno); + se->type = type; if (modified) __mark_sit_entry_dirty(sbi, segno); @@@ -2333,6 -2366,7 +2366,7 @@@ static void __add_sum_entry(struct f2fs { struct curseg_info *curseg = CURSEG_I(sbi, type); void *addr = curseg->sum_blk; + addr += curseg->next_blkoff * sizeof(struct f2fs_summary); memcpy(addr, sum, sizeof(struct f2fs_summary)); } @@@ -2604,22 -2638,20 +2638,20 @@@ static void new_curseg(struct f2fs_sb_i curseg->alloc_type = LFS; } - static void __next_free_blkoff(struct f2fs_sb_info *sbi, - struct curseg_info *seg, block_t start) + static int __next_free_blkoff(struct f2fs_sb_info *sbi, + int segno, block_t start) { - struct seg_entry *se = get_seg_entry(sbi, seg->segno); + struct seg_entry *se = get_seg_entry(sbi, segno); int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long); unsigned long *target_map = SIT_I(sbi)->tmp_map; unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map; unsigned long *cur_map = (unsigned long *)se->cur_valid_map; - int i, pos; + int i; for (i = 0; i < entries; i++) target_map[i] = ckpt_map[i] | cur_map[i]; - pos = __find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, start); - - seg->next_blkoff = pos; + return __find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, start); } /* @@@ -2631,11 -2663,18 +2663,18 @@@ static void __refresh_next_blkoff(struc struct curseg_info *seg) { if (seg->alloc_type == SSR) - __next_free_blkoff(sbi, seg, seg->next_blkoff + 1); + seg->next_blkoff = + __next_free_blkoff(sbi, seg->segno, + seg->next_blkoff + 1); else seg->next_blkoff++; } + bool f2fs_segment_has_free_slot(struct f2fs_sb_info *sbi, int segno) + { + return __next_free_blkoff(sbi, segno, 0) < sbi->blocks_per_seg; + } + /* * This function always allocates a used segment(from dirty seglist) by SSR * manner, so it should recover the existing segment information of valid blocks @@@ -2661,7 -2700,7 +2700,7 @@@ static void change_curseg(struct f2fs_s reset_curseg(sbi, type, 1); curseg->alloc_type = SSR; - __next_free_blkoff(sbi, curseg, 0); + curseg->next_blkoff = __next_free_blkoff(sbi, curseg->segno, 0); sum_page = f2fs_get_sum_page(sbi, new_segno); if (IS_ERR(sum_page)) { @@@ -2893,7 -2932,8 +2932,8 @@@ unlock up_read(&SM_I(sbi)->curseg_lock); } - static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type) + static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type, + bool new_sec, bool force) { struct curseg_info *curseg = CURSEG_I(sbi, type); unsigned int old_segno; @@@ -2901,32 -2941,43 +2941,43 @@@ if (!curseg->inited) goto alloc; - if (!curseg->next_blkoff && - !get_valid_blocks(sbi, curseg->segno, false) && - !get_ckpt_valid_blocks(sbi, curseg->segno)) - return; + if (force || curseg->next_blkoff || + get_valid_blocks(sbi, curseg->segno, new_sec)) + goto alloc; + if (!get_ckpt_valid_blocks(sbi, curseg->segno, new_sec)) + return; alloc: old_segno = curseg->segno; SIT_I(sbi)->s_ops->allocate_segment(sbi, type, true); locate_dirty_segment(sbi, old_segno); } - void f2fs_allocate_new_segment(struct f2fs_sb_info *sbi, int type) + static void __allocate_new_section(struct f2fs_sb_info *sbi, + int type, bool force) + { + __allocate_new_segment(sbi, type, true, force); + } + + void f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force) { + down_read(&SM_I(sbi)->curseg_lock); down_write(&SIT_I(sbi)->sentry_lock); - __allocate_new_segment(sbi, type); + __allocate_new_section(sbi, type, force); up_write(&SIT_I(sbi)->sentry_lock); + up_read(&SM_I(sbi)->curseg_lock); } void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi) { int i; + down_read(&SM_I(sbi)->curseg_lock); down_write(&SIT_I(sbi)->sentry_lock); for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) - __allocate_new_segment(sbi, i); + __allocate_new_segment(sbi, i, false, false); up_write(&SIT_I(sbi)->sentry_lock); + up_read(&SM_I(sbi)->curseg_lock); } static const struct segment_allocation default_salloc_ops = { @@@ -3239,7 -3290,9 +3290,9 @@@ static int __get_segment_type_6(struct struct inode *inode = fio->page->mapping->host; if (is_cold_data(fio->page)) { - if (fio->sbi->am.atgc_enabled) + if (fio->sbi->am.atgc_enabled && + (fio->io_type == FS_DATA_IO) && + (fio->sbi->gc_mode != GC_URGENT_HIGH)) return CURSEG_ALL_DATA_ATGC; else return CURSEG_COLD_DATA; @@@ -3365,12 -3418,12 +3418,12 @@@ void f2fs_allocate_data_block(struct f2 f2fs_inode_chksum_set(sbi, page); } - if (F2FS_IO_ALIGNED(sbi)) - fio->retry = false; - if (fio) { struct f2fs_bio_info *io; + if (F2FS_IO_ALIGNED(sbi)) + fio->retry = false; + INIT_LIST_HEAD(&fio->list); fio->in_list = true; io = sbi->write_io[fio->type] + fio->temp; @@@ -3499,7 -3552,13 +3552,13 @@@ int f2fs_inplace_write_data(struct f2fs set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_warn(sbi, "%s: incorrect segment(%u) type, run fsck to fix.", __func__, segno); - return -EFSCORRUPTED; + err = -EFSCORRUPTED; + goto drop_bio; + } + + if (is_sbi_flag_set(sbi, SBI_NEED_FSCK) || f2fs_cp_error(sbi)) { + err = -EIO; + goto drop_bio; } stat_inc_inplace_blocks(fio->sbi); @@@ -3514,6 -3573,15 +3573,15 @@@ } return err; + drop_bio: + if (fio->bio) { + struct bio *bio = *(fio->bio); + + bio->bi_status = BLK_STS_IOERR; + bio_endio(bio); + fio->bio = NULL; + } + return err; } static inline int __f2fs_get_curseg(struct f2fs_sb_info *sbi, @@@ -3539,6 -3607,7 +3607,7 @@@ void f2fs_do_replace_block(struct f2fs_ struct seg_entry *se; int type; unsigned short old_blkoff; + unsigned char old_alloc_type; segno = GET_SEGNO(sbi, new_blkaddr); se = get_seg_entry(sbi, segno); @@@ -3572,6 -3641,7 +3641,7 @@@ old_cursegno = curseg->segno; old_blkoff = curseg->next_blkoff; + old_alloc_type = curseg->alloc_type; /* change the current segment */ if (segno != curseg->segno) { @@@ -3606,6 -3676,7 +3676,7 @@@ change_curseg(sbi, type, true); } curseg->next_blkoff = old_blkoff; + curseg->alloc_type = old_alloc_type; } up_write(&sit_i->sentry_lock); @@@ -3717,6 -3788,7 +3788,7 @@@ static int read_compacted_summaries(str for (j = 0; j < blk_off; j++) { struct f2fs_summary *s; + s = (struct f2fs_summary *)(kaddr + offset); seg_i->sum_blk->entries[j] = *s; offset += SUMMARY_SIZE; @@@ -3779,6 -3851,7 +3851,7 @@@ static int read_normal_summaries(struc if (__exist_node_summaries(sbi)) { struct f2fs_summary *ns = &sum->entries[0]; int i; + for (i = 0; i < sbi->blocks_per_seg; i++, ns++) { ns->version = 0; ns->ofs_in_node = 0; @@@ -3880,6 -3953,7 +3953,7 @@@ static void write_compacted_summaries(s /* Step 3: write summary entries */ for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { unsigned short blkoff; + seg_i = CURSEG_I(sbi, i); if (sbi->ckpt->alloc_type[i] == SSR) blkoff = sbi->blocks_per_seg; @@@ -3916,6 -3990,7 +3990,7 @@@ static void write_normal_summaries(stru block_t blkaddr, int type) { int i, end; + if (IS_DATASEG(type)) end = type + NR_CURSEG_DATA_TYPE; else @@@ -4381,7 -4456,7 +4456,7 @@@ static int build_sit_entries(struct f2f block_t total_node_blocks = 0; do { - readed = f2fs_ra_meta_pages(sbi, start_blk, BIO_MAX_PAGES, + readed = f2fs_ra_meta_pages(sbi, start_blk, BIO_MAX_VECS, META_SIT, true); start = start_blk * sit_i->sents_per_block; @@@ -4499,6 -4574,7 +4574,7 @@@ static void init_free_segmap(struct f2f /* set use the current segments */ for (type = CURSEG_HOT_DATA; type <= CURSEG_COLD_NODE; type++) { struct curseg_info *curseg_t = CURSEG_I(sbi, type); + __set_test_and_inuse(sbi, curseg_t->segno); } } @@@ -4731,7 -4807,8 +4807,8 @@@ static struct f2fs_dev_info *get_target } static int report_one_zone_cb(struct blk_zone *zone, unsigned int idx, - void *data) { + void *data) + { memcpy(data, zone, sizeof(struct blk_zone)); return 0; } @@@ -4783,7 -4860,8 +4860,8 @@@ static int fix_curseg_write_pointer(str f2fs_notice(sbi, "Assign new section to curseg[%d]: " "curseg[0x%x,0x%x]", type, cs->segno, cs->next_blkoff); - allocate_segment_by_default(sbi, type, true); + + f2fs_allocate_new_section(sbi, type, true); /* check consistency of the zone curseg pointed to */ if (check_zone_write_pointer(sbi, zbd, &zone)) @@@ -4847,8 -4925,10 +4925,10 @@@ struct check_zone_write_pointer_args }; static int check_zone_write_pointer_cb(struct blk_zone *zone, unsigned int idx, - void *data) { + void *data) + { struct check_zone_write_pointer_args *args; + args = (struct check_zone_write_pointer_args *)data; return check_zone_write_pointer(args->sbi, args->fdev, zone); @@@ -5127,6 -5207,7 +5207,7 @@@ static void discard_dirty_segmap(struc static void destroy_victim_secmap(struct f2fs_sb_info *sbi) { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + kvfree(dirty_i->victim_secmap); } @@@ -5171,6 -5252,7 +5252,7 @@@ static void destroy_curseg(struct f2fs_ static void destroy_free_segmap(struct f2fs_sb_info *sbi) { struct free_segmap_info *free_i = SM_I(sbi)->free_info; + if (!free_i) return; SM_I(sbi)->free_info = NULL; diff --combined fs/f2fs/segment.h index e9a7a63,dab87ec..050230c --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@@ -172,12 -172,10 +172,10 @@@ enum /* * BG_GC means the background cleaning job. * FG_GC means the on-demand cleaning job. - * FORCE_FG_GC means on-demand cleaning job in background. */ enum { BG_GC = 0, FG_GC, - FORCE_FG_GC, }; /* for a function parameter to select a victim segment */ @@@ -361,8 -359,20 +359,20 @@@ static inline unsigned int get_valid_bl } static inline unsigned int get_ckpt_valid_blocks(struct f2fs_sb_info *sbi, - unsigned int segno) + unsigned int segno, bool use_section) { + if (use_section && __is_large_section(sbi)) { + unsigned int start_segno = START_SEGNO(segno); + unsigned int blocks = 0; + int i; + + for (i = 0; i < sbi->segs_per_sec; i++, start_segno++) { + struct seg_entry *se = get_seg_entry(sbi, start_segno); + + blocks += se->ckpt_valid_blocks; + } + return blocks; + } return get_seg_entry(sbi, segno)->ckpt_valid_blocks; } @@@ -851,7 -861,7 +861,7 @@@ static inline int nr_pages_to_skip(stru else if (type == NODE) return 8 * sbi->blocks_per_seg; else if (type == META) - return 8 * BIO_MAX_PAGES; + return 8 * BIO_MAX_VECS; else return 0; } @@@ -868,7 -878,7 +878,7 @@@ static inline long nr_pages_to_write(st return 0; nr_to_write = wbc->nr_to_write; - desired = BIO_MAX_PAGES; + desired = BIO_MAX_VECS; if (type == NODE) desired <<= 1; diff --combined fs/f2fs/super.c index 82592b1,5020152..7d325bf --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@@ -151,6 -151,8 +151,8 @@@ enum Opt_compress_chksum, Opt_compress_mode, Opt_atgc, + Opt_gc_merge, + Opt_nogc_merge, Opt_err, }; @@@ -223,6 -225,8 +225,8 @@@ static match_table_t f2fs_tokens = {Opt_compress_chksum, "compress_chksum"}, {Opt_compress_mode, "compress_mode=%s"}, {Opt_atgc, "atgc"}, + {Opt_gc_merge, "gc_merge"}, + {Opt_nogc_merge, "nogc_merge"}, {Opt_err, NULL}, }; @@@ -555,6 -559,7 +559,7 @@@ static int parse_options(struct super_b while ((p = strsep(&options, ",")) != NULL) { int token; + if (!*p) continue; /* @@@ -753,9 -758,9 +758,9 @@@ case Opt_io_size_bits: if (args->from && match_int(args, &arg)) return -EINVAL; - if (arg <= 0 || arg > __ilog2_u32(BIO_MAX_PAGES)) { + if (arg <= 0 || arg > __ilog2_u32(BIO_MAX_VECS)) { f2fs_warn(sbi, "Not support %d, larger than %d", - 1 << arg, BIO_MAX_PAGES); + 1 << arg, BIO_MAX_VECS); return -EINVAL; } F2FS_OPTION(sbi).write_io_size_bits = arg; @@@ -1073,6 -1078,12 +1078,12 @@@ case Opt_atgc: set_opt(sbi, ATGC); break; + case Opt_gc_merge: + set_opt(sbi, GC_MERGE); + break; + case Opt_nogc_merge: + clear_opt(sbi, GC_MERGE); + break; default: f2fs_err(sbi, "Unrecognized mount option \"%s\" or missing value", p); @@@ -1616,6 -1627,7 +1627,7 @@@ static inline void f2fs_show_quota_opti #endif } + #ifdef CONFIG_F2FS_FS_COMPRESSION static inline void f2fs_show_compress_options(struct seq_file *seq, struct super_block *sb) { @@@ -1661,6 -1673,7 +1673,7 @@@ else if (F2FS_OPTION(sbi).compress_mode == COMPR_MODE_USER) seq_printf(seq, ",compress_mode=%s", "user"); } + #endif static int f2fs_show_options(struct seq_file *seq, struct dentry *root) { @@@ -1673,6 -1686,9 +1686,9 @@@ else if (F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_OFF) seq_printf(seq, ",background_gc=%s", "off"); + if (test_opt(sbi, GC_MERGE)) + seq_puts(seq, ",gc_merge"); + if (test_opt(sbi, DISABLE_ROLL_FORWARD)) seq_puts(seq, ",disable_roll_forward"); if (test_opt(sbi, NORECOVERY)) @@@ -1824,6 -1840,7 +1840,7 @@@ static void default_options(struct f2fs set_opt(sbi, EXTENT_CACHE); set_opt(sbi, NOHEAP); clear_opt(sbi, DISABLE_CHECKPOINT); + set_opt(sbi, MERGE_CHECKPOINT); F2FS_OPTION(sbi).unusable_cap = 0; sbi->sb->s_flags |= SB_LAZYTIME; set_opt(sbi, FLUSH_MERGE); @@@ -1865,7 -1882,7 +1882,7 @@@ static int f2fs_disable_checkpoint(stru while (!f2fs_time_over(sbi, DISABLE_TIME)) { down_write(&sbi->gc_lock); - err = f2fs_gc(sbi, true, false, NULL_SEGNO); + err = f2fs_gc(sbi, true, false, false, NULL_SEGNO); if (err == -ENODATA) { err = 0; break; @@@ -1876,7 -1893,7 +1893,7 @@@ ret = sync_filesystem(sbi->sb); if (ret || err) { - err = ret ? ret: err; + err = ret ? ret : err; goto restore_flag; } @@@ -1925,8 -1942,9 +1942,9 @@@ static int f2fs_remount(struct super_bl struct f2fs_mount_info org_mount_opt; unsigned long old_sb_flags; int err; - bool need_restart_gc = false; - bool need_stop_gc = false; + bool need_restart_gc = false, need_stop_gc = false; + bool need_restart_ckpt = false, need_stop_ckpt = false; + bool need_restart_flush = false, need_stop_flush = false; bool no_extent_cache = !test_opt(sbi, EXTENT_CACHE); bool disable_checkpoint = test_opt(sbi, DISABLE_CHECKPOINT); bool no_io_align = !F2FS_IO_ALIGNED(sbi); @@@ -2035,7 -2053,8 +2053,8 @@@ * option. Also sync the filesystem. */ if ((*flags & SB_RDONLY) || - F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_OFF) { + (F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_OFF && + !test_opt(sbi, GC_MERGE))) { if (sbi->gc_thread) { f2fs_stop_gc_thread(sbi); need_restart_gc = true; @@@ -2057,18 -2076,11 +2076,11 @@@ clear_sbi_flag(sbi, SBI_IS_CLOSE); } - if (checkpoint_changed) { - if (test_opt(sbi, DISABLE_CHECKPOINT)) { - err = f2fs_disable_checkpoint(sbi); - if (err) - goto restore_gc; - } else { - f2fs_enable_checkpoint(sbi); - } - } - - if (!test_opt(sbi, DISABLE_CHECKPOINT) && - test_opt(sbi, MERGE_CHECKPOINT)) { + if ((*flags & SB_RDONLY) || test_opt(sbi, DISABLE_CHECKPOINT) || + !test_opt(sbi, MERGE_CHECKPOINT)) { + f2fs_stop_ckpt_thread(sbi); + need_restart_ckpt = true; + } else { err = f2fs_start_ckpt_thread(sbi); if (err) { f2fs_err(sbi, @@@ -2076,8 -2088,7 +2088,7 @@@ err); goto restore_gc; } - } else { - f2fs_stop_ckpt_thread(sbi); + need_stop_ckpt = true; } /* @@@ -2087,11 -2098,24 +2098,24 @@@ if ((*flags & SB_RDONLY) || !test_opt(sbi, FLUSH_MERGE)) { clear_opt(sbi, FLUSH_MERGE); f2fs_destroy_flush_cmd_control(sbi, false); + need_restart_flush = true; } else { err = f2fs_create_flush_cmd_control(sbi); if (err) - goto restore_gc; + goto restore_ckpt; + need_stop_flush = true; } + + if (checkpoint_changed) { + if (test_opt(sbi, DISABLE_CHECKPOINT)) { + err = f2fs_disable_checkpoint(sbi); + if (err) + goto restore_flush; + } else { + f2fs_enable_checkpoint(sbi); + } + } + skip: #ifdef CONFIG_QUOTA /* Release old quota file names */ @@@ -2106,6 -2130,21 +2130,21 @@@ adjust_unusable_cap_perc(sbi); *flags = (*flags & ~SB_LAZYTIME) | (sb->s_flags & SB_LAZYTIME); return 0; + restore_flush: + if (need_restart_flush) { + if (f2fs_create_flush_cmd_control(sbi)) + f2fs_warn(sbi, "background flush thread has stopped"); + } else if (need_stop_flush) { + clear_opt(sbi, FLUSH_MERGE); + f2fs_destroy_flush_cmd_control(sbi, false); + } + restore_ckpt: + if (need_restart_ckpt) { + if (f2fs_start_ckpt_thread(sbi)) + f2fs_warn(sbi, "background ckpt thread has stopped"); + } else if (need_stop_ckpt) { + f2fs_stop_ckpt_thread(sbi); + } restore_gc: if (need_restart_gc) { if (f2fs_start_gc_thread(sbi)) @@@ -3719,7 -3758,7 +3758,7 @@@ try_onemore sbi->iostat_period_ms = DEFAULT_IOSTAT_PERIOD_MS; for (i = 0; i < NR_PAGE_TYPE; i++) { - int n = (i == META) ? 1: NR_TEMP_TYPE; + int n = (i == META) ? 1 : NR_TEMP_TYPE; int j; sbi->write_io[i] = @@@ -3833,7 -3872,7 +3872,7 @@@ /* setup checkpoint request control and start checkpoint issue thread */ f2fs_init_ckpt_req_control(sbi); - if (!test_opt(sbi, DISABLE_CHECKPOINT) && + if (!f2fs_readonly(sb) && !test_opt(sbi, DISABLE_CHECKPOINT) && test_opt(sbi, MERGE_CHECKPOINT)) { err = f2fs_start_ckpt_thread(sbi); if (err) { @@@ -3929,10 -3968,18 +3968,18 @@@ * previous checkpoint was not done by clean system shutdown. */ if (f2fs_hw_is_readonly(sbi)) { - if (!is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) - f2fs_err(sbi, "Need to recover fsync data, but write access unavailable"); - else - f2fs_info(sbi, "write access unavailable, skipping recovery"); + if (!is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) { + err = f2fs_recover_fsync_data(sbi, true); + if (err > 0) { + err = -EROFS; + f2fs_err(sbi, "Need to recover fsync data, but " + "write access unavailable, please try " + "mount w/ disable_roll_forward or norecovery"); + } + if (err < 0) + goto free_meta; + } + f2fs_info(sbi, "write access unavailable, skipping recovery"); goto reset_checkpoint; } @@@ -3989,7 -4036,8 +4036,8 @@@ reset_checkpoint * If filesystem is not mounted as read-only then * do start the gc_thread. */ - if (F2FS_OPTION(sbi).bggc_mode != BGGC_MODE_OFF && !f2fs_readonly(sb)) { + if ((F2FS_OPTION(sbi).bggc_mode != BGGC_MODE_OFF || + test_opt(sbi, GC_MERGE)) && !f2fs_readonly(sb)) { /* After POR, we can run background GC thread.*/ err = f2fs_start_gc_thread(sbi); if (err) diff --combined fs/f2fs/verity.c index a7beff2,15ba369..03549b5 --- a/fs/f2fs/verity.c +++ b/fs/f2fs/verity.c @@@ -152,40 -152,73 +152,73 @@@ static int f2fs_end_enable_verity(struc size_t desc_size, u64 merkle_tree_size) { struct inode *inode = file_inode(filp); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); u64 desc_pos = f2fs_verity_metadata_pos(inode) + merkle_tree_size; struct fsverity_descriptor_location dloc = { .version = cpu_to_le32(F2FS_VERIFY_VER), .size = cpu_to_le32(desc_size), .pos = cpu_to_le64(desc_pos), }; - int err = 0; + int err = 0, err2 = 0; - if (desc != NULL) { - /* Succeeded; write the verity descriptor. */ - err = pagecache_write(inode, desc, desc_size, desc_pos); + /* + * If an error already occurred (which fs/verity/ signals by passing + * desc == NULL), then only clean-up is needed. + */ + if (desc == NULL) + goto cleanup; - /* Write all pages before clearing FI_VERITY_IN_PROGRESS. */ - if (!err) - err = filemap_write_and_wait(inode->i_mapping); - } + /* Append the verity descriptor. */ + err = pagecache_write(inode, desc, desc_size, desc_pos); + if (err) + goto cleanup; + + /* + * Write all pages (both data and verity metadata). Note that this must + * happen before clearing FI_VERITY_IN_PROGRESS; otherwise pages beyond + * i_size won't be written properly. For crash consistency, this also + * must happen before the verity inode flag gets persisted. + */ + err = filemap_write_and_wait(inode->i_mapping); + if (err) + goto cleanup; + + /* Set the verity xattr. */ + err = f2fs_setxattr(inode, F2FS_XATTR_INDEX_VERITY, + F2FS_XATTR_NAME_VERITY, &dloc, sizeof(dloc), + NULL, XATTR_CREATE); + if (err) + goto cleanup; - /* If we failed, truncate anything we wrote past i_size. */ - if (desc == NULL || err) - f2fs_truncate(inode); + /* Finally, set the verity inode flag. */ + file_set_verity(inode); + f2fs_set_inode_flags(inode); + f2fs_mark_inode_dirty_sync(inode, true); clear_inode_flag(inode, FI_VERITY_IN_PROGRESS); + return 0; - if (desc != NULL && !err) { - err = f2fs_setxattr(inode, F2FS_XATTR_INDEX_VERITY, - F2FS_XATTR_NAME_VERITY, &dloc, sizeof(dloc), - NULL, XATTR_CREATE); - if (!err) { - file_set_verity(inode); - f2fs_set_inode_flags(inode); - f2fs_mark_inode_dirty_sync(inode, true); - } + cleanup: + /* + * Verity failed to be enabled, so clean up by truncating any verity + * metadata that was written beyond i_size (both from cache and from + * disk) and clearing FI_VERITY_IN_PROGRESS. + * + * Taking i_gc_rwsem[WRITE] is needed to stop f2fs garbage collection + * from re-instantiating cached pages we are truncating (since unlike + * normal file accesses, garbage collection isn't limited by i_size). + */ + down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + truncate_inode_pages(inode->i_mapping, inode->i_size); + err2 = f2fs_truncate(inode); + if (err2) { + f2fs_err(sbi, "Truncating verity metadata failed (errno=%d)", + err2); + set_sbi_flag(sbi, SBI_NEED_FSCK); } - return err; + up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + clear_inode_flag(inode, FI_VERITY_IN_PROGRESS); + return err ?: err2; } static int f2fs_get_verity_descriptor(struct inode *inode, void *buf, @@@ -228,7 -261,7 +261,7 @@@ static struct page *f2fs_read_merkle_tr pgoff_t index, unsigned long num_ra_pages) { - DEFINE_READAHEAD(ractl, NULL, inode->i_mapping, index); + DEFINE_READAHEAD(ractl, NULL, NULL, inode->i_mapping, index); struct page *page; index += f2fs_verity_metadata_pos(inode) >> PAGE_SHIFT;