For large_nat_bitmap feature, there is a design flaw:
Previous:
struct f2fs_checkpoint layout:
+--------------------------+ 0x0000
| checkpoint_ver |
| ...... |
| checksum_offset |------+
| ...... | |
| sit_nat_version_bitmap[] |<-----|-------+
| ...... | | |
| checksum_value |<-----+ |
+--------------------------+ 0x1000 |
| | nat_bitmap + sit_bitmap
| payload blocks | |
| | |
+--------------------------|<-------------+
Obviously, if nat_bitmap size + sit_bitmap size is larger than
MAX_BITMAP_SIZE_IN_CKPT, nat_bitmap or sit_bitmap may overlap
checkpoint checksum's position, once checkpoint() is triggered
from kernel, nat or sit bitmap will be damaged by checksum field.
In order to fix this, let's relocate checksum_value's position
to the head of sit_nat_version_bitmap as below, then nat/sit
bitmap and chksum value update will become safe.
After:
struct f2fs_checkpoint layout:
+--------------------------+ 0x0000
| checkpoint_ver |
| ...... |
| checksum_offset |------+
| ...... | |
| sit_nat_version_bitmap[] |<-----+
| ...... |<-------------+
| | |
+--------------------------+ 0x1000 |
| | nat_bitmap + sit_bitmap
| payload blocks | |
| | |
+--------------------------|<-------------+
Related report and discussion:
https://sourceforge.net/p/linux-f2fs/mailman/message/
36642346/
In addition, during writing checkpoint, if large_nat_bitmap feature is
enabled, we need to set CP_LARGE_NAT_BITMAP_FLAG flag in checkpoint.
Reported-by: Park Ju Hyung <qkrwngud825@gmail.com>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
int offset;
if (is_set_ckpt_flags(ckpt, CP_LARGE_NAT_BITMAP_FLAG)) {
+ unsigned int chksum_size = 0;
+
offset = (flag == SIT_BITMAP) ?
le32_to_cpu(ckpt->nat_ver_bitmap_bytesize) : 0;
- return &ckpt->sit_nat_version_bitmap + offset;
+
+ if (le32_to_cpu(ckpt->checksum_offset) ==
+ CP_MIN_CHKSUM_OFFSET)
+ chksum_size = sizeof(__le32);
+
+ return &ckpt->sit_nat_version_bitmap + offset + chksum_size;
}
if (le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload) > 0) {
return 0;
}
+void fsck_chk_checkpoint(struct f2fs_sb_info *sbi)
+{
+ struct f2fs_checkpoint *cp = F2FS_CKPT(sbi);
+
+ if (get_cp(ckpt_flags) & CP_LARGE_NAT_BITMAP_FLAG) {
+ if (get_cp(checksum_offset) != CP_MIN_CHKSUM_OFFSET) {
+ ASSERT_MSG("Deprecated layout of large_nat_bitmap, "
+ "chksum_offset:%u", get_cp(checksum_offset));
+ c.fix_chksum = 1;
+ }
+ }
+}
+
void fsck_init(struct f2fs_sb_info *sbi)
{
struct f2fs_fsck *fsck = F2FS_FSCK(sbi);
free(sit_blk);
}
+static void fix_checksum(struct f2fs_sb_info *sbi)
+{
+ struct f2fs_checkpoint *cp = F2FS_CKPT(sbi);
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
+ struct sit_info *sit_i = SIT_I(sbi);
+ void *bitmap_offset;
+
+ if (!c.fix_chksum)
+ return;
+
+ bitmap_offset = cp->sit_nat_version_bitmap + sizeof(__le32);
+
+ memcpy(bitmap_offset, nm_i->nat_bitmap, nm_i->bitmap_size);
+ memcpy(bitmap_offset + nm_i->bitmap_size,
+ sit_i->sit_bitmap, sit_i->bitmap_size);
+}
+
static void fix_checkpoint(struct f2fs_sb_info *sbi)
{
struct f2fs_fsck *fsck = F2FS_FSCK(sbi);
flags |= CP_TRIMMED_FLAG;
if (is_set_ckpt_flags(cp, CP_DISABLED_FLAG))
flags |= CP_DISABLED_FLAG;
+ if (is_set_ckpt_flags(cp, CP_LARGE_NAT_BITMAP_FLAG)) {
+ flags |= CP_LARGE_NAT_BITMAP_FLAG;
+ set_cp(checksum_offset, CP_MIN_CHKSUM_OFFSET);
+ } else {
+ set_cp(checksum_offset, CP_CHKSUM_OFFSET);
+ }
if (flags & CP_UMOUNT_FLAG)
cp_blocks = 8;
write_curseg_info(sbi);
flush_curseg_sit_entries(sbi);
}
+ fix_checksum(sbi);
fix_checkpoint(sbi);
} else if (is_set_ckpt_flags(cp, CP_FSCK_FLAG) ||
is_set_ckpt_flags(cp, CP_QUOTA_NEED_FSCK_FLAG)) {
int, int);
int fsck_chk_inline_dentries(struct f2fs_sb_info *, struct f2fs_node *,
struct child_info *);
+void fsck_chk_checkpoint(struct f2fs_sb_info *sbi);
int fsck_chk_meta(struct f2fs_sb_info *sbi);
int fsck_chk_curseg_info(struct f2fs_sb_info *);
void pretty_print_filename(const u8 *raw_name, u32 len,
c.fix_on = 1;
}
+ fsck_chk_checkpoint(sbi);
+
fsck_chk_quota_node(sbi);
/* Traverse all block recursively from root inode */
unsigned int chksum_offset = get_cp(checksum_offset);
unsigned int crc, cal_crc;
- if (chksum_offset > CP_CHKSUM_OFFSET) {
+ if (chksum_offset < CP_MIN_CHKSUM_OFFSET ||
+ chksum_offset > CP_CHKSUM_OFFSET) {
MSG(0, "\tInvalid CP CRC offset: %u\n", chksum_offset);
return -1;
}
flags |= CP_TRIMMED_FLAG;
if (is_set_ckpt_flags(cp, CP_DISABLED_FLAG))
flags |= CP_DISABLED_FLAG;
+ if (is_set_ckpt_flags(cp, CP_LARGE_NAT_BITMAP_FLAG)) {
+ flags |= CP_LARGE_NAT_BITMAP_FLAG;
+ set_cp(checksum_offset, CP_MIN_CHKSUM_OFFSET);
+ } else {
+ set_cp(checksum_offset, CP_CHKSUM_OFFSET);
+ }
set_cp(free_segment_count, get_free_segments(sbi));
set_cp(valid_block_count, sbi->total_valid_block_count);
flags = update_nat_bits_flags(new_sb, cp, get_cp(ckpt_flags));
if (flags & CP_COMPACT_SUM_FLAG)
flags &= ~CP_COMPACT_SUM_FLAG;
+ if (flags & CP_LARGE_NAT_BITMAP_FLAG)
+ set_cp(checksum_offset, CP_MIN_CHKSUM_OFFSET);
+ else
+ set_cp(checksum_offset, CP_CHKSUM_OFFSET);
+
set_cp(ckpt_flags, flags);
memcpy(new_cp, cp, (unsigned char *)cp->sit_nat_version_bitmap -
int ro;
int preserve_limits; /* preserve quota limits */
int large_nat_bitmap;
+ int fix_chksum; /* fix old cp.chksum position */
__le32 feature; /* defined features */
/* mkfs parameters */
unsigned char sit_nat_version_bitmap[1];
} __attribute__((packed));
+#define CP_BITMAP_OFFSET \
+ (offsetof(struct f2fs_checkpoint, sit_nat_version_bitmap))
+#define CP_MIN_CHKSUM_OFFSET CP_BITMAP_OFFSET
+
+#define MIN_NAT_BITMAP_SIZE 64
#define MAX_SIT_BITMAP_SIZE_IN_CKPT \
- (CP_CHKSUM_OFFSET - sizeof(struct f2fs_checkpoint) + 1 - 64)
+ (CP_CHKSUM_OFFSET - CP_BITMAP_OFFSET - MIN_NAT_BITMAP_SIZE)
#define MAX_BITMAP_SIZE_IN_CKPT \
- (CP_CHKSUM_OFFSET - sizeof(struct f2fs_checkpoint) + 1)
+ (CP_CHKSUM_OFFSET - CP_BITMAP_OFFSET)
/*
* For orphan inode management
set_cp(nat_ver_bitmap_bytesize, ((get_sb(segment_count_nat) / 2) <<
get_sb(log_blocks_per_seg)) / 8);
- set_cp(checksum_offset, CP_CHKSUM_OFFSET);
+ if (c.large_nat_bitmap)
+ set_cp(checksum_offset, CP_MIN_CHKSUM_OFFSET);
+ else
+ set_cp(checksum_offset, CP_CHKSUM_OFFSET);
crc = f2fs_checkpoint_chksum(cp);
*((__le32 *)((unsigned char *)cp + get_cp(checksum_offset))) =