Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso...

author Linus Torvalds <torvalds@linux-foundation.org>

Tue, 28 Feb 2023 17:05:47 +0000 (09:05 -0800)

committer Linus Torvalds <torvalds@linux-foundation.org>

Tue, 28 Feb 2023 17:05:47 +0000 (09:05 -0800)
author Linus Torvalds <torvalds@linux-foundation.org>
Tue, 28 Feb 2023 17:05:47 +0000 (09:05 -0800)
committer Linus Torvalds <torvalds@linux-foundation.org>
Tue, 28 Feb 2023 17:05:47 +0000 (09:05 -0800)
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h

index 43e26e6..4eeb02d 100644 (file)
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1529,6 +1529,7 @@ struct ext4_sb_info {
         unsigned int s_mount_opt2;
         unsigned long s_mount_flags;
         unsigned int s_def_mount_opt;
+       unsigned int s_def_mount_opt2;
         ext4_fsblk_t s_sb_block;
         atomic64_t s_resv_clusters;
         kuid_t s_resuid;
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c

index 9de1c9d..3559ea6 100644 (file)
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3251,7 +3251,7 @@ static int ext4_split_extent_at(handle_t *handle,
                 ext4_ext_mark_unwritten(ex2);
  
         err = ext4_ext_insert_extent(handle, inode, ppath, &newex, flags);
-       if (err != -ENOSPC && err != -EDQUOT)
+       if (err != -ENOSPC && err != -EDQUOT && err != -ENOMEM)
                 goto out;
  
         if (EXT4_EXT_MAY_ZEROOUT & split_flag) {
diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c

index 4594b62..b06de72 100644 (file)
--- a/fs/ext4/fast_commit.c
+++ b/fs/ext4/fast_commit.c
@@ -1332,8 +1332,14 @@ struct dentry_info_args {
         char *dname;
  };
  
+/* Same as struct ext4_fc_tl, but uses native endianness fields */
+struct ext4_fc_tl_mem {
+       u16 fc_tag;
+       u16 fc_len;
+};
+
  static inline void tl_to_darg(struct dentry_info_args *darg,
-                             struct ext4_fc_tl *tl, u8 *val)
+                             struct ext4_fc_tl_mem *tl, u8 *val)
  {
         struct ext4_fc_dentry_info fcd;
  
@@ -1345,16 +1351,18 @@ static inline void tl_to_darg(struct dentry_info_args *darg,
         darg->dname_len = tl->fc_len - sizeof(struct ext4_fc_dentry_info);
  }
  
-static inline void ext4_fc_get_tl(struct ext4_fc_tl *tl, u8 *val)
+static inline void ext4_fc_get_tl(struct ext4_fc_tl_mem *tl, u8 *val)
  {
-       memcpy(tl, val, EXT4_FC_TAG_BASE_LEN);
-       tl->fc_len = le16_to_cpu(tl->fc_len);
-       tl->fc_tag = le16_to_cpu(tl->fc_tag);
+       struct ext4_fc_tl tl_disk;
+
+       memcpy(&tl_disk, val, EXT4_FC_TAG_BASE_LEN);
+       tl->fc_len = le16_to_cpu(tl_disk.fc_len);
+       tl->fc_tag = le16_to_cpu(tl_disk.fc_tag);
  }
  
  /* Unlink replay function */
-static int ext4_fc_replay_unlink(struct super_block *sb, struct ext4_fc_tl *tl,
-                                u8 *val)
+static int ext4_fc_replay_unlink(struct super_block *sb,
+                                struct ext4_fc_tl_mem *tl, u8 *val)
  {
         struct inode *inode, *old_parent;
         struct qstr entry;
@@ -1451,8 +1459,8 @@ out:
  }
  
  /* Link replay function */
-static int ext4_fc_replay_link(struct super_block *sb, struct ext4_fc_tl *tl,
-                              u8 *val)
+static int ext4_fc_replay_link(struct super_block *sb,
+                              struct ext4_fc_tl_mem *tl, u8 *val)
  {
         struct inode *inode;
         struct dentry_info_args darg;
@@ -1506,8 +1514,8 @@ static int ext4_fc_record_modified_inode(struct super_block *sb, int ino)
  /*
   * Inode replay function
   */
-static int ext4_fc_replay_inode(struct super_block *sb, struct ext4_fc_tl *tl,
-                               u8 *val)
+static int ext4_fc_replay_inode(struct super_block *sb,
+                               struct ext4_fc_tl_mem *tl, u8 *val)
  {
         struct ext4_fc_inode fc_inode;
         struct ext4_inode *raw_inode;
@@ -1609,8 +1617,8 @@ out:
   * inode for which we are trying to create a dentry here, should already have
   * been replayed before we start here.
   */
-static int ext4_fc_replay_create(struct super_block *sb, struct ext4_fc_tl *tl,
-                                u8 *val)
+static int ext4_fc_replay_create(struct super_block *sb,
+                                struct ext4_fc_tl_mem *tl, u8 *val)
  {
         int ret = 0;
         struct inode *inode = NULL;
@@ -1708,7 +1716,7 @@ int ext4_fc_record_regions(struct super_block *sb, int ino,
  
  /* Replay add range tag */
  static int ext4_fc_replay_add_range(struct super_block *sb,
-                                   struct ext4_fc_tl *tl, u8 *val)
+                                   struct ext4_fc_tl_mem *tl, u8 *val)
  {
         struct ext4_fc_add_range fc_add_ex;
         struct ext4_extent newex, *ex;
@@ -1828,8 +1836,8 @@ out:
  
  /* Replay DEL_RANGE tag */
  static int
-ext4_fc_replay_del_range(struct super_block *sb, struct ext4_fc_tl *tl,
-                        u8 *val)
+ext4_fc_replay_del_range(struct super_block *sb,
+                        struct ext4_fc_tl_mem *tl, u8 *val)
  {
         struct inode *inode;
         struct ext4_fc_del_range lrange;
@@ -2025,7 +2033,7 @@ static int ext4_fc_replay_scan(journal_t *journal,
         struct ext4_fc_replay_state *state;
         int ret = JBD2_FC_REPLAY_CONTINUE;
         struct ext4_fc_add_range ext;
-       struct ext4_fc_tl tl;
+       struct ext4_fc_tl_mem tl;
         struct ext4_fc_tail tail;
         __u8 *start, *end, *cur, *val;
         struct ext4_fc_head head;
@@ -2144,7 +2152,7 @@ static int ext4_fc_replay(journal_t *journal, struct buffer_head *bh,
  {
         struct super_block *sb = journal->j_private;
         struct ext4_sb_info *sbi = EXT4_SB(sb);
-       struct ext4_fc_tl tl;
+       struct ext4_fc_tl_mem tl;
         __u8 *start, *end, *cur, *val;
         int ret = JBD2_FC_REPLAY_CONTINUE;
         struct ext4_fc_replay_state *state = &sbi->s_fc_replay_state;
diff --git a/fs/ext4/file.c b/fs/ext4/file.c

index 6bdf61a..0b8b449 100644 (file)
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -202,8 +202,9 @@ ext4_extending_io(struct inode *inode, loff_t offset, size_t len)
         return false;
  }
  
-/* Is IO overwriting allocated and initialized blocks? */
-static bool ext4_overwrite_io(struct inode *inode, loff_t pos, loff_t len)
+/* Is IO overwriting allocated or initialized blocks? */
+static bool ext4_overwrite_io(struct inode *inode,
+                             loff_t pos, loff_t len, bool *unwritten)
  {
         struct ext4_map_blocks map;
         unsigned int blkbits = inode->i_blkbits;
@@ -217,12 +218,15 @@ static bool ext4_overwrite_io(struct inode *inode, loff_t pos, loff_t len)
         blklen = map.m_len;
  
         err = ext4_map_blocks(NULL, inode, &map, 0);
+       if (err != blklen)
+               return false;
         /*
          * 'err==len' means that all of the blocks have been preallocated,
-        * regardless of whether they have been initialized or not. To exclude
-        * unwritten extents, we need to check m_flags.
+        * regardless of whether they have been initialized or not. We need to
+        * check m_flags to distinguish the unwritten extents.
          */
-       return err == blklen && (map.m_flags & EXT4_MAP_MAPPED);
+       *unwritten = !(map.m_flags & EXT4_MAP_MAPPED);
+       return true;
  }
  
  static ssize_t ext4_generic_write_checks(struct kiocb *iocb,
@@ -431,11 +435,16 @@ static const struct iomap_dio_ops ext4_dio_write_ops = {
   * - For extending writes case we don't take the shared lock, since it requires
   *   updating inode i_disksize and/or orphan handling with exclusive lock.
   *
- * - shared locking will only be true mostly with overwrites. Otherwise we will
- *   switch to exclusive i_rwsem lock.
+ * - shared locking will only be true mostly with overwrites, including
+ *   initialized blocks and unwritten blocks. For overwrite unwritten blocks
+ *   we protect splitting extents by i_data_sem in ext4_inode_info, so we can
+ *   also release exclusive i_rwsem lock.
+ *
+ * - Otherwise we will switch to exclusive i_rwsem lock.
   */
  static ssize_t ext4_dio_write_checks(struct kiocb *iocb, struct iov_iter *from,
-                                    bool *ilock_shared, bool *extend)
+                                    bool *ilock_shared, bool *extend,
+                                    bool *unwritten)
  {
         struct file *file = iocb->ki_filp;
         struct inode *inode = file_inode(file);
@@ -459,7 +468,7 @@ restart:
          * in file_modified().
          */
         if (*ilock_shared && (!IS_NOSEC(inode) || *extend ||
-            !ext4_overwrite_io(inode, offset, count))) {
+            !ext4_overwrite_io(inode, offset, count, unwritten))) {
                 if (iocb->ki_flags & IOCB_NOWAIT) {
                         ret = -EAGAIN;
                         goto out;
@@ -491,7 +500,7 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
         loff_t offset = iocb->ki_pos;
         size_t count = iov_iter_count(from);
         const struct iomap_ops *iomap_ops = &ext4_iomap_ops;
-       bool extend = false, unaligned_io = false;
+       bool extend = false, unaligned_io = false, unwritten = false;
         bool ilock_shared = true;
  
         /*
@@ -534,7 +543,8 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
                 return ext4_buffered_write_iter(iocb, from);
         }
  
-       ret = ext4_dio_write_checks(iocb, from, &ilock_shared, &extend);
+       ret = ext4_dio_write_checks(iocb, from,
+                                   &ilock_shared, &extend, &unwritten);
         if (ret <= 0)
                 return ret;
  
@@ -582,7 +592,7 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
                 ext4_journal_stop(handle);
         }
  
-       if (ilock_shared)
+       if (ilock_shared && !unwritten)
                 iomap_ops = &ext4_iomap_overwrite_ops;
         ret = iomap_dio_rw(iocb, from, iomap_ops, &ext4_dio_write_ops,
                            (unaligned_io || extend) ? IOMAP_DIO_FORCE_WAIT : 0,
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c

index 40579ef..d251d70 100644 (file)
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4872,13 +4872,6 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
                 goto bad_inode;
         raw_inode = ext4_raw_inode(&iloc);
  
-       if ((ino == EXT4_ROOT_INO) && (raw_inode->i_links_count == 0)) {
-               ext4_error_inode(inode, function, line, 0,
-                                "iget: root inode unallocated");
-               ret = -EFSCORRUPTED;
-               goto bad_inode;
-       }
-
         if ((flags & EXT4_IGET_HANDLE) &&
             (raw_inode->i_links_count == 0) && (raw_inode->i_mode == 0)) {
                 ret = -ESTALE;
@@ -4951,11 +4944,16 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
          * NeilBrown 1999oct15
          */
         if (inode->i_nlink == 0) {
-               if ((inode->i_mode == 0 ||
+               if ((inode->i_mode == 0 || flags & EXT4_IGET_SPECIAL ||
                      !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) &&
                     ino != EXT4_BOOT_LOADER_INO) {
-                       /* this inode is deleted */
-                       ret = -ESTALE;
+                       /* this inode is deleted or unallocated */
+                       if (flags & EXT4_IGET_SPECIAL) {
+                               ext4_error_inode(inode, function, line, 0,
+                                                "iget: special inode unallocated");
+                               ret = -EFSCORRUPTED;
+                       } else
+                               ret = -ESTALE;
                         goto bad_inode;
                 }
                 /* The only unlinked inodes we let through here have
@@ -5788,7 +5786,7 @@ static int ext4_meta_trans_blocks(struct inode *inode, int lblocks,
         ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb);
         int gdpblocks;
         int idxblocks;
-       int ret = 0;
+       int ret;
  
         /*
          * How many index blocks need to touch to map @lblocks logical blocks
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c

index b0dc721..12435d6 100644 (file)
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -155,9 +155,6 @@ static int ext4_update_backup_sb(struct super_block *sb,
         set_buffer_uptodate(bh);
         unlock_buffer(bh);
  
-       if (err)
-               goto out_bh;
-
         if (handle) {
                 err = ext4_handle_dirty_metadata(handle, NULL, bh);
                 if (err)
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c

index d10a508..94608b7 100644 (file)
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -3872,9 +3872,16 @@ static int ext4_rename(struct mnt_idmap *idmap, struct inode *old_dir,
                         if (new.dir != old.dir && EXT4_DIR_LINK_MAX(new.dir))
                                 goto end_rename;
                 }
+               /*
+                * We need to protect against old.inode directory getting
+                * converted from inline directory format into a normal one.
+                */
+               inode_lock_nested(old.inode, I_MUTEX_NONDIR2);
                 retval = ext4_rename_dir_prepare(handle, &old);
-               if (retval)
+               if (retval) {
+                       inode_unlock(old.inode);
                         goto end_rename;
+               }
         }
         /*
          * If we're renaming a file within an inline_data dir and adding or
@@ -4006,6 +4013,8 @@ end_rename:
         } else {
                 ext4_journal_stop(handle);
         }
+       if (old.dir_bh)
+               inode_unlock(old.inode);
  release_bh:
         brelse(old.dir_bh);
         brelse(old.bh);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c

index faae054..88f7b8a 100644 (file)
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2146,7 +2146,7 @@ static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param)
                 return 0;
         case Opt_commit:
                 if (result.uint_32 == 0)
-                       ctx->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE;
+                       result.uint_32 = JBD2_DEFAULT_MAX_COMMIT_AGE;
                 else if (result.uint_32 > INT_MAX / HZ) {
                         ext4_msg(NULL, KERN_ERR,
                                  "Invalid commit interval %d, "
@@ -2883,7 +2883,7 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
  {
         struct ext4_sb_info *sbi = EXT4_SB(sb);
         struct ext4_super_block *es = sbi->s_es;
-       int def_errors, def_mount_opt = sbi->s_def_mount_opt;
+       int def_errors;
         const struct mount_opts *m;
         char sep = nodefs ? '\n' : ',';
  
@@ -2895,15 +2895,28 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
  
         for (m = ext4_mount_opts; m->token != Opt_err; m++) {
                 int want_set = m->flags & MOPT_SET;
+               int opt_2 = m->flags & MOPT_2;
+               unsigned int mount_opt, def_mount_opt;
+
                 if (((m->flags & (MOPT_SET|MOPT_CLEAR)) == 0) ||
                     m->flags & MOPT_SKIP)
                         continue;
-               if (!nodefs && !(m->mount_opt & (sbi->s_mount_opt ^ def_mount_opt)))
-                       continue; /* skip if same as the default */
+
+               if (opt_2) {
+                       mount_opt = sbi->s_mount_opt2;
+                       def_mount_opt = sbi->s_def_mount_opt2;
+               } else {
+                       mount_opt = sbi->s_mount_opt;
+                       def_mount_opt = sbi->s_def_mount_opt;
+               }
+               /* skip if same as the default */
+               if (!nodefs && !(m->mount_opt & (mount_opt ^ def_mount_opt)))
+                       continue;
+               /* select Opt_noFoo vs Opt_Foo */
                 if ((want_set &&
-                    (sbi->s_mount_opt & m->mount_opt) != m->mount_opt) ||
-                   (!want_set && (sbi->s_mount_opt & m->mount_opt)))
-                       continue; /* select Opt_noFoo vs Opt_Foo */
+                    (mount_opt & m->mount_opt) != m->mount_opt) ||
+                   (!want_set && (mount_opt & m->mount_opt)))
+                       continue;
                 SEQ_OPTS_PRINT("%s", token2str(m->token));
         }
  
@@ -2931,7 +2944,7 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
         if (nodefs || sbi->s_stripe)
                 SEQ_OPTS_PRINT("stripe=%lu", sbi->s_stripe);
         if (nodefs || EXT4_MOUNT_DATA_FLAGS &
-                       (sbi->s_mount_opt ^ def_mount_opt)) {
+                       (sbi->s_mount_opt ^ sbi->s_def_mount_opt)) {
                 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
                         SEQ_OPTS_PUTS("data=journal");
                 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
@@ -4727,7 +4740,6 @@ static int ext4_group_desc_init(struct super_block *sb,
         struct ext4_sb_info *sbi = EXT4_SB(sb);
         unsigned int db_count;
         ext4_fsblk_t block;
-       int ret;
         int i;
  
         db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
@@ -4767,8 +4779,7 @@ static int ext4_group_desc_init(struct super_block *sb,
                         ext4_msg(sb, KERN_ERR,
                                "can't read group descriptor %d", i);
                         sbi->s_gdb_count = i;
-                       ret = PTR_ERR(bh);
-                       goto out;
+                       return PTR_ERR(bh);
                 }
                 rcu_read_lock();
                 rcu_dereference(sbi->s_group_desc)[i] = bh;
@@ -4777,13 +4788,10 @@ static int ext4_group_desc_init(struct super_block *sb,
         sbi->s_gdb_count = db_count;
         if (!ext4_check_descriptors(sb, logical_sb_block, first_not_zeroed)) {
                 ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
-               ret = -EFSCORRUPTED;
-               goto out;
+               return -EFSCORRUPTED;
         }
+
         return 0;
-out:
-       ext4_group_desc_free(sbi);
-       return ret;
  }
  
  static int ext4_load_and_init_journal(struct super_block *sb,
@@ -5075,6 +5083,7 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
                 goto failed_mount;
  
         sbi->s_def_mount_opt = sbi->s_mount_opt;
+       sbi->s_def_mount_opt2 = sbi->s_mount_opt2;
  
         err = ext4_check_opt_consistency(fc, sb);
         if (err < 0)
@@ -5209,14 +5218,14 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
         if (ext4_geometry_check(sb, es))
                 goto failed_mount;
  
-       err = ext4_group_desc_init(sb, es, logical_sb_block, &first_not_zeroed);
-       if (err)
-               goto failed_mount;
-
         timer_setup(&sbi->s_err_report, print_daily_error_info, 0);
         spin_lock_init(&sbi->s_error_lock);
         INIT_WORK(&sbi->s_error_work, flush_stashed_error_work);
  
+       err = ext4_group_desc_init(sb, es, logical_sb_block, &first_not_zeroed);
+       if (err)
+               goto failed_mount3;
+
         /* Register extent status tree shrinker */
         if (ext4_es_register_shrinker(sbi))
                 goto failed_mount3;
@@ -5937,8 +5946,11 @@ static int ext4_load_journal(struct super_block *sb,
         if (!really_read_only && journal_devnum &&
             journal_devnum != le32_to_cpu(es->s_journal_dev)) {
                 es->s_journal_dev = cpu_to_le32(journal_devnum);
-
-               /* Make sure we flush the recovery flag to disk. */
+               ext4_commit_super(sb);
+       }
+       if (!really_read_only && journal_inum &&
+           journal_inum != le32_to_cpu(es->s_journal_inum)) {
+               es->s_journal_inum = cpu_to_le32(journal_inum);
                 ext4_commit_super(sb);
         }
  
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c

index a2f04a3..62f2ec5 100644 (file)
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -184,27 +184,73 @@ ext4_xattr_handler(int name_index)
  }
  
  static int
-ext4_xattr_check_entries(struct ext4_xattr_entry *entry, void *end,
-                        void *value_start)
+check_xattrs(struct inode *inode, struct buffer_head *bh,
+            struct ext4_xattr_entry *entry, void *end, void *value_start,
+            const char *function, unsigned int line)
  {
         struct ext4_xattr_entry *e = entry;
+       int err = -EFSCORRUPTED;
+       char *err_str;
+
+       if (bh) {
+               if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) ||
+                   BHDR(bh)->h_blocks != cpu_to_le32(1)) {
+                       err_str = "invalid header";
+                       goto errout;
+               }
+               if (buffer_verified(bh))
+                       return 0;
+               if (!ext4_xattr_block_csum_verify(inode, bh)) {
+                       err = -EFSBADCRC;
+                       err_str = "invalid checksum";
+                       goto errout;
+               }
+       } else {
+               struct ext4_xattr_ibody_header *header = value_start;
+
+               header -= 1;
+               if (end - (void *)header < sizeof(*header) + sizeof(u32)) {
+                       err_str = "in-inode xattr block too small";
+                       goto errout;
+               }
+               if (header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC)) {
+                       err_str = "bad magic number in in-inode xattr";
+                       goto errout;
+               }
+       }
  
         /* Find the end of the names list */
         while (!IS_LAST_ENTRY(e)) {
                 struct ext4_xattr_entry *next = EXT4_XATTR_NEXT(e);
-               if ((void *)next >= end)
-                       return -EFSCORRUPTED;
-               if (strnlen(e->e_name, e->e_name_len) != e->e_name_len)
-                       return -EFSCORRUPTED;
+               if ((void *)next >= end) {
+                       err_str = "e_name out of bounds";
+                       goto errout;
+               }
+               if (strnlen(e->e_name, e->e_name_len) != e->e_name_len) {
+                       err_str = "bad e_name length";
+                       goto errout;
+               }
                 e = next;
         }
  
         /* Check the values */
         while (!IS_LAST_ENTRY(entry)) {
                 u32 size = le32_to_cpu(entry->e_value_size);
+               unsigned long ea_ino = le32_to_cpu(entry->e_value_inum);
  
-               if (size > EXT4_XATTR_SIZE_MAX)
-                       return -EFSCORRUPTED;
+               if (!ext4_has_feature_ea_inode(inode->i_sb) && ea_ino) {
+                       err_str = "ea_inode specified without ea_inode feature enabled";
+                       goto errout;
+               }
+               if (ea_ino && ((ea_ino == EXT4_ROOT_INO) ||
+                              !ext4_valid_inum(inode->i_sb, ea_ino))) {
+                       err_str = "invalid ea_ino";
+                       goto errout;
+               }
+               if (size > EXT4_XATTR_SIZE_MAX) {
+                       err_str = "e_value size too large";
+                       goto errout;
+               }
  
                 if (size != 0 && entry->e_value_inum == 0) {
                         u16 offs = le16_to_cpu(entry->e_value_offs);
@@ -216,66 +262,54 @@ ext4_xattr_check_entries(struct ext4_xattr_entry *entry, void *end,
                          * the padded and unpadded sizes, since the size may
                          * overflow to 0 when adding padding.
                          */
-                       if (offs > end - value_start)
-                               return -EFSCORRUPTED;
+                       if (offs > end - value_start) {
+                               err_str = "e_value out of bounds";
+                               goto errout;
+                       }
                         value = value_start + offs;
                         if (value < (void *)e + sizeof(u32) ||
                             size > end - value ||
-                           EXT4_XATTR_SIZE(size) > end - value)
-                               return -EFSCORRUPTED;
+                           EXT4_XATTR_SIZE(size) > end - value) {
+                               err_str = "overlapping e_value ";
+                               goto errout;
+                       }
                 }
                 entry = EXT4_XATTR_NEXT(entry);
         }
-
+       if (bh)
+               set_buffer_verified(bh);
         return 0;
+
+errout:
+       if (bh)
+               __ext4_error_inode(inode, function, line, 0, -err,
+                                  "corrupted xattr block %llu: %s",
+                                  (unsigned long long) bh->b_blocknr,
+                                  err_str);
+       else
+               __ext4_error_inode(inode, function, line, 0, -err,
+                                  "corrupted in-inode xattr: %s", err_str);
+       return err;
  }
  
  static inline int
  __ext4_xattr_check_block(struct inode *inode, struct buffer_head *bh,
                          const char *function, unsigned int line)
  {
-       int error = -EFSCORRUPTED;
-
-       if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) ||
-           BHDR(bh)->h_blocks != cpu_to_le32(1))
-               goto errout;
-       if (buffer_verified(bh))
-               return 0;
-
-       error = -EFSBADCRC;
-       if (!ext4_xattr_block_csum_verify(inode, bh))
-               goto errout;
-       error = ext4_xattr_check_entries(BFIRST(bh), bh->b_data + bh->b_size,
-                                        bh->b_data);
-errout:
-       if (error)
-               __ext4_error_inode(inode, function, line, 0, -error,
-                                  "corrupted xattr block %llu",
-                                  (unsigned long long) bh->b_blocknr);
-       else
-               set_buffer_verified(bh);
-       return error;
+       return check_xattrs(inode, bh, BFIRST(bh), bh->b_data + bh->b_size,
+                           bh->b_data, function, line);
  }
  
  #define ext4_xattr_check_block(inode, bh) \
         __ext4_xattr_check_block((inode), (bh),  __func__, __LINE__)
  
  
-static int
+static inline int
  __xattr_check_inode(struct inode *inode, struct ext4_xattr_ibody_header *header,
                          void *end, const char *function, unsigned int line)
  {
-       int error = -EFSCORRUPTED;
-
-       if (end - (void *)header < sizeof(*header) + sizeof(u32) ||
-           (header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC)))
-               goto errout;
-       error = ext4_xattr_check_entries(IFIRST(header), end, IFIRST(header));
-errout:
-       if (error)
-               __ext4_error_inode(inode, function, line, 0, -error,
-                                  "corrupted in-inode xattr");
-       return error;
+       return check_xattrs(inode, NULL, IFIRST(header), end, IFIRST(header),
+                           function, line);
  }
  
  #define xattr_check_inode(inode, header, end) \
@@ -388,6 +422,17 @@ static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino,
         struct inode *inode;
         int err;
  
+       /*
+        * We have to check for this corruption early as otherwise
+        * iget_locked() could wait indefinitely for the state of our
+        * parent inode.
+        */
+       if (parent->i_ino == ea_ino) {
+               ext4_error(parent->i_sb,
+                          "Parent and EA inode have the same ino %lu", ea_ino);
+               return -EFSCORRUPTED;
+       }
+
         inode = ext4_iget(parent->i_sb, ea_ino, EXT4_IGET_NORMAL);
         if (IS_ERR(inode)) {
                 err = PTR_ERR(inode);
@@ -1438,6 +1483,13 @@ static struct inode *ext4_xattr_inode_create(handle_t *handle,
         uid_t owner[2] = { i_uid_read(inode), i_gid_read(inode) };
         int err;
  
+       if (inode->i_sb->s_root == NULL) {
+               ext4_warning(inode->i_sb,
+                            "refuse to create EA inode when umounting");
+               WARN_ON(1);
+               return ERR_PTR(-EINVAL);
+       }
+
         /*
          * Let the next inode be the goal, so we try and allocate the EA inode
          * in the same group, or nearby one.
@@ -2567,9 +2619,8 @@ static int ext4_xattr_move_to_block(handle_t *handle, struct inode *inode,
  
         is = kzalloc(sizeof(struct ext4_xattr_ibody_find), GFP_NOFS);
         bs = kzalloc(sizeof(struct ext4_xattr_block_find), GFP_NOFS);
-       buffer = kvmalloc(value_size, GFP_NOFS);
         b_entry_name = kmalloc(entry->e_name_len + 1, GFP_NOFS);
-       if (!is || !bs || !buffer || !b_entry_name) {
+       if (!is || !bs || !b_entry_name) {
                 error = -ENOMEM;
                 goto out;
         }
@@ -2581,12 +2632,18 @@ static int ext4_xattr_move_to_block(handle_t *handle, struct inode *inode,
  
         /* Save the entry name and the entry value */
         if (entry->e_value_inum) {
+               buffer = kvmalloc(value_size, GFP_NOFS);
+               if (!buffer) {
+                       error = -ENOMEM;
+                       goto out;
+               }
+
                 error = ext4_xattr_inode_get(inode, entry, buffer, value_size);
                 if (error)
                         goto out;
         } else {
                 size_t value_offs = le16_to_cpu(entry->e_value_offs);
-               memcpy(buffer, (void *)IFIRST(header) + value_offs, value_size);
+               buffer = (void *)IFIRST(header) + value_offs;
         }
  
         memcpy(b_entry_name, entry->e_name, entry->e_name_len);
@@ -2601,25 +2658,26 @@ static int ext4_xattr_move_to_block(handle_t *handle, struct inode *inode,
         if (error)
                 goto out;
  
-       /* Remove the chosen entry from the inode */
-       error = ext4_xattr_ibody_set(handle, inode, &i, is);
-       if (error)
-               goto out;
-
         i.value = buffer;
         i.value_len = value_size;
         error = ext4_xattr_block_find(inode, &i, bs);
         if (error)
                 goto out;
  
-       /* Add entry which was removed from the inode into the block */
+       /* Move ea entry from the inode into the block */
         error = ext4_xattr_block_set(handle, inode, &i, bs);
         if (error)
                 goto out;
-       error = 0;
+
+       /* Remove the chosen entry from the inode */
+       i.value = NULL;
+       i.value_len = 0;
+       error = ext4_xattr_ibody_set(handle, inode, &i, is);
+
  out:
         kfree(b_entry_name);
-       kvfree(buffer);
+       if (entry->e_value_inum && buffer)
+               kvfree(buffer);
         if (is)
                 brelse(is->iloc.bh);
         if (bs)
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c

index 6a404ac..15de138 100644 (file)
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1010,36 +1010,28 @@ repeat:
          * ie. locked but not dirty) or tune2fs (which may actually have
          * the buffer dirtied, ugh.)  */
  
-       if (buffer_dirty(bh)) {
+       if (buffer_dirty(bh) && jh->b_transaction) {
+               warn_dirty_buffer(bh);
                 /*
-                * First question: is this buffer already part of the current
-                * transaction or the existing committing transaction?
-                */
-               if (jh->b_transaction) {
-                       J_ASSERT_JH(jh,
-                               jh->b_transaction == transaction ||
-                               jh->b_transaction ==
-                                       journal->j_committing_transaction);
-                       if (jh->b_next_transaction)
-                               J_ASSERT_JH(jh, jh->b_next_transaction ==
-                                                       transaction);
-                       warn_dirty_buffer(bh);
-               }
-               /*
-                * In any case we need to clean the dirty flag and we must
-                * do it under the buffer lock to be sure we don't race
-                * with running write-out.
+                * We need to clean the dirty flag and we must do it under the
+                * buffer lock to be sure we don't race with running write-out.
                  */
                 JBUFFER_TRACE(jh, "Journalling dirty buffer");
                 clear_buffer_dirty(bh);
+               /*
+                * The buffer is going to be added to BJ_Reserved list now and
+                * nothing guarantees jbd2_journal_dirty_metadata() will be
+                * ever called for it. So we need to set jbddirty bit here to
+                * make sure the buffer is dirtied and written out when the
+                * journaling machinery is done with it.
+                */
                 set_buffer_jbddirty(bh);
         }
  
-       unlock_buffer(bh);
-
         error = -EROFS;
         if (is_handle_aborted(handle)) {
                 spin_unlock(&jh->b_state_lock);
+               unlock_buffer(bh);
                 goto out;
         }
         error = 0;
@@ -1049,8 +1041,10 @@ repeat:
          * b_next_transaction points to it
          */
         if (jh->b_transaction == transaction ||
-           jh->b_next_transaction == transaction)
+           jh->b_next_transaction == transaction) {
+               unlock_buffer(bh);
                 goto done;
+       }
  
         /*
          * this is the first time this transaction is touching this buffer,
@@ -1074,10 +1068,24 @@ repeat:
                  */
                 smp_wmb();
                 spin_lock(&journal->j_list_lock);
+               if (test_clear_buffer_dirty(bh)) {
+                       /*
+                        * Execute buffer dirty clearing and jh->b_transaction
+                        * assignment under journal->j_list_lock locked to
+                        * prevent bh being removed from checkpoint list if
+                        * the buffer is in an intermediate state (not dirty
+                        * and jh->b_transaction is NULL).
+                        */
+                       JBUFFER_TRACE(jh, "Journalling dirty buffer");
+                       set_buffer_jbddirty(bh);
+               }
                 __jbd2_journal_file_buffer(jh, transaction, BJ_Reserved);
                 spin_unlock(&journal->j_list_lock);
+               unlock_buffer(bh);
                 goto done;
         }
+       unlock_buffer(bh);
+
         /*
          * If there is already a copy-out version of this buffer, then we don't
          * need to make another one
author	Linus Torvalds <torvalds@linux-foundation.org>
	Tue, 28 Feb 2023 17:05:47 +0000 (09:05 -0800)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Tue, 28 Feb 2023 17:05:47 +0000 (09:05 -0800)
fs/ext4/ext4.h		patch \| blob \| history
fs/ext4/extents.c		patch \| blob \| history
fs/ext4/fast_commit.c		patch \| blob \| history
fs/ext4/file.c		patch \| blob \| history
fs/ext4/inode.c		patch \| blob \| history
fs/ext4/ioctl.c		patch \| blob \| history
fs/ext4/namei.c		patch \| blob \| history
fs/ext4/super.c		patch \| blob \| history
fs/ext4/xattr.c		patch \| blob \| history
fs/jbd2/transaction.c		patch \| blob \| history