ext4: fix readdir error in the case of inline_data+dir_index
authorTao Ma <boyu.mt@taobao.com>
Fri, 19 Apr 2013 21:53:09 +0000 (17:53 -0400)
committerTheodore Ts'o <tytso@mit.edu>
Fri, 19 Apr 2013 21:53:09 +0000 (17:53 -0400)
Zach reported a problem that if inline data is enabled, we don't
tell the difference between the offset of '.' and '..'. And a
getdents will fail if the user only want to get '.' and what's worse,
if there is a conversion happens when the user calls getdents
many times, he/she may get the same entry twice.

In theory, a dir block would also fail if it is converted to a
hashed-index based dir since f_pos will become a hash value, not the
real one, but it doesn't happen.  And a deep investigation shows that
we uses a hash based solution even for a normal dir if the dir_index
feature is enabled.

So this patch just adds a new htree_inlinedir_to_tree for inline dir,
and if we find that the hash index is supported, we will do like what
we do for a dir block.

Reported-by: Zach Brown <zab@redhat.com>
Signed-off-by: Tao Ma <boyu.mt@taobao.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
fs/ext4/dir.c
fs/ext4/ext4.h
fs/ext4/inline.c
fs/ext4/namei.c

index d8cd1f0..f8d56e4 100644 (file)
@@ -46,7 +46,8 @@ static int is_dx_dir(struct inode *inode)
        if (EXT4_HAS_COMPAT_FEATURE(inode->i_sb,
                     EXT4_FEATURE_COMPAT_DIR_INDEX) &&
            ((ext4_test_inode_flag(inode, EXT4_INODE_INDEX)) ||
-            ((inode->i_size >> sb->s_blocksize_bits) == 1)))
+            ((inode->i_size >> sb->s_blocksize_bits) == 1) ||
+            ext4_has_inline_data(inode)))
                return 1;
 
        return 0;
@@ -115,14 +116,6 @@ static int ext4_readdir(struct file *filp,
        int ret = 0;
        int dir_has_error = 0;
 
-       if (ext4_has_inline_data(inode)) {
-               int has_inline_data = 1;
-               ret = ext4_read_inline_dir(filp, dirent, filldir,
-                                          &has_inline_data);
-               if (has_inline_data)
-                       return ret;
-       }
-
        if (is_dx_dir(inode)) {
                err = ext4_dx_readdir(filp, dirent, filldir);
                if (err != ERR_BAD_DX_DIR) {
@@ -136,6 +129,15 @@ static int ext4_readdir(struct file *filp,
                ext4_clear_inode_flag(file_inode(filp),
                                      EXT4_INODE_INDEX);
        }
+
+       if (ext4_has_inline_data(inode)) {
+               int has_inline_data = 1;
+               ret = ext4_read_inline_dir(filp, dirent, filldir,
+                                          &has_inline_data);
+               if (has_inline_data)
+                       return ret;
+       }
+
        stored = 0;
        offset = filp->f_pos & (sb->s_blocksize - 1);
 
index 779d26b..0aabb34 100644 (file)
@@ -2518,6 +2518,11 @@ extern int ext4_try_create_inline_dir(handle_t *handle,
 extern int ext4_read_inline_dir(struct file *filp,
                                void *dirent, filldir_t filldir,
                                int *has_inline_data);
+extern int htree_inlinedir_to_tree(struct file *dir_file,
+                                  struct inode *dir, ext4_lblk_t block,
+                                  struct dx_hash_info *hinfo,
+                                  __u32 start_hash, __u32 start_minor_hash,
+                                  int *has_inline_data);
 extern struct buffer_head *ext4_find_inline_entry(struct inode *dir,
                                        const struct qstr *d_name,
                                        struct ext4_dir_entry_2 **res_dir,
@@ -2554,6 +2559,24 @@ extern void initialize_dirent_tail(struct ext4_dir_entry_tail *t,
 extern int ext4_handle_dirty_dirent_node(handle_t *handle,
                                         struct inode *inode,
                                         struct buffer_head *bh);
+#define S_SHIFT 12
+static unsigned char ext4_type_by_mode[S_IFMT >> S_SHIFT] = {
+       [S_IFREG >> S_SHIFT]    = EXT4_FT_REG_FILE,
+       [S_IFDIR >> S_SHIFT]    = EXT4_FT_DIR,
+       [S_IFCHR >> S_SHIFT]    = EXT4_FT_CHRDEV,
+       [S_IFBLK >> S_SHIFT]    = EXT4_FT_BLKDEV,
+       [S_IFIFO >> S_SHIFT]    = EXT4_FT_FIFO,
+       [S_IFSOCK >> S_SHIFT]   = EXT4_FT_SOCK,
+       [S_IFLNK >> S_SHIFT]    = EXT4_FT_SYMLINK,
+};
+
+static inline void ext4_set_de_type(struct super_block *sb,
+                               struct ext4_dir_entry_2 *de,
+                               umode_t mode) {
+       if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FILETYPE))
+               de->file_type = ext4_type_by_mode[(mode & S_IFMT)>>S_SHIFT];
+}
+
 
 /* symlink.c */
 extern const struct inode_operations ext4_symlink_inode_operations;
index c0fd1a1..abf8b62 100644 (file)
@@ -19,7 +19,8 @@
 
 #define EXT4_XATTR_SYSTEM_DATA "data"
 #define EXT4_MIN_INLINE_DATA_SIZE      ((sizeof(__le32) * EXT4_N_BLOCKS))
-#define EXT4_INLINE_DOTDOT_SIZE        4
+#define EXT4_INLINE_DOTDOT_OFFSET      2
+#define EXT4_INLINE_DOTDOT_SIZE                4
 
 int ext4_get_inline_size(struct inode *inode)
 {
@@ -1289,6 +1290,112 @@ out:
        return ret;
 }
 
+/*
+ * This function fills a red-black tree with information from an
+ * inlined dir.  It returns the number directory entries loaded
+ * into the tree.  If there is an error it is returned in err.
+ */
+int htree_inlinedir_to_tree(struct file *dir_file,
+                           struct inode *dir, ext4_lblk_t block,
+                           struct dx_hash_info *hinfo,
+                           __u32 start_hash, __u32 start_minor_hash,
+                           int *has_inline_data)
+{
+       int err = 0, count = 0;
+       unsigned int parent_ino;
+       int pos;
+       struct ext4_dir_entry_2 *de;
+       struct inode *inode = file_inode(dir_file);
+       int ret, inline_size = 0;
+       struct ext4_iloc iloc;
+       void *dir_buf = NULL;
+       struct ext4_dir_entry_2 fake;
+
+       ret = ext4_get_inode_loc(inode, &iloc);
+       if (ret)
+               return ret;
+
+       down_read(&EXT4_I(inode)->xattr_sem);
+       if (!ext4_has_inline_data(inode)) {
+               up_read(&EXT4_I(inode)->xattr_sem);
+               *has_inline_data = 0;
+               goto out;
+       }
+
+       inline_size = ext4_get_inline_size(inode);
+       dir_buf = kmalloc(inline_size, GFP_NOFS);
+       if (!dir_buf) {
+               ret = -ENOMEM;
+               up_read(&EXT4_I(inode)->xattr_sem);
+               goto out;
+       }
+
+       ret = ext4_read_inline_data(inode, dir_buf, inline_size, &iloc);
+       up_read(&EXT4_I(inode)->xattr_sem);
+       if (ret < 0)
+               goto out;
+
+       pos = 0;
+       parent_ino = le32_to_cpu(((struct ext4_dir_entry_2 *)dir_buf)->inode);
+       while (pos < inline_size) {
+               /*
+                * As inlined dir doesn't store any information about '.' and
+                * only the inode number of '..' is stored, we have to handle
+                * them differently.
+                */
+               if (pos == 0) {
+                       fake.inode = cpu_to_le32(inode->i_ino);
+                       fake.name_len = 1;
+                       strcpy(fake.name, ".");
+                       fake.rec_len = ext4_rec_len_to_disk(
+                                               EXT4_DIR_REC_LEN(fake.name_len),
+                                               inline_size);
+                       ext4_set_de_type(inode->i_sb, &fake, S_IFDIR);
+                       de = &fake;
+                       pos = EXT4_INLINE_DOTDOT_OFFSET;
+               } else if (pos == EXT4_INLINE_DOTDOT_OFFSET) {
+                       fake.inode = cpu_to_le32(parent_ino);
+                       fake.name_len = 2;
+                       strcpy(fake.name, "..");
+                       fake.rec_len = ext4_rec_len_to_disk(
+                                               EXT4_DIR_REC_LEN(fake.name_len),
+                                               inline_size);
+                       ext4_set_de_type(inode->i_sb, &fake, S_IFDIR);
+                       de = &fake;
+                       pos = EXT4_INLINE_DOTDOT_SIZE;
+               } else {
+                       de = (struct ext4_dir_entry_2 *)(dir_buf + pos);
+                       pos += ext4_rec_len_from_disk(de->rec_len, inline_size);
+                       if (ext4_check_dir_entry(inode, dir_file, de,
+                                        iloc.bh, dir_buf,
+                                        inline_size, pos)) {
+                               ret = count;
+                               goto out;
+                       }
+               }
+
+               ext4fs_dirhash(de->name, de->name_len, hinfo);
+               if ((hinfo->hash < start_hash) ||
+                   ((hinfo->hash == start_hash) &&
+                    (hinfo->minor_hash < start_minor_hash)))
+                       continue;
+               if (de->inode == 0)
+                       continue;
+               err = ext4_htree_store_dirent(dir_file,
+                                  hinfo->hash, hinfo->minor_hash, de);
+               if (err) {
+                       count = err;
+                       goto out;
+               }
+               count++;
+       }
+       ret = count;
+out:
+       kfree(dir_buf);
+       brelse(iloc.bh);
+       return ret;
+}
+
 int ext4_read_inline_dir(struct file *filp,
                         void *dirent, filldir_t filldir,
                         int *has_inline_data)
index 955c907..6653fc3 100644 (file)
@@ -972,6 +972,17 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
                        hinfo.hash_version +=
                                EXT4_SB(dir->i_sb)->s_hash_unsigned;
                hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
+               if (ext4_has_inline_data(dir)) {
+                       int has_inline_data = 1;
+                       count = htree_inlinedir_to_tree(dir_file, dir, 0,
+                                                       &hinfo, start_hash,
+                                                       start_minor_hash,
+                                                       &has_inline_data);
+                       if (has_inline_data) {
+                               *next_hash = ~0;
+                               return count;
+                       }
+               }
                count = htree_dirblock_to_tree(dir_file, dir, 0, &hinfo,
                                               start_hash, start_minor_hash);
                *next_hash = ~0;
@@ -1456,24 +1467,6 @@ struct dentry *ext4_get_parent(struct dentry *child)
        return d_obtain_alias(ext4_iget(child->d_inode->i_sb, ino));
 }
 
-#define S_SHIFT 12
-static unsigned char ext4_type_by_mode[S_IFMT >> S_SHIFT] = {
-       [S_IFREG >> S_SHIFT]    = EXT4_FT_REG_FILE,
-       [S_IFDIR >> S_SHIFT]    = EXT4_FT_DIR,
-       [S_IFCHR >> S_SHIFT]    = EXT4_FT_CHRDEV,
-       [S_IFBLK >> S_SHIFT]    = EXT4_FT_BLKDEV,
-       [S_IFIFO >> S_SHIFT]    = EXT4_FT_FIFO,
-       [S_IFSOCK >> S_SHIFT]   = EXT4_FT_SOCK,
-       [S_IFLNK >> S_SHIFT]    = EXT4_FT_SYMLINK,
-};
-
-static inline void ext4_set_de_type(struct super_block *sb,
-                               struct ext4_dir_entry_2 *de,
-                               umode_t mode) {
-       if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FILETYPE))
-               de->file_type = ext4_type_by_mode[(mode & S_IFMT)>>S_SHIFT];
-}
-
 /*
  * Move count entries from end of map between two memory locations.
  * Returns pointer to last entry moved.