ext4: race-condition protection for ext4_convert_unwritten_extents_endio
authorDmitry Monakhov <dmonakhov@openvz.org>
Wed, 10 Oct 2012 05:04:58 +0000 (01:04 -0400)
committerTheodore Ts'o <tytso@mit.edu>
Wed, 10 Oct 2012 05:04:58 +0000 (01:04 -0400)
We assumed that at the time we call ext4_convert_unwritten_extents_endio()
extent in question is fully inside [map.m_lblk, map->m_len] because
it was already split during submission.  But this may not be true due to
a race between writeback vs fallocate.

If extent in question is larger than requested we will split it again.
Special precautions should being done if zeroout required because
[map.m_lblk, map->m_len] already contains valid data.

Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Cc: stable@vger.kernel.org
fs/ext4/extents.c

index c278927..7011ac9 100644 (file)
@@ -52,6 +52,9 @@
 #define EXT4_EXT_MARK_UNINIT1  0x2  /* mark first half uninitialized */
 #define EXT4_EXT_MARK_UNINIT2  0x4  /* mark second half uninitialized */
 
+#define EXT4_EXT_DATA_VALID1   0x8  /* first half contains valid data */
+#define EXT4_EXT_DATA_VALID2   0x10 /* second half contains valid data */
+
 static __le32 ext4_extent_block_csum(struct inode *inode,
                                     struct ext4_extent_header *eh)
 {
@@ -2914,6 +2917,9 @@ static int ext4_split_extent_at(handle_t *handle,
        unsigned int ee_len, depth;
        int err = 0;
 
+       BUG_ON((split_flag & (EXT4_EXT_DATA_VALID1 | EXT4_EXT_DATA_VALID2)) ==
+              (EXT4_EXT_DATA_VALID1 | EXT4_EXT_DATA_VALID2));
+
        ext_debug("ext4_split_extents_at: inode %lu, logical"
                "block %llu\n", inode->i_ino, (unsigned long long)split);
 
@@ -2972,7 +2978,14 @@ static int ext4_split_extent_at(handle_t *handle,
 
        err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
        if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
-               err = ext4_ext_zeroout(inode, &orig_ex);
+               if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) {
+                       if (split_flag & EXT4_EXT_DATA_VALID1)
+                               err = ext4_ext_zeroout(inode, ex2);
+                       else
+                               err = ext4_ext_zeroout(inode, ex);
+               } else
+                       err = ext4_ext_zeroout(inode, &orig_ex);
+
                if (err)
                        goto fix_extent_len;
                /* update the extent length and mark as initialized */
@@ -3025,12 +3038,13 @@ static int ext4_split_extent(handle_t *handle,
        uninitialized = ext4_ext_is_uninitialized(ex);
 
        if (map->m_lblk + map->m_len < ee_block + ee_len) {
-               split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT ?
-                             EXT4_EXT_MAY_ZEROOUT : 0;
+               split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT;
                flags1 = flags | EXT4_GET_BLOCKS_PRE_IO;
                if (uninitialized)
                        split_flag1 |= EXT4_EXT_MARK_UNINIT1 |
                                       EXT4_EXT_MARK_UNINIT2;
+               if (split_flag & EXT4_EXT_DATA_VALID2)
+                       split_flag1 |= EXT4_EXT_DATA_VALID1;
                err = ext4_split_extent_at(handle, inode, path,
                                map->m_lblk + map->m_len, split_flag1, flags1);
                if (err)
@@ -3043,8 +3057,8 @@ static int ext4_split_extent(handle_t *handle,
                return PTR_ERR(path);
 
        if (map->m_lblk >= ee_block) {
-               split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT ?
-                             EXT4_EXT_MAY_ZEROOUT : 0;
+               split_flag1 = split_flag & (EXT4_EXT_MAY_ZEROOUT |
+                                           EXT4_EXT_DATA_VALID2);
                if (uninitialized)
                        split_flag1 |= EXT4_EXT_MARK_UNINIT1;
                if (split_flag & EXT4_EXT_MARK_UNINIT2)
@@ -3323,26 +3337,47 @@ static int ext4_split_unwritten_extents(handle_t *handle,
 
        split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0;
        split_flag |= EXT4_EXT_MARK_UNINIT2;
-
+       if (flags & EXT4_GET_BLOCKS_CONVERT)
+               split_flag |= EXT4_EXT_DATA_VALID2;
        flags |= EXT4_GET_BLOCKS_PRE_IO;
        return ext4_split_extent(handle, inode, path, map, split_flag, flags);
 }
 
 static int ext4_convert_unwritten_extents_endio(handle_t *handle,
-                                             struct inode *inode,
-                                             struct ext4_ext_path *path)
+                                               struct inode *inode,
+                                               struct ext4_map_blocks *map,
+                                               struct ext4_ext_path *path)
 {
        struct ext4_extent *ex;
+       ext4_lblk_t ee_block;
+       unsigned int ee_len;
        int depth;
        int err = 0;
 
        depth = ext_depth(inode);
        ex = path[depth].p_ext;
+       ee_block = le32_to_cpu(ex->ee_block);
+       ee_len = ext4_ext_get_actual_len(ex);
 
        ext_debug("ext4_convert_unwritten_extents_endio: inode %lu, logical"
                "block %llu, max_blocks %u\n", inode->i_ino,
-               (unsigned long long)le32_to_cpu(ex->ee_block),
-               ext4_ext_get_actual_len(ex));
+                 (unsigned long long)ee_block, ee_len);
+
+       /* If extent is larger than requested then split is required */
+       if (ee_block != map->m_lblk || ee_len > map->m_len) {
+               err = ext4_split_unwritten_extents(handle, inode, map, path,
+                                                  EXT4_GET_BLOCKS_CONVERT);
+               if (err < 0)
+                       goto out;
+               ext4_ext_drop_refs(path);
+               path = ext4_ext_find_extent(inode, map->m_lblk, path);
+               if (IS_ERR(path)) {
+                       err = PTR_ERR(path);
+                       goto out;
+               }
+               depth = ext_depth(inode);
+               ex = path[depth].p_ext;
+       }
 
        err = ext4_ext_get_access(handle, inode, path + depth);
        if (err)
@@ -3652,7 +3687,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
        }
        /* IO end_io complete, convert the filled extent to written */
        if ((flags & EXT4_GET_BLOCKS_CONVERT)) {
-               ret = ext4_convert_unwritten_extents_endio(handle, inode,
+               ret = ext4_convert_unwritten_extents_endio(handle, inode, map,
                                                        path);
                if (ret >= 0) {
                        ext4_update_inode_fsync_trans(handle, inode, 1);