ext4: track all extent status in extent status tree
authorZheng Liu <wenqing.lz@taobao.com>
Mon, 18 Feb 2013 05:28:47 +0000 (00:28 -0500)
committerTheodore Ts'o <tytso@mit.edu>
Mon, 18 Feb 2013 05:28:47 +0000 (00:28 -0500)
By recording the phycisal block and status, extent status tree is able
to track the status of every extents.  When we call _map_blocks
functions to lookup an extent or create a new written/unwritten/delayed
extent, this extent will be inserted into extent status tree.

We don't load all extents from disk in alloc_inode() because it costs
too much memory, and if a file is opened and closed frequently it will
takes too much time to load all extent information.  So currently when
we create/lookup an extent, this extent will be inserted into extent
status tree.  Hence, the extent status tree may not comprehensively
contain all of the extents found in the file.

Here a condition we need to take care is that an extent might contains
unwritten and delayed status simultaneously because an extent is delayed
allocated and could be allocated by fallocate.  At this time we need to
keep delayed status because later we need to update delayed reservation
space using it.

Signed-off-by: Zheng Liu <wenqing.lz@taobao.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Cc: Jan kara <jack@suse.cz>
fs/ext4/ext4.h
fs/ext4/extents.c
fs/ext4/inode.c

index fc1c037..5c31d6a 100644 (file)
@@ -2602,6 +2602,9 @@ extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t,
                                                  struct ext4_ext_path *);
 extern void ext4_ext_drop_refs(struct ext4_ext_path *);
 extern int ext4_ext_check_inode(struct inode *inode);
+extern int ext4_find_delalloc_range(struct inode *inode,
+                                   ext4_lblk_t lblk_start,
+                                   ext4_lblk_t lblk_end);
 extern int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk);
 extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                        __u64 start, __u64 len);
index cae8ae3..be0b1b3 100644 (file)
@@ -2076,8 +2076,18 @@ static int ext4_fill_fiemap_extents(struct inode *inode,
                        break;
                }
 
-               /* This is possible iff next == next_del == EXT_MAX_BLOCKS */
-               if (next == next_del) {
+               /*
+                * This is possible iff next == next_del == EXT_MAX_BLOCKS.
+                * we need to check next == EXT_MAX_BLOCKS because it is
+                * possible that an extent is with unwritten and delayed
+                * status due to when an extent is delayed allocated and
+                * is allocated by fallocate status tree will track both of
+                * them in a extent.
+                *
+                * So we could return a unwritten and delayed extent, and
+                * its block is equal to 'next'.
+                */
+               if (next == next_del && next == EXT_MAX_BLOCKS) {
                        flags |= FIEMAP_EXTENT_LAST;
                        if (unlikely(next_del != EXT_MAX_BLOCKS ||
                                     next != EXT_MAX_BLOCKS)) {
@@ -3522,9 +3532,9 @@ out:
  *
  * Return 1 if there is a delalloc block in the range, otherwise 0.
  */
-static int ext4_find_delalloc_range(struct inode *inode,
-                                   ext4_lblk_t lblk_start,
-                                   ext4_lblk_t lblk_end)
+int ext4_find_delalloc_range(struct inode *inode,
+                            ext4_lblk_t lblk_start,
+                            ext4_lblk_t lblk_end)
 {
        struct extent_status es;
 
index 084b821..576b586 100644 (file)
@@ -526,20 +526,26 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
                retval = ext4_ind_map_blocks(handle, inode, map, flags &
                                             EXT4_GET_BLOCKS_KEEP_SIZE);
        }
+       if (retval > 0) {
+               int ret;
+               unsigned long long status;
+
+               status = map->m_flags & EXT4_MAP_UNWRITTEN ?
+                               EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
+               if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) &&
+                   ext4_find_delalloc_range(inode, map->m_lblk,
+                                            map->m_lblk + map->m_len - 1))
+                       status |= EXTENT_STATUS_DELAYED;
+               ret = ext4_es_insert_extent(inode, map->m_lblk,
+                                           map->m_len, map->m_pblk, status);
+               if (ret < 0)
+                       retval = ret;
+       }
        if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
                up_read((&EXT4_I(inode)->i_data_sem));
 
        if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
-               int ret;
-               if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) {
-                       /* delayed alloc may be allocated by fallocate and
-                        * coverted to initialized by directIO.
-                        * we need to handle delayed extent here.
-                        */
-                       down_write((&EXT4_I(inode)->i_data_sem));
-                       goto delayed_mapped;
-               }
-               ret = check_block_validity(inode, map);
+               int ret = check_block_validity(inode, map);
                if (ret != 0)
                        return ret;
        }
@@ -608,18 +614,23 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
                        (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE))
                        ext4_da_update_reserve_space(inode, retval, 1);
        }
-       if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) {
+       if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
                ext4_clear_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED);
 
-               if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
-                       int ret;
-delayed_mapped:
-                       /* delayed allocation blocks has been allocated */
-                       ret = ext4_es_remove_extent(inode, map->m_lblk,
-                                                   map->m_len);
-                       if (ret < 0)
-                               retval = ret;
-               }
+       if (retval > 0) {
+               int ret;
+               unsigned long long status;
+
+               status = map->m_flags & EXT4_MAP_UNWRITTEN ?
+                               EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
+               if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) &&
+                   ext4_find_delalloc_range(inode, map->m_lblk,
+                                            map->m_lblk + map->m_len - 1))
+                       status |= EXTENT_STATUS_DELAYED;
+               ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
+                                           map->m_pblk, status);
+               if (ret < 0)
+                       retval = ret;
        }
 
        up_write((&EXT4_I(inode)->i_data_sem));
@@ -1765,6 +1776,7 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
                retval = ext4_ind_map_blocks(NULL, inode, map, 0);
 
        if (retval == 0) {
+               int ret;
                /*
                 * XXX: __block_prepare_write() unmaps passed block,
                 * is it OK?
@@ -1772,16 +1784,20 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
                /* If the block was allocated from previously allocated cluster,
                 * then we dont need to reserve it again. */
                if (!(map->m_flags & EXT4_MAP_FROM_CLUSTER)) {
-                       retval = ext4_da_reserve_space(inode, iblock);
-                       if (retval)
+                       ret = ext4_da_reserve_space(inode, iblock);
+                       if (ret) {
                                /* not enough space to reserve */
+                               retval = ret;
                                goto out_unlock;
+                       }
                }
 
-               retval = ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
-                                              ~0, EXTENT_STATUS_DELAYED);
-               if (retval)
+               ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
+                                           ~0, EXTENT_STATUS_DELAYED);
+               if (ret) {
+                       retval = ret;
                        goto out_unlock;
+               }
 
                /* Clear EXT4_MAP_FROM_CLUSTER flag since its purpose is served
                 * and it should not appear on the bh->b_state.
@@ -1791,6 +1807,16 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
                map_bh(bh, inode->i_sb, invalid_block);
                set_buffer_new(bh);
                set_buffer_delay(bh);
+       } else if (retval > 0) {
+               int ret;
+               unsigned long long status;
+
+               status = map->m_flags & EXT4_MAP_UNWRITTEN ?
+                               EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
+               ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
+                                           map->m_pblk, status);
+               if (ret != 0)
+                       retval = ret;
        }
 
 out_unlock: