Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
authorLinus Torvalds <torvalds@linux-foundation.org>
Tue, 26 Jan 2010 03:05:06 +0000 (19:05 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 26 Jan 2010 03:05:06 +0000 (19:05 -0800)
* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4:
  ext4: Drop EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE flag
  ext4: Fix quota accounting error with fallocate
  ext4: Handle -EDQUOT error on write

fs/ext4/ext4.h
fs/ext4/extents.c
fs/ext4/inode.c

index af7b626..874d169 100644 (file)
@@ -361,14 +361,11 @@ struct ext4_new_group_data {
           so set the magic i_delalloc_reserve_flag after taking the 
           inode allocation semaphore for */
 #define EXT4_GET_BLOCKS_DELALLOC_RESERVE       0x0004
-       /* Call ext4_da_update_reserve_space() after successfully 
-          allocating the blocks */
-#define EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE   0x0008
        /* caller is from the direct IO path, request to creation of an
        unitialized extents if not allocated, split the uninitialized
        extent if blocks has been preallocated already*/
-#define EXT4_GET_BLOCKS_DIO                    0x0010
-#define EXT4_GET_BLOCKS_CONVERT                        0x0020
+#define EXT4_GET_BLOCKS_DIO                    0x0008
+#define EXT4_GET_BLOCKS_CONVERT                        0x0010
 #define EXT4_GET_BLOCKS_DIO_CREATE_EXT         (EXT4_GET_BLOCKS_DIO|\
                                         EXT4_GET_BLOCKS_CREATE_UNINIT_EXT)
        /* Convert extent to initialized after direct IO complete */
@@ -1443,6 +1440,8 @@ extern int ext4_block_truncate_page(handle_t *handle,
 extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
 extern qsize_t *ext4_get_reserved_space(struct inode *inode);
 extern int flush_aio_dio_completed_IO(struct inode *inode);
+extern void ext4_da_update_reserve_space(struct inode *inode,
+                                       int used, int quota_claim);
 /* ioctl.c */
 extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
 extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long);
index 7d7b74e..765a482 100644 (file)
@@ -3132,7 +3132,19 @@ out:
                unmap_underlying_metadata_blocks(inode->i_sb->s_bdev,
                                        newblock + max_blocks,
                                        allocated - max_blocks);
+               allocated = max_blocks;
        }
+
+       /*
+        * If we have done fallocate with the offset that is already
+        * delayed allocated, we would have block reservation
+        * and quota reservation done in the delayed write path.
+        * But fallocate would have already updated quota and block
+        * count for this offset. So cancel these reservation
+        */
+       if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
+               ext4_da_update_reserve_space(inode, allocated, 0);
+
 map_out:
        set_buffer_mapped(bh_result);
 out1:
@@ -3368,9 +3380,18 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
        /* previous routine could use block we allocated */
        newblock = ext_pblock(&newex);
        allocated = ext4_ext_get_actual_len(&newex);
+       if (allocated > max_blocks)
+               allocated = max_blocks;
        set_buffer_new(bh_result);
 
        /*
+        * Update reserved blocks/metadata blocks after successful
+        * block allocation which had been deferred till now.
+        */
+       if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
+               ext4_da_update_reserve_space(inode, allocated, 1);
+
+       /*
         * Cache the extent and update transaction to commit on fdatasync only
         * when it is _not_ an uninitialized extent.
         */
index c818972..e119524 100644 (file)
@@ -1053,11 +1053,12 @@ static int ext4_calc_metadata_amount(struct inode *inode, sector_t lblock)
  * Called with i_data_sem down, which is important since we can call
  * ext4_discard_preallocations() from here.
  */
-static void ext4_da_update_reserve_space(struct inode *inode, int used)
+void ext4_da_update_reserve_space(struct inode *inode,
+                                       int used, int quota_claim)
 {
        struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
        struct ext4_inode_info *ei = EXT4_I(inode);
-       int mdb_free = 0;
+       int mdb_free = 0, allocated_meta_blocks = 0;
 
        spin_lock(&ei->i_block_reservation_lock);
        if (unlikely(used > ei->i_reserved_data_blocks)) {
@@ -1073,6 +1074,7 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used)
        ei->i_reserved_data_blocks -= used;
        used += ei->i_allocated_meta_blocks;
        ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks;
+       allocated_meta_blocks = ei->i_allocated_meta_blocks;
        ei->i_allocated_meta_blocks = 0;
        percpu_counter_sub(&sbi->s_dirtyblocks_counter, used);
 
@@ -1090,9 +1092,23 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used)
        spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
 
        /* Update quota subsystem */
-       vfs_dq_claim_block(inode, used);
-       if (mdb_free)
-               vfs_dq_release_reservation_block(inode, mdb_free);
+       if (quota_claim) {
+               vfs_dq_claim_block(inode, used);
+               if (mdb_free)
+                       vfs_dq_release_reservation_block(inode, mdb_free);
+       } else {
+               /*
+                * We did fallocate with an offset that is already delayed
+                * allocated. So on delayed allocated writeback we should
+                * not update the quota for allocated blocks. But then
+                * converting an fallocate region to initialized region would
+                * have caused a metadata allocation. So claim quota for
+                * that
+                */
+               if (allocated_meta_blocks)
+                       vfs_dq_claim_block(inode, allocated_meta_blocks);
+               vfs_dq_release_reservation_block(inode, mdb_free + used);
+       }
 
        /*
         * If we have done all the pending block allocations and if
@@ -1292,18 +1308,20 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block,
                         */
                        EXT4_I(inode)->i_state &= ~EXT4_STATE_EXT_MIGRATE;
                }
-       }
 
+               /*
+                * Update reserved blocks/metadata blocks after successful
+                * block allocation which had been deferred till now. We don't
+                * support fallocate for non extent files. So we can update
+                * reserve space here.
+                */
+               if ((retval > 0) &&
+                       (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE))
+                       ext4_da_update_reserve_space(inode, retval, 1);
+       }
        if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
                EXT4_I(inode)->i_delalloc_reserved_flag = 0;
 
-       /*
-        * Update reserved blocks/metadata blocks after successful
-        * block allocation which had been deferred till now.
-        */
-       if ((retval > 0) && (flags & EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE))
-               ext4_da_update_reserve_space(inode, retval);
-
        up_write((&EXT4_I(inode)->i_data_sem));
        if (retval > 0 && buffer_mapped(bh)) {
                int ret = check_block_validity(inode, "file system "
@@ -1835,24 +1853,12 @@ repeat:
         * later. Real quota accounting is done at pages writeout
         * time.
         */
-       if (vfs_dq_reserve_block(inode, md_needed + 1)) {
-               /* 
-                * We tend to badly over-estimate the amount of
-                * metadata blocks which are needed, so if we have
-                * reserved any metadata blocks, try to force out the
-                * inode and see if we have any better luck.
-                */
-               if (md_reserved && retries++ <= 3)
-                       goto retry;
+       if (vfs_dq_reserve_block(inode, md_needed + 1))
                return -EDQUOT;
-       }
 
        if (ext4_claim_free_blocks(sbi, md_needed + 1)) {
                vfs_dq_release_reservation_block(inode, md_needed + 1);
                if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
-               retry:
-                       if (md_reserved)
-                               write_inode_now(inode, (retries == 3));
                        yield();
                        goto repeat;
                }
@@ -2213,10 +2219,10 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
         * variables are updated after the blocks have been allocated.
         */
        new.b_state = 0;
-       get_blocks_flags = (EXT4_GET_BLOCKS_CREATE |
-                           EXT4_GET_BLOCKS_DELALLOC_RESERVE);
+       get_blocks_flags = EXT4_GET_BLOCKS_CREATE;
        if (mpd->b_state & (1 << BH_Delay))
-               get_blocks_flags |= EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE;
+               get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE;
+
        blks = ext4_get_blocks(handle, mpd->inode, next, max_blocks,
                               &new, get_blocks_flags);
        if (blks < 0) {
@@ -3032,7 +3038,7 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
                               loff_t pos, unsigned len, unsigned flags,
                               struct page **pagep, void **fsdata)
 {
-       int ret, retries = 0;
+       int ret, retries = 0, quota_retries = 0;
        struct page *page;
        pgoff_t index;
        unsigned from, to;
@@ -3091,6 +3097,22 @@ retry:
 
        if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
                goto retry;
+
+       if ((ret == -EDQUOT) &&
+           EXT4_I(inode)->i_reserved_meta_blocks &&
+           (quota_retries++ < 3)) {
+               /*
+                * Since we often over-estimate the number of meta
+                * data blocks required, we may sometimes get a
+                * spurios out of quota error even though there would
+                * be enough space once we write the data blocks and
+                * find out how many meta data blocks were _really_
+                * required.  So try forcing the inode write to see if
+                * that helps.
+                */
+               write_inode_now(inode, (quota_retries == 3));
+               goto retry;
+       }
 out:
        return ret;
 }