Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso...

author Linus Torvalds <torvalds@linux-foundation.org>

Sun, 7 Oct 2012 21:36:39 +0000 (06:36 +0900)

committer Linus Torvalds <torvalds@linux-foundation.org>

Sun, 7 Oct 2012 21:36:39 +0000 (06:36 +0900)
author Linus Torvalds <torvalds@linux-foundation.org>
Sun, 7 Oct 2012 21:36:39 +0000 (06:36 +0900)
committer Linus Torvalds <torvalds@linux-foundation.org>
Sun, 7 Oct 2012 21:36:39 +0000 (06:36 +0900)
diff --combined fs/buffer.c

index 58e2e7b,1fe3968..b5f0442
--- 1/fs/buffer.c
--- 2/fs/buffer.c
+++ b/fs/buffer.c
@@@ -914,7 -914,7 +914,7 @@@ link_dev_buffers(struct page *page, str
   /*
    * Initialise the state of a blockdev page's buffers.
    */ 
- -static void
+ +static sector_t
   init_page_buffers(struct page *page, struct block_device *bdev,
                         sector_t block, int size)
   {
@@@ -936,41 -936,33 +936,41 @@@
                 block++;
                 bh = bh->b_this_page;
         } while (bh != head);
+ +
+ +      /*
+ +       * Caller needs to validate requested block against end of device.
+ +       */
+ +      return end_block;
   }
   
   /*
    * Create the page-cache page that contains the requested block.
    *
- - * This is user purely for blockdev mappings.
+ + * This is used purely for blockdev mappings.
    */
- -static struct page *
+ +static int
   grow_dev_page(struct block_device *bdev, sector_t block,
- -              pgoff_t index, int size)
+ +              pgoff_t index, int size, int sizebits)
   {
         struct inode *inode = bdev->bd_inode;
         struct page *page;
         struct buffer_head *bh;
+ +      sector_t end_block;
+ +      int ret = 0;            /* Will call free_more_memory() */
   
         page = find_or_create_page(inode->i_mapping, index,
                 (mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS)|__GFP_MOVABLE);
         if (!page)
- -              return NULL;
+ +              return ret;
   
         BUG_ON(!PageLocked(page));
   
         if (page_has_buffers(page)) {
                 bh = page_buffers(page);
                 if (bh->b_size == size) {
- -                      init_page_buffers(page, bdev, block, size);
- -                      return page;
+ +                      end_block = init_page_buffers(page, bdev,
+ +                                              index << sizebits, size);
+ +                      goto done;
                 }
                 if (!try_to_free_buffers(page))
                         goto failed;
@@@ -990,14 -982,14 +990,14 @@@
          */
         spin_lock(&inode->i_mapping->private_lock);
         link_dev_buffers(page, bh);
- -      init_page_buffers(page, bdev, block, size);
+ +      end_block = init_page_buffers(page, bdev, index << sizebits, size);
         spin_unlock(&inode->i_mapping->private_lock);
- -      return page;
- -
+ +done:
+ +      ret = (block < end_block) ? 1 : -ENXIO;
   failed:
         unlock_page(page);
         page_cache_release(page);
- -      return NULL;
+ +      return ret;
   }
   
   /*
@@@ -1007,6 -999,7 +1007,6 @@@
   static int
   grow_buffers(struct block_device *bdev, sector_t block, int size)
   {
- -      struct page *page;
         pgoff_t index;
         int sizebits;
   
@@@ -1030,14 -1023,22 +1030,14 @@@
                         bdevname(bdev, b));
                 return -EIO;
         }
- -      block = index << sizebits;
+ +
         /* Create a page with the proper size buffers.. */
- -      page = grow_dev_page(bdev, block, index, size);
- -      if (!page)
- -              return 0;
- -      unlock_page(page);
- -      page_cache_release(page);
- -      return 1;
+ +      return grow_dev_page(bdev, block, index, size, sizebits);
   }
   
   static struct buffer_head *
   __getblk_slow(struct block_device *bdev, sector_t block, int size)
   {
- -      int ret;
- -      struct buffer_head *bh;
- -
         /* Size must be multiple of hard sectorsize */
         if (unlikely(size & (bdev_logical_block_size(bdev)-1) ||
                         (size < 512 || size > PAGE_SIZE))) {
@@@ -1050,20 -1051,21 +1050,20 @@@
                 return NULL;
         }
   
- -retry:
- -      bh = __find_get_block(bdev, block, size);
- -      if (bh)
- -              return bh;
+ +      for (;;) {
+ +              struct buffer_head *bh;
+ +              int ret;
   
- -      ret = grow_buffers(bdev, block, size);
- -      if (ret == 0) {
- -              free_more_memory();
- -              goto retry;
- -      } else if (ret > 0) {
                 bh = __find_get_block(bdev, block, size);
                 if (bh)
                         return bh;
+ +
+ +              ret = grow_buffers(bdev, block, size);
+ +              if (ret < 0)
+ +                      return NULL;
+ +              if (ret == 0)
+ +                      free_more_memory();
         }
- -      return NULL;
   }
   
   /*
@@@ -1319,6 -1321,10 +1319,6 @@@ EXPORT_SYMBOL(__find_get_block)
    * which corresponds to the passed block_device, block and size. The
    * returned buffer has its reference count incremented.
    *
- - * __getblk() cannot fail - it just keeps trying.  If you pass it an
- - * illegal block number, __getblk() will happily return a buffer_head
- - * which represents the non-existent block.  Very weird.
- - *
    * __getblk() will lock up the machine if grow_dev_page's try_to_free_buffers()
    * attempt is failing.  FIXME, perhaps?
    */
@@@ -2312,12 -2318,6 +2312,6 @@@ int __block_page_mkwrite(struct vm_area
         loff_t size;
         int ret;
   
-       /*
-        * Update file times before taking page lock. We may end up failing the
-        * fault so this update may be superfluous but who really cares...
-        */
-       file_update_time(vma->vm_file);
- 
         lock_page(page);
         size = i_size_read(inode);
         if ((page->mapping != inode->i_mapping) ||
@@@ -2355,6 -2355,13 +2349,13 @@@ int block_page_mkwrite(struct vm_area_s
         struct super_block *sb = vma->vm_file->f_path.dentry->d_inode->i_sb;
   
         sb_start_pagefault(sb);
+ 
+       /*
+        * Update file times before taking page lock. We may end up failing the
+        * fault so this update may be superfluous but who really cares...
+        */
+       file_update_time(vma->vm_file);
+ 
         ret = __block_page_mkwrite(vma, vmf, get_block);
         sb_end_pagefault(sb);
         return block_page_mkwrite_return(ret);
diff --combined fs/ext4/inode.c

index c862ee5,f18e786..b3c243b
--- 1/fs/ext4/inode.c
--- 2/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@@ -732,11 -732,13 +732,13 @@@ struct buffer_head *ext4_getblk(handle_
         err = ext4_map_blocks(handle, inode, &map,
                               create ? EXT4_GET_BLOCKS_CREATE : 0);
   
+       /* ensure we send some value back into *errp */
+       *errp = 0;
+ 
         if (err < 0)
                 *errp = err;
         if (err <= 0)
                 return NULL;
-       *errp = 0;
   
         bh = sb_getblk(inode->i_sb, map.m_pblk);
         if (!bh) {
@@@ -1954,9 -1956,6 +1956,6 @@@ out
         return ret;
   }
   
- static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode);
- static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate);
- 
   /*
    * Note that we don't need to start a transaction unless we're journaling data
    * because we should have holes filled from ext4_page_mkwrite(). We even don't
@@@ -1970,7 -1969,7 +1969,7 @@@
    * This function can get called via...
    *   - ext4_da_writepages after taking page lock (have journal handle)
    *   - journal_submit_inode_data_buffers (no journal handle)
- - *   - shrink_page_list via pdflush (no journal handle)
+ + *   - shrink_page_list via the kswapd/direct reclaim (no journal handle)
    *   - grab_page_cache when doing write_begin (have journal handle)
    *
    * We don't do any block allocation in this function. If we have page with
@@@ -2463,6 -2462,16 +2462,16 @@@ static int ext4_nonda_switch(struct sup
         free_blocks  = EXT4_C2B(sbi,
                 percpu_counter_read_positive(&sbi->s_freeclusters_counter));
         dirty_blocks = percpu_counter_read_positive(&sbi->s_dirtyclusters_counter);
+       /*
+        * Start pushing delalloc when 1/2 of free blocks are dirty.
+        */
+       if (dirty_blocks && (free_blocks < 2 * dirty_blocks) &&
+           !writeback_in_progress(sb->s_bdi) &&
+           down_read_trylock(&sb->s_umount)) {
+               writeback_inodes_sb(sb, WB_REASON_FS_FREE_SPACE);
+               up_read(&sb->s_umount);
+       }
+ 
         if (2 * free_blocks < 3 * dirty_blocks ||
                 free_blocks < (dirty_blocks + EXT4_FREECLUSTERS_WATERMARK)) {
                 /*
@@@ -2471,13 -2480,6 +2480,6 @@@
                  */
                 return 1;
         }
-       /*
-        * Even if we don't switch but are nearing capacity,
-        * start pushing delalloc when 1/2 of free blocks are dirty.
-        */
-       if (free_blocks < 2 * dirty_blocks)
-               writeback_inodes_sb_if_idle(sb, WB_REASON_FS_FREE_SPACE);
- 
         return 0;
   }
   
@@@ -2879,9 -2881,6 +2881,6 @@@ static void ext4_end_io_dio(struct kioc
   {
         struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
           ext4_io_end_t *io_end = iocb->private;
-       struct workqueue_struct *wq;
-       unsigned long flags;
-       struct ext4_inode_info *ei;
   
         /* if not async direct IO or dio with 0 bytes write, just return */
         if (!io_end || !size)
@@@ -2910,24 -2909,14 +2909,14 @@@ out
                 io_end->iocb = iocb;
                 io_end->result = ret;
         }
-       wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq;
   
-       /* Add the io_end to per-inode completed aio dio list*/
-       ei = EXT4_I(io_end->inode);
-       spin_lock_irqsave(&ei->i_completed_io_lock, flags);
-       list_add_tail(&io_end->list, &ei->i_completed_io_list);
-       spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
- 
-       /* queue the work to convert unwritten extents to written */
-       queue_work(wq, &io_end->work);
+       ext4_add_complete_io(io_end);
   }
   
   static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate)
   {
         ext4_io_end_t *io_end = bh->b_private;
-       struct workqueue_struct *wq;
         struct inode *inode;
-       unsigned long flags;
   
         if (!test_clear_buffer_uninit(bh) || !io_end)
                 goto out;
@@@ -2946,15 -2935,7 +2935,7 @@@
          */
         inode = io_end->inode;
         ext4_set_io_unwritten_flag(inode, io_end);
- 
-       /* Add the io_end to per-inode completed io list*/
-       spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
-       list_add_tail(&io_end->list, &EXT4_I(inode)->i_completed_io_list);
-       spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags);
- 
-       wq = EXT4_SB(inode->i_sb)->dio_unwritten_wq;
-       /* queue the work to convert unwritten extents to written */
-       queue_work(wq, &io_end->work);
+       ext4_add_complete_io(io_end);
   out:
         bh->b_private = NULL;
         bh->b_end_io = NULL;
@@@ -3029,6 -3010,7 +3010,7 @@@ static ssize_t ext4_ext_direct_IO(int r
                 overwrite = *((int *)iocb->private);
   
                 if (overwrite) {
+                       atomic_inc(&inode->i_dio_count);
                         down_read(&EXT4_I(inode)->i_data_sem);
                         mutex_unlock(&inode->i_mutex);
                 }
@@@ -3054,7 -3036,7 +3036,7 @@@
                  * hook to the iocb.
                  */
                 iocb->private = NULL;
-               EXT4_I(inode)->cur_aio_dio = NULL;
+               ext4_inode_aio_set(inode, NULL);
                 if (!is_sync_kiocb(iocb)) {
                         ext4_io_end_t *io_end =
                                 ext4_init_io_end(inode, GFP_NOFS);
@@@ -3071,7 -3053,7 +3053,7 @@@
                          * is a unwritten extents needs to be converted
                          * when IO is completed.
                          */
-                       EXT4_I(inode)->cur_aio_dio = iocb->private;
+                       ext4_inode_aio_set(inode, io_end);
                 }
   
                 if (overwrite)
@@@ -3091,7 -3073,7 +3073,7 @@@
                                                  NULL,
                                                  DIO_LOCKING);
                 if (iocb->private)
-                       EXT4_I(inode)->cur_aio_dio = NULL;
+                       ext4_inode_aio_set(inode, NULL);
                 /*
                  * The io_end structure takes a reference to the inode,
                  * that structure needs to be destroyed and the
@@@ -3126,6 -3108,7 +3108,7 @@@
         retake_lock:
                 /* take i_mutex locking again if we do a ovewrite dio */
                 if (overwrite) {
+                       inode_dio_done(inode);
                         up_read(&EXT4_I(inode)->i_data_sem);
                         mutex_lock(&inode->i_mutex);
                 }
@@@ -3313,7 -3296,7 +3296,7 @@@ int ext4_discard_partial_page_buffers(h
    * handle: The journal handle
    * inode:  The files inode
    * page:   A locked page that contains the offset "from"
- - * from:   The starting byte offset (from the begining of the file)
+ + * from:   The starting byte offset (from the beginning of the file)
    *         to begin discarding
    * len:    The length of bytes to discard
    * flags:  Optional flags that may be used:
@@@ -3321,11 -3304,11 +3304,11 @@@
    *         EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED
    *         Only zero the regions of the page whose buffer heads
    *         have already been unmapped.  This flag is appropriate
- - *         for updateing the contents of a page whose blocks may
+ + *         for updating the contents of a page whose blocks may
    *         have already been released, and we only want to zero
    *         out the regions that correspond to those released blocks.
    *
- - * Returns zero on sucess or negative on failure.
+ + * Returns zero on success or negative on failure.
    */
   static int ext4_discard_partial_page_buffers_no_lock(handle_t *handle,
                 struct inode *inode, struct page *page, loff_t from,
@@@ -3486,7 -3469,7 +3469,7 @@@ int ext4_can_truncate(struct inode *ino
    * @offset: The offset where the hole will begin
    * @len:    The length of the hole
    *
- - * Returns: 0 on sucess or negative on failure
+ + * Returns: 0 on success or negative on failure
    */
   
   int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
@@@ -4008,7 -3991,7 +3991,7 @@@ static int ext4_inode_blocks_set(handle
   
         if (i_blocks <= ~0U) {
                 /*
- -               * i_blocks can be represnted in a 32 bit variable
+ +               * i_blocks can be represented in a 32 bit variable
                  * as multiple of 512 bytes
                  */
                 raw_inode->i_blocks_lo   = cpu_to_le32(i_blocks);
@@@ -4052,6 -4035,7 +4035,7 @@@ static int ext4_do_update_inode(handle_
         struct ext4_inode_info *ei = EXT4_I(inode);
         struct buffer_head *bh = iloc->bh;
         int err = 0, rc, block;
+       int need_datasync = 0;
         uid_t i_uid;
         gid_t i_gid;
   
@@@ -4102,7 -4086,10 +4086,10 @@@
                 raw_inode->i_file_acl_high =
                         cpu_to_le16(ei->i_file_acl >> 32);
         raw_inode->i_file_acl_lo = cpu_to_le32(ei->i_file_acl);
-       ext4_isize_set(raw_inode, ei->i_disksize);
+       if (ei->i_disksize != ext4_isize(raw_inode)) {
+               ext4_isize_set(raw_inode, ei->i_disksize);
+               need_datasync = 1;
+       }
         if (ei->i_disksize > 0x7fffffffULL) {
                 struct super_block *sb = inode->i_sb;
                 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
@@@ -4155,7 -4142,7 +4142,7 @@@
                 err = rc;
         ext4_clear_inode_state(inode, EXT4_STATE_NEW);
   
-       ext4_update_inode_fsync_trans(handle, inode, 0);
+       ext4_update_inode_fsync_trans(handle, inode, need_datasync);
   out_brelse:
         brelse(bh);
         ext4_std_error(inode->i_sb, err);
@@@ -4169,7 -4156,7 +4156,7 @@@
    *
    * - Within generic_file_write() for O_SYNC files.
    *   Here, there will be no transaction running. We wait for any running
- - *   trasnaction to commit.
+ + *   transaction to commit.
    *
    * - Within sys_sync(), kupdate and such.
    *   We wait on commit, if tol to.
@@@ -4298,7 -4285,6 +4285,6 @@@ int ext4_setattr(struct dentry *dentry
         }
   
         if (attr->ia_valid & ATTR_SIZE) {
-               inode_dio_wait(inode);
   
                 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
                         struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
@@@ -4347,8 -4333,17 +4333,17 @@@
         }
   
         if (attr->ia_valid & ATTR_SIZE) {
-               if (attr->ia_size != i_size_read(inode))
+               if (attr->ia_size != i_size_read(inode)) {
                         truncate_setsize(inode, attr->ia_size);
+                       /* Inode size will be reduced, wait for dio in flight.
+                        * Temporarily disable dioread_nolock to prevent
+                        * livelock. */
+                       if (orphan) {
+                               ext4_inode_block_unlocked_dio(inode);
+                               inode_dio_wait(inode);
+                               ext4_inode_resume_unlocked_dio(inode);
+                       }
+               }
                 ext4_truncate(inode);
         }
   
@@@ -4413,7 -4408,7 +4408,7 @@@ static int ext4_index_trans_blocks(stru
    * worse case, the indexs blocks spread over different block groups
    *
    * If datablocks are discontiguous, they are possible to spread over
- - * different block groups too. If they are contiuguous, with flexbg,
+ + * different block groups too. If they are contiguous, with flexbg,
    * they could still across block group boundary.
    *
    * Also account for superblock, inode, quota and xattr blocks
@@@ -4589,6 -4584,14 +4584,6 @@@ static int ext4_expand_extra_isize(stru
    * inode out, but prune_icache isn't a user-visible syncing function.
    * Whenever the user wants stuff synced (sys_sync, sys_msync, sys_fsync)
    * we start and wait on commits.
- - *
- - * Is this efficient/effective?  Well, we're being nice to the system
- - * by cleaning up our inodes proactively so they can be reaped
- - * without I/O.  But we are potentially leaving up to five seconds'
- - * worth of inodes floating about which prune_icache wants us to
- - * write out.  One way to fix that would be to get prune_icache()
- - * to do a write_super() to free up some memory.  It has the desired
- - * effect.
    */
   int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
   {
@@@ -4727,6 -4730,10 +4722,10 @@@ int ext4_change_inode_journal_flag(stru
                         return err;
         }
   
+       /* Wait for all existing dio workers */
+       ext4_inode_block_unlocked_dio(inode);
+       inode_dio_wait(inode);
+ 
         jbd2_journal_lock_updates(journal);
   
         /*
@@@ -4746,6 -4753,7 +4745,7 @@@
         ext4_set_aops(inode);
   
         jbd2_journal_unlock_updates(journal);
+       ext4_inode_resume_unlocked_dio(inode);
   
         /* Finally we can mark the inode as dirty. */
   
@@@ -4780,6 -4788,7 +4780,7 @@@ int ext4_page_mkwrite(struct vm_area_st
         int retries = 0;
   
         sb_start_pagefault(inode->i_sb);
+       file_update_time(vma->vm_file);
         /* Delalloc case is easy... */
         if (test_opt(inode->i_sb, DELALLOC) &&
             !ext4_should_journal_data(inode) &&
diff --combined fs/ext4/ioctl.c

index 5439d6a,17c53a6..5747f52
--- 1/fs/ext4/ioctl.c
--- 2/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@@ -233,7 -233,7 +233,7 @@@ group_extend_out
   
         case EXT4_IOC_MOVE_EXT: {
                 struct move_extent me;
- -              struct file *donor_filp;
+ +              struct fd donor;
                 int err;
   
                 if (!(filp->f_mode & FMODE_READ) ||
@@@ -245,11 -245,11 +245,11 @@@
                         return -EFAULT;
                 me.moved_len = 0;
   
- -              donor_filp = fget(me.donor_fd);
- -              if (!donor_filp)
+ +              donor = fdget(me.donor_fd);
+ +              if (!donor.file)
                         return -EBADF;
   
- -              if (!(donor_filp->f_mode & FMODE_WRITE)) {
+ +              if (!(donor.file->f_mode & FMODE_WRITE)) {
                         err = -EBADF;
                         goto mext_out;
                 }
@@@ -266,7 -266,7 +266,7 @@@
                 if (err)
                         goto mext_out;
   
- -              err = ext4_move_extents(filp, donor_filp, me.orig_start,
+ +              err = ext4_move_extents(filp, donor.file, me.orig_start,
                                         me.donor_start, me.len, &me.moved_len);
                 mnt_drop_write_file(filp);
   
@@@ -274,7 -274,7 +274,7 @@@
                                  &me, sizeof(me)))
                         err = -EFAULT;
   mext_out:
- -              fput(donor_filp);
+ +              fdput(donor);
                 return err;
         }
   
@@@ -366,26 -366,11 +366,11 @@@ group_add_out
                         return -EOPNOTSUPP;
                 }
   
-               if (EXT4_HAS_INCOMPAT_FEATURE(sb,
-                              EXT4_FEATURE_INCOMPAT_META_BG)) {
-                       ext4_msg(sb, KERN_ERR,
-                                "Online resizing not (yet) supported with meta_bg");
-                       return -EOPNOTSUPP;
-               }
- 
                 if (copy_from_user(&n_blocks_count, (__u64 __user *)arg,
                                    sizeof(__u64))) {
                         return -EFAULT;
                 }
   
-               if (n_blocks_count > MAX_32_NUM &&
-                   !EXT4_HAS_INCOMPAT_FEATURE(sb,
-                                              EXT4_FEATURE_INCOMPAT_64BIT)) {
-                       ext4_msg(sb, KERN_ERR,
-                                "File system only supports 32-bit block numbers");
-                       return -EOPNOTSUPP;
-               }
- 
                 err = ext4_resize_begin(sb);
                 if (err)
                         return err;
@@@ -420,13 -405,6 +405,6 @@@ resizefs_out
                 if (!blk_queue_discard(q))
                         return -EOPNOTSUPP;
   
-               if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
-                              EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
-                       ext4_msg(sb, KERN_ERR,
-                                "FITRIM not supported with bigalloc");
-                       return -EOPNOTSUPP;
-               }
- 
                 if (copy_from_user(&range, (struct fstrim_range __user *)arg,
                     sizeof(range)))
                         return -EFAULT;
diff --combined fs/ext4/mballoc.c

index 08778f6,a415465..f8b27bf
--- 1/fs/ext4/mballoc.c
--- 2/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@@ -24,6 -24,7 +24,7 @@@
   #include "ext4_jbd2.h"
   #include "mballoc.h"
   #include <linux/debugfs.h>
+ #include <linux/log2.h>
   #include <linux/slab.h>
   #include <trace/events/ext4.h>
   
@@@ -1338,17 -1339,17 +1339,17 @@@ static void mb_free_blocks(struct inod
         mb_check_buddy(e4b);
   }
   
- static int mb_find_extent(struct ext4_buddy *e4b, int order, int block,
+ static int mb_find_extent(struct ext4_buddy *e4b, int block,
                                 int needed, struct ext4_free_extent *ex)
   {
         int next = block;
-       int max;
+       int max, order;
         void *buddy;
   
         assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
         BUG_ON(ex == NULL);
   
-       buddy = mb_find_buddy(e4b, order, &max);
+       buddy = mb_find_buddy(e4b, 0, &max);
         BUG_ON(buddy == NULL);
         BUG_ON(block >= max);
         if (mb_test_bit(block, buddy)) {
@@@ -1358,12 -1359,9 +1359,9 @@@
                 return 0;
         }
   
-       /* FIXME dorp order completely ? */
-       if (likely(order == 0)) {
-               /* find actual order */
-               order = mb_find_order_for_block(e4b, block);
-               block = block >> order;
-       }
+       /* find actual order */
+       order = mb_find_order_for_block(e4b, block);
+       block = block >> order;
   
         ex->fe_len = 1 << order;
         ex->fe_start = block << order;
@@@ -1549,7 -1547,7 +1547,7 @@@ static void ext4_mb_check_limits(struc
                 /* recheck chunk's availability - we don't know
                  * when it was found (within this lock-unlock
                  * period or not) */
-               max = mb_find_extent(e4b, 0, bex->fe_start, gex->fe_len, &ex);
+               max = mb_find_extent(e4b, bex->fe_start, gex->fe_len, &ex);
                 if (max >= gex->fe_len) {
                         ext4_mb_use_best_found(ac, e4b);
                         return;
@@@ -1641,7 -1639,7 +1639,7 @@@ int ext4_mb_try_best_found(struct ext4_
                 return err;
   
         ext4_lock_group(ac->ac_sb, group);
-       max = mb_find_extent(e4b, 0, ex.fe_start, ex.fe_len, &ex);
+       max = mb_find_extent(e4b, ex.fe_start, ex.fe_len, &ex);
   
         if (max > 0) {
                 ac->ac_b_ex = ex;
@@@ -1662,17 -1660,20 +1660,20 @@@ int ext4_mb_find_by_goal(struct ext4_al
         int max;
         int err;
         struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
+       struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
         struct ext4_free_extent ex;
   
         if (!(ac->ac_flags & EXT4_MB_HINT_TRY_GOAL))
                 return 0;
+       if (grp->bb_free == 0)
+               return 0;
   
         err = ext4_mb_load_buddy(ac->ac_sb, group, e4b);
         if (err)
                 return err;
   
         ext4_lock_group(ac->ac_sb, group);
-       max = mb_find_extent(e4b, 0, ac->ac_g_ex.fe_start,
+       max = mb_find_extent(e4b, ac->ac_g_ex.fe_start,
                              ac->ac_g_ex.fe_len, &ex);
   
         if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) {
@@@ -1788,7 -1789,7 +1789,7 @@@ void ext4_mb_complex_scan_group(struct 
                         break;
                 }
   
-               mb_find_extent(e4b, 0, i, ac->ac_g_ex.fe_len, &ex);
+               mb_find_extent(e4b, i, ac->ac_g_ex.fe_len, &ex);
                 BUG_ON(ex.fe_len <= 0);
                 if (free < ex.fe_len) {
                         ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
@@@ -1840,7 -1841,7 +1841,7 @@@ void ext4_mb_scan_aligned(struct ext4_a
   
         while (i < EXT4_CLUSTERS_PER_GROUP(sb)) {
                 if (!mb_test_bit(i, bitmap)) {
-                       max = mb_find_extent(e4b, 0, i, sbi->s_stripe, &ex);
+                       max = mb_find_extent(e4b, i, sbi->s_stripe, &ex);
                         if (max >= sbi->s_stripe) {
                                 ac->ac_found++;
                                 ac->ac_b_ex = ex;
@@@ -1862,6 -1863,12 +1863,12 @@@ static int ext4_mb_good_group(struct ex
   
         BUG_ON(cr < 0 || cr >= 4);
   
+       free = grp->bb_free;
+       if (free == 0)
+               return 0;
+       if (cr <= 2 && free < ac->ac_g_ex.fe_len)
+               return 0;
+ 
         /* We only do this if the grp has never been initialized */
         if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
                 int ret = ext4_mb_init_group(ac->ac_sb, group);
@@@ -1869,10 -1876,7 +1876,7 @@@
                         return 0;
         }
   
-       free = grp->bb_free;
         fragments = grp->bb_fragments;
-       if (free == 0)
-               return 0;
         if (fragments == 0)
                 return 0;
   
@@@ -2163,6 -2167,39 +2167,39 @@@ static struct kmem_cache *get_groupinfo
         return cachep;
   }
   
+ /*
+  * Allocate the top-level s_group_info array for the specified number
+  * of groups
+  */
+ int ext4_mb_alloc_groupinfo(struct super_block *sb, ext4_group_t ngroups)
+ {
+       struct ext4_sb_info *sbi = EXT4_SB(sb);
+       unsigned size;
+       struct ext4_group_info ***new_groupinfo;
+ 
+       size = (ngroups + EXT4_DESC_PER_BLOCK(sb) - 1) >>
+               EXT4_DESC_PER_BLOCK_BITS(sb);
+       if (size <= sbi->s_group_info_size)
+               return 0;
+ 
+       size = roundup_pow_of_two(sizeof(*sbi->s_group_info) * size);
+       new_groupinfo = ext4_kvzalloc(size, GFP_KERNEL);
+       if (!new_groupinfo) {
+               ext4_msg(sb, KERN_ERR, "can't allocate buddy meta group");
+               return -ENOMEM;
+       }
+       if (sbi->s_group_info) {
+               memcpy(new_groupinfo, sbi->s_group_info,
+                      sbi->s_group_info_size * sizeof(*sbi->s_group_info));
+               ext4_kvfree(sbi->s_group_info);
+       }
+       sbi->s_group_info = new_groupinfo;
+       sbi->s_group_info_size = size / sizeof(*sbi->s_group_info);
+       ext4_debug("allocated s_groupinfo array for %d meta_bg's\n", 
+                  sbi->s_group_info_size);
+       return 0;
+ }
+ 
   /* Create and initialize ext4_group_info data for the given group. */
   int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
                           struct ext4_group_desc *desc)
@@@ -2195,12 -2232,11 +2232,11 @@@
                 sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)];
         i = group & (EXT4_DESC_PER_BLOCK(sb) - 1);
   
-       meta_group_info[i] = kmem_cache_alloc(cachep, GFP_KERNEL);
+       meta_group_info[i] = kmem_cache_zalloc(cachep, GFP_KERNEL);
         if (meta_group_info[i] == NULL) {
                 ext4_msg(sb, KERN_ERR, "can't allocate buddy mem");
                 goto exit_group_info;
         }
-       memset(meta_group_info[i], 0, kmem_cache_size(cachep));
         set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT,
                 &(meta_group_info[i]->bb_state));
   
@@@ -2252,49 -2288,14 +2288,14 @@@ static int ext4_mb_init_backend(struct 
         ext4_group_t ngroups = ext4_get_groups_count(sb);
         ext4_group_t i;
         struct ext4_sb_info *sbi = EXT4_SB(sb);
-       struct ext4_super_block *es = sbi->s_es;
-       int num_meta_group_infos;
-       int num_meta_group_infos_max;
-       int array_size;
+       int err;
         struct ext4_group_desc *desc;
         struct kmem_cache *cachep;
   
-       /* This is the number of blocks used by GDT */
-       num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) -
-                               1) >> EXT4_DESC_PER_BLOCK_BITS(sb);
- 
-       /*
-        * This is the total number of blocks used by GDT including
-        * the number of reserved blocks for GDT.
-        * The s_group_info array is allocated with this value
-        * to allow a clean online resize without a complex
-        * manipulation of pointer.
-        * The drawback is the unused memory when no resize
-        * occurs but it's very low in terms of pages
-        * (see comments below)
-        * Need to handle this properly when META_BG resizing is allowed
-        */
-       num_meta_group_infos_max = num_meta_group_infos +
-                               le16_to_cpu(es->s_reserved_gdt_blocks);
+       err = ext4_mb_alloc_groupinfo(sb, ngroups);
+       if (err)
+               return err;
   
-       /*
-        * array_size is the size of s_group_info array. We round it
-        * to the next power of two because this approximation is done
-        * internally by kmalloc so we can have some more memory
-        * for free here (e.g. may be used for META_BG resize).
-        */
-       array_size = 1;
-       while (array_size < sizeof(*sbi->s_group_info) *
-              num_meta_group_infos_max)
-               array_size = array_size << 1;
-       /* An 8TB filesystem with 64-bit pointers requires a 4096 byte
-        * kmalloc. A 128kb malloc should suffice for a 256TB filesystem.
-        * So a two level scheme suffices for now. */
-       sbi->s_group_info = ext4_kvzalloc(array_size, GFP_KERNEL);
-       if (sbi->s_group_info == NULL) {
-               ext4_msg(sb, KERN_ERR, "can't allocate buddy meta group");
-               return -ENOMEM;
-       }
         sbi->s_buddy_cache = new_inode(sb);
         if (sbi->s_buddy_cache == NULL) {
                 ext4_msg(sb, KERN_ERR, "can't get new inode");
@@@ -2322,7 -2323,7 +2323,7 @@@ err_freebuddy
         cachep = get_groupinfo_cache(sb->s_blocksize_bits);
         while (i-- > 0)
                 kmem_cache_free(cachep, ext4_get_group_info(sb, i));
-       i = num_meta_group_infos;
+       i = sbi->s_group_info_size;
         while (i-- > 0)
                 kfree(sbi->s_group_info[i]);
         iput(sbi->s_buddy_cache);
@@@ -4008,7 -4009,6 +4009,6 @@@ ext4_mb_initialize_context(struct ext4_
         ext4_get_group_no_and_offset(sb, goal, &group, &block);
   
         /* set up allocation goals */
-       memset(ac, 0, sizeof(struct ext4_allocation_context));
         ac->ac_b_ex.fe_logical = ar->logical & ~(sbi->s_cluster_ratio - 1);
         ac->ac_status = AC_STATUS_CONTINUE;
         ac->ac_sb = sb;
@@@ -4291,7 -4291,7 +4291,7 @@@ ext4_fsblk_t ext4_mb_new_blocks(handle_
                 }
         }
   
-       ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
+       ac = kmem_cache_zalloc(ext4_ac_cachep, GFP_NOFS);
         if (!ac) {
                 ar->len = 0;
                 *errp = -ENOMEM;
@@@ -4657,6 -4657,8 +4657,8 @@@ do_more
                  * with group lock held. generate_buddy look at
                  * them with group lock_held
                  */
+               if (test_opt(sb, DISCARD))
+                       ext4_issue_discard(sb, block_group, bit, count);
                 ext4_lock_group(sb, block_group);
                 mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);
                 mb_free_blocks(inode, &e4b, bit, count_clusters);
@@@ -4709,7 -4711,7 +4711,7 @@@ error_return
    * ext4_group_add_blocks() -- Add given blocks to an existing group
    * @handle:                   handle to this transaction
    * @sb:                               super block
- - * @block:                    start physcial block to add to the block group
+ + * @block:                    start physical block to add to the block group
    * @count:                    number of blocks to free
    *
    * This marks the blocks as free in the bitmap and buddy.
@@@ -4988,7 -4990,8 +4990,8 @@@ int ext4_trim_fs(struct super_block *sb
   
         start = range->start >> sb->s_blocksize_bits;
         end = start + (range->len >> sb->s_blocksize_bits) - 1;
-       minlen = range->minlen >> sb->s_blocksize_bits;
+       minlen = EXT4_NUM_B2C(EXT4_SB(sb),
+                             range->minlen >> sb->s_blocksize_bits);
   
         if (unlikely(minlen > EXT4_CLUSTERS_PER_GROUP(sb)) ||
             unlikely(start >= max_blks))
@@@ -5048,6 -5051,6 +5051,6 @@@
                 atomic_set(&EXT4_SB(sb)->s_last_trim_minblks, minlen);
   
   out:
-       range->len = trimmed * sb->s_blocksize;
+       range->len = EXT4_C2B(EXT4_SB(sb), trimmed) << sb->s_blocksize_bits;
         return ret;
   }
diff --combined fs/ext4/super.c

index 69c55d4,982f6fc..7265a03
--- 1/fs/ext4/super.c
--- 2/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@@ -326,6 -326,11 +326,6 @@@ static void ext4_put_nojournal(handle_
   
   /*
    * Wrappers for jbd2_journal_start/end.
- - *
- - * The only special thing we need to do here is to make sure that all
- - * journal_end calls result in the superblock being marked dirty, so
- - * that sync() will call the filesystem's write_super callback if
- - * appropriate.
    */
   handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
   {
@@@ -351,6 -356,12 +351,6 @@@
         return jbd2_journal_start(journal, nblocks);
   }
   
- -/*
- - * The only special thing we need to do here is to make sure that all
- - * jbd2_journal_stop calls result in the superblock being marked dirty, so
- - * that sync() will call the filesystem's write_super callback if
- - * appropriate.
- - */
   int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle)
   {
         struct super_block *sb;
@@@ -420,7 -431,7 +420,7 @@@ static void __save_error_info(struct su
          */
         if (!es->s_error_count)
                 mod_timer(&EXT4_SB(sb)->s_err_report, jiffies + 24*60*60*HZ);
-       es->s_error_count = cpu_to_le32(le32_to_cpu(es->s_error_count) + 1);
+       le32_add_cpu(&es->s_error_count, 1);
   }
   
   static void save_error_info(struct super_block *sb, const char *func,
@@@ -850,7 -861,6 +850,6 @@@ static void ext4_put_super(struct super
         flush_workqueue(sbi->dio_unwritten_wq);
         destroy_workqueue(sbi->dio_unwritten_wq);
   
-       lock_super(sb);
         if (sbi->s_journal) {
                 err = jbd2_journal_destroy(sbi->s_journal);
                 sbi->s_journal = NULL;
@@@ -917,7 -927,6 +916,6 @@@
          * Now that we are completely done shutting down the
          * superblock, we need to actually destroy the kobject.
          */
-       unlock_super(sb);
         kobject_put(&sbi->s_kobj);
         wait_for_completion(&sbi->s_kobj_unregister);
         if (sbi->s_chksum_driver)
@@@ -956,11 -965,10 +954,10 @@@ static struct inode *ext4_alloc_inode(s
         ei->jinode = NULL;
         INIT_LIST_HEAD(&ei->i_completed_io_list);
         spin_lock_init(&ei->i_completed_io_lock);
-       ei->cur_aio_dio = NULL;
         ei->i_sync_tid = 0;
         ei->i_datasync_tid = 0;
         atomic_set(&ei->i_ioend_count, 0);
-       atomic_set(&ei->i_aiodio_unwritten, 0);
+       atomic_set(&ei->i_unwritten, 0);
   
         return &ei->vfs_inode;
   }
@@@ -1019,11 -1027,6 +1016,11 @@@ static int init_inodecache(void
   
   static void destroy_inodecache(void)
   {
+ +      /*
+ +       * Make sure all delayed rcu free inodes are flushed before we
+ +       * destroy cache.
+ +       */
+ +      rcu_barrier();
         kmem_cache_destroy(ext4_inode_cachep);
   }
   
@@@ -1224,6 -1227,7 +1221,7 @@@ enum 
         Opt_inode_readahead_blks, Opt_journal_ioprio,
         Opt_dioread_nolock, Opt_dioread_lock,
         Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
+       Opt_max_dir_size_kb,
   };
   
   static const match_table_t tokens = {
@@@ -1297,6 -1301,7 +1295,7 @@@
         {Opt_init_itable, "init_itable=%u"},
         {Opt_init_itable, "init_itable"},
         {Opt_noinit_itable, "noinit_itable"},
+       {Opt_max_dir_size_kb, "max_dir_size_kb=%u"},
         {Opt_removed, "check=none"},    /* mount option from ext2/3 */
         {Opt_removed, "nocheck"},       /* mount option from ext2/3 */
         {Opt_removed, "reservation"},   /* mount option from ext2/3 */
@@@ -1477,6 -1482,7 +1476,7 @@@ static const struct mount_opts 
         {Opt_jqfmt_vfsold, QFMT_VFS_OLD, MOPT_QFMT},
         {Opt_jqfmt_vfsv0, QFMT_VFS_V0, MOPT_QFMT},
         {Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT},
+       {Opt_max_dir_size_kb, 0, MOPT_GTE0},
         {Opt_err, 0, 0}
   };
   
@@@ -1592,6 -1598,8 +1592,8 @@@ static int handle_mount_opt(struct supe
                         if (!args->from)
                                 arg = EXT4_DEF_LI_WAIT_MULT;
                         sbi->s_li_wait_mult = arg;
+               } else if (token == Opt_max_dir_size_kb) {
+                       sbi->s_max_dir_size_kb = arg;
                 } else if (token == Opt_stripe) {
                         sbi->s_stripe = arg;
                 } else if (m->flags & MOPT_DATAJ) {
@@@ -1664,7 -1672,7 +1666,7 @@@ static int parse_options(char *options
                  * Initialize args struct so we know whether arg was
                  * found; some options take optional arguments.
                  */
-               args[0].to = args[0].from = 0;
+               args[0].to = args[0].from = NULL;
                 token = match_token(p, tokens, args);
                 if (handle_mount_opt(sb, p, token, args, journal_devnum,
                                      journal_ioprio, is_remount) < 0)
@@@ -1740,7 -1748,7 +1742,7 @@@ static inline void ext4_show_quota_opti
   
   static const char *token2str(int token)
   {
-       static const struct match_token *t;
+       const struct match_token *t;
   
         for (t = tokens; t->token != Opt_err; t++)
                 if (t->token == token && !strchr(t->pattern, '='))
@@@ -1823,6 -1831,8 +1825,8 @@@ static int _ext4_show_options(struct se
         if (nodefs || (test_opt(sb, INIT_INODE_TABLE) &&
                        (sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT)))
                 SEQ_OPTS_PRINT("init_itable=%u", sbi->s_li_wait_mult);
+       if (nodefs || sbi->s_max_dir_size_kb)
+               SEQ_OPTS_PRINT("max_dir_size_kb=%u", sbi->s_max_dir_size_kb);
   
         ext4_show_quota_options(seq, sb);
         return 0;
@@@ -1914,15 -1924,45 +1918,45 @@@ done
         return res;
   }
   
+ int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup)
+ {
+       struct ext4_sb_info *sbi = EXT4_SB(sb);
+       struct flex_groups *new_groups;
+       int size;
+ 
+       if (!sbi->s_log_groups_per_flex)
+               return 0;
+ 
+       size = ext4_flex_group(sbi, ngroup - 1) + 1;
+       if (size <= sbi->s_flex_groups_allocated)
+               return 0;
+ 
+       size = roundup_pow_of_two(size * sizeof(struct flex_groups));
+       new_groups = ext4_kvzalloc(size, GFP_KERNEL);
+       if (!new_groups) {
+               ext4_msg(sb, KERN_ERR, "not enough memory for %d flex groups",
+                        size / (int) sizeof(struct flex_groups));
+               return -ENOMEM;
+       }
+ 
+       if (sbi->s_flex_groups) {
+               memcpy(new_groups, sbi->s_flex_groups,
+                      (sbi->s_flex_groups_allocated *
+                       sizeof(struct flex_groups)));
+               ext4_kvfree(sbi->s_flex_groups);
+       }
+       sbi->s_flex_groups = new_groups;
+       sbi->s_flex_groups_allocated = size / sizeof(struct flex_groups);
+       return 0;
+ }
+ 
   static int ext4_fill_flex_info(struct super_block *sb)
   {
         struct ext4_sb_info *sbi = EXT4_SB(sb);
         struct ext4_group_desc *gdp = NULL;
-       ext4_group_t flex_group_count;
         ext4_group_t flex_group;
         unsigned int groups_per_flex = 0;
-       size_t size;
-       int i;
+       int i, err;
   
         sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
         if (sbi->s_log_groups_per_flex < 1 || sbi->s_log_groups_per_flex > 31) {
@@@ -1931,17 -1971,9 +1965,9 @@@
         }
         groups_per_flex = 1 << sbi->s_log_groups_per_flex;
   
-       /* We allocate both existing and potentially added groups */
-       flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) +
-                       ((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) <<
-                             EXT4_DESC_PER_BLOCK_BITS(sb))) / groups_per_flex;
-       size = flex_group_count * sizeof(struct flex_groups);
-       sbi->s_flex_groups = ext4_kvzalloc(size, GFP_KERNEL);
-       if (sbi->s_flex_groups == NULL) {
-               ext4_msg(sb, KERN_ERR, "not enough memory for %u flex groups",
-                        flex_group_count);
+       err = ext4_alloc_flex_bg_array(sb, sbi->s_groups_count);
+       if (err)
                 goto failed;
-       }
   
         for (i = 0; i < sbi->s_groups_count; i++) {
                 gdp = ext4_get_group_desc(sb, i, NULL);
@@@ -2144,10 -2176,12 +2170,12 @@@ static void ext4_orphan_cleanup(struct 
         }
   
         if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
-               if (es->s_last_orphan)
+               /* don't clear list on RO mount w/ errors */
+               if (es->s_last_orphan && !(s_flags & MS_RDONLY)) {
                         jbd_debug(1, "Errors on filesystem, "
                                   "clearing orphan list.\n");
-               es->s_last_orphan = 0;
+                       es->s_last_orphan = 0;
+               }
                 jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
                 return;
         }
@@@ -2528,6 -2562,7 +2556,7 @@@ EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb
   EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request);
   EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc);
   EXT4_RW_ATTR_SBI_UI(max_writeback_mb_bump, s_max_writeback_mb_bump);
+ EXT4_RW_ATTR_SBI_UI(extent_max_zeroout_kb, s_extent_max_zeroout_kb);
   EXT4_ATTR(trigger_fs_error, 0200, NULL, trigger_test_error);
   
   static struct attribute *ext4_attrs[] = {
@@@ -2543,6 -2578,7 +2572,7 @@@
         ATTR_LIST(mb_stream_req),
         ATTR_LIST(mb_group_prealloc),
         ATTR_LIST(max_writeback_mb_bump),
+       ATTR_LIST(extent_max_zeroout_kb),
         ATTR_LIST(trigger_fs_error),
         NULL,
   };
@@@ -2550,10 -2586,12 +2580,12 @@@
   /* Features this copy of ext4 supports */
   EXT4_INFO_ATTR(lazy_itable_init);
   EXT4_INFO_ATTR(batched_discard);
+ EXT4_INFO_ATTR(meta_bg_resize);
   
   static struct attribute *ext4_feat_attrs[] = {
         ATTR_LIST(lazy_itable_init),
         ATTR_LIST(batched_discard),
+       ATTR_LIST(meta_bg_resize),
         NULL,
   };
   
@@@ -3374,7 -3412,7 +3406,7 @@@ static int ext4_fill_super(struct super
          * enable delayed allocation by default
          * Use -o nodelalloc to turn it off
          */
-       if (!IS_EXT3_SB(sb) &&
+       if (!IS_EXT3_SB(sb) && !IS_EXT2_SB(sb) &&
             ((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0))
                 set_opt(sb, DELALLOC);
   
@@@ -3743,6 -3781,7 +3775,7 @@@
   
         sbi->s_stripe = ext4_get_stripe_size(sbi);
         sbi->s_max_writeback_mb_bump = 128;
+       sbi->s_extent_max_zeroout_kb = 32;
   
         /*
          * set up enough so that it can read an inode
@@@ -4519,11 -4558,9 +4552,9 @@@ static int ext4_unfreeze(struct super_b
         if (sb->s_flags & MS_RDONLY)
                 return 0;
   
-       lock_super(sb);
         /* Reset the needs_recovery flag before the fs is unlocked. */
         EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
         ext4_commit_super(sb, 1);
-       unlock_super(sb);
         return 0;
   }
   
@@@ -4559,7 -4596,6 +4590,6 @@@ static int ext4_remount(struct super_bl
         char *orig_data = kstrdup(data, GFP_KERNEL);
   
         /* Store the original options */
-       lock_super(sb);
         old_sb_flags = sb->s_flags;
         old_opts.s_mount_opt = sbi->s_mount_opt;
         old_opts.s_mount_opt2 = sbi->s_mount_opt2;
@@@ -4701,7 -4737,6 +4731,6 @@@
         if (sbi->s_journal == NULL)
                 ext4_commit_super(sb, 1);
   
-       unlock_super(sb);
   #ifdef CONFIG_QUOTA
         /* Release old quota file names */
         for (i = 0; i < MAXQUOTAS; i++)
@@@ -4714,10 -4749,8 +4743,8 @@@
                 else if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
                                         EXT4_FEATURE_RO_COMPAT_QUOTA)) {
                         err = ext4_enable_quotas(sb);
-                       if (err) {
-                               lock_super(sb);
+                       if (err)
                                 goto restore_opts;
-                       }
                 }
         }
   #endif
@@@ -4744,7 -4777,6 +4771,6 @@@ restore_opts
                 sbi->s_qf_names[i] = old_opts.s_qf_names[i];
         }
   #endif
-       unlock_super(sb);
         kfree(orig_data);
         return err;
   }
@@@ -4796,7 -4828,7 +4822,7 @@@ static int ext4_statfs(struct dentry *d
   
   static inline struct inode *dquot_to_inode(struct dquot *dquot)
   {
- -      return sb_dqopt(dquot->dq_sb)->files[dquot->dq_type];
+ +      return sb_dqopt(dquot->dq_sb)->files[dquot->dq_id.type];
   }
   
   static int ext4_write_dquot(struct dquot *dquot)
@@@ -5269,8 -5301,10 +5295,10 @@@ static int __init ext4_init_fs(void
         if (err)
                 goto out6;
         ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj);
-       if (!ext4_kset)
+       if (!ext4_kset) {
+               err = -ENOMEM;
                 goto out5;
+       }
         ext4_proc_root = proc_mkdir("fs/ext4", NULL);
   
         err = ext4_init_feat_adverts();
diff --combined fs/fs-writeback.c

index 6d46c0d,5602d73..8e1d7b9
--- 1/fs/fs-writeback.c
--- 2/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@@ -63,6 -63,7 +63,7 @@@ int writeback_in_progress(struct backin
   {
         return test_bit(BDI_writeback_running, &bdi->state);
   }
+ EXPORT_SYMBOL(writeback_in_progress);
   
   static inline struct backing_dev_info *inode_to_bdi(struct inode *inode)
   {
@@@ -577,6 -578,10 +578,6 @@@ static long writeback_chunk_size(struc
   /*
    * Write a portion of b_io inodes which belong to @sb.
    *
- - * If @only_this_sb is true, then find and write all such
- - * inodes. Otherwise write only ones which go sequentially
- - * in reverse order.
- - *
    * Return the number of pages and/or inodes written.
    */
   static long writeback_sb_inodes(struct super_block *sb,
diff --combined fs/jbd2/journal.c

index e149b99,0f16edd..484b8d1
--- 1/fs/jbd2/journal.c
--- 2/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@@ -612,8 -612,8 +612,8 @@@ int jbd2_journal_start_commit(journal_
                 ret = 1;
         } else if (journal->j_committing_transaction) {
                 /*
- -               * If ext3_write_super() recently started a commit, then we
- -               * have to wait for completion of that transaction
+ +               * If commit has been started, then we have to wait for
+ +               * completion of that transaction.
                  */
                 if (ptid)
                         *ptid = journal->j_committing_transaction->t_tid;
@@@ -1354,6 -1354,11 +1354,11 @@@ static void jbd2_mark_journal_empty(jou
   
         BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
         read_lock(&journal->j_state_lock);
+       /* Is it already empty? */
+       if (sb->s_start == 0) {
+               read_unlock(&journal->j_state_lock);
+               return;
+       }
         jbd_debug(1, "JBD2: Marking journal as empty (seq %d)\n",
                   journal->j_tail_sequence);
author	Linus Torvalds <torvalds@linux-foundation.org>
	Sun, 7 Oct 2012 21:36:39 +0000 (06:36 +0900)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Sun, 7 Oct 2012 21:36:39 +0000 (06:36 +0900)
		1	2
fs/buffer.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext4/inode.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext4/ioctl.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext4/mballoc.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext4/super.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/fs-writeback.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/jbd2/journal.c	patch \|	diff1 \|	diff2 \|	blob \| history