Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso...
authorLinus Torvalds <torvalds@linux-foundation.org>
Sun, 7 Oct 2012 21:36:39 +0000 (06:36 +0900)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sun, 7 Oct 2012 21:36:39 +0000 (06:36 +0900)
Pull ext4 updates from Ted Ts'o:
 "The big new feature added this time is supporting online resizing
  using the meta_bg feature.  This allows us to resize file systems
  which are greater than 16TB.  In addition, the speed of online
  resizing has been improved in general.

  We also fix a number of races, some of which could lead to deadlocks,
  in ext4's Asynchronous I/O and online defrag support, thanks to good
  work by Dmitry Monakhov.

  There are also a large number of more minor bug fixes and cleanups
  from a number of other ext4 contributors, quite of few of which have
  submitted fixes for the first time."

* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (69 commits)
  ext4: fix ext4_flush_completed_IO wait semantics
  ext4: fix mtime update in nodelalloc mode
  ext4: fix ext_remove_space for punch_hole case
  ext4: punch_hole should wait for DIO writers
  ext4: serialize truncate with owerwrite DIO workers
  ext4: endless truncate due to nonlocked dio readers
  ext4: serialize unlocked dio reads with truncate
  ext4: serialize dio nonlocked reads with defrag workers
  ext4: completed_io locking cleanup
  ext4: fix unwritten counter leakage
  ext4: give i_aiodio_unwritten a more appropriate name
  ext4: ext4_inode_info diet
  ext4: convert to use leXX_add_cpu()
  ext4: ext4_bread usage audit
  fs: reserve fallocate flag codepoint
  ext4: remove redundant offset check in mext_check_arguments()
  ext4: don't clear orphan list on ro mount with errors
  jbd2: fix assertion failure in commit code due to lacking transaction credits
  ext4: release donor reference when EXT4_IOC_MOVE_EXT ioctl fails
  ext4: enable FITRIM ioctl on bigalloc file system
  ...

1  2 
fs/buffer.c
fs/ext4/inode.c
fs/ext4/ioctl.c
fs/ext4/mballoc.c
fs/ext4/super.c
fs/fs-writeback.c
fs/jbd2/journal.c

diff --combined fs/buffer.c
@@@ -914,7 -914,7 +914,7 @@@ link_dev_buffers(struct page *page, str
  /*
   * Initialise the state of a blockdev page's buffers.
   */ 
 -static void
 +static sector_t
  init_page_buffers(struct page *page, struct block_device *bdev,
                        sector_t block, int size)
  {
                block++;
                bh = bh->b_this_page;
        } while (bh != head);
 +
 +      /*
 +       * Caller needs to validate requested block against end of device.
 +       */
 +      return end_block;
  }
  
  /*
   * Create the page-cache page that contains the requested block.
   *
 - * This is user purely for blockdev mappings.
 + * This is used purely for blockdev mappings.
   */
 -static struct page *
 +static int
  grow_dev_page(struct block_device *bdev, sector_t block,
 -              pgoff_t index, int size)
 +              pgoff_t index, int size, int sizebits)
  {
        struct inode *inode = bdev->bd_inode;
        struct page *page;
        struct buffer_head *bh;
 +      sector_t end_block;
 +      int ret = 0;            /* Will call free_more_memory() */
  
        page = find_or_create_page(inode->i_mapping, index,
                (mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS)|__GFP_MOVABLE);
        if (!page)
 -              return NULL;
 +              return ret;
  
        BUG_ON(!PageLocked(page));
  
        if (page_has_buffers(page)) {
                bh = page_buffers(page);
                if (bh->b_size == size) {
 -                      init_page_buffers(page, bdev, block, size);
 -                      return page;
 +                      end_block = init_page_buffers(page, bdev,
 +                                              index << sizebits, size);
 +                      goto done;
                }
                if (!try_to_free_buffers(page))
                        goto failed;
         */
        spin_lock(&inode->i_mapping->private_lock);
        link_dev_buffers(page, bh);
 -      init_page_buffers(page, bdev, block, size);
 +      end_block = init_page_buffers(page, bdev, index << sizebits, size);
        spin_unlock(&inode->i_mapping->private_lock);
 -      return page;
 -
 +done:
 +      ret = (block < end_block) ? 1 : -ENXIO;
  failed:
        unlock_page(page);
        page_cache_release(page);
 -      return NULL;
 +      return ret;
  }
  
  /*
  static int
  grow_buffers(struct block_device *bdev, sector_t block, int size)
  {
 -      struct page *page;
        pgoff_t index;
        int sizebits;
  
                        bdevname(bdev, b));
                return -EIO;
        }
 -      block = index << sizebits;
 +
        /* Create a page with the proper size buffers.. */
 -      page = grow_dev_page(bdev, block, index, size);
 -      if (!page)
 -              return 0;
 -      unlock_page(page);
 -      page_cache_release(page);
 -      return 1;
 +      return grow_dev_page(bdev, block, index, size, sizebits);
  }
  
  static struct buffer_head *
  __getblk_slow(struct block_device *bdev, sector_t block, int size)
  {
 -      int ret;
 -      struct buffer_head *bh;
 -
        /* Size must be multiple of hard sectorsize */
        if (unlikely(size & (bdev_logical_block_size(bdev)-1) ||
                        (size < 512 || size > PAGE_SIZE))) {
                return NULL;
        }
  
 -retry:
 -      bh = __find_get_block(bdev, block, size);
 -      if (bh)
 -              return bh;
 +      for (;;) {
 +              struct buffer_head *bh;
 +              int ret;
  
 -      ret = grow_buffers(bdev, block, size);
 -      if (ret == 0) {
 -              free_more_memory();
 -              goto retry;
 -      } else if (ret > 0) {
                bh = __find_get_block(bdev, block, size);
                if (bh)
                        return bh;
 +
 +              ret = grow_buffers(bdev, block, size);
 +              if (ret < 0)
 +                      return NULL;
 +              if (ret == 0)
 +                      free_more_memory();
        }
 -      return NULL;
  }
  
  /*
@@@ -1319,6 -1321,10 +1319,6 @@@ EXPORT_SYMBOL(__find_get_block)
   * which corresponds to the passed block_device, block and size. The
   * returned buffer has its reference count incremented.
   *
 - * __getblk() cannot fail - it just keeps trying.  If you pass it an
 - * illegal block number, __getblk() will happily return a buffer_head
 - * which represents the non-existent block.  Very weird.
 - *
   * __getblk() will lock up the machine if grow_dev_page's try_to_free_buffers()
   * attempt is failing.  FIXME, perhaps?
   */
@@@ -2312,12 -2318,6 +2312,6 @@@ int __block_page_mkwrite(struct vm_area
        loff_t size;
        int ret;
  
-       /*
-        * Update file times before taking page lock. We may end up failing the
-        * fault so this update may be superfluous but who really cares...
-        */
-       file_update_time(vma->vm_file);
        lock_page(page);
        size = i_size_read(inode);
        if ((page->mapping != inode->i_mapping) ||
@@@ -2355,6 -2355,13 +2349,13 @@@ int block_page_mkwrite(struct vm_area_s
        struct super_block *sb = vma->vm_file->f_path.dentry->d_inode->i_sb;
  
        sb_start_pagefault(sb);
+       /*
+        * Update file times before taking page lock. We may end up failing the
+        * fault so this update may be superfluous but who really cares...
+        */
+       file_update_time(vma->vm_file);
        ret = __block_page_mkwrite(vma, vmf, get_block);
        sb_end_pagefault(sb);
        return block_page_mkwrite_return(ret);
diff --combined fs/ext4/inode.c
@@@ -732,11 -732,13 +732,13 @@@ struct buffer_head *ext4_getblk(handle_
        err = ext4_map_blocks(handle, inode, &map,
                              create ? EXT4_GET_BLOCKS_CREATE : 0);
  
+       /* ensure we send some value back into *errp */
+       *errp = 0;
        if (err < 0)
                *errp = err;
        if (err <= 0)
                return NULL;
-       *errp = 0;
  
        bh = sb_getblk(inode->i_sb, map.m_pblk);
        if (!bh) {
@@@ -1954,9 -1956,6 +1956,6 @@@ out
        return ret;
  }
  
- static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode);
- static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate);
  /*
   * Note that we don't need to start a transaction unless we're journaling data
   * because we should have holes filled from ext4_page_mkwrite(). We even don't
   * This function can get called via...
   *   - ext4_da_writepages after taking page lock (have journal handle)
   *   - journal_submit_inode_data_buffers (no journal handle)
 - *   - shrink_page_list via pdflush (no journal handle)
 + *   - shrink_page_list via the kswapd/direct reclaim (no journal handle)
   *   - grab_page_cache when doing write_begin (have journal handle)
   *
   * We don't do any block allocation in this function. If we have page with
@@@ -2463,6 -2462,16 +2462,16 @@@ static int ext4_nonda_switch(struct sup
        free_blocks  = EXT4_C2B(sbi,
                percpu_counter_read_positive(&sbi->s_freeclusters_counter));
        dirty_blocks = percpu_counter_read_positive(&sbi->s_dirtyclusters_counter);
+       /*
+        * Start pushing delalloc when 1/2 of free blocks are dirty.
+        */
+       if (dirty_blocks && (free_blocks < 2 * dirty_blocks) &&
+           !writeback_in_progress(sb->s_bdi) &&
+           down_read_trylock(&sb->s_umount)) {
+               writeback_inodes_sb(sb, WB_REASON_FS_FREE_SPACE);
+               up_read(&sb->s_umount);
+       }
        if (2 * free_blocks < 3 * dirty_blocks ||
                free_blocks < (dirty_blocks + EXT4_FREECLUSTERS_WATERMARK)) {
                /*
                 */
                return 1;
        }
-       /*
-        * Even if we don't switch but are nearing capacity,
-        * start pushing delalloc when 1/2 of free blocks are dirty.
-        */
-       if (free_blocks < 2 * dirty_blocks)
-               writeback_inodes_sb_if_idle(sb, WB_REASON_FS_FREE_SPACE);
        return 0;
  }
  
@@@ -2879,9 -2881,6 +2881,6 @@@ static void ext4_end_io_dio(struct kioc
  {
        struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
          ext4_io_end_t *io_end = iocb->private;
-       struct workqueue_struct *wq;
-       unsigned long flags;
-       struct ext4_inode_info *ei;
  
        /* if not async direct IO or dio with 0 bytes write, just return */
        if (!io_end || !size)
                io_end->iocb = iocb;
                io_end->result = ret;
        }
-       wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq;
  
-       /* Add the io_end to per-inode completed aio dio list*/
-       ei = EXT4_I(io_end->inode);
-       spin_lock_irqsave(&ei->i_completed_io_lock, flags);
-       list_add_tail(&io_end->list, &ei->i_completed_io_list);
-       spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
-       /* queue the work to convert unwritten extents to written */
-       queue_work(wq, &io_end->work);
+       ext4_add_complete_io(io_end);
  }
  
  static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate)
  {
        ext4_io_end_t *io_end = bh->b_private;
-       struct workqueue_struct *wq;
        struct inode *inode;
-       unsigned long flags;
  
        if (!test_clear_buffer_uninit(bh) || !io_end)
                goto out;
         */
        inode = io_end->inode;
        ext4_set_io_unwritten_flag(inode, io_end);
-       /* Add the io_end to per-inode completed io list*/
-       spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
-       list_add_tail(&io_end->list, &EXT4_I(inode)->i_completed_io_list);
-       spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags);
-       wq = EXT4_SB(inode->i_sb)->dio_unwritten_wq;
-       /* queue the work to convert unwritten extents to written */
-       queue_work(wq, &io_end->work);
+       ext4_add_complete_io(io_end);
  out:
        bh->b_private = NULL;
        bh->b_end_io = NULL;
@@@ -3029,6 -3010,7 +3010,7 @@@ static ssize_t ext4_ext_direct_IO(int r
                overwrite = *((int *)iocb->private);
  
                if (overwrite) {
+                       atomic_inc(&inode->i_dio_count);
                        down_read(&EXT4_I(inode)->i_data_sem);
                        mutex_unlock(&inode->i_mutex);
                }
                 * hook to the iocb.
                 */
                iocb->private = NULL;
-               EXT4_I(inode)->cur_aio_dio = NULL;
+               ext4_inode_aio_set(inode, NULL);
                if (!is_sync_kiocb(iocb)) {
                        ext4_io_end_t *io_end =
                                ext4_init_io_end(inode, GFP_NOFS);
                         * is a unwritten extents needs to be converted
                         * when IO is completed.
                         */
-                       EXT4_I(inode)->cur_aio_dio = iocb->private;
+                       ext4_inode_aio_set(inode, io_end);
                }
  
                if (overwrite)
                                                 NULL,
                                                 DIO_LOCKING);
                if (iocb->private)
-                       EXT4_I(inode)->cur_aio_dio = NULL;
+                       ext4_inode_aio_set(inode, NULL);
                /*
                 * The io_end structure takes a reference to the inode,
                 * that structure needs to be destroyed and the
        retake_lock:
                /* take i_mutex locking again if we do a ovewrite dio */
                if (overwrite) {
+                       inode_dio_done(inode);
                        up_read(&EXT4_I(inode)->i_data_sem);
                        mutex_lock(&inode->i_mutex);
                }
@@@ -3313,7 -3296,7 +3296,7 @@@ int ext4_discard_partial_page_buffers(h
   * handle: The journal handle
   * inode:  The files inode
   * page:   A locked page that contains the offset "from"
 - * from:   The starting byte offset (from the begining of the file)
 + * from:   The starting byte offset (from the beginning of the file)
   *         to begin discarding
   * len:    The length of bytes to discard
   * flags:  Optional flags that may be used:
   *         EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED
   *         Only zero the regions of the page whose buffer heads
   *         have already been unmapped.  This flag is appropriate
 - *         for updateing the contents of a page whose blocks may
 + *         for updating the contents of a page whose blocks may
   *         have already been released, and we only want to zero
   *         out the regions that correspond to those released blocks.
   *
 - * Returns zero on sucess or negative on failure.
 + * Returns zero on success or negative on failure.
   */
  static int ext4_discard_partial_page_buffers_no_lock(handle_t *handle,
                struct inode *inode, struct page *page, loff_t from,
@@@ -3486,7 -3469,7 +3469,7 @@@ int ext4_can_truncate(struct inode *ino
   * @offset: The offset where the hole will begin
   * @len:    The length of the hole
   *
 - * Returns: 0 on sucess or negative on failure
 + * Returns: 0 on success or negative on failure
   */
  
  int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
@@@ -4008,7 -3991,7 +3991,7 @@@ static int ext4_inode_blocks_set(handle
  
        if (i_blocks <= ~0U) {
                /*
 -               * i_blocks can be represnted in a 32 bit variable
 +               * i_blocks can be represented in a 32 bit variable
                 * as multiple of 512 bytes
                 */
                raw_inode->i_blocks_lo   = cpu_to_le32(i_blocks);
@@@ -4052,6 -4035,7 +4035,7 @@@ static int ext4_do_update_inode(handle_
        struct ext4_inode_info *ei = EXT4_I(inode);
        struct buffer_head *bh = iloc->bh;
        int err = 0, rc, block;
+       int need_datasync = 0;
        uid_t i_uid;
        gid_t i_gid;
  
                raw_inode->i_file_acl_high =
                        cpu_to_le16(ei->i_file_acl >> 32);
        raw_inode->i_file_acl_lo = cpu_to_le32(ei->i_file_acl);
-       ext4_isize_set(raw_inode, ei->i_disksize);
+       if (ei->i_disksize != ext4_isize(raw_inode)) {
+               ext4_isize_set(raw_inode, ei->i_disksize);
+               need_datasync = 1;
+       }
        if (ei->i_disksize > 0x7fffffffULL) {
                struct super_block *sb = inode->i_sb;
                if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
                err = rc;
        ext4_clear_inode_state(inode, EXT4_STATE_NEW);
  
-       ext4_update_inode_fsync_trans(handle, inode, 0);
+       ext4_update_inode_fsync_trans(handle, inode, need_datasync);
  out_brelse:
        brelse(bh);
        ext4_std_error(inode->i_sb, err);
   *
   * - Within generic_file_write() for O_SYNC files.
   *   Here, there will be no transaction running. We wait for any running
 - *   trasnaction to commit.
 + *   transaction to commit.
   *
   * - Within sys_sync(), kupdate and such.
   *   We wait on commit, if tol to.
@@@ -4298,7 -4285,6 +4285,6 @@@ int ext4_setattr(struct dentry *dentry
        }
  
        if (attr->ia_valid & ATTR_SIZE) {
-               inode_dio_wait(inode);
  
                if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
                        struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
        }
  
        if (attr->ia_valid & ATTR_SIZE) {
-               if (attr->ia_size != i_size_read(inode))
+               if (attr->ia_size != i_size_read(inode)) {
                        truncate_setsize(inode, attr->ia_size);
+                       /* Inode size will be reduced, wait for dio in flight.
+                        * Temporarily disable dioread_nolock to prevent
+                        * livelock. */
+                       if (orphan) {
+                               ext4_inode_block_unlocked_dio(inode);
+                               inode_dio_wait(inode);
+                               ext4_inode_resume_unlocked_dio(inode);
+                       }
+               }
                ext4_truncate(inode);
        }
  
@@@ -4413,7 -4408,7 +4408,7 @@@ static int ext4_index_trans_blocks(stru
   * worse case, the indexs blocks spread over different block groups
   *
   * If datablocks are discontiguous, they are possible to spread over
 - * different block groups too. If they are contiuguous, with flexbg,
 + * different block groups too. If they are contiguous, with flexbg,
   * they could still across block group boundary.
   *
   * Also account for superblock, inode, quota and xattr blocks
@@@ -4589,6 -4584,14 +4584,6 @@@ static int ext4_expand_extra_isize(stru
   * inode out, but prune_icache isn't a user-visible syncing function.
   * Whenever the user wants stuff synced (sys_sync, sys_msync, sys_fsync)
   * we start and wait on commits.
 - *
 - * Is this efficient/effective?  Well, we're being nice to the system
 - * by cleaning up our inodes proactively so they can be reaped
 - * without I/O.  But we are potentially leaving up to five seconds'
 - * worth of inodes floating about which prune_icache wants us to
 - * write out.  One way to fix that would be to get prune_icache()
 - * to do a write_super() to free up some memory.  It has the desired
 - * effect.
   */
  int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
  {
@@@ -4727,6 -4730,10 +4722,10 @@@ int ext4_change_inode_journal_flag(stru
                        return err;
        }
  
+       /* Wait for all existing dio workers */
+       ext4_inode_block_unlocked_dio(inode);
+       inode_dio_wait(inode);
        jbd2_journal_lock_updates(journal);
  
        /*
        ext4_set_aops(inode);
  
        jbd2_journal_unlock_updates(journal);
+       ext4_inode_resume_unlocked_dio(inode);
  
        /* Finally we can mark the inode as dirty. */
  
@@@ -4780,6 -4788,7 +4780,7 @@@ int ext4_page_mkwrite(struct vm_area_st
        int retries = 0;
  
        sb_start_pagefault(inode->i_sb);
+       file_update_time(vma->vm_file);
        /* Delalloc case is easy... */
        if (test_opt(inode->i_sb, DELALLOC) &&
            !ext4_should_journal_data(inode) &&
diff --combined fs/ext4/ioctl.c
@@@ -233,7 -233,7 +233,7 @@@ group_extend_out
  
        case EXT4_IOC_MOVE_EXT: {
                struct move_extent me;
 -              struct file *donor_filp;
 +              struct fd donor;
                int err;
  
                if (!(filp->f_mode & FMODE_READ) ||
                        return -EFAULT;
                me.moved_len = 0;
  
 -              donor_filp = fget(me.donor_fd);
 -              if (!donor_filp)
 +              donor = fdget(me.donor_fd);
 +              if (!donor.file)
                        return -EBADF;
  
 -              if (!(donor_filp->f_mode & FMODE_WRITE)) {
 +              if (!(donor.file->f_mode & FMODE_WRITE)) {
                        err = -EBADF;
                        goto mext_out;
                }
                if (err)
                        goto mext_out;
  
 -              err = ext4_move_extents(filp, donor_filp, me.orig_start,
 +              err = ext4_move_extents(filp, donor.file, me.orig_start,
                                        me.donor_start, me.len, &me.moved_len);
                mnt_drop_write_file(filp);
  
                                 &me, sizeof(me)))
                        err = -EFAULT;
  mext_out:
 -              fput(donor_filp);
 +              fdput(donor);
                return err;
        }
  
@@@ -366,26 -366,11 +366,11 @@@ group_add_out
                        return -EOPNOTSUPP;
                }
  
-               if (EXT4_HAS_INCOMPAT_FEATURE(sb,
-                              EXT4_FEATURE_INCOMPAT_META_BG)) {
-                       ext4_msg(sb, KERN_ERR,
-                                "Online resizing not (yet) supported with meta_bg");
-                       return -EOPNOTSUPP;
-               }
                if (copy_from_user(&n_blocks_count, (__u64 __user *)arg,
                                   sizeof(__u64))) {
                        return -EFAULT;
                }
  
-               if (n_blocks_count > MAX_32_NUM &&
-                   !EXT4_HAS_INCOMPAT_FEATURE(sb,
-                                              EXT4_FEATURE_INCOMPAT_64BIT)) {
-                       ext4_msg(sb, KERN_ERR,
-                                "File system only supports 32-bit block numbers");
-                       return -EOPNOTSUPP;
-               }
                err = ext4_resize_begin(sb);
                if (err)
                        return err;
@@@ -420,13 -405,6 +405,6 @@@ resizefs_out
                if (!blk_queue_discard(q))
                        return -EOPNOTSUPP;
  
-               if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
-                              EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
-                       ext4_msg(sb, KERN_ERR,
-                                "FITRIM not supported with bigalloc");
-                       return -EOPNOTSUPP;
-               }
                if (copy_from_user(&range, (struct fstrim_range __user *)arg,
                    sizeof(range)))
                        return -EFAULT;
diff --combined fs/ext4/mballoc.c
@@@ -24,6 -24,7 +24,7 @@@
  #include "ext4_jbd2.h"
  #include "mballoc.h"
  #include <linux/debugfs.h>
+ #include <linux/log2.h>
  #include <linux/slab.h>
  #include <trace/events/ext4.h>
  
@@@ -1338,17 -1339,17 +1339,17 @@@ static void mb_free_blocks(struct inod
        mb_check_buddy(e4b);
  }
  
- static int mb_find_extent(struct ext4_buddy *e4b, int order, int block,
+ static int mb_find_extent(struct ext4_buddy *e4b, int block,
                                int needed, struct ext4_free_extent *ex)
  {
        int next = block;
-       int max;
+       int max, order;
        void *buddy;
  
        assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
        BUG_ON(ex == NULL);
  
-       buddy = mb_find_buddy(e4b, order, &max);
+       buddy = mb_find_buddy(e4b, 0, &max);
        BUG_ON(buddy == NULL);
        BUG_ON(block >= max);
        if (mb_test_bit(block, buddy)) {
                return 0;
        }
  
-       /* FIXME dorp order completely ? */
-       if (likely(order == 0)) {
-               /* find actual order */
-               order = mb_find_order_for_block(e4b, block);
-               block = block >> order;
-       }
+       /* find actual order */
+       order = mb_find_order_for_block(e4b, block);
+       block = block >> order;
  
        ex->fe_len = 1 << order;
        ex->fe_start = block << order;
@@@ -1549,7 -1547,7 +1547,7 @@@ static void ext4_mb_check_limits(struc
                /* recheck chunk's availability - we don't know
                 * when it was found (within this lock-unlock
                 * period or not) */
-               max = mb_find_extent(e4b, 0, bex->fe_start, gex->fe_len, &ex);
+               max = mb_find_extent(e4b, bex->fe_start, gex->fe_len, &ex);
                if (max >= gex->fe_len) {
                        ext4_mb_use_best_found(ac, e4b);
                        return;
@@@ -1641,7 -1639,7 +1639,7 @@@ int ext4_mb_try_best_found(struct ext4_
                return err;
  
        ext4_lock_group(ac->ac_sb, group);
-       max = mb_find_extent(e4b, 0, ex.fe_start, ex.fe_len, &ex);
+       max = mb_find_extent(e4b, ex.fe_start, ex.fe_len, &ex);
  
        if (max > 0) {
                ac->ac_b_ex = ex;
@@@ -1662,17 -1660,20 +1660,20 @@@ int ext4_mb_find_by_goal(struct ext4_al
        int max;
        int err;
        struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
+       struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
        struct ext4_free_extent ex;
  
        if (!(ac->ac_flags & EXT4_MB_HINT_TRY_GOAL))
                return 0;
+       if (grp->bb_free == 0)
+               return 0;
  
        err = ext4_mb_load_buddy(ac->ac_sb, group, e4b);
        if (err)
                return err;
  
        ext4_lock_group(ac->ac_sb, group);
-       max = mb_find_extent(e4b, 0, ac->ac_g_ex.fe_start,
+       max = mb_find_extent(e4b, ac->ac_g_ex.fe_start,
                             ac->ac_g_ex.fe_len, &ex);
  
        if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) {
@@@ -1788,7 -1789,7 +1789,7 @@@ void ext4_mb_complex_scan_group(struct 
                        break;
                }
  
-               mb_find_extent(e4b, 0, i, ac->ac_g_ex.fe_len, &ex);
+               mb_find_extent(e4b, i, ac->ac_g_ex.fe_len, &ex);
                BUG_ON(ex.fe_len <= 0);
                if (free < ex.fe_len) {
                        ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
@@@ -1840,7 -1841,7 +1841,7 @@@ void ext4_mb_scan_aligned(struct ext4_a
  
        while (i < EXT4_CLUSTERS_PER_GROUP(sb)) {
                if (!mb_test_bit(i, bitmap)) {
-                       max = mb_find_extent(e4b, 0, i, sbi->s_stripe, &ex);
+                       max = mb_find_extent(e4b, i, sbi->s_stripe, &ex);
                        if (max >= sbi->s_stripe) {
                                ac->ac_found++;
                                ac->ac_b_ex = ex;
@@@ -1862,6 -1863,12 +1863,12 @@@ static int ext4_mb_good_group(struct ex
  
        BUG_ON(cr < 0 || cr >= 4);
  
+       free = grp->bb_free;
+       if (free == 0)
+               return 0;
+       if (cr <= 2 && free < ac->ac_g_ex.fe_len)
+               return 0;
        /* We only do this if the grp has never been initialized */
        if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
                int ret = ext4_mb_init_group(ac->ac_sb, group);
                        return 0;
        }
  
-       free = grp->bb_free;
        fragments = grp->bb_fragments;
-       if (free == 0)
-               return 0;
        if (fragments == 0)
                return 0;
  
@@@ -2163,6 -2167,39 +2167,39 @@@ static struct kmem_cache *get_groupinfo
        return cachep;
  }
  
+ /*
+  * Allocate the top-level s_group_info array for the specified number
+  * of groups
+  */
+ int ext4_mb_alloc_groupinfo(struct super_block *sb, ext4_group_t ngroups)
+ {
+       struct ext4_sb_info *sbi = EXT4_SB(sb);
+       unsigned size;
+       struct ext4_group_info ***new_groupinfo;
+       size = (ngroups + EXT4_DESC_PER_BLOCK(sb) - 1) >>
+               EXT4_DESC_PER_BLOCK_BITS(sb);
+       if (size <= sbi->s_group_info_size)
+               return 0;
+       size = roundup_pow_of_two(sizeof(*sbi->s_group_info) * size);
+       new_groupinfo = ext4_kvzalloc(size, GFP_KERNEL);
+       if (!new_groupinfo) {
+               ext4_msg(sb, KERN_ERR, "can't allocate buddy meta group");
+               return -ENOMEM;
+       }
+       if (sbi->s_group_info) {
+               memcpy(new_groupinfo, sbi->s_group_info,
+                      sbi->s_group_info_size * sizeof(*sbi->s_group_info));
+               ext4_kvfree(sbi->s_group_info);
+       }
+       sbi->s_group_info = new_groupinfo;
+       sbi->s_group_info_size = size / sizeof(*sbi->s_group_info);
+       ext4_debug("allocated s_groupinfo array for %d meta_bg's\n", 
+                  sbi->s_group_info_size);
+       return 0;
+ }
  /* Create and initialize ext4_group_info data for the given group. */
  int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
                          struct ext4_group_desc *desc)
                sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)];
        i = group & (EXT4_DESC_PER_BLOCK(sb) - 1);
  
-       meta_group_info[i] = kmem_cache_alloc(cachep, GFP_KERNEL);
+       meta_group_info[i] = kmem_cache_zalloc(cachep, GFP_KERNEL);
        if (meta_group_info[i] == NULL) {
                ext4_msg(sb, KERN_ERR, "can't allocate buddy mem");
                goto exit_group_info;
        }
-       memset(meta_group_info[i], 0, kmem_cache_size(cachep));
        set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT,
                &(meta_group_info[i]->bb_state));
  
@@@ -2252,49 -2288,14 +2288,14 @@@ static int ext4_mb_init_backend(struct 
        ext4_group_t ngroups = ext4_get_groups_count(sb);
        ext4_group_t i;
        struct ext4_sb_info *sbi = EXT4_SB(sb);
-       struct ext4_super_block *es = sbi->s_es;
-       int num_meta_group_infos;
-       int num_meta_group_infos_max;
-       int array_size;
+       int err;
        struct ext4_group_desc *desc;
        struct kmem_cache *cachep;
  
-       /* This is the number of blocks used by GDT */
-       num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) -
-                               1) >> EXT4_DESC_PER_BLOCK_BITS(sb);
-       /*
-        * This is the total number of blocks used by GDT including
-        * the number of reserved blocks for GDT.
-        * The s_group_info array is allocated with this value
-        * to allow a clean online resize without a complex
-        * manipulation of pointer.
-        * The drawback is the unused memory when no resize
-        * occurs but it's very low in terms of pages
-        * (see comments below)
-        * Need to handle this properly when META_BG resizing is allowed
-        */
-       num_meta_group_infos_max = num_meta_group_infos +
-                               le16_to_cpu(es->s_reserved_gdt_blocks);
+       err = ext4_mb_alloc_groupinfo(sb, ngroups);
+       if (err)
+               return err;
  
-       /*
-        * array_size is the size of s_group_info array. We round it
-        * to the next power of two because this approximation is done
-        * internally by kmalloc so we can have some more memory
-        * for free here (e.g. may be used for META_BG resize).
-        */
-       array_size = 1;
-       while (array_size < sizeof(*sbi->s_group_info) *
-              num_meta_group_infos_max)
-               array_size = array_size << 1;
-       /* An 8TB filesystem with 64-bit pointers requires a 4096 byte
-        * kmalloc. A 128kb malloc should suffice for a 256TB filesystem.
-        * So a two level scheme suffices for now. */
-       sbi->s_group_info = ext4_kvzalloc(array_size, GFP_KERNEL);
-       if (sbi->s_group_info == NULL) {
-               ext4_msg(sb, KERN_ERR, "can't allocate buddy meta group");
-               return -ENOMEM;
-       }
        sbi->s_buddy_cache = new_inode(sb);
        if (sbi->s_buddy_cache == NULL) {
                ext4_msg(sb, KERN_ERR, "can't get new inode");
@@@ -2322,7 -2323,7 +2323,7 @@@ err_freebuddy
        cachep = get_groupinfo_cache(sb->s_blocksize_bits);
        while (i-- > 0)
                kmem_cache_free(cachep, ext4_get_group_info(sb, i));
-       i = num_meta_group_infos;
+       i = sbi->s_group_info_size;
        while (i-- > 0)
                kfree(sbi->s_group_info[i]);
        iput(sbi->s_buddy_cache);
@@@ -4008,7 -4009,6 +4009,6 @@@ ext4_mb_initialize_context(struct ext4_
        ext4_get_group_no_and_offset(sb, goal, &group, &block);
  
        /* set up allocation goals */
-       memset(ac, 0, sizeof(struct ext4_allocation_context));
        ac->ac_b_ex.fe_logical = ar->logical & ~(sbi->s_cluster_ratio - 1);
        ac->ac_status = AC_STATUS_CONTINUE;
        ac->ac_sb = sb;
@@@ -4291,7 -4291,7 +4291,7 @@@ ext4_fsblk_t ext4_mb_new_blocks(handle_
                }
        }
  
-       ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
+       ac = kmem_cache_zalloc(ext4_ac_cachep, GFP_NOFS);
        if (!ac) {
                ar->len = 0;
                *errp = -ENOMEM;
@@@ -4657,6 -4657,8 +4657,8 @@@ do_more
                 * with group lock held. generate_buddy look at
                 * them with group lock_held
                 */
+               if (test_opt(sb, DISCARD))
+                       ext4_issue_discard(sb, block_group, bit, count);
                ext4_lock_group(sb, block_group);
                mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);
                mb_free_blocks(inode, &e4b, bit, count_clusters);
@@@ -4709,7 -4711,7 +4711,7 @@@ error_return
   * ext4_group_add_blocks() -- Add given blocks to an existing group
   * @handle:                   handle to this transaction
   * @sb:                               super block
 - * @block:                    start physcial block to add to the block group
 + * @block:                    start physical block to add to the block group
   * @count:                    number of blocks to free
   *
   * This marks the blocks as free in the bitmap and buddy.
@@@ -4988,7 -4990,8 +4990,8 @@@ int ext4_trim_fs(struct super_block *sb
  
        start = range->start >> sb->s_blocksize_bits;
        end = start + (range->len >> sb->s_blocksize_bits) - 1;
-       minlen = range->minlen >> sb->s_blocksize_bits;
+       minlen = EXT4_NUM_B2C(EXT4_SB(sb),
+                             range->minlen >> sb->s_blocksize_bits);
  
        if (unlikely(minlen > EXT4_CLUSTERS_PER_GROUP(sb)) ||
            unlikely(start >= max_blks))
                atomic_set(&EXT4_SB(sb)->s_last_trim_minblks, minlen);
  
  out:
-       range->len = trimmed * sb->s_blocksize;
+       range->len = EXT4_C2B(EXT4_SB(sb), trimmed) << sb->s_blocksize_bits;
        return ret;
  }
diff --combined fs/ext4/super.c
@@@ -326,6 -326,11 +326,6 @@@ static void ext4_put_nojournal(handle_
  
  /*
   * Wrappers for jbd2_journal_start/end.
 - *
 - * The only special thing we need to do here is to make sure that all
 - * journal_end calls result in the superblock being marked dirty, so
 - * that sync() will call the filesystem's write_super callback if
 - * appropriate.
   */
  handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
  {
        return jbd2_journal_start(journal, nblocks);
  }
  
 -/*
 - * The only special thing we need to do here is to make sure that all
 - * jbd2_journal_stop calls result in the superblock being marked dirty, so
 - * that sync() will call the filesystem's write_super callback if
 - * appropriate.
 - */
  int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle)
  {
        struct super_block *sb;
@@@ -420,7 -431,7 +420,7 @@@ static void __save_error_info(struct su
         */
        if (!es->s_error_count)
                mod_timer(&EXT4_SB(sb)->s_err_report, jiffies + 24*60*60*HZ);
-       es->s_error_count = cpu_to_le32(le32_to_cpu(es->s_error_count) + 1);
+       le32_add_cpu(&es->s_error_count, 1);
  }
  
  static void save_error_info(struct super_block *sb, const char *func,
@@@ -850,7 -861,6 +850,6 @@@ static void ext4_put_super(struct super
        flush_workqueue(sbi->dio_unwritten_wq);
        destroy_workqueue(sbi->dio_unwritten_wq);
  
-       lock_super(sb);
        if (sbi->s_journal) {
                err = jbd2_journal_destroy(sbi->s_journal);
                sbi->s_journal = NULL;
         * Now that we are completely done shutting down the
         * superblock, we need to actually destroy the kobject.
         */
-       unlock_super(sb);
        kobject_put(&sbi->s_kobj);
        wait_for_completion(&sbi->s_kobj_unregister);
        if (sbi->s_chksum_driver)
@@@ -956,11 -965,10 +954,10 @@@ static struct inode *ext4_alloc_inode(s
        ei->jinode = NULL;
        INIT_LIST_HEAD(&ei->i_completed_io_list);
        spin_lock_init(&ei->i_completed_io_lock);
-       ei->cur_aio_dio = NULL;
        ei->i_sync_tid = 0;
        ei->i_datasync_tid = 0;
        atomic_set(&ei->i_ioend_count, 0);
-       atomic_set(&ei->i_aiodio_unwritten, 0);
+       atomic_set(&ei->i_unwritten, 0);
  
        return &ei->vfs_inode;
  }
@@@ -1019,11 -1027,6 +1016,11 @@@ static int init_inodecache(void
  
  static void destroy_inodecache(void)
  {
 +      /*
 +       * Make sure all delayed rcu free inodes are flushed before we
 +       * destroy cache.
 +       */
 +      rcu_barrier();
        kmem_cache_destroy(ext4_inode_cachep);
  }
  
@@@ -1224,6 -1227,7 +1221,7 @@@ enum 
        Opt_inode_readahead_blks, Opt_journal_ioprio,
        Opt_dioread_nolock, Opt_dioread_lock,
        Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
+       Opt_max_dir_size_kb,
  };
  
  static const match_table_t tokens = {
        {Opt_init_itable, "init_itable=%u"},
        {Opt_init_itable, "init_itable"},
        {Opt_noinit_itable, "noinit_itable"},
+       {Opt_max_dir_size_kb, "max_dir_size_kb=%u"},
        {Opt_removed, "check=none"},    /* mount option from ext2/3 */
        {Opt_removed, "nocheck"},       /* mount option from ext2/3 */
        {Opt_removed, "reservation"},   /* mount option from ext2/3 */
@@@ -1477,6 -1482,7 +1476,7 @@@ static const struct mount_opts 
        {Opt_jqfmt_vfsold, QFMT_VFS_OLD, MOPT_QFMT},
        {Opt_jqfmt_vfsv0, QFMT_VFS_V0, MOPT_QFMT},
        {Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT},
+       {Opt_max_dir_size_kb, 0, MOPT_GTE0},
        {Opt_err, 0, 0}
  };
  
@@@ -1592,6 -1598,8 +1592,8 @@@ static int handle_mount_opt(struct supe
                        if (!args->from)
                                arg = EXT4_DEF_LI_WAIT_MULT;
                        sbi->s_li_wait_mult = arg;
+               } else if (token == Opt_max_dir_size_kb) {
+                       sbi->s_max_dir_size_kb = arg;
                } else if (token == Opt_stripe) {
                        sbi->s_stripe = arg;
                } else if (m->flags & MOPT_DATAJ) {
@@@ -1664,7 -1672,7 +1666,7 @@@ static int parse_options(char *options
                 * Initialize args struct so we know whether arg was
                 * found; some options take optional arguments.
                 */
-               args[0].to = args[0].from = 0;
+               args[0].to = args[0].from = NULL;
                token = match_token(p, tokens, args);
                if (handle_mount_opt(sb, p, token, args, journal_devnum,
                                     journal_ioprio, is_remount) < 0)
@@@ -1740,7 -1748,7 +1742,7 @@@ static inline void ext4_show_quota_opti
  
  static const char *token2str(int token)
  {
-       static const struct match_token *t;
+       const struct match_token *t;
  
        for (t = tokens; t->token != Opt_err; t++)
                if (t->token == token && !strchr(t->pattern, '='))
@@@ -1823,6 -1831,8 +1825,8 @@@ static int _ext4_show_options(struct se
        if (nodefs || (test_opt(sb, INIT_INODE_TABLE) &&
                       (sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT)))
                SEQ_OPTS_PRINT("init_itable=%u", sbi->s_li_wait_mult);
+       if (nodefs || sbi->s_max_dir_size_kb)
+               SEQ_OPTS_PRINT("max_dir_size_kb=%u", sbi->s_max_dir_size_kb);
  
        ext4_show_quota_options(seq, sb);
        return 0;
@@@ -1914,15 -1924,45 +1918,45 @@@ done
        return res;
  }
  
+ int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup)
+ {
+       struct ext4_sb_info *sbi = EXT4_SB(sb);
+       struct flex_groups *new_groups;
+       int size;
+       if (!sbi->s_log_groups_per_flex)
+               return 0;
+       size = ext4_flex_group(sbi, ngroup - 1) + 1;
+       if (size <= sbi->s_flex_groups_allocated)
+               return 0;
+       size = roundup_pow_of_two(size * sizeof(struct flex_groups));
+       new_groups = ext4_kvzalloc(size, GFP_KERNEL);
+       if (!new_groups) {
+               ext4_msg(sb, KERN_ERR, "not enough memory for %d flex groups",
+                        size / (int) sizeof(struct flex_groups));
+               return -ENOMEM;
+       }
+       if (sbi->s_flex_groups) {
+               memcpy(new_groups, sbi->s_flex_groups,
+                      (sbi->s_flex_groups_allocated *
+                       sizeof(struct flex_groups)));
+               ext4_kvfree(sbi->s_flex_groups);
+       }
+       sbi->s_flex_groups = new_groups;
+       sbi->s_flex_groups_allocated = size / sizeof(struct flex_groups);
+       return 0;
+ }
  static int ext4_fill_flex_info(struct super_block *sb)
  {
        struct ext4_sb_info *sbi = EXT4_SB(sb);
        struct ext4_group_desc *gdp = NULL;
-       ext4_group_t flex_group_count;
        ext4_group_t flex_group;
        unsigned int groups_per_flex = 0;
-       size_t size;
-       int i;
+       int i, err;
  
        sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
        if (sbi->s_log_groups_per_flex < 1 || sbi->s_log_groups_per_flex > 31) {
        }
        groups_per_flex = 1 << sbi->s_log_groups_per_flex;
  
-       /* We allocate both existing and potentially added groups */
-       flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) +
-                       ((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) <<
-                             EXT4_DESC_PER_BLOCK_BITS(sb))) / groups_per_flex;
-       size = flex_group_count * sizeof(struct flex_groups);
-       sbi->s_flex_groups = ext4_kvzalloc(size, GFP_KERNEL);
-       if (sbi->s_flex_groups == NULL) {
-               ext4_msg(sb, KERN_ERR, "not enough memory for %u flex groups",
-                        flex_group_count);
+       err = ext4_alloc_flex_bg_array(sb, sbi->s_groups_count);
+       if (err)
                goto failed;
-       }
  
        for (i = 0; i < sbi->s_groups_count; i++) {
                gdp = ext4_get_group_desc(sb, i, NULL);
@@@ -2144,10 -2176,12 +2170,12 @@@ static void ext4_orphan_cleanup(struct 
        }
  
        if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
-               if (es->s_last_orphan)
+               /* don't clear list on RO mount w/ errors */
+               if (es->s_last_orphan && !(s_flags & MS_RDONLY)) {
                        jbd_debug(1, "Errors on filesystem, "
                                  "clearing orphan list.\n");
-               es->s_last_orphan = 0;
+                       es->s_last_orphan = 0;
+               }
                jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
                return;
        }
@@@ -2528,6 -2562,7 +2556,7 @@@ EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb
  EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request);
  EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc);
  EXT4_RW_ATTR_SBI_UI(max_writeback_mb_bump, s_max_writeback_mb_bump);
+ EXT4_RW_ATTR_SBI_UI(extent_max_zeroout_kb, s_extent_max_zeroout_kb);
  EXT4_ATTR(trigger_fs_error, 0200, NULL, trigger_test_error);
  
  static struct attribute *ext4_attrs[] = {
        ATTR_LIST(mb_stream_req),
        ATTR_LIST(mb_group_prealloc),
        ATTR_LIST(max_writeback_mb_bump),
+       ATTR_LIST(extent_max_zeroout_kb),
        ATTR_LIST(trigger_fs_error),
        NULL,
  };
  /* Features this copy of ext4 supports */
  EXT4_INFO_ATTR(lazy_itable_init);
  EXT4_INFO_ATTR(batched_discard);
+ EXT4_INFO_ATTR(meta_bg_resize);
  
  static struct attribute *ext4_feat_attrs[] = {
        ATTR_LIST(lazy_itable_init),
        ATTR_LIST(batched_discard),
+       ATTR_LIST(meta_bg_resize),
        NULL,
  };
  
@@@ -3374,7 -3412,7 +3406,7 @@@ static int ext4_fill_super(struct super
         * enable delayed allocation by default
         * Use -o nodelalloc to turn it off
         */
-       if (!IS_EXT3_SB(sb) &&
+       if (!IS_EXT3_SB(sb) && !IS_EXT2_SB(sb) &&
            ((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0))
                set_opt(sb, DELALLOC);
  
  
        sbi->s_stripe = ext4_get_stripe_size(sbi);
        sbi->s_max_writeback_mb_bump = 128;
+       sbi->s_extent_max_zeroout_kb = 32;
  
        /*
         * set up enough so that it can read an inode
@@@ -4519,11 -4558,9 +4552,9 @@@ static int ext4_unfreeze(struct super_b
        if (sb->s_flags & MS_RDONLY)
                return 0;
  
-       lock_super(sb);
        /* Reset the needs_recovery flag before the fs is unlocked. */
        EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
        ext4_commit_super(sb, 1);
-       unlock_super(sb);
        return 0;
  }
  
@@@ -4559,7 -4596,6 +4590,6 @@@ static int ext4_remount(struct super_bl
        char *orig_data = kstrdup(data, GFP_KERNEL);
  
        /* Store the original options */
-       lock_super(sb);
        old_sb_flags = sb->s_flags;
        old_opts.s_mount_opt = sbi->s_mount_opt;
        old_opts.s_mount_opt2 = sbi->s_mount_opt2;
        if (sbi->s_journal == NULL)
                ext4_commit_super(sb, 1);
  
-       unlock_super(sb);
  #ifdef CONFIG_QUOTA
        /* Release old quota file names */
        for (i = 0; i < MAXQUOTAS; i++)
                else if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
                                        EXT4_FEATURE_RO_COMPAT_QUOTA)) {
                        err = ext4_enable_quotas(sb);
-                       if (err) {
-                               lock_super(sb);
+                       if (err)
                                goto restore_opts;
-                       }
                }
        }
  #endif
@@@ -4744,7 -4777,6 +4771,6 @@@ restore_opts
                sbi->s_qf_names[i] = old_opts.s_qf_names[i];
        }
  #endif
-       unlock_super(sb);
        kfree(orig_data);
        return err;
  }
@@@ -4796,7 -4828,7 +4822,7 @@@ static int ext4_statfs(struct dentry *d
  
  static inline struct inode *dquot_to_inode(struct dquot *dquot)
  {
 -      return sb_dqopt(dquot->dq_sb)->files[dquot->dq_type];
 +      return sb_dqopt(dquot->dq_sb)->files[dquot->dq_id.type];
  }
  
  static int ext4_write_dquot(struct dquot *dquot)
@@@ -5269,8 -5301,10 +5295,10 @@@ static int __init ext4_init_fs(void
        if (err)
                goto out6;
        ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj);
-       if (!ext4_kset)
+       if (!ext4_kset) {
+               err = -ENOMEM;
                goto out5;
+       }
        ext4_proc_root = proc_mkdir("fs/ext4", NULL);
  
        err = ext4_init_feat_adverts();
diff --combined fs/fs-writeback.c
@@@ -63,6 -63,7 +63,7 @@@ int writeback_in_progress(struct backin
  {
        return test_bit(BDI_writeback_running, &bdi->state);
  }
+ EXPORT_SYMBOL(writeback_in_progress);
  
  static inline struct backing_dev_info *inode_to_bdi(struct inode *inode)
  {
@@@ -577,6 -578,10 +578,6 @@@ static long writeback_chunk_size(struc
  /*
   * Write a portion of b_io inodes which belong to @sb.
   *
 - * If @only_this_sb is true, then find and write all such
 - * inodes. Otherwise write only ones which go sequentially
 - * in reverse order.
 - *
   * Return the number of pages and/or inodes written.
   */
  static long writeback_sb_inodes(struct super_block *sb,
diff --combined fs/jbd2/journal.c
@@@ -612,8 -612,8 +612,8 @@@ int jbd2_journal_start_commit(journal_
                ret = 1;
        } else if (journal->j_committing_transaction) {
                /*
 -               * If ext3_write_super() recently started a commit, then we
 -               * have to wait for completion of that transaction
 +               * If commit has been started, then we have to wait for
 +               * completion of that transaction.
                 */
                if (ptid)
                        *ptid = journal->j_committing_transaction->t_tid;
@@@ -1354,6 -1354,11 +1354,11 @@@ static void jbd2_mark_journal_empty(jou
  
        BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
        read_lock(&journal->j_state_lock);
+       /* Is it already empty? */
+       if (sb->s_start == 0) {
+               read_unlock(&journal->j_state_lock);
+               return;
+       }
        jbd_debug(1, "JBD2: Marking journal as empty (seq %d)\n",
                  journal->j_tail_sequence);