Merge tag 'ext4_for_linus_stable' of git://git.kernel.org/pub/scm/linux/kernel/git...
authorLinus Torvalds <torvalds@linux-foundation.org>
Sun, 3 Oct 2021 20:56:53 +0000 (13:56 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sun, 3 Oct 2021 20:56:53 +0000 (13:56 -0700)
Pull ext4 fixes from Ted Ts'o:
 "Fix a number of ext4 bugs in fast_commit, inline data, and delayed
  allocation.

  Also fix error handling code paths in ext4_dx_readdir() and
  ext4_fill_super().

  Finally, avoid a grabbing a journal head in the delayed allocation
  write in the common cases where we are overwriting a pre-existing
  block or appending to an inode"

* tag 'ext4_for_linus_stable' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4:
  ext4: recheck buffer uptodate bit under buffer lock
  ext4: fix potential infinite loop in ext4_dx_readdir()
  ext4: flush s_error_work before journal destroy in ext4_fill_super
  ext4: fix loff_t overflow in ext4_max_bitmap_size()
  ext4: fix reserved space counter leakage
  ext4: limit the number of blocks in one ADD_RANGE TLV
  ext4: enforce buffer head state assertion in ext4_da_map_blocks
  ext4: remove extent cache entries when truncating inline data
  ext4: drop unnecessary journal handle in delalloc write
  ext4: factor out write end code of inline file
  ext4: correct the error path of ext4_write_inline_data_end()
  ext4: check and update i_disksize properly
  ext4: add error checking to ext4_ext_replay_set_iblocks()

1  2 
fs/ext4/ext4.h
fs/ext4/extents.c
fs/ext4/inode.c
fs/ext4/super.c

diff --combined fs/ext4/ext4.h
@@@ -1093,6 -1093,15 +1093,6 @@@ struct ext4_inode_info 
         * by other means, so we have i_data_sem.
         */
        struct rw_semaphore i_data_sem;
 -      /*
 -       * i_mmap_sem is for serializing page faults with truncate / punch hole
 -       * operations. We have to make sure that new page cannot be faulted in
 -       * a section of the inode that is being punched. We cannot easily use
 -       * i_data_sem for this since we need protection for the whole punch
 -       * operation and i_data_sem ranks below transaction start so we have
 -       * to occasionally drop it.
 -       */
 -      struct rw_semaphore i_mmap_sem;
        struct inode vfs_inode;
        struct jbd2_inode *jinode;
  
@@@ -3037,6 -3046,7 +3037,6 @@@ extern int ext4_chunk_trans_blocks(stru
  extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode,
                             loff_t lstart, loff_t lend);
  extern vm_fault_t ext4_page_mkwrite(struct vm_fault *vmf);
 -extern vm_fault_t ext4_filemap_fault(struct vm_fault *vmf);
  extern qsize_t *ext4_get_reserved_space(struct inode *inode);
  extern int ext4_get_projid(struct inode *inode, kprojid_t *projid);
  extern void ext4_da_release_space(struct inode *inode, int to_free);
@@@ -3593,9 -3603,6 +3593,6 @@@ extern int ext4_da_write_inline_data_be
                                           unsigned flags,
                                           struct page **pagep,
                                           void **fsdata);
- extern int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos,
-                                        unsigned len, unsigned copied,
-                                        struct page *page);
  extern int ext4_try_add_inline_entry(handle_t *handle,
                                     struct ext4_filename *fname,
                                     struct inode *dir, struct inode *inode);
diff --combined fs/ext4/extents.c
@@@ -4479,7 -4479,6 +4479,7 @@@ static long ext4_zero_range(struct fil
                            loff_t len, int mode)
  {
        struct inode *inode = file_inode(file);
 +      struct address_space *mapping = file->f_mapping;
        handle_t *handle = NULL;
        unsigned int max_blocks;
        loff_t new_size = 0;
                 * Prevent page faults from reinstantiating pages we have
                 * released from page cache.
                 */
 -              down_write(&EXT4_I(inode)->i_mmap_sem);
 +              filemap_invalidate_lock(mapping);
  
                ret = ext4_break_layouts(inode);
                if (ret) {
 -                      up_write(&EXT4_I(inode)->i_mmap_sem);
 +                      filemap_invalidate_unlock(mapping);
                        goto out_mutex;
                }
  
                ret = ext4_update_disksize_before_punch(inode, offset, len);
                if (ret) {
 -                      up_write(&EXT4_I(inode)->i_mmap_sem);
 +                      filemap_invalidate_unlock(mapping);
                        goto out_mutex;
                }
                /* Now release the pages and zero block aligned part of pages */
  
                ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
                                             flags);
 -              up_write(&EXT4_I(inode)->i_mmap_sem);
 +              filemap_invalidate_unlock(mapping);
                if (ret)
                        goto out_mutex;
        }
@@@ -5227,7 -5226,6 +5227,7 @@@ out
  static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
  {
        struct super_block *sb = inode->i_sb;
 +      struct address_space *mapping = inode->i_mapping;
        ext4_lblk_t punch_start, punch_stop;
        handle_t *handle;
        unsigned int credits;
         * Prevent page faults from reinstantiating pages we have released from
         * page cache.
         */
 -      down_write(&EXT4_I(inode)->i_mmap_sem);
 +      filemap_invalidate_lock(mapping);
  
        ret = ext4_break_layouts(inode);
        if (ret)
         * Write tail of the last page before removed range since it will get
         * removed from the page cache below.
         */
 -      ret = filemap_write_and_wait_range(inode->i_mapping, ioffset, offset);
 +      ret = filemap_write_and_wait_range(mapping, ioffset, offset);
        if (ret)
                goto out_mmap;
        /*
         * Write data that will be shifted to preserve them when discarding
         * page cache below. We are also protected from pages becoming dirty
 -       * by i_mmap_sem.
 +       * by i_rwsem and invalidate_lock.
         */
 -      ret = filemap_write_and_wait_range(inode->i_mapping, offset + len,
 +      ret = filemap_write_and_wait_range(mapping, offset + len,
                                           LLONG_MAX);
        if (ret)
                goto out_mmap;
@@@ -5357,7 -5355,7 +5357,7 @@@ out_stop
        ext4_journal_stop(handle);
        ext4_fc_stop_ineligible(sb);
  out_mmap:
 -      up_write(&EXT4_I(inode)->i_mmap_sem);
 +      filemap_invalidate_unlock(mapping);
  out_mutex:
        inode_unlock(inode);
        return ret;
  static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
  {
        struct super_block *sb = inode->i_sb;
 +      struct address_space *mapping = inode->i_mapping;
        handle_t *handle;
        struct ext4_ext_path *path;
        struct ext4_extent *extent;
         * Prevent page faults from reinstantiating pages we have released from
         * page cache.
         */
 -      down_write(&EXT4_I(inode)->i_mmap_sem);
 +      filemap_invalidate_lock(mapping);
  
        ret = ext4_break_layouts(inode);
        if (ret)
@@@ -5534,7 -5531,7 +5534,7 @@@ out_stop
        ext4_journal_stop(handle);
        ext4_fc_stop_ineligible(sb);
  out_mmap:
 -      up_write(&EXT4_I(inode)->i_mmap_sem);
 +      filemap_invalidate_unlock(mapping);
  out_mutex:
        inode_unlock(inode);
        return ret;
@@@ -5916,7 -5913,7 +5916,7 @@@ void ext4_ext_replay_shrink_inode(struc
  }
  
  /* Check if *cur is a hole and if it is, skip it */
- static void skip_hole(struct inode *inode, ext4_lblk_t *cur)
+ static int skip_hole(struct inode *inode, ext4_lblk_t *cur)
  {
        int ret;
        struct ext4_map_blocks map;
        map.m_len = ((inode->i_size) >> inode->i_sb->s_blocksize_bits) - *cur;
  
        ret = ext4_map_blocks(NULL, inode, &map, 0);
+       if (ret < 0)
+               return ret;
        if (ret != 0)
-               return;
+               return 0;
        *cur = *cur + map.m_len;
+       return 0;
  }
  
  /* Count number of blocks used by this inode and update i_blocks */
@@@ -5976,7 -5976,9 +5979,9 @@@ int ext4_ext_replay_set_iblocks(struct 
         * iblocks by total number of differences found.
         */
        cur = 0;
-       skip_hole(inode, &cur);
+       ret = skip_hole(inode, &cur);
+       if (ret < 0)
+               goto out;
        path = ext4_find_extent(inode, cur, NULL, 0);
        if (IS_ERR(path))
                goto out;
                }
                cur = max(cur + 1, le32_to_cpu(ex->ee_block) +
                                        ext4_ext_get_actual_len(ex));
-               skip_hole(inode, &cur);
+               ret = skip_hole(inode, &cur);
+               if (ret < 0) {
+                       ext4_ext_drop_refs(path);
+                       kfree(path);
+                       break;
+               }
                path2 = ext4_find_extent(inode, cur, NULL, 0);
                if (IS_ERR(path2)) {
                        ext4_ext_drop_refs(path);
diff --combined fs/ext4/inode.c
@@@ -1284,22 -1284,14 +1284,14 @@@ static int ext4_write_end(struct file *
        loff_t old_size = inode->i_size;
        int ret = 0, ret2;
        int i_size_changed = 0;
-       int inline_data = ext4_has_inline_data(inode);
        bool verity = ext4_verity_in_progress(inode);
  
        trace_ext4_write_end(inode, pos, len, copied);
-       if (inline_data) {
-               ret = ext4_write_inline_data_end(inode, pos, len,
-                                                copied, page);
-               if (ret < 0) {
-                       unlock_page(page);
-                       put_page(page);
-                       goto errout;
-               }
-               copied = ret;
-       } else
-               copied = block_write_end(file, mapping, pos,
-                                        len, copied, page, fsdata);
+       if (ext4_has_inline_data(inode))
+               return ext4_write_inline_data_end(inode, pos, len, copied, page);
+       copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
        /*
         * it's important to update i_size while still holding page lock:
         * page writeout could otherwise come in and zero beyond i_size.
         * ordering of page lock and transaction start for journaling
         * filesystems.
         */
-       if (i_size_changed || inline_data)
+       if (i_size_changed)
                ret = ext4_mark_inode_dirty(handle, inode);
  
        if (pos + len > inode->i_size && !verity && ext4_can_truncate(inode))
                 * inode->i_size. So truncate them
                 */
                ext4_orphan_add(handle, inode);
- errout:
        ret2 = ext4_journal_stop(handle);
        if (!ret)
                ret = ret2;
@@@ -1395,7 -1387,6 +1387,6 @@@ static int ext4_journalled_write_end(st
        int partial = 0;
        unsigned from, to;
        int size_changed = 0;
-       int inline_data = ext4_has_inline_data(inode);
        bool verity = ext4_verity_in_progress(inode);
  
        trace_ext4_journalled_write_end(inode, pos, len, copied);
  
        BUG_ON(!ext4_handle_valid(handle));
  
-       if (inline_data) {
-               ret = ext4_write_inline_data_end(inode, pos, len,
-                                                copied, page);
-               if (ret < 0) {
-                       unlock_page(page);
-                       put_page(page);
-                       goto errout;
-               }
-               copied = ret;
-       } else if (unlikely(copied < len) && !PageUptodate(page)) {
+       if (ext4_has_inline_data(inode))
+               return ext4_write_inline_data_end(inode, pos, len, copied, page);
+       if (unlikely(copied < len) && !PageUptodate(page)) {
                copied = 0;
                ext4_journalled_zero_new_buffers(handle, inode, page, from, to);
        } else {
        if (old_size < pos && !verity)
                pagecache_isize_extended(inode, old_size, pos);
  
-       if (size_changed || inline_data) {
+       if (size_changed) {
                ret2 = ext4_mark_inode_dirty(handle, inode);
                if (!ret)
                        ret = ret2;
                 */
                ext4_orphan_add(handle, inode);
  
- errout:
        ret2 = ext4_journal_stop(handle);
        if (!ret)
                ret = ret2;
@@@ -1644,6 -1628,7 +1628,7 @@@ static int ext4_insert_delayed_block(st
        struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
        int ret;
        bool allocated = false;
+       bool reserved = false;
  
        /*
         * If the cluster containing lblk is shared with a delayed,
                ret = ext4_da_reserve_space(inode);
                if (ret != 0)   /* ENOSPC */
                        goto errout;
+               reserved = true;
        } else {   /* bigalloc */
                if (!ext4_es_scan_clu(inode, &ext4_es_is_delonly, lblk)) {
                        if (!ext4_es_scan_clu(inode,
                                        ret = ext4_da_reserve_space(inode);
                                        if (ret != 0)   /* ENOSPC */
                                                goto errout;
+                                       reserved = true;
                                } else {
                                        allocated = true;
                                }
        }
  
        ret = ext4_es_insert_delayed_block(inode, lblk, allocated);
+       if (ret && reserved)
+               ext4_da_release_space(inode, 1);
  
  errout:
        return ret;
@@@ -1722,13 -1711,16 +1711,16 @@@ static int ext4_da_map_blocks(struct in
                }
  
                /*
-                * Delayed extent could be allocated by fallocate.
-                * So we need to check it.
+                * the buffer head associated with a delayed and not unwritten
+                * block found in the extent status cache must contain an
+                * invalid block number and have its BH_New and BH_Delay bits
+                * set, reflecting the state assigned when the block was
+                * initially delayed allocated
                 */
-               if (ext4_es_is_delayed(&es) && !ext4_es_is_unwritten(&es)) {
-                       map_bh(bh, inode->i_sb, invalid_block);
-                       set_buffer_new(bh);
-                       set_buffer_delay(bh);
+               if (ext4_es_is_delonly(&es)) {
+                       BUG_ON(bh->b_blocknr != invalid_block);
+                       BUG_ON(!buffer_new(bh));
+                       BUG_ON(!buffer_delay(bh));
                        return 0;
                }
  
@@@ -2932,19 -2924,6 +2924,6 @@@ static int ext4_nonda_switch(struct sup
        return 0;
  }
  
- /* We always reserve for an inode update; the superblock could be there too */
- static int ext4_da_write_credits(struct inode *inode, loff_t pos, unsigned len)
- {
-       if (likely(ext4_has_feature_large_file(inode->i_sb)))
-               return 1;
-       if (pos + len <= 0x7fffffffULL)
-               return 1;
-       /* We might need to update the superblock to set LARGE_FILE */
-       return 2;
- }
  static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
                               loff_t pos, unsigned len, unsigned flags,
                               struct page **pagep, void **fsdata)
        struct page *page;
        pgoff_t index;
        struct inode *inode = mapping->host;
-       handle_t *handle;
  
        if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
                return -EIO;
                        return 0;
        }
  
-       /*
-        * grab_cache_page_write_begin() can take a long time if the
-        * system is thrashing due to memory pressure, or if the page
-        * is being written back.  So grab it first before we start
-        * the transaction handle.  This also allows us to allocate
-        * the page (if needed) without using GFP_NOFS.
-        */
- retry_grab:
+ retry:
        page = grab_cache_page_write_begin(mapping, index, flags);
        if (!page)
                return -ENOMEM;
-       unlock_page(page);
  
-       /*
-        * With delayed allocation, we don't log the i_disksize update
-        * if there is delayed block allocation. But we still need
-        * to journalling the i_disksize update if writes to the end
-        * of file which has an already mapped buffer.
-        */
- retry_journal:
-       handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE,
-                               ext4_da_write_credits(inode, pos, len));
-       if (IS_ERR(handle)) {
-               put_page(page);
-               return PTR_ERR(handle);
-       }
-       lock_page(page);
-       if (page->mapping != mapping) {
-               /* The page got truncated from under us */
-               unlock_page(page);
-               put_page(page);
-               ext4_journal_stop(handle);
-               goto retry_grab;
-       }
        /* In case writeback began while the page was unlocked */
        wait_for_stable_page(page);
  
  #endif
        if (ret < 0) {
                unlock_page(page);
-               ext4_journal_stop(handle);
+               put_page(page);
                /*
                 * block_write_begin may have instantiated a few blocks
                 * outside i_size.  Trim these off again. Don't need
-                * i_size_read because we hold i_mutex.
+                * i_size_read because we hold inode lock.
                 */
                if (pos + len > inode->i_size)
                        ext4_truncate_failed_write(inode);
  
                if (ret == -ENOSPC &&
                    ext4_should_retry_alloc(inode->i_sb, &retries))
-                       goto retry_journal;
-               put_page(page);
+                       goto retry;
                return ret;
        }
  
@@@ -3075,8 -3021,6 +3021,6 @@@ static int ext4_da_write_end(struct fil
                             struct page *page, void *fsdata)
  {
        struct inode *inode = mapping->host;
-       int ret = 0, ret2;
-       handle_t *handle = ext4_journal_current_handle();
        loff_t new_i_size;
        unsigned long start, end;
        int write_mode = (int)(unsigned long)fsdata;
                                      len, copied, page, fsdata);
  
        trace_ext4_da_write_end(inode, pos, len, copied);
+       if (write_mode != CONVERT_INLINE_DATA &&
+           ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA) &&
+           ext4_has_inline_data(inode))
+               return ext4_write_inline_data_end(inode, pos, len, copied, page);
        start = pos & (PAGE_SIZE - 1);
        end = start + copied - 1;
  
        /*
-        * generic_write_end() will run mark_inode_dirty() if i_size
-        * changes.  So let's piggyback the i_disksize mark_inode_dirty
-        * into that.
+        * Since we are holding inode lock, we are sure i_disksize <=
+        * i_size. We also know that if i_disksize < i_size, there are
+        * delalloc writes pending in the range upto i_size. If the end of
+        * the current write is <= i_size, there's no need to touch
+        * i_disksize since writeback will push i_disksize upto i_size
+        * eventually. If the end of the current write is > i_size and
+        * inside an allocated block (ext4_da_should_update_i_disksize()
+        * check), we need to update i_disksize here as neither
+        * ext4_writepage() nor certain ext4_writepages() paths not
+        * allocating blocks update i_disksize.
+        *
+        * Note that we defer inode dirtying to generic_write_end() /
+        * ext4_da_write_inline_data_end().
         */
        new_i_size = pos + copied;
-       if (copied && new_i_size > EXT4_I(inode)->i_disksize) {
-               if (ext4_has_inline_data(inode) ||
-                   ext4_da_should_update_i_disksize(page, end)) {
-                       ext4_update_i_disksize(inode, new_i_size);
-                       /* We need to mark inode dirty even if
-                        * new_i_size is less that inode->i_size
-                        * bu greater than i_disksize.(hint delalloc)
-                        */
-                       ret = ext4_mark_inode_dirty(handle, inode);
-               }
-       }
+       if (copied && new_i_size > inode->i_size &&
+           ext4_da_should_update_i_disksize(page, end))
+               ext4_update_i_disksize(inode, new_i_size);
  
-       if (write_mode != CONVERT_INLINE_DATA &&
-           ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA) &&
-           ext4_has_inline_data(inode))
-               ret2 = ext4_da_write_inline_data_end(inode, pos, len, copied,
-                                                    page);
-       else
-               ret2 = generic_write_end(file, mapping, pos, len, copied,
-                                                       page, fsdata);
-       copied = ret2;
-       if (ret2 < 0)
-               ret = ret2;
-       ret2 = ext4_journal_stop(handle);
-       if (unlikely(ret2 && !ret))
-               ret = ret2;
-       return ret ? ret : copied;
+       return generic_write_end(file, mapping, pos, len, copied, page, fsdata);
  }
  
  /*
@@@ -3957,19 -3893,20 +3893,19 @@@ int ext4_update_disksize_before_punch(s
        return ret;
  }
  
 -static void ext4_wait_dax_page(struct ext4_inode_info *ei)
 +static void ext4_wait_dax_page(struct inode *inode)
  {
 -      up_write(&ei->i_mmap_sem);
 +      filemap_invalidate_unlock(inode->i_mapping);
        schedule();
 -      down_write(&ei->i_mmap_sem);
 +      filemap_invalidate_lock(inode->i_mapping);
  }
  
  int ext4_break_layouts(struct inode *inode)
  {
 -      struct ext4_inode_info *ei = EXT4_I(inode);
        struct page *page;
        int error;
  
 -      if (WARN_ON_ONCE(!rwsem_is_locked(&ei->i_mmap_sem)))
 +      if (WARN_ON_ONCE(!rwsem_is_locked(&inode->i_mapping->invalidate_lock)))
                return -EINVAL;
  
        do {
                error = ___wait_var_event(&page->_refcount,
                                atomic_read(&page->_refcount) == 1,
                                TASK_INTERRUPTIBLE, 0, 0,
 -                              ext4_wait_dax_page(ei));
 +                              ext4_wait_dax_page(inode));
        } while (error == 0);
  
        return error;
@@@ -4011,9 -3948,9 +3947,9 @@@ int ext4_punch_hole(struct inode *inode
  
        ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
        if (ext4_has_inline_data(inode)) {
 -              down_write(&EXT4_I(inode)->i_mmap_sem);
 +              filemap_invalidate_lock(mapping);
                ret = ext4_convert_inline_data(inode);
 -              up_write(&EXT4_I(inode)->i_mmap_sem);
 +              filemap_invalidate_unlock(mapping);
                if (ret)
                        return ret;
        }
         * Prevent page faults from reinstantiating pages we have released from
         * page cache.
         */
 -      down_write(&EXT4_I(inode)->i_mmap_sem);
 +      filemap_invalidate_lock(mapping);
  
        ret = ext4_break_layouts(inode);
        if (ret)
  out_stop:
        ext4_journal_stop(handle);
  out_dio:
 -      up_write(&EXT4_I(inode)->i_mmap_sem);
 +      filemap_invalidate_unlock(mapping);
  out_mutex:
        inode_unlock(inode);
        return ret;
@@@ -4340,6 -4277,12 +4276,12 @@@ static int __ext4_get_inode_loc(struct 
                goto has_buffer;
  
        lock_buffer(bh);
+       if (ext4_buffer_uptodate(bh)) {
+               /* Someone brought it uptodate while we waited */
+               unlock_buffer(bh);
+               goto has_buffer;
+       }
        /*
         * If we have all information of the inode in memory and this
         * is the only valid inode in the block, we need not read the
@@@ -5443,11 -5386,11 +5385,11 @@@ int ext4_setattr(struct user_namespace 
                        inode_dio_wait(inode);
                }
  
 -              down_write(&EXT4_I(inode)->i_mmap_sem);
 +              filemap_invalidate_lock(inode->i_mapping);
  
                rc = ext4_break_layouts(inode);
                if (rc) {
 -                      up_write(&EXT4_I(inode)->i_mmap_sem);
 +                      filemap_invalidate_unlock(inode->i_mapping);
                        goto err_out;
                }
  
                                error = rc;
                }
  out_mmap_sem:
 -              up_write(&EXT4_I(inode)->i_mmap_sem);
 +              filemap_invalidate_unlock(inode->i_mapping);
        }
  
        if (!error) {
@@@ -6002,10 -5945,10 +5944,10 @@@ int ext4_change_inode_journal_flag(stru
         * data (and journalled aops don't know how to handle these cases).
         */
        if (val) {
 -              down_write(&EXT4_I(inode)->i_mmap_sem);
 +              filemap_invalidate_lock(inode->i_mapping);
                err = filemap_write_and_wait(inode->i_mapping);
                if (err < 0) {
 -                      up_write(&EXT4_I(inode)->i_mmap_sem);
 +                      filemap_invalidate_unlock(inode->i_mapping);
                        return err;
                }
        }
        percpu_up_write(&sbi->s_writepages_rwsem);
  
        if (val)
 -              up_write(&EXT4_I(inode)->i_mmap_sem);
 +              filemap_invalidate_unlock(inode->i_mapping);
  
        /* Finally we can mark the inode as dirty. */
  
@@@ -6083,7 -6026,7 +6025,7 @@@ vm_fault_t ext4_page_mkwrite(struct vm_
        sb_start_pagefault(inode->i_sb);
        file_update_time(vma->vm_file);
  
 -      down_read(&EXT4_I(inode)->i_mmap_sem);
 +      filemap_invalidate_lock_shared(mapping);
  
        err = ext4_convert_inline_data(inode);
        if (err)
@@@ -6198,7 -6141,7 +6140,7 @@@ retry_alloc
  out_ret:
        ret = block_page_mkwrite_return(err);
  out:
 -      up_read(&EXT4_I(inode)->i_mmap_sem);
 +      filemap_invalidate_unlock_shared(mapping);
        sb_end_pagefault(inode->i_sb);
        return ret;
  out_error:
        ext4_journal_stop(handle);
        goto out;
  }
 -
 -vm_fault_t ext4_filemap_fault(struct vm_fault *vmf)
 -{
 -      struct inode *inode = file_inode(vmf->vma->vm_file);
 -      vm_fault_t ret;
 -
 -      down_read(&EXT4_I(inode)->i_mmap_sem);
 -      ret = filemap_fault(vmf);
 -      up_read(&EXT4_I(inode)->i_mmap_sem);
 -
 -      return ret;
 -}
diff --combined fs/ext4/super.c
@@@ -89,9 -89,12 +89,9 @@@ static struct inode *ext4_get_journal_i
  /*
   * Lock ordering
   *
 - * Note the difference between i_mmap_sem (EXT4_I(inode)->i_mmap_sem) and
 - * i_mmap_rwsem (inode->i_mmap_rwsem)!
 - *
   * page fault path:
 - * mmap_lock -> sb_start_pagefault -> i_mmap_sem (r) -> transaction start ->
 - *   page lock -> i_data_sem (rw)
 + * mmap_lock -> sb_start_pagefault -> invalidate_lock (r) -> transaction start
 + *   -> page lock -> i_data_sem (rw)
   *
   * buffered write path:
   * sb_start_write -> i_mutex -> mmap_lock
   *   i_data_sem (rw)
   *
   * truncate:
 - * sb_start_write -> i_mutex -> i_mmap_sem (w) -> i_mmap_rwsem (w) -> page lock
 - * sb_start_write -> i_mutex -> i_mmap_sem (w) -> transaction start ->
 + * sb_start_write -> i_mutex -> invalidate_lock (w) -> i_mmap_rwsem (w) ->
 + *   page lock
 + * sb_start_write -> i_mutex -> invalidate_lock (w) -> transaction start ->
   *   i_data_sem (rw)
   *
   * direct IO:
@@@ -658,7 -660,7 +658,7 @@@ static void ext4_handle_error(struct su
                 * constraints, it may not be safe to do it right here so we
                 * defer superblock flushing to a workqueue.
                 */
-               if (continue_fs)
+               if (continue_fs && journal)
                        schedule_work(&EXT4_SB(sb)->s_error_work);
                else
                        ext4_commit_super(sb);
@@@ -1350,6 -1352,12 +1350,12 @@@ static void ext4_destroy_inode(struct i
                                true);
                dump_stack();
        }
+       if (EXT4_I(inode)->i_reserved_data_blocks)
+               ext4_msg(inode->i_sb, KERN_ERR,
+                        "Inode %lu (%p): i_reserved_data_blocks (%u) not cleared!",
+                        inode->i_ino, EXT4_I(inode),
+                        EXT4_I(inode)->i_reserved_data_blocks);
  }
  
  static void init_once(void *foo)
        INIT_LIST_HEAD(&ei->i_orphan);
        init_rwsem(&ei->xattr_sem);
        init_rwsem(&ei->i_data_sem);
 -      init_rwsem(&ei->i_mmap_sem);
        inode_init_once(&ei->vfs_inode);
        ext4_fc_init_inode(&ei->vfs_inode);
  }
@@@ -3021,17 -3030,17 +3027,17 @@@ static loff_t ext4_max_size(int blkbits
   */
  static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
  {
-       loff_t res = EXT4_NDIR_BLOCKS;
+       unsigned long long upper_limit, res = EXT4_NDIR_BLOCKS;
        int meta_blocks;
-       loff_t upper_limit;
-       /* This is calculated to be the largest file size for a dense, block
+       /*
+        * This is calculated to be the largest file size for a dense, block
         * mapped file such that the file's total number of 512-byte sectors,
         * including data and all indirect blocks, does not exceed (2^48 - 1).
         *
         * __u32 i_blocks_lo and _u16 i_blocks_high represent the total
         * number of 512-byte sectors of the file.
         */
        if (!has_huge_files) {
                /*
                 * !has_huge_files or implies that the inode i_block field
        if (res > MAX_LFS_FILESIZE)
                res = MAX_LFS_FILESIZE;
  
-       return res;
+       return (loff_t)res;
  }
  
  static ext4_fsblk_t descriptor_loc(struct super_block *sb,
@@@ -4287,8 -4296,7 +4293,8 @@@ static int ext4_fill_super(struct super
                goto failed_mount;
        }
  
 -      if (bdev_dax_supported(sb->s_bdev, blocksize))
 +      if (dax_supported(dax_dev, sb->s_bdev, blocksize, 0,
 +                      bdev_nr_sectors(sb->s_bdev)))
                set_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags);
  
        if (sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) {
@@@ -5042,12 -5050,15 +5048,15 @@@ failed_mount_wq
        sbi->s_ea_block_cache = NULL;
  
        if (sbi->s_journal) {
+               /* flush s_error_work before journal destroy. */
+               flush_work(&sbi->s_error_work);
                jbd2_journal_destroy(sbi->s_journal);
                sbi->s_journal = NULL;
        }
  failed_mount3a:
        ext4_es_unregister_shrinker(sbi);
  failed_mount3:
+       /* flush s_error_work before sbi destroy */
        flush_work(&sbi->s_error_work);
        del_timer_sync(&sbi->s_err_report);
        ext4_stop_mmpd(sbi);