Merge branch 'delalloc-buffer-write' into dev
authorTheodore Ts'o <tytso@mit.edu>
Sun, 5 Sep 2021 03:46:32 +0000 (23:46 -0400)
committerTheodore Ts'o <tytso@mit.edu>
Thu, 9 Sep 2021 14:47:06 +0000 (10:47 -0400)
Fix a bug in how we update i_disksize, and the error path in
inline_data_end.  Finally, drop an unnecessary creation of a journal
handle which was only needed for inline data, which can give us a
large performance gain in delayed allocation writes.

Signed-off-by: Theodore Ts'o <tytso@mit.edu>
1  2 
fs/ext4/ext4.h
fs/ext4/inline.c
fs/ext4/inode.c

diff --cc fs/ext4/ext4.h
Simple merge
@@@ -733,45 -729,76 +733,83 @@@ convert
  int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len,
                               unsigned copied, struct page *page)
  {
-       int ret, no_expand;
+       handle_t *handle = ext4_journal_current_handle();
+       int no_expand;
        void *kaddr;
        struct ext4_iloc iloc;
+       int ret = 0, ret2;
+       if (unlikely(copied < len) && !PageUptodate(page))
+               copied = 0;
  
-       if (unlikely(copied < len)) {
-               if (!PageUptodate(page)) {
-                       copied = 0;
+       if (likely(copied)) {
+               ret = ext4_get_inode_loc(inode, &iloc);
+               if (ret) {
+                       unlock_page(page);
+                       put_page(page);
+                       ext4_std_error(inode->i_sb, ret);
                        goto out;
                }
-       }
+               ext4_write_lock_xattr(inode, &no_expand);
+               BUG_ON(!ext4_has_inline_data(inode));
  
-       ret = ext4_get_inode_loc(inode, &iloc);
-       if (ret) {
-               ext4_std_error(inode->i_sb, ret);
-               copied = 0;
-               goto out;
-       }
++              /*
++               * ei->i_inline_off may have changed since
++               * ext4_write_begin() called
++               * ext4_try_to_write_inline_data()
++               */
++              (void) ext4_find_inline_data_nolock(inode);
 +
-       ext4_write_lock_xattr(inode, &no_expand);
-       BUG_ON(!ext4_has_inline_data(inode));
+               kaddr = kmap_atomic(page);
+               ext4_write_inline_data(inode, &iloc, kaddr, pos, copied);
+               kunmap_atomic(kaddr);
+               SetPageUptodate(page);
+               /* clear page dirty so that writepages wouldn't work for us. */
+               ClearPageDirty(page);
  
-       /*
-        * ei->i_inline_off may have changed since ext4_write_begin()
-        * called ext4_try_to_write_inline_data()
-        */
-       (void) ext4_find_inline_data_nolock(inode);
+               ext4_write_unlock_xattr(inode, &no_expand);
+               brelse(iloc.bh);
  
-       kaddr = kmap_atomic(page);
-       ext4_write_inline_data(inode, &iloc, kaddr, pos, len);
-       kunmap_atomic(kaddr);
-       SetPageUptodate(page);
-       /* clear page dirty so that writepages wouldn't work for us. */
-       ClearPageDirty(page);
+               /*
+                * It's important to update i_size while still holding page
+                * lock: page writeout could otherwise come in and zero
+                * beyond i_size.
+                */
+               ext4_update_inode_size(inode, pos + copied);
+       }
+       unlock_page(page);
+       put_page(page);
  
-       ext4_write_unlock_xattr(inode, &no_expand);
-       brelse(iloc.bh);
-       mark_inode_dirty(inode);
+       /*
+        * Don't mark the inode dirty under page lock. First, it unnecessarily
+        * makes the holding time of page lock longer. Second, it forces lock
+        * ordering of page lock and transaction start for journaling
+        * filesystems.
+        */
+       if (likely(copied))
+               mark_inode_dirty(inode);
  out:
-       return copied;
+       /*
+        * If we didn't copy as much data as expected, we need to trim back
+        * size of xattr containing inline data.
+        */
+       if (pos + len > inode->i_size && ext4_can_truncate(inode))
+               ext4_orphan_add(handle, inode);
+       ret2 = ext4_journal_stop(handle);
+       if (!ret)
+               ret = ret2;
+       if (pos + len > inode->i_size) {
+               ext4_truncate_failed_write(inode);
+               /*
+                * If truncate failed early the inode might still be
+                * on the orphan list; we need to make sure the inode
+                * is removed from the orphan list in that case.
+                */
+               if (inode->i_nlink)
+                       ext4_orphan_del(NULL, inode);
+       }
+       return ret ? ret : copied;
  }
  
  struct buffer_head *
diff --cc fs/ext4/inode.c
@@@ -1404,24 -1392,18 +1395,18 @@@ static int ext4_journalled_write_end(st
  
        BUG_ON(!ext4_handle_valid(handle));
  
-       if (inline_data) {
-               ret = ext4_write_inline_data_end(inode, pos, len,
-                                                copied, page);
-               if (ret < 0) {
-                       unlock_page(page);
-                       put_page(page);
-                       goto errout;
-               }
-               copied = ret;
-       } else if (unlikely(copied < len) && !PageUptodate(page)) {
+       if (ext4_has_inline_data(inode))
+               return ext4_write_inline_data_end(inode, pos, len, copied, page);
+       if (unlikely(copied < len) && !PageUptodate(page)) {
                copied = 0;
 -              ext4_journalled_zero_new_buffers(handle, page, from, to);
 +              ext4_journalled_zero_new_buffers(handle, inode, page, from, to);
        } else {
                if (unlikely(copied < len))
 -                      ext4_journalled_zero_new_buffers(handle, page,
 +                      ext4_journalled_zero_new_buffers(handle, inode, page,
                                                         from + copied, to);
 -              ret = ext4_walk_page_buffers(handle, page_buffers(page), from,
 -                                           from + copied, &partial,
 +              ret = ext4_walk_page_buffers(handle, inode, page_buffers(page),
 +                                           from, from + copied, &partial,
                                             write_end_fn);
                if (!partial)
                        SetPageUptodate(page);