xfs,iomap: move delalloc punching to iomap

author Dave Chinner <dchinner@redhat.com>

Wed, 23 Nov 2022 01:44:38 +0000 (12:44 +1100)

committer Dave Chinner <david@fromorbit.com>

Wed, 23 Nov 2022 01:44:38 +0000 (12:44 +1100)
author Dave Chinner <dchinner@redhat.com>
Wed, 23 Nov 2022 01:44:38 +0000 (12:44 +1100)
committer Dave Chinner <david@fromorbit.com>
Wed, 23 Nov 2022 01:44:38 +0000 (12:44 +1100)
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c

index 91ee0b3..734b761 100644 (file)
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -832,6 +832,66 @@ iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *i,
  }
  EXPORT_SYMBOL_GPL(iomap_file_buffered_write);
  
+/*
+ * When a short write occurs, the filesystem may need to remove reserved space
+ * that was allocated in ->iomap_begin from it's ->iomap_end method. For
+ * filesystems that use delayed allocation, we need to punch out delalloc
+ * extents from the range that are not dirty in the page cache. As the write can
+ * race with page faults, there can be dirty pages over the delalloc extent
+ * outside the range of a short write but still within the delalloc extent
+ * allocated for this iomap.
+ *
+ * This function uses [start_byte, end_byte) intervals (i.e. open ended) to
+ * simplify range iterations, but converts them back to {offset,len} tuples for
+ * the punch callback.
+ */
+int iomap_file_buffered_write_punch_delalloc(struct inode *inode,
+               struct iomap *iomap, loff_t pos, loff_t length,
+               ssize_t written,
+               int (*punch)(struct inode *inode, loff_t pos, loff_t length))
+{
+       loff_t                  start_byte;
+       loff_t                  end_byte;
+       int                     blocksize = i_blocksize(inode);
+       int                     error = 0;
+
+       if (iomap->type != IOMAP_DELALLOC)
+               return 0;
+
+       /* If we didn't reserve the blocks, we're not allowed to punch them. */
+       if (!(iomap->flags & IOMAP_F_NEW))
+               return 0;
+
+       /*
+        * start_byte refers to the first unused block after a short write. If
+        * nothing was written, round offset down to point at the first block in
+        * the range.
+        */
+       if (unlikely(!written))
+               start_byte = round_down(pos, blocksize);
+       else
+               start_byte = round_up(pos + written, blocksize);
+       end_byte = round_up(pos + length, blocksize);
+
+       /* Nothing to do if we've written the entire delalloc extent */
+       if (start_byte >= end_byte)
+               return 0;
+
+       /*
+        * Lock the mapping to avoid races with page faults re-instantiating
+        * folios and dirtying them via ->page_mkwrite between the page cache
+        * truncation and the delalloc extent removal. Failing to do this can
+        * leave dirty pages with no space reservation in the cache.
+        */
+       filemap_invalidate_lock(inode->i_mapping);
+       truncate_pagecache_range(inode, start_byte, end_byte - 1);
+       error = punch(inode, start_byte, end_byte - start_byte);
+       filemap_invalidate_unlock(inode->i_mapping);
+
+       return error;
+}
+EXPORT_SYMBOL_GPL(iomap_file_buffered_write_punch_delalloc);
+
  static loff_t iomap_unshare_iter(struct iomap_iter *iter)
  {
         struct iomap *iomap = &iter->iomap;
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c

index 7bb55db..ea96e8a 100644 (file)
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -1123,12 +1123,12 @@ out_unlock:
  static int
  xfs_buffered_write_delalloc_punch(
         struct inode            *inode,
-       loff_t                  start_byte,
-       loff_t                  end_byte)
+       loff_t                  offset,
+       loff_t                  length)
  {
         struct xfs_mount        *mp = XFS_M(inode->i_sb);
-       xfs_fileoff_t           start_fsb = XFS_B_TO_FSBT(mp, start_byte);
-       xfs_fileoff_t           end_fsb = XFS_B_TO_FSB(mp, end_byte);
+       xfs_fileoff_t           start_fsb = XFS_B_TO_FSBT(mp, offset);
+       xfs_fileoff_t           end_fsb = XFS_B_TO_FSB(mp, offset + length);
  
         return xfs_bmap_punch_delalloc_range(XFS_I(inode), start_fsb,
                                 end_fsb - start_fsb);
@@ -1143,13 +1143,9 @@ xfs_buffered_write_iomap_end(
         unsigned                flags,
         struct iomap            *iomap)
  {
-       struct xfs_mount        *mp = XFS_M(inode->i_sb);
-       loff_t                  start_byte;
-       loff_t                  end_byte;
-       int                     error = 0;
  
-       if (iomap->type != IOMAP_DELALLOC)
-               return 0;
+       struct xfs_mount        *mp = XFS_M(inode->i_sb);
+       int                     error;
  
         /*
          * Behave as if the write failed if drop writes is enabled. Set the NEW
@@ -1160,35 +1156,8 @@ xfs_buffered_write_iomap_end(
                 written = 0;
         }
  
-       /* If we didn't reserve the blocks, we're not allowed to punch them. */
-       if (!(iomap->flags & IOMAP_F_NEW))
-               return 0;
-
-       /*
-        * start_fsb refers to the first unused block after a short write. If
-        * nothing was written, round offset down to point at the first block in
-        * the range.
-        */
-       if (unlikely(!written))
-               start_byte = round_down(offset, mp->m_sb.sb_blocksize);
-       else
-               start_byte = round_up(offset + written, mp->m_sb.sb_blocksize);
-       end_byte = round_up(offset + length, mp->m_sb.sb_blocksize);
-
-       /* Nothing to do if we've written the entire delalloc extent */
-       if (start_byte >= end_byte)
-               return 0;
-
-       /*
-        * Lock the mapping to avoid races with page faults re-instantiating
-        * folios and dirtying them via ->page_mkwrite between the page cache
-        * truncation and the delalloc extent removal. Failing to do this can
-        * leave dirty pages with no space reservation in the cache.
-        */
-       filemap_invalidate_lock(inode->i_mapping);
-       truncate_pagecache_range(inode, start_byte, end_byte - 1);
-       error = xfs_buffered_write_delalloc_punch(inode, start_byte, end_byte);
-       filemap_invalidate_unlock(inode->i_mapping);
+       error = iomap_file_buffered_write_punch_delalloc(inode, iomap, offset,
+                       length, written, &xfs_buffered_write_delalloc_punch);
         if (error && !xfs_is_shutdown(mp)) {
                 xfs_alert(mp, "%s: unable to clean up ino 0x%llx",
                         __func__, XFS_I(inode)->i_ino);
diff --git a/include/linux/iomap.h b/include/linux/iomap.h

index 238a030..0698c4b 100644 (file)
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -226,6 +226,10 @@ static inline const struct iomap *iomap_iter_srcmap(const struct iomap_iter *i)
  
  ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from,
                 const struct iomap_ops *ops);
+int iomap_file_buffered_write_punch_delalloc(struct inode *inode,
+               struct iomap *iomap, loff_t pos, loff_t length, ssize_t written,
+               int (*punch)(struct inode *inode, loff_t pos, loff_t length));
+
  int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops);
  void iomap_readahead(struct readahead_control *, const struct iomap_ops *ops);
  bool iomap_is_partially_uptodate(struct folio *, size_t from, size_t count);
author	Dave Chinner <dchinner@redhat.com>
	Wed, 23 Nov 2022 01:44:38 +0000 (12:44 +1100)
committer	Dave Chinner <david@fromorbit.com>
	Wed, 23 Nov 2022 01:44:38 +0000 (12:44 +1100)
fs/iomap/buffered-io.c		patch \| blob \| history
fs/xfs/xfs_iomap.c		patch \| blob \| history
include/linux/iomap.h		patch \| blob \| history