typedef int (*iomap_punch_t)(struct inode *inode, loff_t offset, loff_t length);
/*
- * Structure allocated for each folio to track per-block uptodate state
+ * Structure allocated for each folio to track per-block uptodate, dirty state
* and I/O completions.
*/
struct iomap_folio_state {
atomic_t read_bytes_pending;
atomic_t write_bytes_pending;
spinlock_t state_lock;
+
+ /*
+ * Each block has two bits in this bitmap:
+ * Bits [0..blocks_per_folio) has the uptodate status.
+ * Bits [b_p_f...(2*b_p_f)) has the dirty status.
+ */
unsigned long state[];
};
folio_mark_uptodate(folio);
}
+static inline bool ifs_block_is_dirty(struct folio *folio,
+ struct iomap_folio_state *ifs, int block)
+{
+ struct inode *inode = folio->mapping->host;
+ unsigned int blks_per_folio = i_blocks_per_folio(inode, folio);
+
+ return test_bit(block + blks_per_folio, ifs->state);
+}
+
+static void ifs_clear_range_dirty(struct folio *folio,
+ struct iomap_folio_state *ifs, size_t off, size_t len)
+{
+ struct inode *inode = folio->mapping->host;
+ unsigned int blks_per_folio = i_blocks_per_folio(inode, folio);
+ unsigned int first_blk = (off >> inode->i_blkbits);
+ unsigned int last_blk = (off + len - 1) >> inode->i_blkbits;
+ unsigned int nr_blks = last_blk - first_blk + 1;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ifs->state_lock, flags);
+ bitmap_clear(ifs->state, first_blk + blks_per_folio, nr_blks);
+ spin_unlock_irqrestore(&ifs->state_lock, flags);
+}
+
+static void iomap_clear_range_dirty(struct folio *folio, size_t off, size_t len)
+{
+ struct iomap_folio_state *ifs = folio->private;
+
+ if (ifs)
+ ifs_clear_range_dirty(folio, ifs, off, len);
+}
+
+static void ifs_set_range_dirty(struct folio *folio,
+ struct iomap_folio_state *ifs, size_t off, size_t len)
+{
+ struct inode *inode = folio->mapping->host;
+ unsigned int blks_per_folio = i_blocks_per_folio(inode, folio);
+ unsigned int first_blk = (off >> inode->i_blkbits);
+ unsigned int last_blk = (off + len - 1) >> inode->i_blkbits;
+ unsigned int nr_blks = last_blk - first_blk + 1;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ifs->state_lock, flags);
+ bitmap_set(ifs->state, first_blk + blks_per_folio, nr_blks);
+ spin_unlock_irqrestore(&ifs->state_lock, flags);
+}
+
+static void iomap_set_range_dirty(struct folio *folio, size_t off, size_t len)
+{
+ struct iomap_folio_state *ifs = folio->private;
+
+ if (ifs)
+ ifs_set_range_dirty(folio, ifs, off, len);
+}
+
static struct iomap_folio_state *ifs_alloc(struct inode *inode,
struct folio *folio, unsigned int flags)
{
else
gfp = GFP_NOFS | __GFP_NOFAIL;
- ifs = kzalloc(struct_size(ifs, state, BITS_TO_LONGS(nr_blocks)),
- gfp);
- if (ifs) {
- spin_lock_init(&ifs->state_lock);
- if (folio_test_uptodate(folio))
- bitmap_fill(ifs->state, nr_blocks);
- folio_attach_private(folio, ifs);
- }
+ /*
+ * ifs->state tracks two sets of state flags when the
+ * filesystem block size is smaller than the folio size.
+ * The first state tracks per-block uptodate and the
+ * second tracks per-block dirty state.
+ */
+ ifs = kzalloc(struct_size(ifs, state,
+ BITS_TO_LONGS(2 * nr_blocks)), gfp);
+ if (!ifs)
+ return ifs;
+
+ spin_lock_init(&ifs->state_lock);
+ if (folio_test_uptodate(folio))
+ bitmap_set(ifs->state, 0, nr_blocks);
+ if (folio_test_dirty(folio))
+ bitmap_set(ifs->state, nr_blocks, nr_blocks);
+ folio_attach_private(folio, ifs);
+
return ifs;
}
}
EXPORT_SYMBOL_GPL(iomap_invalidate_folio);
+bool iomap_dirty_folio(struct address_space *mapping, struct folio *folio)
+{
+ struct inode *inode = mapping->host;
+ size_t len = folio_size(folio);
+
+ ifs_alloc(inode, folio, 0);
+ iomap_set_range_dirty(folio, 0, len);
+ return filemap_dirty_folio(mapping, folio);
+}
+EXPORT_SYMBOL_GPL(iomap_dirty_folio);
+
static void
iomap_write_failed(struct inode *inode, loff_t pos, unsigned len)
{
if (unlikely(copied < len && !folio_test_uptodate(folio)))
return 0;
iomap_set_range_uptodate(folio, offset_in_folio(folio, pos), len);
+ iomap_set_range_dirty(folio, offset_in_folio(folio, pos), copied);
filemap_dirty_folio(inode->i_mapping, folio);
return copied;
}
}
EXPORT_SYMBOL_GPL(iomap_file_buffered_write);
+static int iomap_write_delalloc_ifs_punch(struct inode *inode,
+ struct folio *folio, loff_t start_byte, loff_t end_byte,
+ iomap_punch_t punch)
+{
+ unsigned int first_blk, last_blk, i;
+ loff_t last_byte;
+ u8 blkbits = inode->i_blkbits;
+ struct iomap_folio_state *ifs;
+ int ret = 0;
+
+ /*
+ * When we have per-block dirty tracking, there can be
+ * blocks within a folio which are marked uptodate
+ * but not dirty. In that case it is necessary to punch
+ * out such blocks to avoid leaking any delalloc blocks.
+ */
+ ifs = folio->private;
+ if (!ifs)
+ return ret;
+
+ last_byte = min_t(loff_t, end_byte - 1,
+ folio_pos(folio) + folio_size(folio) - 1);
+ first_blk = offset_in_folio(folio, start_byte) >> blkbits;
+ last_blk = offset_in_folio(folio, last_byte) >> blkbits;
+ for (i = first_blk; i <= last_blk; i++) {
+ if (!ifs_block_is_dirty(folio, ifs, i)) {
+ ret = punch(inode, folio_pos(folio) + (i << blkbits),
+ 1 << blkbits);
+ if (ret)
+ return ret;
+ }
+ }
+
+ return ret;
+}
+
+
static int iomap_write_delalloc_punch(struct inode *inode, struct folio *folio,
loff_t *punch_start_byte, loff_t start_byte, loff_t end_byte,
iomap_punch_t punch)
return ret;
}
+ /* Punch non-dirty blocks within folio */
+ ret = iomap_write_delalloc_ifs_punch(inode, folio, start_byte,
+ end_byte, punch);
+ if (ret)
+ return ret;
+
/*
* Make sure the next punch start is correctly bound to
* the end of this data range, not the end of the folio.
struct writeback_control *wbc, struct inode *inode,
struct folio *folio, u64 end_pos)
{
- struct iomap_folio_state *ifs = ifs_alloc(inode, folio, 0);
+ struct iomap_folio_state *ifs = folio->private;
struct iomap_ioend *ioend, *next;
unsigned len = i_blocksize(inode);
unsigned nblocks = i_blocks_per_folio(inode, folio);
int error = 0, count = 0, i;
LIST_HEAD(submit_list);
+ WARN_ON_ONCE(end_pos <= pos);
+
+ if (!ifs && nblocks > 1) {
+ ifs = ifs_alloc(inode, folio, 0);
+ iomap_set_range_dirty(folio, 0, end_pos - pos);
+ }
+
WARN_ON_ONCE(ifs && atomic_read(&ifs->write_bytes_pending) != 0);
/*
* invalid, grab a new one.
*/
for (i = 0; i < nblocks && pos < end_pos; i++, pos += len) {
- if (ifs && !ifs_block_is_uptodate(ifs, i))
+ if (ifs && !ifs_block_is_dirty(folio, ifs, i))
continue;
error = wpc->ops->map_blocks(wpc, inode, pos);
}
}
+ /*
+ * We can have dirty bits set past end of file in page_mkwrite path
+ * while mapping the last partial folio. Hence it's better to clear
+ * all the dirty bits in the folio here.
+ */
+ iomap_clear_range_dirty(folio, 0, folio_size(folio));
folio_start_writeback(folio);
folio_unlock(folio);