fs/buffer.c: make block-size be per-page and protected by the page lock
authorLinus Torvalds <torvalds@linux-foundation.org>
Thu, 29 Nov 2012 18:21:43 +0000 (10:21 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 29 Nov 2012 18:47:20 +0000 (10:47 -0800)
This makes the buffer size handling be a per-page thing, which allows us
to not have to worry about locking too much when changing the buffer
size.  If a page doesn't have buffers, we still need to read the block
size from the inode, but we can do that with ACCESS_ONCE(), so that even
if the size is changing, we get a consistent value.

This doesn't convert all functions - many of the buffer functions are
used purely by filesystems, which in turn results in the buffer size
being fixed at mount-time.  So they don't have the same consistency
issues that the raw device access can have.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
fs/buffer.c

index b5f0442..28a74ff 100644 (file)
@@ -1553,6 +1553,28 @@ void unmap_underlying_metadata(struct block_device *bdev, sector_t block)
 EXPORT_SYMBOL(unmap_underlying_metadata);
 
 /*
+ * Size is a power-of-two in the range 512..PAGE_SIZE,
+ * and the case we care about most is PAGE_SIZE.
+ *
+ * So this *could* possibly be written with those
+ * constraints in mind (relevant mostly if some
+ * architecture has a slow bit-scan instruction)
+ */
+static inline int block_size_bits(unsigned int blocksize)
+{
+       return ilog2(blocksize);
+}
+
+static struct buffer_head *create_page_buffers(struct page *page, struct inode *inode, unsigned int b_state)
+{
+       BUG_ON(!PageLocked(page));
+
+       if (!page_has_buffers(page))
+               create_empty_buffers(page, 1 << ACCESS_ONCE(inode->i_blkbits), b_state);
+       return page_buffers(page);
+}
+
+/*
  * NOTE! All mapped/uptodate combinations are valid:
  *
  *     Mapped  Uptodate        Meaning
@@ -1589,19 +1611,13 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
        sector_t block;
        sector_t last_block;
        struct buffer_head *bh, *head;
-       const unsigned blocksize = 1 << inode->i_blkbits;
+       unsigned int blocksize, bbits;
        int nr_underway = 0;
        int write_op = (wbc->sync_mode == WB_SYNC_ALL ?
                        WRITE_SYNC : WRITE);
 
-       BUG_ON(!PageLocked(page));
-
-       last_block = (i_size_read(inode) - 1) >> inode->i_blkbits;
-
-       if (!page_has_buffers(page)) {
-               create_empty_buffers(page, blocksize,
+       head = create_page_buffers(page, inode,
                                        (1 << BH_Dirty)|(1 << BH_Uptodate));
-       }
 
        /*
         * Be very careful.  We have no exclusion from __set_page_dirty_buffers
@@ -1613,9 +1629,12 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
         * handle that here by just cleaning them.
         */
 
-       block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
-       head = page_buffers(page);
        bh = head;
+       blocksize = bh->b_size;
+       bbits = block_size_bits(blocksize);
+
+       block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
+       last_block = (i_size_read(inode) - 1) >> bbits;
 
        /*
         * Get all the dirty buffers mapped to disk addresses and
@@ -1806,12 +1825,10 @@ int __block_write_begin(struct page *page, loff_t pos, unsigned len,
        BUG_ON(to > PAGE_CACHE_SIZE);
        BUG_ON(from > to);
 
-       blocksize = 1 << inode->i_blkbits;
-       if (!page_has_buffers(page))
-               create_empty_buffers(page, blocksize, 0);
-       head = page_buffers(page);
+       head = create_page_buffers(page, inode, 0);
+       blocksize = head->b_size;
+       bbits = block_size_bits(blocksize);
 
-       bbits = inode->i_blkbits;
        block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
 
        for(bh = head, block_start = 0; bh != head || !block_start;
@@ -1881,11 +1898,11 @@ static int __block_commit_write(struct inode *inode, struct page *page,
        unsigned blocksize;
        struct buffer_head *bh, *head;
 
-       blocksize = 1 << inode->i_blkbits;
+       bh = head = page_buffers(page);
+       blocksize = bh->b_size;
 
-       for(bh = head = page_buffers(page), block_start = 0;
-           bh != head || !block_start;
-           block_start=block_end, bh = bh->b_this_page) {
+       block_start = 0;
+       do {
                block_end = block_start + blocksize;
                if (block_end <= from || block_start >= to) {
                        if (!buffer_uptodate(bh))
@@ -1895,7 +1912,10 @@ static int __block_commit_write(struct inode *inode, struct page *page,
                        mark_buffer_dirty(bh);
                }
                clear_buffer_new(bh);
-       }
+
+               block_start = block_end;
+               bh = bh->b_this_page;
+       } while (bh != head);
 
        /*
         * If this is a partial write which happened to make all buffers
@@ -2020,7 +2040,6 @@ EXPORT_SYMBOL(generic_write_end);
 int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc,
                                        unsigned long from)
 {
-       struct inode *inode = page->mapping->host;
        unsigned block_start, block_end, blocksize;
        unsigned to;
        struct buffer_head *bh, *head;
@@ -2029,13 +2048,13 @@ int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc,
        if (!page_has_buffers(page))
                return 0;
 
-       blocksize = 1 << inode->i_blkbits;
+       head = page_buffers(page);
+       blocksize = head->b_size;
        to = min_t(unsigned, PAGE_CACHE_SIZE - from, desc->count);
        to = from + to;
        if (from < blocksize && to > PAGE_CACHE_SIZE - blocksize)
                return 0;
 
-       head = page_buffers(page);
        bh = head;
        block_start = 0;
        do {
@@ -2068,18 +2087,16 @@ int block_read_full_page(struct page *page, get_block_t *get_block)
        struct inode *inode = page->mapping->host;
        sector_t iblock, lblock;
        struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
-       unsigned int blocksize;
+       unsigned int blocksize, bbits;
        int nr, i;
        int fully_mapped = 1;
 
-       BUG_ON(!PageLocked(page));
-       blocksize = 1 << inode->i_blkbits;
-       if (!page_has_buffers(page))
-               create_empty_buffers(page, blocksize, 0);
-       head = page_buffers(page);
+       head = create_page_buffers(page, inode, 0);
+       blocksize = head->b_size;
+       bbits = block_size_bits(blocksize);
 
-       iblock = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
-       lblock = (i_size_read(inode)+blocksize-1) >> inode->i_blkbits;
+       iblock = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
+       lblock = (i_size_read(inode)+blocksize-1) >> bbits;
        bh = head;
        nr = 0;
        i = 0;