Btrfs: properly honor wbc->nr_to_write changes
authorChris Mason <chris.mason@oracle.com>
Fri, 18 Sep 2009 20:03:16 +0000 (16:03 -0400)
committerChris Mason <chris.mason@oracle.com>
Fri, 18 Sep 2009 20:08:46 +0000 (16:08 -0400)
When btrfs fills a delayed allocation, it tries to increase
the wbc nr_to_write to cover a big part of allocation.  The
theory is that we're doing contiguous IO and writing a few
more blocks will save seeks overall at a very low cost.

The problem is that extent_write_cache_pages could ignore
the new higher nr_to_write if nr_to_write had already gone
down to zero.  We fix that by rechecking the nr_to_write
for every page that is processed in the pagevec.

This updates the math around bumping the nr_to_write value
to make sure we don't leave a tiny amount of IO hanging
around for the very end of a new extent.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
fs/btrfs/extent_io.c

index a102422..7e16c6d 100644 (file)
@@ -2182,7 +2182,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
        delalloc_end = 0;
        page_started = 0;
        if (!epd->extent_locked) {
-               u64 delalloc_to_write;
+               u64 delalloc_to_write = 0;
                /*
                 * make sure the wbc mapping index is at least updated
                 * to this page.
@@ -2202,16 +2202,24 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
                        tree->ops->fill_delalloc(inode, page, delalloc_start,
                                                 delalloc_end, &page_started,
                                                 &nr_written);
-                       delalloc_to_write = (delalloc_end -
-                                       max_t(u64, page_offset(page),
-                                             delalloc_start) + 1) >>
-                                       PAGE_CACHE_SHIFT;
-                       if (wbc->nr_to_write < delalloc_to_write) {
-                               wbc->nr_to_write = min_t(long, 8192,
-                                                delalloc_to_write);
-                       }
+                       /*
+                        * delalloc_end is already one less than the total
+                        * length, so we don't subtract one from
+                        * PAGE_CACHE_SIZE
+                        */
+                       delalloc_to_write += (delalloc_end - delalloc_start +
+                                             PAGE_CACHE_SIZE) >>
+                                             PAGE_CACHE_SHIFT;
                        delalloc_start = delalloc_end + 1;
                }
+               if (wbc->nr_to_write < delalloc_to_write) {
+                       int thresh = 8192;
+
+                       if (delalloc_to_write < thresh * 2)
+                               thresh = delalloc_to_write;
+                       wbc->nr_to_write = min_t(u64, delalloc_to_write,
+                                                thresh);
+               }
 
                /* did the fill delalloc function already unlock and start
                 * the IO?
@@ -2388,6 +2396,7 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
 {
        int ret = 0;
        int done = 0;
+       int nr_to_write_done = 0;
        struct pagevec pvec;
        int nr_pages;
        pgoff_t index;
@@ -2407,7 +2416,7 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
                scanned = 1;
        }
 retry:
-       while (!done && (index <= end) &&
+       while (!done && !nr_to_write_done && (index <= end) &&
               (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
                              PAGECACHE_TAG_DIRTY, min(end - index,
                                  (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
@@ -2458,8 +2467,15 @@ retry:
                                unlock_page(page);
                                ret = 0;
                        }
-                       if (ret || wbc->nr_to_write <= 0)
+                       if (ret)
                                done = 1;
+
+                       /*
+                        * the filesystem may choose to bump up nr_to_write.
+                        * We have to make sure to honor the new nr_to_write
+                        * at any time
+                        */
+                       nr_to_write_done = wbc->nr_to_write <= 0;
                }
                pagevec_release(&pvec);
                cond_resched();