ceph: implement writeback livelock avoidance using page tagging
authorXiubo Li <xiubli@redhat.com>
Wed, 8 Mar 2023 02:21:44 +0000 (10:21 +0800)
committerIlya Dryomov <idryomov@gmail.com>
Sun, 30 Apr 2023 10:37:28 +0000 (12:37 +0200)
While the mapped IOs continue if we try to flush a file's buffer
we can see that the fsync() won't complete until the IOs finish.

This is analogous to Jan Kara's commit (f446daaea9d4 mm: implement
writeback livelock avoidance using page tagging), we will try to
avoid livelocks of writeback when some steadily creates dirty pages
in a mapping we are writing out.

Signed-off-by: Xiubo Li <xiubli@redhat.com>
Reviewed-by: Ilya Dryomov <idryomov@gmail.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
fs/ceph/addr.c

index d5335f4..6bb251a 100644 (file)
@@ -808,6 +808,7 @@ static int ceph_writepages_start(struct address_space *mapping,
        bool should_loop, range_whole = false;
        bool done = false;
        bool caching = ceph_is_cache_enabled(inode);
+       xa_mark_t tag;
 
        if (wbc->sync_mode == WB_SYNC_NONE &&
            fsc->write_congested)
@@ -834,6 +835,11 @@ static int ceph_writepages_start(struct address_space *mapping,
        start_index = wbc->range_cyclic ? mapping->writeback_index : 0;
        index = start_index;
 
+       if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) {
+               tag = PAGECACHE_TAG_TOWRITE;
+       } else {
+               tag = PAGECACHE_TAG_DIRTY;
+       }
 retry:
        /* find oldest snap context with dirty data */
        snapc = get_oldest_context(inode, &ceph_wbc, NULL);
@@ -872,6 +878,9 @@ retry:
                dout(" non-head snapc, range whole\n");
        }
 
+       if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
+               tag_pages_for_writeback(mapping, index, end);
+
        ceph_put_snap_context(last_snapc);
        last_snapc = snapc;
 
@@ -888,7 +897,7 @@ retry:
 
 get_more_pages:
                nr_folios = filemap_get_folios_tag(mapping, &index,
-                               end, PAGECACHE_TAG_DIRTY, &fbatch);
+                                                  end, tag, &fbatch);
                dout("pagevec_lookup_range_tag got %d\n", nr_folios);
                if (!nr_folios && !locked_pages)
                        break;