From 3480373ebdf7625ee29bee6508c9fc4ae70c00bf Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 27 Mar 2023 09:49:51 +0900 Subject: [PATCH] btrfs, block: move REQ_CGROUP_PUNT to btrfs REQ_CGROUP_PUNT is a bit annoying as it is hard to follow and adds a branch to the bio submission hot path. To fix this, export blkcg_punt_bio_submit and let btrfs call it directly. Add a new REQ_FS_PRIVATE flag for btrfs to indicate to it's own low-level bio submission code that a punt to the cgroup submission helper is required. Reviewed-by: Jens Axboe Signed-off-by: Christoph Hellwig Reviewed-by: David Sterba Signed-off-by: David Sterba --- block/blk-cgroup.c | 31 +++++++++++++++++-------------- block/blk-cgroup.h | 12 ------------ block/blk-core.c | 3 --- fs/btrfs/bio.c | 12 ++++++++---- fs/btrfs/bio.h | 3 +++ fs/btrfs/extent_io.c | 2 +- fs/btrfs/inode.c | 2 +- include/linux/bio.h | 5 +++++ include/linux/blk_types.h | 18 +++++------------- 9 files changed, 40 insertions(+), 48 deletions(-) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index bd50b55..9f5f326 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -1688,24 +1688,27 @@ out_unlock: } EXPORT_SYMBOL_GPL(blkcg_policy_unregister); -bool __blkcg_punt_bio_submit(struct bio *bio) +/* + * When a shared kthread issues a bio for a cgroup, doing so synchronously can + * lead to priority inversions as the kthread can be trapped waiting for that + * cgroup. Use this helper instead of submit_bio to punt the actual issuing to + * a dedicated per-blkcg work item to avoid such priority inversions. + */ +void blkcg_punt_bio_submit(struct bio *bio) { struct blkcg_gq *blkg = bio->bi_blkg; - /* consume the flag first */ - bio->bi_opf &= ~REQ_CGROUP_PUNT; - - /* never bounce for the root cgroup */ - if (!blkg->parent) - return false; - - spin_lock_bh(&blkg->async_bio_lock); - bio_list_add(&blkg->async_bios, bio); - spin_unlock_bh(&blkg->async_bio_lock); - - queue_work(blkcg_punt_bio_wq, &blkg->async_bio_work); - return true; + if (blkg->parent) { + spin_lock_bh(&blkg->async_bio_lock); + bio_list_add(&blkg->async_bios, bio); + spin_unlock_bh(&blkg->async_bio_lock); + queue_work(blkcg_punt_bio_wq, &blkg->async_bio_work); + } else { + /* never bounce for the root cgroup */ + submit_bio(bio); + } } +EXPORT_SYMBOL_GPL(blkcg_punt_bio_submit); /* * Scale the accumulated delay based on how long it has been since we updated diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index 9c50787..64758ab 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h @@ -375,16 +375,6 @@ static inline void blkg_put(struct blkcg_gq *blkg) if (((d_blkg) = blkg_lookup(css_to_blkcg(pos_css), \ (p_blkg)->q))) -bool __blkcg_punt_bio_submit(struct bio *bio); - -static inline bool blkcg_punt_bio_submit(struct bio *bio) -{ - if (bio->bi_opf & REQ_CGROUP_PUNT) - return __blkcg_punt_bio_submit(bio); - else - return false; -} - static inline void blkcg_bio_issue_init(struct bio *bio) { bio_issue_init(&bio->bi_issue, bio_sectors(bio)); @@ -506,8 +496,6 @@ static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) { return static inline char *blkg_path(struct blkcg_gq *blkg) { return NULL; } static inline void blkg_get(struct blkcg_gq *blkg) { } static inline void blkg_put(struct blkcg_gq *blkg) { } - -static inline bool blkcg_punt_bio_submit(struct bio *bio) { return false; } static inline void blkcg_bio_issue_init(struct bio *bio) { } static inline void blk_cgroup_bio_start(struct bio *bio) { } static inline bool blk_cgroup_mergeable(struct request *rq, struct bio *bio) { return true; } diff --git a/block/blk-core.c b/block/blk-core.c index 42926e6..478978d 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -830,9 +830,6 @@ EXPORT_SYMBOL(submit_bio_noacct); */ void submit_bio(struct bio *bio) { - if (blkcg_punt_bio_submit(bio)) - return; - if (bio_op(bio) == REQ_OP_READ) { task_io_account_read(bio->bi_iter.bi_size); count_vm_events(PGPGIN, bio_sectors(bio)); diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c index afd2f90..ed5aa8a 100644 --- a/fs/btrfs/bio.c +++ b/fs/btrfs/bio.c @@ -435,7 +435,11 @@ static void btrfs_submit_dev_bio(struct btrfs_device *dev, struct bio *bio) dev->devid, bio->bi_iter.bi_size); btrfsic_check_bio(bio); - submit_bio(bio); + + if (bio->bi_opf & REQ_BTRFS_CGROUP_PUNT) + blkcg_punt_bio_submit(bio); + else + submit_bio(bio); } static void btrfs_submit_mirrored_bio(struct btrfs_io_context *bioc, int dev_nr) @@ -551,10 +555,10 @@ static void run_one_async_done(struct btrfs_work *work) /* * All of the bios that pass through here are from async helpers. - * Use REQ_CGROUP_PUNT to issue them from the owning cgroup's context. - * This changes nothing when cgroups aren't in use. + * Use REQ_BTRFS_CGROUP_PUNT to issue them from the owning cgroup's + * context. This changes nothing when cgroups aren't in use. */ - bio->bi_opf |= REQ_CGROUP_PUNT; + bio->bi_opf |= REQ_BTRFS_CGROUP_PUNT; __btrfs_submit_bio(bio, async->bioc, &async->smap, async->mirror_num); } diff --git a/fs/btrfs/bio.h b/fs/btrfs/bio.h index dbf125f..8edf3c3 100644 --- a/fs/btrfs/bio.h +++ b/fs/btrfs/bio.h @@ -88,6 +88,9 @@ static inline void btrfs_bio_end_io(struct btrfs_bio *bbio, blk_status_t status) /* Bio only refers to one ordered extent. */ #define REQ_BTRFS_ONE_ORDERED REQ_DRV +/* Submit using blkcg_punt_bio_submit. */ +#define REQ_BTRFS_CGROUP_PUNT REQ_FS_PRIVATE + void btrfs_submit_bio(struct btrfs_bio *bbio, int mirror_num); int btrfs_repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start, u64 length, u64 logical, struct page *page, diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index f5702b1..f40e4a0 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2538,7 +2538,7 @@ int extent_write_locked_range(struct inode *inode, u64 start, u64 end) struct btrfs_bio_ctrl bio_ctrl = { .wbc = &wbc_writepages, /* We're called from an async helper function */ - .opf = REQ_OP_WRITE | REQ_CGROUP_PUNT | + .opf = REQ_OP_WRITE | REQ_BTRFS_CGROUP_PUNT | wbc_to_write_flags(&wbc_writepages), .extent_locked = 1, }; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 5c216ca..93e16a4 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1616,7 +1616,7 @@ static int cow_file_range_async(struct btrfs_inode *inode, if (blkcg_css != blkcg_root_css) { css_get(blkcg_css); async_chunk[i].blkcg_css = blkcg_css; - async_chunk[i].write_flags |= REQ_CGROUP_PUNT; + async_chunk[i].write_flags |= REQ_BTRFS_CGROUP_PUNT; } else { async_chunk[i].blkcg_css = NULL; } diff --git a/include/linux/bio.h b/include/linux/bio.h index d766be7..b3e7529 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -500,6 +500,7 @@ void bio_associate_blkg(struct bio *bio); void bio_associate_blkg_from_css(struct bio *bio, struct cgroup_subsys_state *css); void bio_clone_blkg_association(struct bio *dst, struct bio *src); +void blkcg_punt_bio_submit(struct bio *bio); #else /* CONFIG_BLK_CGROUP */ static inline void bio_associate_blkg(struct bio *bio) { } static inline void bio_associate_blkg_from_css(struct bio *bio, @@ -507,6 +508,10 @@ static inline void bio_associate_blkg_from_css(struct bio *bio, { } static inline void bio_clone_blkg_association(struct bio *dst, struct bio *src) { } +static inline void blkcg_punt_bio_submit(struct bio *bio) +{ + submit_bio(bio); +} #endif /* CONFIG_BLK_CGROUP */ static inline void bio_set_dev(struct bio *bio, struct block_device *bdev) diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 99be590..fb88439 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -404,18 +404,11 @@ enum req_flag_bits { __REQ_RAHEAD, /* read ahead, can fail anytime */ __REQ_BACKGROUND, /* background IO */ __REQ_NOWAIT, /* Don't wait if request will block */ - /* - * When a shared kthread needs to issue a bio for a cgroup, doing - * so synchronously can lead to priority inversions as the kthread - * can be trapped waiting for that cgroup. CGROUP_PUNT flag makes - * submit_bio() punt the actual issuing to a dedicated per-blkcg - * work item to avoid such priority inversions. - */ - __REQ_CGROUP_PUNT, __REQ_POLLED, /* caller polls for completion using bio_poll */ __REQ_ALLOC_CACHE, /* allocate IO from cache if available */ __REQ_SWAP, /* swap I/O */ __REQ_DRV, /* for driver use */ + __REQ_FS_PRIVATE, /* for file system (submitter) use */ /* * Command specific flags, keep last: @@ -443,14 +436,13 @@ enum req_flag_bits { #define REQ_RAHEAD (__force blk_opf_t)(1ULL << __REQ_RAHEAD) #define REQ_BACKGROUND (__force blk_opf_t)(1ULL << __REQ_BACKGROUND) #define REQ_NOWAIT (__force blk_opf_t)(1ULL << __REQ_NOWAIT) -#define REQ_CGROUP_PUNT (__force blk_opf_t)(1ULL << __REQ_CGROUP_PUNT) - -#define REQ_NOUNMAP (__force blk_opf_t)(1ULL << __REQ_NOUNMAP) #define REQ_POLLED (__force blk_opf_t)(1ULL << __REQ_POLLED) #define REQ_ALLOC_CACHE (__force blk_opf_t)(1ULL << __REQ_ALLOC_CACHE) - -#define REQ_DRV (__force blk_opf_t)(1ULL << __REQ_DRV) #define REQ_SWAP (__force blk_opf_t)(1ULL << __REQ_SWAP) +#define REQ_DRV (__force blk_opf_t)(1ULL << __REQ_DRV) +#define REQ_FS_PRIVATE (__force blk_opf_t)(1ULL << __REQ_FS_PRIVATE) + +#define REQ_NOUNMAP (__force blk_opf_t)(1ULL << __REQ_NOUNMAP) #define REQ_FAILFAST_MASK \ (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER) -- 2.7.4