Merge branch 'for-6.5/block-late' into block-6.5

author Jens Axboe <axboe@kernel.dk>

Wed, 28 Jun 2023 22:08:19 +0000 (16:08 -0600)

committer Jens Axboe <axboe@kernel.dk>

Wed, 28 Jun 2023 22:08:19 +0000 (16:08 -0600)
author Jens Axboe <axboe@kernel.dk>
Wed, 28 Jun 2023 22:08:19 +0000 (16:08 -0600)
committer Jens Axboe <axboe@kernel.dk>
Wed, 28 Jun 2023 22:08:19 +0000 (16:08 -0600)
diff --combined block/blk-cgroup.c

index aaf9903,c8b28ec..fc49be6
--- 1/block/blk-cgroup.c
--- 2/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@@ -34,8 -34,6 +34,8 @@@
   #include "blk-ioprio.h"
   #include "blk-throttle.h"
   
+ +static void __blkcg_rstat_flush(struct blkcg *blkcg, int cpu);
+ +
   /*
    * blkcg_pol_mutex protects blkcg_policy[] and policy [de]activation.
    * blkcg_pol_register_mutex nests outside of it and synchronizes entire
@@@ -58,8 -56,6 +58,8 @@@ static LIST_HEAD(all_blkcgs);         /* prote
   
   bool blkcg_debug_stats = false;
   
+ +static DEFINE_RAW_SPINLOCK(blkg_stat_lock);
+ +
   #define BLKG_DESTROY_BATCH_SIZE  64
   
   /*
@@@ -167,20 -163,10 +167,20 @@@ static void blkg_free(struct blkcg_gq *
   static void __blkg_release(struct rcu_head *rcu)
   {
         struct blkcg_gq *blkg = container_of(rcu, struct blkcg_gq, rcu_head);
+ +      struct blkcg *blkcg = blkg->blkcg;
+ +      int cpu;
   
   #ifdef CONFIG_BLK_CGROUP_PUNT_BIO
         WARN_ON(!bio_list_empty(&blkg->async_bios));
   #endif
+ +      /*
+ +       * Flush all the non-empty percpu lockless lists before releasing
+ +       * us, given these stat belongs to us.
+ +       *
+ +       * blkg_stat_lock is for serializing blkg stat update
+ +       */
+ +      for_each_possible_cpu(cpu)
+ +              __blkcg_rstat_flush(blkcg, cpu);
   
         /* release the blkcg and parent blkg refs this blkg has been holding */
         css_put(&blkg->blkcg->css);
@@@ -979,12 -965,16 +979,12 @@@ static void blkcg_iostat_update(struct 
         u64_stats_update_end_irqrestore(&blkg->iostat.sync, flags);
   }
   
- -static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu)
+ +static void __blkcg_rstat_flush(struct blkcg *blkcg, int cpu)
   {
- -      struct blkcg *blkcg = css_to_blkcg(css);
         struct llist_head *lhead = per_cpu_ptr(blkcg->lhead, cpu);
         struct llist_node *lnode;
         struct blkg_iostat_set *bisc, *next_bisc;
- -
- -      /* Root-level stats are sourced from system-wide IO stats */
- -      if (!cgroup_parent(css->cgroup))
- -              return;
+ +      unsigned long flags;
   
         rcu_read_lock();
   
@@@ -993,14 -983,6 +993,14 @@@
                 goto out;
   
         /*
+ +       * For covering concurrent parent blkg update from blkg_release().
+ +       *
+ +       * When flushing from cgroup, cgroup_rstat_lock is always held, so
+ +       * this lock won't cause contention most of time.
+ +       */
+ +      raw_spin_lock_irqsave(&blkg_stat_lock, flags);
+ +
+ +      /*
          * Iterate only the iostat_cpu's queued in the lockless list.
          */
         llist_for_each_entry_safe(bisc, next_bisc, lnode, lnode) {
@@@ -1023,19 -1005,13 +1023,19 @@@
                 if (parent && parent->parent)
                         blkcg_iostat_update(parent, &blkg->iostat.cur,
                                             &blkg->iostat.last);
- -              percpu_ref_put(&blkg->refcnt);
         }
- -
+ +      raw_spin_unlock_irqrestore(&blkg_stat_lock, flags);
   out:
         rcu_read_unlock();
   }
   
+ +static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu)
+ +{
+ +      /* Root-level stats are sourced from system-wide IO stats */
+ +      if (cgroup_parent(css->cgroup))
+ +              __blkcg_rstat_flush(css_to_blkcg(css), cpu);
+ +}
+ +
   /*
    * We source root cgroup stats from the system-wide stats to avoid
    * tracking the same information twice and incurring overhead when no
@@@ -2086,6 -2062,9 +2086,9 @@@ void blk_cgroup_bio_start(struct bio *b
         struct blkg_iostat_set *bis;
         unsigned long flags;
   
+       if (!cgroup_subsys_on_dfl(io_cgrp_subsys))
+               return;
+ 
         /* Root-level stats are sourced from system-wide IO stats */
         if (!cgroup_parent(blkcg->css.cgroup))
                 return;
@@@ -2113,11 -2092,11 +2116,10 @@@
   
                 llist_add(&bis->lnode, lhead);
                 WRITE_ONCE(bis->lqueued, true);
- -              percpu_ref_get(&bis->blkg->refcnt);
         }
   
         u64_stats_update_end_irqrestore(&bis->sync, flags);
-       if (cgroup_subsys_on_dfl(io_cgrp_subsys))
-               cgroup_rstat_updated(blkcg->css.cgroup, cpu);
+       cgroup_rstat_updated(blkcg->css.cgroup, cpu);
         put_cpu();
   }
   
diff --combined block/blk-mq.c

index decb6ab,32e50bc..98eb31f
--- 1/block/blk-mq.c
--- 2/block/blk-mq.c
+++ b/block/blk-mq.c
@@@ -688,10 -688,6 +688,10 @@@ static void __blk_mq_free_request(struc
         blk_crypto_free_request(rq);
         blk_pm_mark_last_busy(rq);
         rq->mq_hctx = NULL;
+ +
+ +      if (rq->rq_flags & RQF_MQ_INFLIGHT)
+ +              __blk_mq_dec_active_requests(hctx);
+ +
         if (rq->tag != BLK_MQ_NO_TAG)
                 blk_mq_put_tag(hctx->tags, ctx, rq->tag);
         if (sched_tag != BLK_MQ_NO_TAG)
@@@ -703,11 -699,15 +703,11 @@@
   void blk_mq_free_request(struct request *rq)
   {
         struct request_queue *q = rq->q;
- -      struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
   
         if ((rq->rq_flags & RQF_USE_SCHED) &&
             q->elevator->type->ops.finish_request)
                 q->elevator->type->ops.finish_request(rq);
   
- -      if (rq->rq_flags & RQF_MQ_INFLIGHT)
- -              __blk_mq_dec_active_requests(hctx);
- -
         if (unlikely(laptop_mode && !blk_rq_is_passthrough(rq)))
                 laptop_io_completion(q->disk->bdi);
   
@@@ -1280,7 -1280,11 +1280,11 @@@ static void blk_add_rq_to_plug(struct b
   
         if (!plug->multiple_queues && last && last->q != rq->q)
                 plug->multiple_queues = true;
-       if (!plug->has_elevator && (rq->rq_flags & RQF_USE_SCHED))
+       /*
+        * Any request allocated from sched tags can't be issued to
+        * ->queue_rqs() directly
+        */
+       if (!plug->has_elevator && (rq->rq_flags & RQF_SCHED_TAGS))
                 plug->has_elevator = true;
         rq->rq_next = NULL;
         rq_list_add(&plug->mq_list, rq);
diff --combined block/blk-wbt.c

index 7a87506,9f7c99c..0bb6131
--- 1/block/blk-wbt.c
--- 2/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@@ -146,7 -146,7 +146,7 @@@ enum 
   static inline bool rwb_enabled(struct rq_wb *rwb)
   {
         return rwb && rwb->enable_state != WBT_STATE_OFF_DEFAULT &&
-                     rwb->wb_normal != 0;
+                     rwb->enable_state != WBT_STATE_OFF_MANUAL;
   }
   
   static void wb_timestamp(struct rq_wb *rwb, unsigned long *var)
@@@ -201,15 -201,6 +201,6 @@@ static void wbt_rqw_done(struct rq_wb *
         inflight = atomic_dec_return(&rqw->inflight);
   
         /*
-        * wbt got disabled with IO in flight. Wake up any potential
-        * waiters, we don't have to do more than that.
-        */
-       if (unlikely(!rwb_enabled(rwb))) {
-               rwb_wake_all(rwb);
-               return;
-       }
- 
-       /*
          * For discards, our limit is always the background. For writes, if
          * the device does write back caching, drop further down before we
          * wake people up.
@@@ -503,8 -494,7 +494,7 @@@ bool wbt_disabled(struct request_queue 
   {
         struct rq_qos *rqos = wbt_rq_qos(q);
   
-       return !rqos || RQWB(rqos)->enable_state == WBT_STATE_OFF_DEFAULT ||
-              RQWB(rqos)->enable_state == WBT_STATE_OFF_MANUAL;
+       return !rqos || !rwb_enabled(RQWB(rqos));
   }
   
   u64 wbt_get_min_lat(struct request_queue *q)
@@@ -545,13 -535,6 +535,6 @@@ static inline unsigned int get_limit(st
   {
         unsigned int limit;
   
-       /*
-        * If we got disabled, just return UINT_MAX. This ensures that
-        * we'll properly inc a new IO, and dec+wakeup at the end.
-        */
-       if (!rwb_enabled(rwb))
-               return UINT_MAX;
- 
         if ((opf & REQ_OP_MASK) == REQ_OP_DISCARD)
                 return rwb->wb_background;
   
@@@ -730,16 -713,14 +713,16 @@@ void wbt_enable_default(struct gendisk 
   {
         struct request_queue *q = disk->queue;
         struct rq_qos *rqos;
- -      bool disable_flag = q->elevator &&
- -                  test_bit(ELEVATOR_FLAG_DISABLE_WBT, &q->elevator->flags);
+ +      bool enable = IS_ENABLED(CONFIG_BLK_WBT_MQ);
+ +
+ +      if (q->elevator &&
+ +          test_bit(ELEVATOR_FLAG_DISABLE_WBT, &q->elevator->flags))
+ +              enable = false;
   
         /* Throttling already enabled? */
         rqos = wbt_rq_qos(q);
         if (rqos) {
- -              if (!disable_flag &&
- -                  RQWB(rqos)->enable_state == WBT_STATE_OFF_DEFAULT)
+ +              if (enable && RQWB(rqos)->enable_state == WBT_STATE_OFF_DEFAULT)
                         RQWB(rqos)->enable_state = WBT_STATE_ON_DEFAULT;
                 return;
         }
@@@ -748,7 -729,7 +731,7 @@@
         if (!blk_queue_registered(q))
                 return;
   
- -      if (queue_is_mq(q) && !disable_flag)
+ +      if (queue_is_mq(q) && enable)
                 wbt_init(disk);
   }
   EXPORT_SYMBOL_GPL(wbt_enable_default);
author	Jens Axboe <axboe@kernel.dk>
	Wed, 28 Jun 2023 22:08:19 +0000 (16:08 -0600)
committer	Jens Axboe <axboe@kernel.dk>
	Wed, 28 Jun 2023 22:08:19 +0000 (16:08 -0600)
		1	2
block/blk-cgroup.c	patch \|	diff1 \|	diff2 \|	blob \| history
block/blk-mq.c	patch \|	diff1 \|	diff2 \|	blob \| history
block/blk-wbt.c	patch \|	diff1 \|	diff2 \|	blob \| history