Merge branch 'block-5.7' into for-5.8/block
authorJens Axboe <axboe@kernel.dk>
Sat, 9 May 2020 22:13:58 +0000 (16:13 -0600)
committerJens Axboe <axboe@kernel.dk>
Sat, 9 May 2020 22:13:58 +0000 (16:13 -0600)
Pull in block-5.7 fixes for 5.8. Mostly to resolve a conflict with
the blk-iocost changes, but we also need the base of the bdi
use-after-free as well as we build on top of it.

* block-5.7:
  nvme: fix possible hang when ns scanning fails during error recovery
  nvme-pci: fix "slimmer CQ head update"
  bdi: add a ->dev_name field to struct backing_dev_info
  bdi: use bdi_dev_name() to get device name
  bdi: move bdi_dev_name out of line
  vboxsf: don't use the source name in the bdi name
  iocost: protect iocg->abs_vdebt with iocg->waitq.lock
  block: remove the bd_openers checks in blk_drop_partitions
  nvme: prevent double free in nvme_alloc_ns() error handling
  null_blk: Cleanup zoned device initialization
  null_blk: Fix zoned command handling
  block: remove unused header
  blk-iocost: Fix error on iocost_ioc_vrate_adj
  bdev: Reduce time holding bd_mutex in sync in blkdev_close()
  buffer: remove useless comment and WB_REASON_FREE_MORE_MEM, reason.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
1  2 
block/blk-cgroup.c
block/blk-iocost.c
block/partitions/core.c
fs/block_dev.c
fs/buffer.c
tools/cgroup/iocost_monitor.py

Simple merge
@@@ -1217,15 -1213,21 +1214,21 @@@ static bool iocg_kick_delay(struct ioc_
        u64 vtime = atomic64_read(&iocg->vtime);
        u64 vmargin = ioc->margin_us * now->vrate;
        u64 margin_ns = ioc->margin_us * NSEC_PER_USEC;
 -      u64 expires, oexpires;
 +      u64 delta_ns, expires, oexpires;
        u32 hw_inuse;
  
+       lockdep_assert_held(&iocg->waitq.lock);
        /* debt-adjust vtime */
        current_hweight(iocg, NULL, &hw_inuse);
-       vtime += abs_cost_to_cost(atomic64_read(&iocg->abs_vdebt), hw_inuse);
+       vtime += abs_cost_to_cost(iocg->abs_vdebt, hw_inuse);
  
-       /* clear or maintain depending on the overage */
-       if (time_before_eq64(vtime, now->vnow)) {
+       /*
+        * Clear or maintain depending on the overage. Non-zero vdebt is what
+        * guarantees that @iocg is online and future iocg_kick_delay() will
+        * clear use_delay. Don't leave it on when there's no vdebt.
+        */
+       if (!iocg->abs_vdebt || time_before_eq64(vtime, now->vnow)) {
                blkcg_clear_delay(blkg);
                return false;
        }
@@@ -1254,9 -1261,12 +1257,12 @@@ static enum hrtimer_restart iocg_delay_
  {
        struct ioc_gq *iocg = container_of(timer, struct ioc_gq, delay_timer);
        struct ioc_now now;
+       unsigned long flags;
  
+       spin_lock_irqsave(&iocg->waitq.lock, flags);
        ioc_now(iocg->ioc, &now);
 -      iocg_kick_delay(iocg, &now, 0);
 +      iocg_kick_delay(iocg, &now);
+       spin_unlock_irqrestore(&iocg->waitq.lock, flags);
  
        return HRTIMER_NORESTART;
  }
@@@ -1370,11 -1380,10 +1376,10 @@@ static void ioc_timer_fn(struct timer_l
  
                spin_lock(&iocg->waitq.lock);
  
-               if (waitqueue_active(&iocg->waitq) ||
-                   atomic64_read(&iocg->abs_vdebt)) {
+               if (waitqueue_active(&iocg->waitq) || iocg->abs_vdebt) {
                        /* might be oversleeping vtime / hweight changes, kick */
                        iocg_kick_waitq(iocg, &now);
 -                      iocg_kick_delay(iocg, &now, 0);
 +                      iocg_kick_delay(iocg, &now);
                } else if (iocg_is_idle(iocg)) {
                        /* no waiter and idle, deactivate */
                        iocg->last_inuse = iocg->inuse;
@@@ -1747,20 -1730,42 +1751,42 @@@ static void ioc_rqos_throttle(struct rq
        }
  
        /*
-        * We're over budget.  If @bio has to be issued regardless,
-        * remember the abs_cost instead of advancing vtime.
-        * iocg_kick_waitq() will pay off the debt before waking more IOs.
+        * We activated above but w/o any synchronization. Deactivation is
+        * synchronized with waitq.lock and we won't get deactivated as long
+        * as we're waiting or has debt, so we're good if we're activated
+        * here. In the unlikely case that we aren't, just issue the IO.
+        */
+       spin_lock_irq(&iocg->waitq.lock);
+       if (unlikely(list_empty(&iocg->active_list))) {
+               spin_unlock_irq(&iocg->waitq.lock);
+               iocg_commit_bio(iocg, bio, cost);
+               return;
+       }
+       /*
+        * We're over budget. If @bio has to be issued regardless, remember
+        * the abs_cost instead of advancing vtime. iocg_kick_waitq() will pay
+        * off the debt before waking more IOs.
+        *
         * This way, the debt is continuously paid off each period with the
-        * actual budget available to the cgroup.  If we just wound vtime,
-        * we would incorrectly use the current hw_inuse for the entire
-        * amount which, for example, can lead to the cgroup staying
-        * blocked for a long time even with substantially raised hw_inuse.
+        * actual budget available to the cgroup. If we just wound vtime, we
+        * would incorrectly use the current hw_inuse for the entire amount
+        * which, for example, can lead to the cgroup staying blocked for a
+        * long time even with substantially raised hw_inuse.
+        *
+        * An iocg with vdebt should stay online so that the timer can keep
+        * deducting its vdebt and [de]activate use_delay mechanism
+        * accordingly. We don't want to race against the timer trying to
+        * clear them and leave @iocg inactive w/ dangling use_delay heavily
+        * penalizing the cgroup and its descendants.
         */
        if (bio_issue_as_root_blkg(bio) || fatal_signal_pending(current)) {
-               atomic64_add(abs_cost, &iocg->abs_vdebt);
+               iocg->abs_vdebt += abs_cost;
 -              if (iocg_kick_delay(iocg, &now, cost))
 +              if (iocg_kick_delay(iocg, &now))
                        blkcg_schedule_throttle(rqos->q,
                                        (bio->bi_opf & REQ_SWAP) == REQ_SWAP);
+               spin_unlock_irq(&iocg->waitq.lock);
                return;
        }
  
@@@ -1841,15 -1833,28 +1854,28 @@@ static void ioc_rqos_merge(struct rq_qo
                iocg->cursor = bio_end;
  
        /*
-        * Charge if there's enough vtime budget and the existing request
-        * has cost assigned.  Otherwise, account it as debt.  See debt
-        * handling in ioc_rqos_throttle() for details.
+        * Charge if there's enough vtime budget and the existing request has
+        * cost assigned.
         */
        if (rq->bio && rq->bio->bi_iocost_cost &&
-           time_before_eq64(atomic64_read(&iocg->vtime) + cost, now.vnow))
+           time_before_eq64(atomic64_read(&iocg->vtime) + cost, now.vnow)) {
                iocg_commit_bio(iocg, bio, cost);
-       else
-               atomic64_add(abs_cost, &iocg->abs_vdebt);
+               return;
+       }
+       /*
+        * Otherwise, account it as debt if @iocg is online, which it should
+        * be for the vast majority of cases. See debt handling in
+        * ioc_rqos_throttle() for details.
+        */
+       spin_lock_irqsave(&iocg->waitq.lock, flags);
+       if (likely(!list_empty(&iocg->active_list))) {
+               iocg->abs_vdebt += abs_cost;
 -              iocg_kick_delay(iocg, &now, cost);
++              iocg_kick_delay(iocg, &now);
+       } else {
+               iocg_commit_bio(iocg, bio, cost);
+       }
+       spin_unlock_irqrestore(&iocg->waitq.lock, flags);
  }
  
  static void ioc_rqos_done_bio(struct rq_qos *rqos, struct bio *bio)
@@@ -607,18 -492,19 +607,18 @@@ int blk_drop_partitions(struct block_de
  {
        struct disk_part_iter piter;
        struct hd_struct *part;
 -      int res;
  
 -      if (!disk_part_scan_enabled(disk))
 +      if (!disk_part_scan_enabled(bdev->bd_disk))
                return 0;
-       if (bdev->bd_part_count || bdev->bd_openers > 1)
+       if (bdev->bd_part_count)
                return -EBUSY;
 -      res = invalidate_partition(disk, 0);
 -      if (res)
 -              return res;
  
 -      disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY);
 +      sync_blockdev(bdev);
 +      invalidate_bdev(bdev);
 +
 +      disk_part_iter_init(&piter, bdev->bd_disk, DISK_PITER_INCL_EMPTY);
        while ((part = disk_part_iter_next(&piter)))
 -              delete_partition(disk, part->partno);
 +              delete_partition(bdev->bd_disk, part);
        disk_part_iter_exit(&piter);
  
        return 0;
diff --cc fs/block_dev.c
Simple merge
diff --cc fs/buffer.c
Simple merge
Simple merge