Merge tag 'riscv-for-linus-5.18-rc4' of git://git.kernel.org/pub/scm/linux/kernel...
[platform/kernel/linux-starfive.git] / block / blk-core.c
index d93e3bb..937bb6b 100644 (file)
@@ -34,7 +34,6 @@
 #include <linux/delay.h>
 #include <linux/ratelimit.h>
 #include <linux/pm_runtime.h>
-#include <linux/blk-cgroup.h>
 #include <linux/t10-pi.h>
 #include <linux/debugfs.h>
 #include <linux/bpf.h>
@@ -49,7 +48,9 @@
 #include "blk.h"
 #include "blk-mq-sched.h"
 #include "blk-pm.h"
+#include "blk-cgroup.h"
 #include "blk-throttle.h"
+#include "blk-rq-qos.h"
 
 struct dentry *blk_debugfs_root;
 
@@ -122,7 +123,6 @@ static const char *const blk_op_name[] = {
        REQ_OP_NAME(ZONE_CLOSE),
        REQ_OP_NAME(ZONE_FINISH),
        REQ_OP_NAME(ZONE_APPEND),
-       REQ_OP_NAME(WRITE_SAME),
        REQ_OP_NAME(WRITE_ZEROES),
        REQ_OP_NAME(DRV_IN),
        REQ_OP_NAME(DRV_OUT),
@@ -164,6 +164,7 @@ static const struct {
        [BLK_STS_RESOURCE]      = { -ENOMEM,    "kernel resource" },
        [BLK_STS_DEV_RESOURCE]  = { -EBUSY,     "device resource" },
        [BLK_STS_AGAIN]         = { -EAGAIN,    "nonblocking retry" },
+       [BLK_STS_OFFLINE]       = { -ENODEV,    "device offline" },
 
        /* device mapper special case, should not leak out: */
        [BLK_STS_DM_REQUEUE]    = { -EREMCHG, "dm internal retry" },
@@ -284,13 +285,6 @@ void blk_queue_start_drain(struct request_queue *q)
        wake_up_all(&q->mq_freeze_wq);
 }
 
-void blk_set_queue_dying(struct request_queue *q)
-{
-       blk_queue_flag_set(QUEUE_FLAG_DYING, q);
-       blk_queue_start_drain(q);
-}
-EXPORT_SYMBOL_GPL(blk_set_queue_dying);
-
 /**
  * blk_cleanup_queue - shutdown a request queue
  * @q: request queue to shutdown
@@ -308,7 +302,8 @@ void blk_cleanup_queue(struct request_queue *q)
        WARN_ON_ONCE(blk_queue_registered(q));
 
        /* mark @q DYING, no new request or merges will be allowed afterwards */
-       blk_set_queue_dying(q);
+       blk_queue_flag_set(QUEUE_FLAG_DYING, q);
+       blk_queue_start_drain(q);
 
        blk_queue_flag_set(QUEUE_FLAG_NOMERGES, q);
        blk_queue_flag_set(QUEUE_FLAG_NOXMERGES, q);
@@ -320,6 +315,9 @@ void blk_cleanup_queue(struct request_queue *q)
         */
        blk_freeze_queue(q);
 
+       /* cleanup rq qos structures for queue without disk */
+       rq_qos_exit(q);
+
        blk_queue_flag_set(QUEUE_FLAG_DEAD, q);
 
        blk_sync_queue(q);
@@ -341,8 +339,6 @@ void blk_cleanup_queue(struct request_queue *q)
                blk_mq_sched_free_rqs(q);
        mutex_unlock(&q->sysfs_lock);
 
-       percpu_ref_exit(&q->q_usage_counter);
-
        /* @q is and will stay empty, shutdown and put */
        blk_put_queue(q);
 }
@@ -475,9 +471,6 @@ struct request_queue *blk_alloc_queue(int node_id, bool alloc_srcu)
        timer_setup(&q->timeout, blk_rq_timed_out_timer, 0);
        INIT_WORK(&q->timeout_work, blk_timeout_work);
        INIT_LIST_HEAD(&q->icq_list);
-#ifdef CONFIG_BLK_CGROUP
-       INIT_LIST_HEAD(&q->blkg_list);
-#endif
 
        kobject_init(&q->kobj, &blk_queue_ktype);
 
@@ -498,17 +491,12 @@ struct request_queue *blk_alloc_queue(int node_id, bool alloc_srcu)
                                PERCPU_REF_INIT_ATOMIC, GFP_KERNEL))
                goto fail_stats;
 
-       if (blkcg_init_queue(q))
-               goto fail_ref;
-
        blk_queue_dma_alignment(q, 511);
        blk_set_default_limits(&q->limits);
        q->nr_requests = BLKDEV_DEFAULT_RQ;
 
        return q;
 
-fail_ref:
-       percpu_ref_exit(&q->q_usage_counter);
 fail_stats:
        blk_free_queue_stats(q->stats);
 fail_split:
@@ -542,17 +530,6 @@ bool blk_get_queue(struct request_queue *q)
 }
 EXPORT_SYMBOL(blk_get_queue);
 
-static void handle_bad_sector(struct bio *bio, sector_t maxsector)
-{
-       char b[BDEVNAME_SIZE];
-
-       pr_info_ratelimited("%s: attempt to access beyond end of device\n"
-                           "%s: rw=%d, want=%llu, limit=%llu\n",
-                           current->comm,
-                           bio_devname(bio, b), bio->bi_opf,
-                           bio_end_sector(bio), maxsector);
-}
-
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 
 static DECLARE_FAULT_ATTR(fail_make_request);
@@ -582,14 +559,10 @@ late_initcall(fail_make_request_debugfs);
 static inline bool bio_check_ro(struct bio *bio)
 {
        if (op_is_write(bio_op(bio)) && bdev_read_only(bio->bi_bdev)) {
-               char b[BDEVNAME_SIZE];
-
                if (op_is_flush(bio->bi_opf) && !bio_sectors(bio))
                        return false;
-
-               WARN_ONCE(1,
-                      "Trying to write to read-only block-device %s (partno %d)\n",
-                       bio_devname(bio, b), bio->bi_bdev->bd_partno);
+               pr_warn("Trying to write to read-only block-device %pg\n",
+                       bio->bi_bdev);
                /* Older lvm-tools actually trigger this */
                return false;
        }
@@ -618,7 +591,11 @@ static inline int bio_check_eod(struct bio *bio)
        if (nr_sectors && maxsector &&
            (nr_sectors > maxsector ||
             bio->bi_iter.bi_sector > maxsector - nr_sectors)) {
-               handle_bad_sector(bio, maxsector);
+               pr_info_ratelimited("%s: attempt to access beyond end of device\n"
+                                   "%pg: rw=%d, want=%llu, limit=%llu\n",
+                                   current->comm,
+                                   bio->bi_bdev, bio->bi_opf,
+                                   bio_end_sector(bio), maxsector);
                return -EIO;
        }
        return 0;
@@ -678,134 +655,19 @@ static inline blk_status_t blk_check_zone_append(struct request_queue *q,
        return BLK_STS_OK;
 }
 
-noinline_for_stack bool submit_bio_checks(struct bio *bio)
-{
-       struct block_device *bdev = bio->bi_bdev;
-       struct request_queue *q = bdev_get_queue(bdev);
-       blk_status_t status = BLK_STS_IOERR;
-       struct blk_plug *plug;
-
-       might_sleep();
-
-       plug = blk_mq_plug(q, bio);
-       if (plug && plug->nowait)
-               bio->bi_opf |= REQ_NOWAIT;
-
-       /*
-        * For a REQ_NOWAIT based request, return -EOPNOTSUPP
-        * if queue does not support NOWAIT.
-        */
-       if ((bio->bi_opf & REQ_NOWAIT) && !blk_queue_nowait(q))
-               goto not_supported;
-
-       if (should_fail_bio(bio))
-               goto end_io;
-       if (unlikely(bio_check_ro(bio)))
-               goto end_io;
-       if (!bio_flagged(bio, BIO_REMAPPED)) {
-               if (unlikely(bio_check_eod(bio)))
-                       goto end_io;
-               if (bdev->bd_partno && unlikely(blk_partition_remap(bio)))
-                       goto end_io;
-       }
-
-       /*
-        * Filter flush bio's early so that bio based drivers without flush
-        * support don't have to worry about them.
-        */
-       if (op_is_flush(bio->bi_opf) &&
-           !test_bit(QUEUE_FLAG_WC, &q->queue_flags)) {
-               bio->bi_opf &= ~(REQ_PREFLUSH | REQ_FUA);
-               if (!bio_sectors(bio)) {
-                       status = BLK_STS_OK;
-                       goto end_io;
-               }
-       }
-
-       if (!test_bit(QUEUE_FLAG_POLL, &q->queue_flags))
-               bio_clear_polled(bio);
-
-       switch (bio_op(bio)) {
-       case REQ_OP_DISCARD:
-               if (!blk_queue_discard(q))
-                       goto not_supported;
-               break;
-       case REQ_OP_SECURE_ERASE:
-               if (!blk_queue_secure_erase(q))
-                       goto not_supported;
-               break;
-       case REQ_OP_WRITE_SAME:
-               if (!q->limits.max_write_same_sectors)
-                       goto not_supported;
-               break;
-       case REQ_OP_ZONE_APPEND:
-               status = blk_check_zone_append(q, bio);
-               if (status != BLK_STS_OK)
-                       goto end_io;
-               break;
-       case REQ_OP_ZONE_RESET:
-       case REQ_OP_ZONE_OPEN:
-       case REQ_OP_ZONE_CLOSE:
-       case REQ_OP_ZONE_FINISH:
-               if (!blk_queue_is_zoned(q))
-                       goto not_supported;
-               break;
-       case REQ_OP_ZONE_RESET_ALL:
-               if (!blk_queue_is_zoned(q) || !blk_queue_zone_resetall(q))
-                       goto not_supported;
-               break;
-       case REQ_OP_WRITE_ZEROES:
-               if (!q->limits.max_write_zeroes_sectors)
-                       goto not_supported;
-               break;
-       default:
-               break;
-       }
-
-       if (blk_throtl_bio(bio))
-               return false;
-
-       blk_cgroup_bio_start(bio);
-       blkcg_bio_issue_init(bio);
-
-       if (!bio_flagged(bio, BIO_TRACE_COMPLETION)) {
-               trace_block_bio_queue(bio);
-               /* Now that enqueuing has been traced, we need to trace
-                * completion as well.
-                */
-               bio_set_flag(bio, BIO_TRACE_COMPLETION);
-       }
-       return true;
-
-not_supported:
-       status = BLK_STS_NOTSUPP;
-end_io:
-       bio->bi_status = status;
-       bio_endio(bio);
-       return false;
-}
-
-static void __submit_bio_fops(struct gendisk *disk, struct bio *bio)
-{
-       if (blk_crypto_bio_prep(&bio)) {
-               if (likely(bio_queue_enter(bio) == 0)) {
-                       disk->fops->submit_bio(bio);
-                       blk_queue_exit(disk->queue);
-               }
-       }
-}
-
 static void __submit_bio(struct bio *bio)
 {
        struct gendisk *disk = bio->bi_bdev->bd_disk;
 
-       if (unlikely(!submit_bio_checks(bio)))
+       if (unlikely(!blk_crypto_bio_prep(&bio)))
                return;
 
-       if (!disk->fops->submit_bio)
+       if (!disk->fops->submit_bio) {
                blk_mq_submit_bio(bio);
-       else
-               __submit_bio_fops(disk, bio);
+       } else if (likely(bio_queue_enter(bio) == 0)) {
+               disk->fops->submit_bio(bio);
+               blk_queue_exit(disk->queue);
+       }
 }
 
 /*
@@ -825,7 +687,7 @@ static void __submit_bio(struct bio *bio)
  *
  * bio_list_on_stack[0] contains bios submitted by the current ->submit_bio.
  * bio_list_on_stack[1] contains bios that were submitted before the current
- *     ->submit_bio_bio, but that haven't been processed yet.
+ *     ->submit_bio, but that haven't been processed yet.
  */
 static void __submit_bio_noacct(struct bio *bio)
 {
@@ -884,16 +746,7 @@ static void __submit_bio_noacct_mq(struct bio *bio)
        current->bio_list = NULL;
 }
 
-/**
- * submit_bio_noacct - re-submit a bio to the block device layer for I/O
- * @bio:  The bio describing the location in memory and on the device.
- *
- * This is a version of submit_bio() that shall only be used for I/O that is
- * resubmitted to lower level drivers by stacking block drivers.  All file
- * systems and other upper level users of the block layer should use
- * submit_bio() instead.
- */
-void submit_bio_noacct(struct bio *bio)
+void submit_bio_noacct_nocheck(struct bio *bio)
 {
        /*
         * We only want one ->submit_bio to be active at a time, else stack
@@ -908,6 +761,118 @@ void submit_bio_noacct(struct bio *bio)
        else
                __submit_bio_noacct(bio);
 }
+
+/**
+ * submit_bio_noacct - re-submit a bio to the block device layer for I/O
+ * @bio:  The bio describing the location in memory and on the device.
+ *
+ * This is a version of submit_bio() that shall only be used for I/O that is
+ * resubmitted to lower level drivers by stacking block drivers.  All file
+ * systems and other upper level users of the block layer should use
+ * submit_bio() instead.
+ */
+void submit_bio_noacct(struct bio *bio)
+{
+       struct block_device *bdev = bio->bi_bdev;
+       struct request_queue *q = bdev_get_queue(bdev);
+       blk_status_t status = BLK_STS_IOERR;
+       struct blk_plug *plug;
+
+       might_sleep();
+
+       plug = blk_mq_plug(q, bio);
+       if (plug && plug->nowait)
+               bio->bi_opf |= REQ_NOWAIT;
+
+       /*
+        * For a REQ_NOWAIT based request, return -EOPNOTSUPP
+        * if queue does not support NOWAIT.
+        */
+       if ((bio->bi_opf & REQ_NOWAIT) && !blk_queue_nowait(q))
+               goto not_supported;
+
+       if (should_fail_bio(bio))
+               goto end_io;
+       if (unlikely(bio_check_ro(bio)))
+               goto end_io;
+       if (!bio_flagged(bio, BIO_REMAPPED)) {
+               if (unlikely(bio_check_eod(bio)))
+                       goto end_io;
+               if (bdev->bd_partno && unlikely(blk_partition_remap(bio)))
+                       goto end_io;
+       }
+
+       /*
+        * Filter flush bio's early so that bio based drivers without flush
+        * support don't have to worry about them.
+        */
+       if (op_is_flush(bio->bi_opf) &&
+           !test_bit(QUEUE_FLAG_WC, &q->queue_flags)) {
+               bio->bi_opf &= ~(REQ_PREFLUSH | REQ_FUA);
+               if (!bio_sectors(bio)) {
+                       status = BLK_STS_OK;
+                       goto end_io;
+               }
+       }
+
+       if (!test_bit(QUEUE_FLAG_POLL, &q->queue_flags))
+               bio_clear_polled(bio);
+
+       switch (bio_op(bio)) {
+       case REQ_OP_DISCARD:
+               if (!blk_queue_discard(q))
+                       goto not_supported;
+               break;
+       case REQ_OP_SECURE_ERASE:
+               if (!blk_queue_secure_erase(q))
+                       goto not_supported;
+               break;
+       case REQ_OP_ZONE_APPEND:
+               status = blk_check_zone_append(q, bio);
+               if (status != BLK_STS_OK)
+                       goto end_io;
+               break;
+       case REQ_OP_ZONE_RESET:
+       case REQ_OP_ZONE_OPEN:
+       case REQ_OP_ZONE_CLOSE:
+       case REQ_OP_ZONE_FINISH:
+               if (!blk_queue_is_zoned(q))
+                       goto not_supported;
+               break;
+       case REQ_OP_ZONE_RESET_ALL:
+               if (!blk_queue_is_zoned(q) || !blk_queue_zone_resetall(q))
+                       goto not_supported;
+               break;
+       case REQ_OP_WRITE_ZEROES:
+               if (!q->limits.max_write_zeroes_sectors)
+                       goto not_supported;
+               break;
+       default:
+               break;
+       }
+
+       if (blk_throtl_bio(bio))
+               return;
+
+       blk_cgroup_bio_start(bio);
+       blkcg_bio_issue_init(bio);
+
+       if (!bio_flagged(bio, BIO_TRACE_COMPLETION)) {
+               trace_block_bio_queue(bio);
+               /* Now that enqueuing has been traced, we need to trace
+                * completion as well.
+                */
+               bio_set_flag(bio, BIO_TRACE_COMPLETION);
+       }
+       submit_bio_noacct_nocheck(bio);
+       return;
+
+not_supported:
+       status = BLK_STS_NOTSUPP;
+end_io:
+       bio->bi_status = status;
+       bio_endio(bio);
+}
 EXPORT_SYMBOL(submit_bio_noacct);
 
 /**
@@ -933,13 +898,7 @@ void submit_bio(struct bio *bio)
         * go through the normal accounting stuff before submission.
         */
        if (bio_has_data(bio)) {
-               unsigned int count;
-
-               if (unlikely(bio_op(bio) == REQ_OP_WRITE_SAME))
-                       count = queue_logical_block_size(
-                                       bdev_get_queue(bio->bi_bdev)) >> 9;
-               else
-                       count = bio_sectors(bio);
+               unsigned int count = bio_sectors(bio);
 
                if (op_is_write(bio_op(bio))) {
                        count_vm_events(PGPGOUT, count);
@@ -985,21 +944,24 @@ int bio_poll(struct bio *bio, struct io_comp_batch *iob, unsigned int flags)
 {
        struct request_queue *q = bdev_get_queue(bio->bi_bdev);
        blk_qc_t cookie = READ_ONCE(bio->bi_cookie);
-       int ret;
+       int ret = 0;
 
        if (cookie == BLK_QC_T_NONE ||
            !test_bit(QUEUE_FLAG_POLL, &q->queue_flags))
                return 0;
 
-       if (current->plug)
-               blk_flush_plug(current->plug, false);
+       blk_flush_plug(current->plug, false);
 
        if (blk_queue_enter(q, BLK_MQ_REQ_NOWAIT))
                return 0;
-       if (WARN_ON_ONCE(!queue_is_mq(q)))
-               ret = 0;        /* not yet implemented, should not happen */
-       else
+       if (queue_is_mq(q)) {
                ret = blk_mq_poll(q, cookie, iob, flags);
+       } else {
+               struct gendisk *disk = q->disk;
+
+               if (disk && disk->fops->poll_bio)
+                       ret = disk->fops->poll_bio(bio, iob, flags);
+       }
        blk_queue_exit(q);
        return ret;
 }
@@ -1274,7 +1236,7 @@ struct blk_plug_cb *blk_check_plugged(blk_plug_cb_fn unplug, void *data,
 }
 EXPORT_SYMBOL(blk_check_plugged);
 
-void blk_flush_plug(struct blk_plug *plug, bool from_schedule)
+void __blk_flush_plug(struct blk_plug *plug, bool from_schedule)
 {
        if (!list_empty(&plug->cb_list))
                flush_plug_callbacks(plug, from_schedule);
@@ -1303,7 +1265,7 @@ void blk_flush_plug(struct blk_plug *plug, bool from_schedule)
 void blk_finish_plug(struct blk_plug *plug)
 {
        if (plug == current->plug) {
-               blk_flush_plug(plug, false);
+               __blk_flush_plug(plug, false);
                current->plug = NULL;
        }
 }