#include <linux/delay.h>
#include <linux/ratelimit.h>
#include <linux/pm_runtime.h>
-#include <linux/blk-cgroup.h>
#include <linux/t10-pi.h>
#include <linux/debugfs.h>
#include <linux/bpf.h>
#include "blk.h"
#include "blk-mq-sched.h"
#include "blk-pm.h"
+#include "blk-cgroup.h"
#include "blk-throttle.h"
+#include "blk-rq-qos.h"
struct dentry *blk_debugfs_root;
REQ_OP_NAME(ZONE_CLOSE),
REQ_OP_NAME(ZONE_FINISH),
REQ_OP_NAME(ZONE_APPEND),
- REQ_OP_NAME(WRITE_SAME),
REQ_OP_NAME(WRITE_ZEROES),
REQ_OP_NAME(DRV_IN),
REQ_OP_NAME(DRV_OUT),
[BLK_STS_RESOURCE] = { -ENOMEM, "kernel resource" },
[BLK_STS_DEV_RESOURCE] = { -EBUSY, "device resource" },
[BLK_STS_AGAIN] = { -EAGAIN, "nonblocking retry" },
+ [BLK_STS_OFFLINE] = { -ENODEV, "device offline" },
/* device mapper special case, should not leak out: */
[BLK_STS_DM_REQUEUE] = { -EREMCHG, "dm internal retry" },
wake_up_all(&q->mq_freeze_wq);
}
-void blk_set_queue_dying(struct request_queue *q)
-{
- blk_queue_flag_set(QUEUE_FLAG_DYING, q);
- blk_queue_start_drain(q);
-}
-EXPORT_SYMBOL_GPL(blk_set_queue_dying);
-
/**
* blk_cleanup_queue - shutdown a request queue
* @q: request queue to shutdown
WARN_ON_ONCE(blk_queue_registered(q));
/* mark @q DYING, no new request or merges will be allowed afterwards */
- blk_set_queue_dying(q);
+ blk_queue_flag_set(QUEUE_FLAG_DYING, q);
+ blk_queue_start_drain(q);
blk_queue_flag_set(QUEUE_FLAG_NOMERGES, q);
blk_queue_flag_set(QUEUE_FLAG_NOXMERGES, q);
*/
blk_freeze_queue(q);
+ /* cleanup rq qos structures for queue without disk */
+ rq_qos_exit(q);
+
blk_queue_flag_set(QUEUE_FLAG_DEAD, q);
blk_sync_queue(q);
blk_mq_sched_free_rqs(q);
mutex_unlock(&q->sysfs_lock);
- percpu_ref_exit(&q->q_usage_counter);
-
/* @q is and will stay empty, shutdown and put */
blk_put_queue(q);
}
timer_setup(&q->timeout, blk_rq_timed_out_timer, 0);
INIT_WORK(&q->timeout_work, blk_timeout_work);
INIT_LIST_HEAD(&q->icq_list);
-#ifdef CONFIG_BLK_CGROUP
- INIT_LIST_HEAD(&q->blkg_list);
-#endif
kobject_init(&q->kobj, &blk_queue_ktype);
PERCPU_REF_INIT_ATOMIC, GFP_KERNEL))
goto fail_stats;
- if (blkcg_init_queue(q))
- goto fail_ref;
-
blk_queue_dma_alignment(q, 511);
blk_set_default_limits(&q->limits);
q->nr_requests = BLKDEV_DEFAULT_RQ;
return q;
-fail_ref:
- percpu_ref_exit(&q->q_usage_counter);
fail_stats:
blk_free_queue_stats(q->stats);
fail_split:
}
EXPORT_SYMBOL(blk_get_queue);
-static void handle_bad_sector(struct bio *bio, sector_t maxsector)
-{
- char b[BDEVNAME_SIZE];
-
- pr_info_ratelimited("%s: attempt to access beyond end of device\n"
- "%s: rw=%d, want=%llu, limit=%llu\n",
- current->comm,
- bio_devname(bio, b), bio->bi_opf,
- bio_end_sector(bio), maxsector);
-}
-
#ifdef CONFIG_FAIL_MAKE_REQUEST
static DECLARE_FAULT_ATTR(fail_make_request);
static inline bool bio_check_ro(struct bio *bio)
{
if (op_is_write(bio_op(bio)) && bdev_read_only(bio->bi_bdev)) {
- char b[BDEVNAME_SIZE];
-
if (op_is_flush(bio->bi_opf) && !bio_sectors(bio))
return false;
-
- WARN_ONCE(1,
- "Trying to write to read-only block-device %s (partno %d)\n",
- bio_devname(bio, b), bio->bi_bdev->bd_partno);
+ pr_warn("Trying to write to read-only block-device %pg\n",
+ bio->bi_bdev);
/* Older lvm-tools actually trigger this */
return false;
}
if (nr_sectors && maxsector &&
(nr_sectors > maxsector ||
bio->bi_iter.bi_sector > maxsector - nr_sectors)) {
- handle_bad_sector(bio, maxsector);
+ pr_info_ratelimited("%s: attempt to access beyond end of device\n"
+ "%pg: rw=%d, want=%llu, limit=%llu\n",
+ current->comm,
+ bio->bi_bdev, bio->bi_opf,
+ bio_end_sector(bio), maxsector);
return -EIO;
}
return 0;
return BLK_STS_OK;
}
-noinline_for_stack bool submit_bio_checks(struct bio *bio)
-{
- struct block_device *bdev = bio->bi_bdev;
- struct request_queue *q = bdev_get_queue(bdev);
- blk_status_t status = BLK_STS_IOERR;
- struct blk_plug *plug;
-
- might_sleep();
-
- plug = blk_mq_plug(q, bio);
- if (plug && plug->nowait)
- bio->bi_opf |= REQ_NOWAIT;
-
- /*
- * For a REQ_NOWAIT based request, return -EOPNOTSUPP
- * if queue does not support NOWAIT.
- */
- if ((bio->bi_opf & REQ_NOWAIT) && !blk_queue_nowait(q))
- goto not_supported;
-
- if (should_fail_bio(bio))
- goto end_io;
- if (unlikely(bio_check_ro(bio)))
- goto end_io;
- if (!bio_flagged(bio, BIO_REMAPPED)) {
- if (unlikely(bio_check_eod(bio)))
- goto end_io;
- if (bdev->bd_partno && unlikely(blk_partition_remap(bio)))
- goto end_io;
- }
-
- /*
- * Filter flush bio's early so that bio based drivers without flush
- * support don't have to worry about them.
- */
- if (op_is_flush(bio->bi_opf) &&
- !test_bit(QUEUE_FLAG_WC, &q->queue_flags)) {
- bio->bi_opf &= ~(REQ_PREFLUSH | REQ_FUA);
- if (!bio_sectors(bio)) {
- status = BLK_STS_OK;
- goto end_io;
- }
- }
-
- if (!test_bit(QUEUE_FLAG_POLL, &q->queue_flags))
- bio_clear_polled(bio);
-
- switch (bio_op(bio)) {
- case REQ_OP_DISCARD:
- if (!blk_queue_discard(q))
- goto not_supported;
- break;
- case REQ_OP_SECURE_ERASE:
- if (!blk_queue_secure_erase(q))
- goto not_supported;
- break;
- case REQ_OP_WRITE_SAME:
- if (!q->limits.max_write_same_sectors)
- goto not_supported;
- break;
- case REQ_OP_ZONE_APPEND:
- status = blk_check_zone_append(q, bio);
- if (status != BLK_STS_OK)
- goto end_io;
- break;
- case REQ_OP_ZONE_RESET:
- case REQ_OP_ZONE_OPEN:
- case REQ_OP_ZONE_CLOSE:
- case REQ_OP_ZONE_FINISH:
- if (!blk_queue_is_zoned(q))
- goto not_supported;
- break;
- case REQ_OP_ZONE_RESET_ALL:
- if (!blk_queue_is_zoned(q) || !blk_queue_zone_resetall(q))
- goto not_supported;
- break;
- case REQ_OP_WRITE_ZEROES:
- if (!q->limits.max_write_zeroes_sectors)
- goto not_supported;
- break;
- default:
- break;
- }
-
- if (blk_throtl_bio(bio))
- return false;
-
- blk_cgroup_bio_start(bio);
- blkcg_bio_issue_init(bio);
-
- if (!bio_flagged(bio, BIO_TRACE_COMPLETION)) {
- trace_block_bio_queue(bio);
- /* Now that enqueuing has been traced, we need to trace
- * completion as well.
- */
- bio_set_flag(bio, BIO_TRACE_COMPLETION);
- }
- return true;
-
-not_supported:
- status = BLK_STS_NOTSUPP;
-end_io:
- bio->bi_status = status;
- bio_endio(bio);
- return false;
-}
-
-static void __submit_bio_fops(struct gendisk *disk, struct bio *bio)
-{
- if (blk_crypto_bio_prep(&bio)) {
- if (likely(bio_queue_enter(bio) == 0)) {
- disk->fops->submit_bio(bio);
- blk_queue_exit(disk->queue);
- }
- }
-}
-
static void __submit_bio(struct bio *bio)
{
struct gendisk *disk = bio->bi_bdev->bd_disk;
- if (unlikely(!submit_bio_checks(bio)))
+ if (unlikely(!blk_crypto_bio_prep(&bio)))
return;
- if (!disk->fops->submit_bio)
+ if (!disk->fops->submit_bio) {
blk_mq_submit_bio(bio);
- else
- __submit_bio_fops(disk, bio);
+ } else if (likely(bio_queue_enter(bio) == 0)) {
+ disk->fops->submit_bio(bio);
+ blk_queue_exit(disk->queue);
+ }
}
/*
*
* bio_list_on_stack[0] contains bios submitted by the current ->submit_bio.
* bio_list_on_stack[1] contains bios that were submitted before the current
- * ->submit_bio_bio, but that haven't been processed yet.
+ * ->submit_bio, but that haven't been processed yet.
*/
static void __submit_bio_noacct(struct bio *bio)
{
current->bio_list = NULL;
}
-/**
- * submit_bio_noacct - re-submit a bio to the block device layer for I/O
- * @bio: The bio describing the location in memory and on the device.
- *
- * This is a version of submit_bio() that shall only be used for I/O that is
- * resubmitted to lower level drivers by stacking block drivers. All file
- * systems and other upper level users of the block layer should use
- * submit_bio() instead.
- */
-void submit_bio_noacct(struct bio *bio)
+void submit_bio_noacct_nocheck(struct bio *bio)
{
/*
* We only want one ->submit_bio to be active at a time, else stack
else
__submit_bio_noacct(bio);
}
+
+/**
+ * submit_bio_noacct - re-submit a bio to the block device layer for I/O
+ * @bio: The bio describing the location in memory and on the device.
+ *
+ * This is a version of submit_bio() that shall only be used for I/O that is
+ * resubmitted to lower level drivers by stacking block drivers. All file
+ * systems and other upper level users of the block layer should use
+ * submit_bio() instead.
+ */
+void submit_bio_noacct(struct bio *bio)
+{
+ struct block_device *bdev = bio->bi_bdev;
+ struct request_queue *q = bdev_get_queue(bdev);
+ blk_status_t status = BLK_STS_IOERR;
+ struct blk_plug *plug;
+
+ might_sleep();
+
+ plug = blk_mq_plug(q, bio);
+ if (plug && plug->nowait)
+ bio->bi_opf |= REQ_NOWAIT;
+
+ /*
+ * For a REQ_NOWAIT based request, return -EOPNOTSUPP
+ * if queue does not support NOWAIT.
+ */
+ if ((bio->bi_opf & REQ_NOWAIT) && !blk_queue_nowait(q))
+ goto not_supported;
+
+ if (should_fail_bio(bio))
+ goto end_io;
+ if (unlikely(bio_check_ro(bio)))
+ goto end_io;
+ if (!bio_flagged(bio, BIO_REMAPPED)) {
+ if (unlikely(bio_check_eod(bio)))
+ goto end_io;
+ if (bdev->bd_partno && unlikely(blk_partition_remap(bio)))
+ goto end_io;
+ }
+
+ /*
+ * Filter flush bio's early so that bio based drivers without flush
+ * support don't have to worry about them.
+ */
+ if (op_is_flush(bio->bi_opf) &&
+ !test_bit(QUEUE_FLAG_WC, &q->queue_flags)) {
+ bio->bi_opf &= ~(REQ_PREFLUSH | REQ_FUA);
+ if (!bio_sectors(bio)) {
+ status = BLK_STS_OK;
+ goto end_io;
+ }
+ }
+
+ if (!test_bit(QUEUE_FLAG_POLL, &q->queue_flags))
+ bio_clear_polled(bio);
+
+ switch (bio_op(bio)) {
+ case REQ_OP_DISCARD:
+ if (!blk_queue_discard(q))
+ goto not_supported;
+ break;
+ case REQ_OP_SECURE_ERASE:
+ if (!blk_queue_secure_erase(q))
+ goto not_supported;
+ break;
+ case REQ_OP_ZONE_APPEND:
+ status = blk_check_zone_append(q, bio);
+ if (status != BLK_STS_OK)
+ goto end_io;
+ break;
+ case REQ_OP_ZONE_RESET:
+ case REQ_OP_ZONE_OPEN:
+ case REQ_OP_ZONE_CLOSE:
+ case REQ_OP_ZONE_FINISH:
+ if (!blk_queue_is_zoned(q))
+ goto not_supported;
+ break;
+ case REQ_OP_ZONE_RESET_ALL:
+ if (!blk_queue_is_zoned(q) || !blk_queue_zone_resetall(q))
+ goto not_supported;
+ break;
+ case REQ_OP_WRITE_ZEROES:
+ if (!q->limits.max_write_zeroes_sectors)
+ goto not_supported;
+ break;
+ default:
+ break;
+ }
+
+ if (blk_throtl_bio(bio))
+ return;
+
+ blk_cgroup_bio_start(bio);
+ blkcg_bio_issue_init(bio);
+
+ if (!bio_flagged(bio, BIO_TRACE_COMPLETION)) {
+ trace_block_bio_queue(bio);
+ /* Now that enqueuing has been traced, we need to trace
+ * completion as well.
+ */
+ bio_set_flag(bio, BIO_TRACE_COMPLETION);
+ }
+ submit_bio_noacct_nocheck(bio);
+ return;
+
+not_supported:
+ status = BLK_STS_NOTSUPP;
+end_io:
+ bio->bi_status = status;
+ bio_endio(bio);
+}
EXPORT_SYMBOL(submit_bio_noacct);
/**
* go through the normal accounting stuff before submission.
*/
if (bio_has_data(bio)) {
- unsigned int count;
-
- if (unlikely(bio_op(bio) == REQ_OP_WRITE_SAME))
- count = queue_logical_block_size(
- bdev_get_queue(bio->bi_bdev)) >> 9;
- else
- count = bio_sectors(bio);
+ unsigned int count = bio_sectors(bio);
if (op_is_write(bio_op(bio))) {
count_vm_events(PGPGOUT, count);
{
struct request_queue *q = bdev_get_queue(bio->bi_bdev);
blk_qc_t cookie = READ_ONCE(bio->bi_cookie);
- int ret;
+ int ret = 0;
if (cookie == BLK_QC_T_NONE ||
!test_bit(QUEUE_FLAG_POLL, &q->queue_flags))
return 0;
- if (current->plug)
- blk_flush_plug(current->plug, false);
+ blk_flush_plug(current->plug, false);
if (blk_queue_enter(q, BLK_MQ_REQ_NOWAIT))
return 0;
- if (WARN_ON_ONCE(!queue_is_mq(q)))
- ret = 0; /* not yet implemented, should not happen */
- else
+ if (queue_is_mq(q)) {
ret = blk_mq_poll(q, cookie, iob, flags);
+ } else {
+ struct gendisk *disk = q->disk;
+
+ if (disk && disk->fops->poll_bio)
+ ret = disk->fops->poll_bio(bio, iob, flags);
+ }
blk_queue_exit(q);
return ret;
}
}
EXPORT_SYMBOL(blk_check_plugged);
-void blk_flush_plug(struct blk_plug *plug, bool from_schedule)
+void __blk_flush_plug(struct blk_plug *plug, bool from_schedule)
{
if (!list_empty(&plug->cb_list))
flush_plug_callbacks(plug, from_schedule);
void blk_finish_plug(struct blk_plug *plug)
{
if (plug == current->plug) {
- blk_flush_plug(plug, false);
+ __blk_flush_plug(plug, false);
current->plug = NULL;
}
}