Pull block fixes from Jens Axboe:
- stable fix for the bi_size overflow. Not a corruption issue, but a
case wher we could merge but disallowed (Andreas)
- NVMe pull request via Keith, with various fixes.
- MD pull request from Song.
- Merge window regression fix for the rq passthrough stats (Logan)
- Remove unused blkcg_drain_queue() function (Guoqing)
* tag 'for-linus-
20191212' of git://git.kernel.dk/linux-block:
blk-cgroup: remove blkcg_drain_queue
block: fix NULL pointer dereference in account statistics with IDE
md: make sure desc_nr less than MD_SB_DISKS
md: raid1: check rdev before reference in raid1_sync_request func
raid5: need to set STRIPE_HANDLE for batch head
block: fix "check bi_size overflow before merge"
nvme/pci: Fix read queue count
nvme/pci Limit write queue sizes to possible cpus
nvme/pci: Fix write and poll queue types
nvme/pci: Remove last_cq_head
nvme: Namepace identification descriptor list is optional
nvme-fc: fix double-free scenarios on hw queues
nvme: else following return is not needed
nvme: add error message on mismatching controller ids
nvme_fc: add module to ops template to allow module references
nvmet-loop: Avoid preallocating big SGL for data
nvme-fc: Avoid preallocating big SGL for data
nvme-rdma: Avoid preallocating big SGL for data
if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
return false;
- if (bio->bi_vcnt > 0 && !bio_full(bio, len)) {
+ if (bio->bi_vcnt > 0) {
struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
if (page_is_mergeable(bv, page, len, off, same_page)) {
+ if (bio->bi_iter.bi_size > UINT_MAX - len)
+ return false;
bv->bv_len += len;
bio->bi_iter.bi_size += len;
return true;
}
/**
- * blkcg_drain_queue - drain blkcg part of request_queue
- * @q: request_queue to drain
- *
- * Called from blk_drain_queue(). Responsible for draining blkcg part.
- */
-void blkcg_drain_queue(struct request_queue *q)
-{
- lockdep_assert_held(&q->queue_lock);
-
- /*
- * @q could be exiting and already have destroyed all blkgs as
- * indicated by NULL root_blkg. If so, don't confuse policies.
- */
- if (!q->root_blkg)
- return;
-
- blk_throtl_drain(q);
-}
-
-/**
* blkcg_exit_queue - exit and release blkcg part of request_queue
* @q: request_queue being released
*
void blk_account_io_completion(struct request *req, unsigned int bytes)
{
- if (blk_do_io_stat(req)) {
+ if (req->part && blk_do_io_stat(req)) {
const int sgrp = op_stat_group(req_op(req));
struct hd_struct *part;
* normal IO on queueing nor completion. Accounting the
* containing request is enough.
*/
- if (blk_do_io_stat(req) && !(req->rq_flags & RQF_FLUSH_SEQ)) {
+ if (req->part && blk_do_io_stat(req) &&
+ !(req->rq_flags & RQF_FLUSH_SEQ)) {
const int sgrp = op_stat_group(req_op(req));
struct hd_struct *part;
/* not spare disk, or LEVEL_MULTIPATH */
if (sb->level == LEVEL_MULTIPATH ||
(rdev->desc_nr >= 0 &&
+ rdev->desc_nr < MD_SB_DISKS &&
sb->disks[rdev->desc_nr].state &
((1<<MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE))))
spare_disk = false;
write_targets++;
}
}
- if (bio->bi_end_io) {
+ if (rdev && bio->bi_end_io) {
atomic_inc(&rdev->nr_pending);
bio->bi_iter.bi_sector = sector_nr + rdev->data_offset;
bio_set_dev(bio, rdev->bdev);
do_flush = false;
}
- if (!sh->batch_head)
+ if (!sh->batch_head || sh == sh->batch_head)
set_bit(STRIPE_HANDLE, &sh->state);
clear_bit(STRIPE_DELAYED, &sh->state);
if ((!sh->batch_head || sh == sh->batch_head) &&
if (ret)
dev_warn(ctrl->device,
"Identify Descriptors failed (%d)\n", ret);
+ if (ret > 0)
+ ret = 0;
}
return ret;
}
* admin connect
*/
if (ctrl->cntlid != le16_to_cpu(id->cntlid)) {
+ dev_err(ctrl->device,
+ "Mismatching cntlid: Connect %u vs Identify "
+ "%u, rejecting\n",
+ ctrl->cntlid, le16_to_cpu(id->cntlid));
ret = -EINVAL;
goto out_free;
}
struct nvme_fcp_op_w_sgl {
struct nvme_fc_fcp_op op;
- struct scatterlist sgl[SG_CHUNK_SIZE];
+ struct scatterlist sgl[NVME_INLINE_SG_CNT];
uint8_t priv[0];
};
!template->ls_req || !template->fcp_io ||
!template->ls_abort || !template->fcp_abort ||
!template->max_hw_queues || !template->max_sgl_segments ||
- !template->max_dif_sgl_segments || !template->dma_boundary) {
+ !template->max_dif_sgl_segments || !template->dma_boundary ||
+ !template->module) {
ret = -EINVAL;
goto out_reghost_failed;
}
{
struct nvme_fc_ctrl *ctrl =
container_of(ref, struct nvme_fc_ctrl, ref);
+ struct nvme_fc_lport *lport = ctrl->lport;
unsigned long flags;
if (ctrl->ctrl.tagset) {
if (ctrl->ctrl.opts)
nvmf_free_options(ctrl->ctrl.opts);
kfree(ctrl);
+ module_put(lport->ops->module);
}
static void
freq->sg_table.sgl = freq->first_sgl;
ret = sg_alloc_table_chained(&freq->sg_table,
blk_rq_nr_phys_segments(rq), freq->sg_table.sgl,
- SG_CHUNK_SIZE);
+ NVME_INLINE_SG_CNT);
if (ret)
return -ENOMEM;
freq->sg_cnt = fc_dma_map_sg(ctrl->lport->dev, freq->sg_table.sgl,
op->nents, rq_dma_dir(rq));
if (unlikely(freq->sg_cnt <= 0)) {
- sg_free_table_chained(&freq->sg_table, SG_CHUNK_SIZE);
+ sg_free_table_chained(&freq->sg_table, NVME_INLINE_SG_CNT);
freq->sg_cnt = 0;
return -EFAULT;
}
fc_dma_unmap_sg(ctrl->lport->dev, freq->sg_table.sgl, op->nents,
rq_dma_dir(rq));
- sg_free_table_chained(&freq->sg_table, SG_CHUNK_SIZE);
+ sg_free_table_chained(&freq->sg_table, NVME_INLINE_SG_CNT);
freq->sg_cnt = 0;
}
static void
__nvme_fc_terminate_io(struct nvme_fc_ctrl *ctrl)
{
- nvme_stop_keep_alive(&ctrl->ctrl);
+ /*
+ * if state is connecting - the error occurred as part of a
+ * reconnect attempt. The create_association error paths will
+ * clean up any outstanding io.
+ *
+ * if it's a different state - ensure all pending io is
+ * terminated. Given this can delay while waiting for the
+ * aborted io to return, we recheck adapter state below
+ * before changing state.
+ */
+ if (ctrl->ctrl.state != NVME_CTRL_CONNECTING) {
+ nvme_stop_keep_alive(&ctrl->ctrl);
- /* will block will waiting for io to terminate */
- nvme_fc_delete_association(ctrl);
+ /* will block will waiting for io to terminate */
+ nvme_fc_delete_association(ctrl);
+ }
if (ctrl->ctrl.state != NVME_CTRL_CONNECTING &&
!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING))
goto out_fail;
}
+ if (!try_module_get(lport->ops->module)) {
+ ret = -EUNATCH;
+ goto out_free_ctrl;
+ }
+
idx = ida_simple_get(&nvme_fc_ctrl_cnt, 0, 0, GFP_KERNEL);
if (idx < 0) {
ret = -ENOSPC;
- goto out_free_ctrl;
+ goto out_mod_put;
}
ctrl->ctrl.opts = opts;
out_free_ida:
put_device(ctrl->dev);
ida_simple_remove(&nvme_fc_ctrl_cnt, ctrl->cnum);
+out_mod_put:
+ module_put(lport->ops->module);
out_free_ctrl:
kfree(ctrl);
out_fail:
#define NVME_DEFAULT_KATO 5
#define NVME_KATO_GRACE 10
+#ifdef CONFIG_ARCH_NO_SG_CHAIN
+#define NVME_INLINE_SG_CNT 0
+#else
+#define NVME_INLINE_SG_CNT 2
+#endif
+
extern struct workqueue_struct *nvme_wq;
extern struct workqueue_struct *nvme_reset_wq;
extern struct workqueue_struct *nvme_delete_wq;
module_param_cb(io_queue_depth, &io_queue_depth_ops, &io_queue_depth, 0644);
MODULE_PARM_DESC(io_queue_depth, "set io queue depth, should >= 2");
-static int write_queues;
-module_param(write_queues, int, 0644);
+static unsigned int write_queues;
+module_param(write_queues, uint, 0644);
MODULE_PARM_DESC(write_queues,
"Number of queues to use for writes. If not set, reads and writes "
"will share a queue set.");
-static int poll_queues;
-module_param(poll_queues, int, 0644);
+static unsigned int poll_queues;
+module_param(poll_queues, uint, 0644);
MODULE_PARM_DESC(poll_queues, "Number of queues to use for polled IO.");
struct nvme_dev;
u16 sq_tail;
u16 last_sq_tail;
u16 cq_head;
- u16 last_cq_head;
u16 qid;
u8 cq_phase;
u8 sqes;
* the irq handler, even if that was on another CPU.
*/
rmb();
- if (nvmeq->cq_head != nvmeq->last_cq_head)
- ret = IRQ_HANDLED;
nvme_process_cq(nvmeq, &start, &end, -1);
- nvmeq->last_cq_head = nvmeq->cq_head;
wmb();
if (start != end) {
result = adapter_alloc_sq(dev, qid, nvmeq);
if (result < 0)
return result;
- else if (result)
+ if (result)
goto release_cq;
nvmeq->cq_vector = vector;
.priv = dev,
};
unsigned int irq_queues, this_p_queues;
- unsigned int nr_cpus = num_possible_cpus();
/*
* Poll queues don't need interrupts, but we need at least one IO
this_p_queues = nr_io_queues - 1;
irq_queues = 1;
} else {
- if (nr_cpus < nr_io_queues - this_p_queues)
- irq_queues = nr_cpus + 1;
- else
- irq_queues = nr_io_queues - this_p_queues + 1;
+ irq_queues = nr_io_queues - this_p_queues + 1;
}
dev->io_queues[HCTX_TYPE_POLL] = this_p_queues;
BUILD_BUG_ON(sizeof(struct nvme_create_sq) != 64);
BUILD_BUG_ON(sizeof(struct nvme_delete_queue) != 64);
BUILD_BUG_ON(IRQ_AFFINITY_MAX_SETS < 2);
+
+ write_queues = min(write_queues, num_possible_cpus());
+ poll_queues = min(poll_queues, num_possible_cpus());
return pci_register_driver(&nvme_driver);
}
set->reserved_tags = 2; /* connect + keep-alive */
set->numa_node = nctrl->numa_node;
set->cmd_size = sizeof(struct nvme_rdma_request) +
- SG_CHUNK_SIZE * sizeof(struct scatterlist);
+ NVME_INLINE_SG_CNT * sizeof(struct scatterlist);
set->driver_data = ctrl;
set->nr_hw_queues = 1;
set->timeout = ADMIN_TIMEOUT;
set->numa_node = nctrl->numa_node;
set->flags = BLK_MQ_F_SHOULD_MERGE;
set->cmd_size = sizeof(struct nvme_rdma_request) +
- SG_CHUNK_SIZE * sizeof(struct scatterlist);
+ NVME_INLINE_SG_CNT * sizeof(struct scatterlist);
set->driver_data = ctrl;
set->nr_hw_queues = nctrl->queue_count - 1;
set->timeout = NVME_IO_TIMEOUT;
}
ib_dma_unmap_sg(ibdev, req->sg_table.sgl, req->nents, rq_dma_dir(rq));
- sg_free_table_chained(&req->sg_table, SG_CHUNK_SIZE);
+ sg_free_table_chained(&req->sg_table, NVME_INLINE_SG_CNT);
}
static int nvme_rdma_set_sg_null(struct nvme_command *c)
req->sg_table.sgl = req->first_sgl;
ret = sg_alloc_table_chained(&req->sg_table,
blk_rq_nr_phys_segments(rq), req->sg_table.sgl,
- SG_CHUNK_SIZE);
+ NVME_INLINE_SG_CNT);
if (ret)
return -ENOMEM;
out_unmap_sg:
ib_dma_unmap_sg(ibdev, req->sg_table.sgl, req->nents, rq_dma_dir(rq));
out_free_table:
- sg_free_table_chained(&req->sg_table, SG_CHUNK_SIZE);
+ sg_free_table_chained(&req->sg_table, NVME_INLINE_SG_CNT);
return ret;
}
#define FCLOOP_DMABOUND_4G 0xFFFFFFFF
static struct nvme_fc_port_template fctemplate = {
+ .module = THIS_MODULE,
.localport_delete = fcloop_localport_delete,
.remoteport_delete = fcloop_remoteport_delete,
.create_queue = fcloop_create_queue,
{
struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req);
- sg_free_table_chained(&iod->sg_table, SG_CHUNK_SIZE);
+ sg_free_table_chained(&iod->sg_table, NVME_INLINE_SG_CNT);
nvme_complete_rq(req);
}
iod->sg_table.sgl = iod->first_sgl;
if (sg_alloc_table_chained(&iod->sg_table,
blk_rq_nr_phys_segments(req),
- iod->sg_table.sgl, SG_CHUNK_SIZE)) {
+ iod->sg_table.sgl, NVME_INLINE_SG_CNT)) {
nvme_cleanup_cmd(req);
return BLK_STS_RESOURCE;
}
ctrl->admin_tag_set.reserved_tags = 2; /* connect + keep-alive */
ctrl->admin_tag_set.numa_node = NUMA_NO_NODE;
ctrl->admin_tag_set.cmd_size = sizeof(struct nvme_loop_iod) +
- SG_CHUNK_SIZE * sizeof(struct scatterlist);
+ NVME_INLINE_SG_CNT * sizeof(struct scatterlist);
ctrl->admin_tag_set.driver_data = ctrl;
ctrl->admin_tag_set.nr_hw_queues = 1;
ctrl->admin_tag_set.timeout = ADMIN_TIMEOUT;
ctrl->tag_set.numa_node = NUMA_NO_NODE;
ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
ctrl->tag_set.cmd_size = sizeof(struct nvme_loop_iod) +
- SG_CHUNK_SIZE * sizeof(struct scatterlist);
+ NVME_INLINE_SG_CNT * sizeof(struct scatterlist);
ctrl->tag_set.driver_data = ctrl;
ctrl->tag_set.nr_hw_queues = ctrl->ctrl.queue_count - 1;
ctrl->tag_set.timeout = NVME_IO_TIMEOUT;
/* Declare and initialization an instance of the FC NVME template. */
static struct nvme_fc_port_template lpfc_nvme_template = {
+ .module = THIS_MODULE,
+
/* initiator-based functions */
.localport_delete = lpfc_nvme_localport_delete,
.remoteport_delete = lpfc_nvme_remoteport_delete,
}
static struct nvme_fc_port_template qla_nvme_fc_transport = {
+ .module = THIS_MODULE,
.localport_delete = qla_nvme_localport_delete,
.remoteport_delete = qla_nvme_remoteport_delete,
.create_queue = qla_nvme_alloc_queue,
struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
struct request_queue *q);
int blkcg_init_queue(struct request_queue *q);
-void blkcg_drain_queue(struct request_queue *q);
void blkcg_exit_queue(struct request_queue *q);
/* Blkio controller policy registration */
static inline struct blkcg_gq *blk_queue_root_blkg(struct request_queue *q)
{ return NULL; }
static inline int blkcg_init_queue(struct request_queue *q) { return 0; }
-static inline void blkcg_drain_queue(struct request_queue *q) { }
static inline void blkcg_exit_queue(struct request_queue *q) { }
static inline int blkcg_policy_register(struct blkcg_policy *pol) { return 0; }
static inline void blkcg_policy_unregister(struct blkcg_policy *pol) { }
*
* Host/Initiator Transport Entrypoints/Parameters:
*
+ * @module: The LLDD module using the interface
+ *
* @localport_delete: The LLDD initiates deletion of a localport via
* nvme_fc_deregister_localport(). However, the teardown is
* asynchronous. This routine is called upon the completion of the
* Value is Mandatory. Allowed to be zero.
*/
struct nvme_fc_port_template {
+ struct module *module;
+
/* initiator-based functions */
void (*localport_delete)(struct nvme_fc_local_port *);
void (*remoteport_delete)(struct nvme_fc_remote_port *);