2 * Copyright (c) 2005 Cisco Systems. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 #define pr_fmt(fmt) PFX fmt
35 #include <linux/module.h>
36 #include <linux/init.h>
37 #include <linux/slab.h>
38 #include <linux/err.h>
39 #include <linux/string.h>
40 #include <linux/parser.h>
41 #include <linux/random.h>
42 #include <linux/jiffies.h>
44 #include <linux/atomic.h>
46 #include <scsi/scsi.h>
47 #include <scsi/scsi_device.h>
48 #include <scsi/scsi_dbg.h>
49 #include <scsi/scsi_tcq.h>
51 #include <scsi/scsi_transport_srp.h>
55 #define DRV_NAME "ib_srp"
56 #define PFX DRV_NAME ": "
57 #define DRV_VERSION "1.0"
58 #define DRV_RELDATE "July 1, 2013"
60 MODULE_AUTHOR("Roland Dreier");
61 MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator "
62 "v" DRV_VERSION " (" DRV_RELDATE ")");
63 MODULE_LICENSE("Dual BSD/GPL");
65 static unsigned int srp_sg_tablesize;
66 static unsigned int cmd_sg_entries;
67 static unsigned int indirect_sg_entries;
68 static bool allow_ext_sg;
69 static int topspin_workarounds = 1;
71 module_param(srp_sg_tablesize, uint, 0444);
72 MODULE_PARM_DESC(srp_sg_tablesize, "Deprecated name for cmd_sg_entries");
74 module_param(cmd_sg_entries, uint, 0444);
75 MODULE_PARM_DESC(cmd_sg_entries,
76 "Default number of gather/scatter entries in the SRP command (default is 12, max 255)");
78 module_param(indirect_sg_entries, uint, 0444);
79 MODULE_PARM_DESC(indirect_sg_entries,
80 "Default max number of gather/scatter entries (default is 12, max is " __stringify(SCSI_MAX_SG_CHAIN_SEGMENTS) ")");
82 module_param(allow_ext_sg, bool, 0444);
83 MODULE_PARM_DESC(allow_ext_sg,
84 "Default behavior when there are more than cmd_sg_entries S/G entries after mapping; fails the request when false (default false)");
86 module_param(topspin_workarounds, int, 0444);
87 MODULE_PARM_DESC(topspin_workarounds,
88 "Enable workarounds for Topspin/Cisco SRP target bugs if != 0");
90 static struct kernel_param_ops srp_tmo_ops;
92 static int srp_reconnect_delay = 10;
93 module_param_cb(reconnect_delay, &srp_tmo_ops, &srp_reconnect_delay,
95 MODULE_PARM_DESC(reconnect_delay, "Time between successive reconnect attempts");
97 static int srp_fast_io_fail_tmo = 15;
98 module_param_cb(fast_io_fail_tmo, &srp_tmo_ops, &srp_fast_io_fail_tmo,
100 MODULE_PARM_DESC(fast_io_fail_tmo,
101 "Number of seconds between the observation of a transport"
102 " layer error and failing all I/O. \"off\" means that this"
103 " functionality is disabled.");
105 static int srp_dev_loss_tmo = 600;
106 module_param_cb(dev_loss_tmo, &srp_tmo_ops, &srp_dev_loss_tmo,
108 MODULE_PARM_DESC(dev_loss_tmo,
109 "Maximum number of seconds that the SRP transport should"
110 " insulate transport layer errors. After this time has been"
111 " exceeded the SCSI host is removed. Should be"
112 " between 1 and " __stringify(SCSI_DEVICE_BLOCK_MAX_TIMEOUT)
113 " if fast_io_fail_tmo has not been set. \"off\" means that"
114 " this functionality is disabled.");
116 static void srp_add_one(struct ib_device *device);
117 static void srp_remove_one(struct ib_device *device);
118 static void srp_recv_completion(struct ib_cq *cq, void *target_ptr);
119 static void srp_send_completion(struct ib_cq *cq, void *target_ptr);
120 static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event);
122 static struct scsi_transport_template *ib_srp_transport_template;
123 static struct workqueue_struct *srp_remove_wq;
125 static struct ib_client srp_client = {
128 .remove = srp_remove_one
131 static struct ib_sa_client srp_sa_client;
133 static int srp_tmo_get(char *buffer, const struct kernel_param *kp)
135 int tmo = *(int *)kp->arg;
138 return sprintf(buffer, "%d", tmo);
140 return sprintf(buffer, "off");
143 static int srp_tmo_set(const char *val, const struct kernel_param *kp)
147 if (strncmp(val, "off", 3) != 0) {
148 res = kstrtoint(val, 0, &tmo);
154 if (kp->arg == &srp_reconnect_delay)
155 res = srp_tmo_valid(tmo, srp_fast_io_fail_tmo,
157 else if (kp->arg == &srp_fast_io_fail_tmo)
158 res = srp_tmo_valid(srp_reconnect_delay, tmo, srp_dev_loss_tmo);
160 res = srp_tmo_valid(srp_reconnect_delay, srp_fast_io_fail_tmo,
164 *(int *)kp->arg = tmo;
170 static struct kernel_param_ops srp_tmo_ops = {
175 static inline struct srp_target_port *host_to_target(struct Scsi_Host *host)
177 return (struct srp_target_port *) host->hostdata;
180 static const char *srp_target_info(struct Scsi_Host *host)
182 return host_to_target(host)->target_name;
185 static int srp_target_is_topspin(struct srp_target_port *target)
187 static const u8 topspin_oui[3] = { 0x00, 0x05, 0xad };
188 static const u8 cisco_oui[3] = { 0x00, 0x1b, 0x0d };
190 return topspin_workarounds &&
191 (!memcmp(&target->ioc_guid, topspin_oui, sizeof topspin_oui) ||
192 !memcmp(&target->ioc_guid, cisco_oui, sizeof cisco_oui));
195 static struct srp_iu *srp_alloc_iu(struct srp_host *host, size_t size,
197 enum dma_data_direction direction)
201 iu = kmalloc(sizeof *iu, gfp_mask);
205 iu->buf = kzalloc(size, gfp_mask);
209 iu->dma = ib_dma_map_single(host->srp_dev->dev, iu->buf, size,
211 if (ib_dma_mapping_error(host->srp_dev->dev, iu->dma))
215 iu->direction = direction;
227 static void srp_free_iu(struct srp_host *host, struct srp_iu *iu)
232 ib_dma_unmap_single(host->srp_dev->dev, iu->dma, iu->size,
238 static void srp_qp_event(struct ib_event *event, void *context)
240 pr_debug("QP event %d\n", event->event);
243 static int srp_init_qp(struct srp_target_port *target,
246 struct ib_qp_attr *attr;
249 attr = kmalloc(sizeof *attr, GFP_KERNEL);
253 ret = ib_find_pkey(target->srp_host->srp_dev->dev,
254 target->srp_host->port,
255 be16_to_cpu(target->path.pkey),
260 attr->qp_state = IB_QPS_INIT;
261 attr->qp_access_flags = (IB_ACCESS_REMOTE_READ |
262 IB_ACCESS_REMOTE_WRITE);
263 attr->port_num = target->srp_host->port;
265 ret = ib_modify_qp(qp, attr,
276 static int srp_new_cm_id(struct srp_target_port *target)
278 struct ib_cm_id *new_cm_id;
280 new_cm_id = ib_create_cm_id(target->srp_host->srp_dev->dev,
281 srp_cm_handler, target);
282 if (IS_ERR(new_cm_id))
283 return PTR_ERR(new_cm_id);
286 ib_destroy_cm_id(target->cm_id);
287 target->cm_id = new_cm_id;
292 static int srp_create_target_ib(struct srp_target_port *target)
294 struct ib_qp_init_attr *init_attr;
295 struct ib_cq *recv_cq, *send_cq;
299 init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL);
303 recv_cq = ib_create_cq(target->srp_host->srp_dev->dev,
304 srp_recv_completion, NULL, target,
305 target->queue_size, target->comp_vector);
306 if (IS_ERR(recv_cq)) {
307 ret = PTR_ERR(recv_cq);
311 send_cq = ib_create_cq(target->srp_host->srp_dev->dev,
312 srp_send_completion, NULL, target,
313 target->queue_size, target->comp_vector);
314 if (IS_ERR(send_cq)) {
315 ret = PTR_ERR(send_cq);
319 ib_req_notify_cq(recv_cq, IB_CQ_NEXT_COMP);
321 init_attr->event_handler = srp_qp_event;
322 init_attr->cap.max_send_wr = target->queue_size;
323 init_attr->cap.max_recv_wr = target->queue_size;
324 init_attr->cap.max_recv_sge = 1;
325 init_attr->cap.max_send_sge = 1;
326 init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
327 init_attr->qp_type = IB_QPT_RC;
328 init_attr->send_cq = send_cq;
329 init_attr->recv_cq = recv_cq;
331 qp = ib_create_qp(target->srp_host->srp_dev->pd, init_attr);
337 ret = srp_init_qp(target, qp);
342 ib_destroy_qp(target->qp);
344 ib_destroy_cq(target->recv_cq);
346 ib_destroy_cq(target->send_cq);
349 target->recv_cq = recv_cq;
350 target->send_cq = send_cq;
359 ib_destroy_cq(send_cq);
362 ib_destroy_cq(recv_cq);
370 * Note: this function may be called without srp_alloc_iu_bufs() having been
371 * invoked. Hence the target->[rt]x_ring checks.
373 static void srp_free_target_ib(struct srp_target_port *target)
377 ib_destroy_qp(target->qp);
378 ib_destroy_cq(target->send_cq);
379 ib_destroy_cq(target->recv_cq);
382 target->send_cq = target->recv_cq = NULL;
384 if (target->rx_ring) {
385 for (i = 0; i < target->queue_size; ++i)
386 srp_free_iu(target->srp_host, target->rx_ring[i]);
387 kfree(target->rx_ring);
388 target->rx_ring = NULL;
390 if (target->tx_ring) {
391 for (i = 0; i < target->queue_size; ++i)
392 srp_free_iu(target->srp_host, target->tx_ring[i]);
393 kfree(target->tx_ring);
394 target->tx_ring = NULL;
398 static void srp_path_rec_completion(int status,
399 struct ib_sa_path_rec *pathrec,
402 struct srp_target_port *target = target_ptr;
404 target->status = status;
406 shost_printk(KERN_ERR, target->scsi_host,
407 PFX "Got failed path rec status %d\n", status);
409 target->path = *pathrec;
410 complete(&target->done);
413 static int srp_lookup_path(struct srp_target_port *target)
415 target->path.numb_path = 1;
417 init_completion(&target->done);
419 target->path_query_id = ib_sa_path_rec_get(&srp_sa_client,
420 target->srp_host->srp_dev->dev,
421 target->srp_host->port,
423 IB_SA_PATH_REC_SERVICE_ID |
424 IB_SA_PATH_REC_DGID |
425 IB_SA_PATH_REC_SGID |
426 IB_SA_PATH_REC_NUMB_PATH |
428 SRP_PATH_REC_TIMEOUT_MS,
430 srp_path_rec_completion,
431 target, &target->path_query);
432 if (target->path_query_id < 0)
433 return target->path_query_id;
435 wait_for_completion(&target->done);
437 if (target->status < 0)
438 shost_printk(KERN_WARNING, target->scsi_host,
439 PFX "Path record query failed\n");
441 return target->status;
444 static int srp_send_req(struct srp_target_port *target)
447 struct ib_cm_req_param param;
448 struct srp_login_req priv;
452 req = kzalloc(sizeof *req, GFP_KERNEL);
456 req->param.primary_path = &target->path;
457 req->param.alternate_path = NULL;
458 req->param.service_id = target->service_id;
459 req->param.qp_num = target->qp->qp_num;
460 req->param.qp_type = target->qp->qp_type;
461 req->param.private_data = &req->priv;
462 req->param.private_data_len = sizeof req->priv;
463 req->param.flow_control = 1;
465 get_random_bytes(&req->param.starting_psn, 4);
466 req->param.starting_psn &= 0xffffff;
469 * Pick some arbitrary defaults here; we could make these
470 * module parameters if anyone cared about setting them.
472 req->param.responder_resources = 4;
473 req->param.remote_cm_response_timeout = 20;
474 req->param.local_cm_response_timeout = 20;
475 req->param.retry_count = target->tl_retry_count;
476 req->param.rnr_retry_count = 7;
477 req->param.max_cm_retries = 15;
479 req->priv.opcode = SRP_LOGIN_REQ;
481 req->priv.req_it_iu_len = cpu_to_be32(target->max_iu_len);
482 req->priv.req_buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT |
483 SRP_BUF_FORMAT_INDIRECT);
485 * In the published SRP specification (draft rev. 16a), the
486 * port identifier format is 8 bytes of ID extension followed
487 * by 8 bytes of GUID. Older drafts put the two halves in the
488 * opposite order, so that the GUID comes first.
490 * Targets conforming to these obsolete drafts can be
491 * recognized by the I/O Class they report.
493 if (target->io_class == SRP_REV10_IB_IO_CLASS) {
494 memcpy(req->priv.initiator_port_id,
495 &target->path.sgid.global.interface_id, 8);
496 memcpy(req->priv.initiator_port_id + 8,
497 &target->initiator_ext, 8);
498 memcpy(req->priv.target_port_id, &target->ioc_guid, 8);
499 memcpy(req->priv.target_port_id + 8, &target->id_ext, 8);
501 memcpy(req->priv.initiator_port_id,
502 &target->initiator_ext, 8);
503 memcpy(req->priv.initiator_port_id + 8,
504 &target->path.sgid.global.interface_id, 8);
505 memcpy(req->priv.target_port_id, &target->id_ext, 8);
506 memcpy(req->priv.target_port_id + 8, &target->ioc_guid, 8);
510 * Topspin/Cisco SRP targets will reject our login unless we
511 * zero out the first 8 bytes of our initiator port ID and set
512 * the second 8 bytes to the local node GUID.
514 if (srp_target_is_topspin(target)) {
515 shost_printk(KERN_DEBUG, target->scsi_host,
516 PFX "Topspin/Cisco initiator port ID workaround "
517 "activated for target GUID %016llx\n",
518 (unsigned long long) be64_to_cpu(target->ioc_guid));
519 memset(req->priv.initiator_port_id, 0, 8);
520 memcpy(req->priv.initiator_port_id + 8,
521 &target->srp_host->srp_dev->dev->node_guid, 8);
524 status = ib_send_cm_req(target->cm_id, &req->param);
531 static bool srp_queue_remove_work(struct srp_target_port *target)
533 bool changed = false;
535 spin_lock_irq(&target->lock);
536 if (target->state != SRP_TARGET_REMOVED) {
537 target->state = SRP_TARGET_REMOVED;
540 spin_unlock_irq(&target->lock);
543 queue_work(srp_remove_wq, &target->remove_work);
548 static bool srp_change_conn_state(struct srp_target_port *target,
551 bool changed = false;
553 spin_lock_irq(&target->lock);
554 if (target->connected != connected) {
555 target->connected = connected;
558 spin_unlock_irq(&target->lock);
563 static void srp_disconnect_target(struct srp_target_port *target)
565 if (srp_change_conn_state(target, false)) {
566 /* XXX should send SRP_I_LOGOUT request */
568 if (ib_send_cm_dreq(target->cm_id, NULL, 0)) {
569 shost_printk(KERN_DEBUG, target->scsi_host,
570 PFX "Sending CM DREQ failed\n");
575 static void srp_free_req_data(struct srp_target_port *target)
577 struct ib_device *ibdev = target->srp_host->srp_dev->dev;
578 struct srp_request *req;
581 if (!target->req_ring)
584 for (i = 0; i < target->req_ring_size; ++i) {
585 req = &target->req_ring[i];
586 kfree(req->fmr_list);
587 kfree(req->map_page);
588 if (req->indirect_dma_addr) {
589 ib_dma_unmap_single(ibdev, req->indirect_dma_addr,
590 target->indirect_size,
593 kfree(req->indirect_desc);
596 kfree(target->req_ring);
597 target->req_ring = NULL;
600 static int srp_alloc_req_data(struct srp_target_port *target)
602 struct srp_device *srp_dev = target->srp_host->srp_dev;
603 struct ib_device *ibdev = srp_dev->dev;
604 struct srp_request *req;
606 int i, ret = -ENOMEM;
608 INIT_LIST_HEAD(&target->free_reqs);
610 target->req_ring = kzalloc(target->req_ring_size *
611 sizeof(*target->req_ring), GFP_KERNEL);
612 if (!target->req_ring)
615 for (i = 0; i < target->req_ring_size; ++i) {
616 req = &target->req_ring[i];
617 req->fmr_list = kmalloc(target->cmd_sg_cnt * sizeof(void *),
619 req->map_page = kmalloc(SRP_FMR_SIZE * sizeof(void *),
621 req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL);
622 if (!req->fmr_list || !req->map_page || !req->indirect_desc)
625 dma_addr = ib_dma_map_single(ibdev, req->indirect_desc,
626 target->indirect_size,
628 if (ib_dma_mapping_error(ibdev, dma_addr))
631 req->indirect_dma_addr = dma_addr;
633 list_add_tail(&req->list, &target->free_reqs);
642 * srp_del_scsi_host_attr() - Remove attributes defined in the host template.
643 * @shost: SCSI host whose attributes to remove from sysfs.
645 * Note: Any attributes defined in the host template and that did not exist
646 * before invocation of this function will be ignored.
648 static void srp_del_scsi_host_attr(struct Scsi_Host *shost)
650 struct device_attribute **attr;
652 for (attr = shost->hostt->shost_attrs; attr && *attr; ++attr)
653 device_remove_file(&shost->shost_dev, *attr);
656 static void srp_remove_target(struct srp_target_port *target)
658 WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
660 srp_del_scsi_host_attr(target->scsi_host);
661 srp_rport_get(target->rport);
662 srp_remove_host(target->scsi_host);
663 scsi_remove_host(target->scsi_host);
664 srp_stop_rport_timers(target->rport);
665 srp_disconnect_target(target);
666 ib_destroy_cm_id(target->cm_id);
667 srp_free_target_ib(target);
668 cancel_work_sync(&target->tl_err_work);
669 srp_rport_put(target->rport);
670 srp_free_req_data(target);
672 spin_lock(&target->srp_host->target_lock);
673 list_del(&target->list);
674 spin_unlock(&target->srp_host->target_lock);
676 scsi_host_put(target->scsi_host);
679 static void srp_remove_work(struct work_struct *work)
681 struct srp_target_port *target =
682 container_of(work, struct srp_target_port, remove_work);
684 WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
686 srp_remove_target(target);
689 static void srp_rport_delete(struct srp_rport *rport)
691 struct srp_target_port *target = rport->lld_data;
693 srp_queue_remove_work(target);
696 static int srp_connect_target(struct srp_target_port *target)
701 WARN_ON_ONCE(target->connected);
703 target->qp_in_error = false;
705 ret = srp_lookup_path(target);
710 init_completion(&target->done);
711 ret = srp_send_req(target);
714 wait_for_completion(&target->done);
717 * The CM event handling code will set status to
718 * SRP_PORT_REDIRECT if we get a port redirect REJ
719 * back, or SRP_DLID_REDIRECT if we get a lid/qp
722 switch (target->status) {
724 srp_change_conn_state(target, true);
727 case SRP_PORT_REDIRECT:
728 ret = srp_lookup_path(target);
733 case SRP_DLID_REDIRECT:
737 /* Our current CM id was stale, and is now in timewait.
738 * Try to reconnect with a new one.
740 if (!retries-- || srp_new_cm_id(target)) {
741 shost_printk(KERN_ERR, target->scsi_host, PFX
742 "giving up on stale connection\n");
743 target->status = -ECONNRESET;
744 return target->status;
747 shost_printk(KERN_ERR, target->scsi_host, PFX
748 "retrying stale connection\n");
752 return target->status;
757 static void srp_unmap_data(struct scsi_cmnd *scmnd,
758 struct srp_target_port *target,
759 struct srp_request *req)
761 struct ib_device *ibdev = target->srp_host->srp_dev->dev;
762 struct ib_pool_fmr **pfmr;
764 if (!scsi_sglist(scmnd) ||
765 (scmnd->sc_data_direction != DMA_TO_DEVICE &&
766 scmnd->sc_data_direction != DMA_FROM_DEVICE))
769 pfmr = req->fmr_list;
771 ib_fmr_pool_unmap(*pfmr++);
773 ib_dma_unmap_sg(ibdev, scsi_sglist(scmnd), scsi_sg_count(scmnd),
774 scmnd->sc_data_direction);
778 * srp_claim_req - Take ownership of the scmnd associated with a request.
779 * @target: SRP target port.
781 * @scmnd: If NULL, take ownership of @req->scmnd. If not NULL, only take
782 * ownership of @req->scmnd if it equals @scmnd.
785 * Either NULL or a pointer to the SCSI command the caller became owner of.
787 static struct scsi_cmnd *srp_claim_req(struct srp_target_port *target,
788 struct srp_request *req,
789 struct scsi_cmnd *scmnd)
793 spin_lock_irqsave(&target->lock, flags);
797 } else if (req->scmnd == scmnd) {
802 spin_unlock_irqrestore(&target->lock, flags);
808 * srp_free_req() - Unmap data and add request to the free request list.
810 static void srp_free_req(struct srp_target_port *target,
811 struct srp_request *req, struct scsi_cmnd *scmnd,
816 srp_unmap_data(scmnd, target, req);
818 spin_lock_irqsave(&target->lock, flags);
819 target->req_lim += req_lim_delta;
820 list_add_tail(&req->list, &target->free_reqs);
821 spin_unlock_irqrestore(&target->lock, flags);
824 static void srp_finish_req(struct srp_target_port *target,
825 struct srp_request *req, int result)
827 struct scsi_cmnd *scmnd = srp_claim_req(target, req, NULL);
830 srp_free_req(target, req, scmnd, 0);
831 scmnd->result = result;
832 scmnd->scsi_done(scmnd);
836 static void srp_terminate_io(struct srp_rport *rport)
838 struct srp_target_port *target = rport->lld_data;
841 for (i = 0; i < target->req_ring_size; ++i) {
842 struct srp_request *req = &target->req_ring[i];
843 srp_finish_req(target, req, DID_TRANSPORT_FAILFAST << 16);
848 * It is up to the caller to ensure that srp_rport_reconnect() calls are
849 * serialized and that no concurrent srp_queuecommand(), srp_abort(),
850 * srp_reset_device() or srp_reset_host() calls will occur while this function
851 * is in progress. One way to realize that is not to call this function
852 * directly but to call srp_reconnect_rport() instead since that last function
853 * serializes calls of this function via rport->mutex and also blocks
854 * srp_queuecommand() calls before invoking this function.
856 static int srp_rport_reconnect(struct srp_rport *rport)
858 struct srp_target_port *target = rport->lld_data;
861 srp_disconnect_target(target);
863 * Now get a new local CM ID so that we avoid confusing the target in
864 * case things are really fouled up. Doing so also ensures that all CM
865 * callbacks will have finished before a new QP is allocated.
867 ret = srp_new_cm_id(target);
869 * Whether or not creating a new CM ID succeeded, create a new
870 * QP. This guarantees that all completion callback function
871 * invocations have finished before request resetting starts.
874 ret = srp_create_target_ib(target);
876 srp_create_target_ib(target);
878 for (i = 0; i < target->req_ring_size; ++i) {
879 struct srp_request *req = &target->req_ring[i];
880 srp_finish_req(target, req, DID_RESET << 16);
883 INIT_LIST_HEAD(&target->free_tx);
884 for (i = 0; i < target->queue_size; ++i)
885 list_add(&target->tx_ring[i]->list, &target->free_tx);
888 ret = srp_connect_target(target);
891 shost_printk(KERN_INFO, target->scsi_host,
892 PFX "reconnect succeeded\n");
897 static void srp_map_desc(struct srp_map_state *state, dma_addr_t dma_addr,
898 unsigned int dma_len, u32 rkey)
900 struct srp_direct_buf *desc = state->desc;
902 desc->va = cpu_to_be64(dma_addr);
903 desc->key = cpu_to_be32(rkey);
904 desc->len = cpu_to_be32(dma_len);
906 state->total_len += dma_len;
911 static int srp_map_finish_fmr(struct srp_map_state *state,
912 struct srp_target_port *target)
914 struct srp_device *dev = target->srp_host->srp_dev;
915 struct ib_pool_fmr *fmr;
921 if (state->npages == 1) {
922 srp_map_desc(state, state->base_dma_addr, state->fmr_len,
924 state->npages = state->fmr_len = 0;
928 fmr = ib_fmr_pool_map_phys(dev->fmr_pool, state->pages,
929 state->npages, io_addr);
933 *state->next_fmr++ = fmr;
936 srp_map_desc(state, 0, state->fmr_len, fmr->fmr->rkey);
937 state->npages = state->fmr_len = 0;
941 static void srp_map_update_start(struct srp_map_state *state,
942 struct scatterlist *sg, int sg_index,
945 state->unmapped_sg = sg;
946 state->unmapped_index = sg_index;
947 state->unmapped_addr = dma_addr;
950 static int srp_map_sg_entry(struct srp_map_state *state,
951 struct srp_target_port *target,
952 struct scatterlist *sg, int sg_index,
955 struct srp_device *dev = target->srp_host->srp_dev;
956 struct ib_device *ibdev = dev->dev;
957 dma_addr_t dma_addr = ib_sg_dma_address(ibdev, sg);
958 unsigned int dma_len = ib_sg_dma_len(ibdev, sg);
965 if (use_fmr == SRP_MAP_NO_FMR) {
966 /* Once we're in direct map mode for a request, we don't
967 * go back to FMR mode, so no need to update anything
968 * other than the descriptor.
970 srp_map_desc(state, dma_addr, dma_len, target->rkey);
974 /* If we start at an offset into the FMR page, don't merge into
975 * the current FMR. Finish it out, and use the kernel's MR for this
976 * sg entry. This is to avoid potential bugs on some SRP targets
977 * that were never quite defined, but went away when the initiator
978 * avoided using FMR on such page fragments.
980 if (dma_addr & ~dev->fmr_page_mask || dma_len > dev->fmr_max_size) {
981 ret = srp_map_finish_fmr(state, target);
985 srp_map_desc(state, dma_addr, dma_len, target->rkey);
986 srp_map_update_start(state, NULL, 0, 0);
990 /* If this is the first sg to go into the FMR, save our position.
991 * We need to know the first unmapped entry, its index, and the
992 * first unmapped address within that entry to be able to restart
993 * mapping after an error.
995 if (!state->unmapped_sg)
996 srp_map_update_start(state, sg, sg_index, dma_addr);
999 if (state->npages == SRP_FMR_SIZE) {
1000 ret = srp_map_finish_fmr(state, target);
1004 srp_map_update_start(state, sg, sg_index, dma_addr);
1007 len = min_t(unsigned int, dma_len, dev->fmr_page_size);
1010 state->base_dma_addr = dma_addr;
1011 state->pages[state->npages++] = dma_addr;
1012 state->fmr_len += len;
1017 /* If the last entry of the FMR wasn't a full page, then we need to
1018 * close it out and start a new one -- we can only merge at page
1022 if (len != dev->fmr_page_size) {
1023 ret = srp_map_finish_fmr(state, target);
1025 srp_map_update_start(state, NULL, 0, 0);
1030 static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target,
1031 struct srp_request *req)
1033 struct scatterlist *scat, *sg;
1034 struct srp_cmd *cmd = req->cmd->buf;
1035 int i, len, nents, count, use_fmr;
1036 struct srp_device *dev;
1037 struct ib_device *ibdev;
1038 struct srp_map_state state;
1039 struct srp_indirect_buf *indirect_hdr;
1043 if (!scsi_sglist(scmnd) || scmnd->sc_data_direction == DMA_NONE)
1044 return sizeof (struct srp_cmd);
1046 if (scmnd->sc_data_direction != DMA_FROM_DEVICE &&
1047 scmnd->sc_data_direction != DMA_TO_DEVICE) {
1048 shost_printk(KERN_WARNING, target->scsi_host,
1049 PFX "Unhandled data direction %d\n",
1050 scmnd->sc_data_direction);
1054 nents = scsi_sg_count(scmnd);
1055 scat = scsi_sglist(scmnd);
1057 dev = target->srp_host->srp_dev;
1060 count = ib_dma_map_sg(ibdev, scat, nents, scmnd->sc_data_direction);
1061 if (unlikely(count == 0))
1064 fmt = SRP_DATA_DESC_DIRECT;
1065 len = sizeof (struct srp_cmd) + sizeof (struct srp_direct_buf);
1069 * The midlayer only generated a single gather/scatter
1070 * entry, or DMA mapping coalesced everything to a
1071 * single entry. So a direct descriptor along with
1072 * the DMA MR suffices.
1074 struct srp_direct_buf *buf = (void *) cmd->add_data;
1076 buf->va = cpu_to_be64(ib_sg_dma_address(ibdev, scat));
1077 buf->key = cpu_to_be32(target->rkey);
1078 buf->len = cpu_to_be32(ib_sg_dma_len(ibdev, scat));
1084 /* We have more than one scatter/gather entry, so build our indirect
1085 * descriptor table, trying to merge as many entries with FMR as we
1088 indirect_hdr = (void *) cmd->add_data;
1090 ib_dma_sync_single_for_cpu(ibdev, req->indirect_dma_addr,
1091 target->indirect_size, DMA_TO_DEVICE);
1093 memset(&state, 0, sizeof(state));
1094 state.desc = req->indirect_desc;
1095 state.pages = req->map_page;
1096 state.next_fmr = req->fmr_list;
1098 use_fmr = dev->fmr_pool ? SRP_MAP_ALLOW_FMR : SRP_MAP_NO_FMR;
1100 for_each_sg(scat, sg, count, i) {
1101 if (srp_map_sg_entry(&state, target, sg, i, use_fmr)) {
1102 /* FMR mapping failed, so backtrack to the first
1103 * unmapped entry and continue on without using FMR.
1105 dma_addr_t dma_addr;
1106 unsigned int dma_len;
1109 sg = state.unmapped_sg;
1110 i = state.unmapped_index;
1112 dma_addr = ib_sg_dma_address(ibdev, sg);
1113 dma_len = ib_sg_dma_len(ibdev, sg);
1114 dma_len -= (state.unmapped_addr - dma_addr);
1115 dma_addr = state.unmapped_addr;
1116 use_fmr = SRP_MAP_NO_FMR;
1117 srp_map_desc(&state, dma_addr, dma_len, target->rkey);
1121 if (use_fmr == SRP_MAP_ALLOW_FMR && srp_map_finish_fmr(&state, target))
1124 /* We've mapped the request, now pull as much of the indirect
1125 * descriptor table as we can into the command buffer. If this
1126 * target is not using an external indirect table, we are
1127 * guaranteed to fit into the command, as the SCSI layer won't
1128 * give us more S/G entries than we allow.
1130 req->nfmr = state.nfmr;
1131 if (state.ndesc == 1) {
1132 /* FMR mapping was able to collapse this to one entry,
1133 * so use a direct descriptor.
1135 struct srp_direct_buf *buf = (void *) cmd->add_data;
1137 *buf = req->indirect_desc[0];
1141 if (unlikely(target->cmd_sg_cnt < state.ndesc &&
1142 !target->allow_ext_sg)) {
1143 shost_printk(KERN_ERR, target->scsi_host,
1144 "Could not fit S/G list into SRP_CMD\n");
1148 count = min(state.ndesc, target->cmd_sg_cnt);
1149 table_len = state.ndesc * sizeof (struct srp_direct_buf);
1151 fmt = SRP_DATA_DESC_INDIRECT;
1152 len = sizeof(struct srp_cmd) + sizeof (struct srp_indirect_buf);
1153 len += count * sizeof (struct srp_direct_buf);
1155 memcpy(indirect_hdr->desc_list, req->indirect_desc,
1156 count * sizeof (struct srp_direct_buf));
1158 indirect_hdr->table_desc.va = cpu_to_be64(req->indirect_dma_addr);
1159 indirect_hdr->table_desc.key = cpu_to_be32(target->rkey);
1160 indirect_hdr->table_desc.len = cpu_to_be32(table_len);
1161 indirect_hdr->len = cpu_to_be32(state.total_len);
1163 if (scmnd->sc_data_direction == DMA_TO_DEVICE)
1164 cmd->data_out_desc_cnt = count;
1166 cmd->data_in_desc_cnt = count;
1168 ib_dma_sync_single_for_device(ibdev, req->indirect_dma_addr, table_len,
1172 if (scmnd->sc_data_direction == DMA_TO_DEVICE)
1173 cmd->buf_fmt = fmt << 4;
1181 * Return an IU and possible credit to the free pool
1183 static void srp_put_tx_iu(struct srp_target_port *target, struct srp_iu *iu,
1184 enum srp_iu_type iu_type)
1186 unsigned long flags;
1188 spin_lock_irqsave(&target->lock, flags);
1189 list_add(&iu->list, &target->free_tx);
1190 if (iu_type != SRP_IU_RSP)
1192 spin_unlock_irqrestore(&target->lock, flags);
1196 * Must be called with target->lock held to protect req_lim and free_tx.
1197 * If IU is not sent, it must be returned using srp_put_tx_iu().
1200 * An upper limit for the number of allocated information units for each
1202 * - SRP_IU_CMD: SRP_CMD_SQ_SIZE, since the SCSI mid-layer never queues
1203 * more than Scsi_Host.can_queue requests.
1204 * - SRP_IU_TSK_MGMT: SRP_TSK_MGMT_SQ_SIZE.
1205 * - SRP_IU_RSP: 1, since a conforming SRP target never sends more than
1206 * one unanswered SRP request to an initiator.
1208 static struct srp_iu *__srp_get_tx_iu(struct srp_target_port *target,
1209 enum srp_iu_type iu_type)
1211 s32 rsv = (iu_type == SRP_IU_TSK_MGMT) ? 0 : SRP_TSK_MGMT_SQ_SIZE;
1214 srp_send_completion(target->send_cq, target);
1216 if (list_empty(&target->free_tx))
1219 /* Initiator responses to target requests do not consume credits */
1220 if (iu_type != SRP_IU_RSP) {
1221 if (target->req_lim <= rsv) {
1222 ++target->zero_req_lim;
1229 iu = list_first_entry(&target->free_tx, struct srp_iu, list);
1230 list_del(&iu->list);
1234 static int srp_post_send(struct srp_target_port *target,
1235 struct srp_iu *iu, int len)
1238 struct ib_send_wr wr, *bad_wr;
1240 list.addr = iu->dma;
1242 list.lkey = target->lkey;
1245 wr.wr_id = (uintptr_t) iu;
1248 wr.opcode = IB_WR_SEND;
1249 wr.send_flags = IB_SEND_SIGNALED;
1251 return ib_post_send(target->qp, &wr, &bad_wr);
1254 static int srp_post_recv(struct srp_target_port *target, struct srp_iu *iu)
1256 struct ib_recv_wr wr, *bad_wr;
1259 list.addr = iu->dma;
1260 list.length = iu->size;
1261 list.lkey = target->lkey;
1264 wr.wr_id = (uintptr_t) iu;
1268 return ib_post_recv(target->qp, &wr, &bad_wr);
1271 static void srp_process_rsp(struct srp_target_port *target, struct srp_rsp *rsp)
1273 struct srp_request *req;
1274 struct scsi_cmnd *scmnd;
1275 unsigned long flags;
1277 if (unlikely(rsp->tag & SRP_TAG_TSK_MGMT)) {
1278 spin_lock_irqsave(&target->lock, flags);
1279 target->req_lim += be32_to_cpu(rsp->req_lim_delta);
1280 spin_unlock_irqrestore(&target->lock, flags);
1282 target->tsk_mgmt_status = -1;
1283 if (be32_to_cpu(rsp->resp_data_len) >= 4)
1284 target->tsk_mgmt_status = rsp->data[3];
1285 complete(&target->tsk_mgmt_done);
1287 req = &target->req_ring[rsp->tag];
1288 scmnd = srp_claim_req(target, req, NULL);
1290 shost_printk(KERN_ERR, target->scsi_host,
1291 "Null scmnd for RSP w/tag %016llx\n",
1292 (unsigned long long) rsp->tag);
1294 spin_lock_irqsave(&target->lock, flags);
1295 target->req_lim += be32_to_cpu(rsp->req_lim_delta);
1296 spin_unlock_irqrestore(&target->lock, flags);
1300 scmnd->result = rsp->status;
1302 if (rsp->flags & SRP_RSP_FLAG_SNSVALID) {
1303 memcpy(scmnd->sense_buffer, rsp->data +
1304 be32_to_cpu(rsp->resp_data_len),
1305 min_t(int, be32_to_cpu(rsp->sense_data_len),
1306 SCSI_SENSE_BUFFERSIZE));
1309 if (rsp->flags & (SRP_RSP_FLAG_DOOVER | SRP_RSP_FLAG_DOUNDER))
1310 scsi_set_resid(scmnd, be32_to_cpu(rsp->data_out_res_cnt));
1311 else if (rsp->flags & (SRP_RSP_FLAG_DIOVER | SRP_RSP_FLAG_DIUNDER))
1312 scsi_set_resid(scmnd, be32_to_cpu(rsp->data_in_res_cnt));
1314 srp_free_req(target, req, scmnd,
1315 be32_to_cpu(rsp->req_lim_delta));
1317 scmnd->host_scribble = NULL;
1318 scmnd->scsi_done(scmnd);
1322 static int srp_response_common(struct srp_target_port *target, s32 req_delta,
1325 struct ib_device *dev = target->srp_host->srp_dev->dev;
1326 unsigned long flags;
1330 spin_lock_irqsave(&target->lock, flags);
1331 target->req_lim += req_delta;
1332 iu = __srp_get_tx_iu(target, SRP_IU_RSP);
1333 spin_unlock_irqrestore(&target->lock, flags);
1336 shost_printk(KERN_ERR, target->scsi_host, PFX
1337 "no IU available to send response\n");
1341 ib_dma_sync_single_for_cpu(dev, iu->dma, len, DMA_TO_DEVICE);
1342 memcpy(iu->buf, rsp, len);
1343 ib_dma_sync_single_for_device(dev, iu->dma, len, DMA_TO_DEVICE);
1345 err = srp_post_send(target, iu, len);
1347 shost_printk(KERN_ERR, target->scsi_host, PFX
1348 "unable to post response: %d\n", err);
1349 srp_put_tx_iu(target, iu, SRP_IU_RSP);
1355 static void srp_process_cred_req(struct srp_target_port *target,
1356 struct srp_cred_req *req)
1358 struct srp_cred_rsp rsp = {
1359 .opcode = SRP_CRED_RSP,
1362 s32 delta = be32_to_cpu(req->req_lim_delta);
1364 if (srp_response_common(target, delta, &rsp, sizeof rsp))
1365 shost_printk(KERN_ERR, target->scsi_host, PFX
1366 "problems processing SRP_CRED_REQ\n");
1369 static void srp_process_aer_req(struct srp_target_port *target,
1370 struct srp_aer_req *req)
1372 struct srp_aer_rsp rsp = {
1373 .opcode = SRP_AER_RSP,
1376 s32 delta = be32_to_cpu(req->req_lim_delta);
1378 shost_printk(KERN_ERR, target->scsi_host, PFX
1379 "ignoring AER for LUN %llu\n", be64_to_cpu(req->lun));
1381 if (srp_response_common(target, delta, &rsp, sizeof rsp))
1382 shost_printk(KERN_ERR, target->scsi_host, PFX
1383 "problems processing SRP_AER_REQ\n");
1386 static void srp_handle_recv(struct srp_target_port *target, struct ib_wc *wc)
1388 struct ib_device *dev = target->srp_host->srp_dev->dev;
1389 struct srp_iu *iu = (struct srp_iu *) (uintptr_t) wc->wr_id;
1393 ib_dma_sync_single_for_cpu(dev, iu->dma, target->max_ti_iu_len,
1396 opcode = *(u8 *) iu->buf;
1399 shost_printk(KERN_ERR, target->scsi_host,
1400 PFX "recv completion, opcode 0x%02x\n", opcode);
1401 print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 8, 1,
1402 iu->buf, wc->byte_len, true);
1407 srp_process_rsp(target, iu->buf);
1411 srp_process_cred_req(target, iu->buf);
1415 srp_process_aer_req(target, iu->buf);
1419 /* XXX Handle target logout */
1420 shost_printk(KERN_WARNING, target->scsi_host,
1421 PFX "Got target logout request\n");
1425 shost_printk(KERN_WARNING, target->scsi_host,
1426 PFX "Unhandled SRP opcode 0x%02x\n", opcode);
1430 ib_dma_sync_single_for_device(dev, iu->dma, target->max_ti_iu_len,
1433 res = srp_post_recv(target, iu);
1435 shost_printk(KERN_ERR, target->scsi_host,
1436 PFX "Recv failed with error code %d\n", res);
1440 * srp_tl_err_work() - handle a transport layer error
1442 * Note: This function may get invoked before the rport has been created,
1443 * hence the target->rport test.
1445 static void srp_tl_err_work(struct work_struct *work)
1447 struct srp_target_port *target;
1449 target = container_of(work, struct srp_target_port, tl_err_work);
1451 srp_start_tl_fail_timers(target->rport);
1454 static void srp_handle_qp_err(enum ib_wc_status wc_status, bool send_err,
1455 struct srp_target_port *target)
1457 if (target->connected && !target->qp_in_error) {
1458 shost_printk(KERN_ERR, target->scsi_host,
1459 PFX "failed %s status %d\n",
1460 send_err ? "send" : "receive",
1462 queue_work(system_long_wq, &target->tl_err_work);
1464 target->qp_in_error = true;
1467 static void srp_recv_completion(struct ib_cq *cq, void *target_ptr)
1469 struct srp_target_port *target = target_ptr;
1472 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
1473 while (ib_poll_cq(cq, 1, &wc) > 0) {
1474 if (likely(wc.status == IB_WC_SUCCESS)) {
1475 srp_handle_recv(target, &wc);
1477 srp_handle_qp_err(wc.status, false, target);
1482 static void srp_send_completion(struct ib_cq *cq, void *target_ptr)
1484 struct srp_target_port *target = target_ptr;
1488 while (ib_poll_cq(cq, 1, &wc) > 0) {
1489 if (likely(wc.status == IB_WC_SUCCESS)) {
1490 iu = (struct srp_iu *) (uintptr_t) wc.wr_id;
1491 list_add(&iu->list, &target->free_tx);
1493 srp_handle_qp_err(wc.status, true, target);
1498 static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
1500 struct srp_target_port *target = host_to_target(shost);
1501 struct srp_rport *rport = target->rport;
1502 struct srp_request *req;
1504 struct srp_cmd *cmd;
1505 struct ib_device *dev;
1506 unsigned long flags;
1508 const bool in_scsi_eh = !in_interrupt() && current == shost->ehandler;
1511 * The SCSI EH thread is the only context from which srp_queuecommand()
1512 * can get invoked for blocked devices (SDEV_BLOCK /
1513 * SDEV_CREATED_BLOCK). Avoid racing with srp_reconnect_rport() by
1514 * locking the rport mutex if invoked from inside the SCSI EH.
1517 mutex_lock(&rport->mutex);
1519 result = srp_chkready(target->rport);
1520 if (unlikely(result)) {
1521 scmnd->result = result;
1522 scmnd->scsi_done(scmnd);
1526 spin_lock_irqsave(&target->lock, flags);
1527 iu = __srp_get_tx_iu(target, SRP_IU_CMD);
1531 req = list_first_entry(&target->free_reqs, struct srp_request, list);
1532 list_del(&req->list);
1533 spin_unlock_irqrestore(&target->lock, flags);
1535 dev = target->srp_host->srp_dev->dev;
1536 ib_dma_sync_single_for_cpu(dev, iu->dma, target->max_iu_len,
1540 scmnd->host_scribble = (void *) req;
1543 memset(cmd, 0, sizeof *cmd);
1545 cmd->opcode = SRP_CMD;
1546 cmd->lun = cpu_to_be64((u64) scmnd->device->lun << 48);
1547 cmd->tag = req->index;
1548 memcpy(cmd->cdb, scmnd->cmnd, scmnd->cmd_len);
1553 len = srp_map_data(scmnd, target, req);
1555 shost_printk(KERN_ERR, target->scsi_host,
1556 PFX "Failed to map data\n");
1560 ib_dma_sync_single_for_device(dev, iu->dma, target->max_iu_len,
1563 if (srp_post_send(target, iu, len)) {
1564 shost_printk(KERN_ERR, target->scsi_host, PFX "Send failed\n");
1570 mutex_unlock(&rport->mutex);
1575 srp_unmap_data(scmnd, target, req);
1578 srp_put_tx_iu(target, iu, SRP_IU_CMD);
1581 * Avoid that the loops that iterate over the request ring can
1582 * encounter a dangling SCSI command pointer.
1586 spin_lock_irqsave(&target->lock, flags);
1587 list_add(&req->list, &target->free_reqs);
1590 spin_unlock_irqrestore(&target->lock, flags);
1593 mutex_unlock(&rport->mutex);
1595 return SCSI_MLQUEUE_HOST_BUSY;
1599 * Note: the resources allocated in this function are freed in
1600 * srp_free_target_ib().
1602 static int srp_alloc_iu_bufs(struct srp_target_port *target)
1606 target->rx_ring = kzalloc(target->queue_size * sizeof(*target->rx_ring),
1608 if (!target->rx_ring)
1610 target->tx_ring = kzalloc(target->queue_size * sizeof(*target->tx_ring),
1612 if (!target->tx_ring)
1615 for (i = 0; i < target->queue_size; ++i) {
1616 target->rx_ring[i] = srp_alloc_iu(target->srp_host,
1617 target->max_ti_iu_len,
1618 GFP_KERNEL, DMA_FROM_DEVICE);
1619 if (!target->rx_ring[i])
1623 for (i = 0; i < target->queue_size; ++i) {
1624 target->tx_ring[i] = srp_alloc_iu(target->srp_host,
1626 GFP_KERNEL, DMA_TO_DEVICE);
1627 if (!target->tx_ring[i])
1630 list_add(&target->tx_ring[i]->list, &target->free_tx);
1636 for (i = 0; i < target->queue_size; ++i) {
1637 srp_free_iu(target->srp_host, target->rx_ring[i]);
1638 srp_free_iu(target->srp_host, target->tx_ring[i]);
1643 kfree(target->tx_ring);
1644 target->tx_ring = NULL;
1645 kfree(target->rx_ring);
1646 target->rx_ring = NULL;
1651 static uint32_t srp_compute_rq_tmo(struct ib_qp_attr *qp_attr, int attr_mask)
1653 uint64_t T_tr_ns, max_compl_time_ms;
1654 uint32_t rq_tmo_jiffies;
1657 * According to section 11.2.4.2 in the IBTA spec (Modify Queue Pair,
1658 * table 91), both the QP timeout and the retry count have to be set
1659 * for RC QP's during the RTR to RTS transition.
1661 WARN_ON_ONCE((attr_mask & (IB_QP_TIMEOUT | IB_QP_RETRY_CNT)) !=
1662 (IB_QP_TIMEOUT | IB_QP_RETRY_CNT));
1665 * Set target->rq_tmo_jiffies to one second more than the largest time
1666 * it can take before an error completion is generated. See also
1667 * C9-140..142 in the IBTA spec for more information about how to
1668 * convert the QP Local ACK Timeout value to nanoseconds.
1670 T_tr_ns = 4096 * (1ULL << qp_attr->timeout);
1671 max_compl_time_ms = qp_attr->retry_cnt * 4 * T_tr_ns;
1672 do_div(max_compl_time_ms, NSEC_PER_MSEC);
1673 rq_tmo_jiffies = msecs_to_jiffies(max_compl_time_ms + 1000);
1675 return rq_tmo_jiffies;
1678 static void srp_cm_rep_handler(struct ib_cm_id *cm_id,
1679 struct srp_login_rsp *lrsp,
1680 struct srp_target_port *target)
1682 struct ib_qp_attr *qp_attr = NULL;
1687 if (lrsp->opcode == SRP_LOGIN_RSP) {
1688 target->max_ti_iu_len = be32_to_cpu(lrsp->max_ti_iu_len);
1689 target->req_lim = be32_to_cpu(lrsp->req_lim_delta);
1692 * Reserve credits for task management so we don't
1693 * bounce requests back to the SCSI mid-layer.
1695 target->scsi_host->can_queue
1696 = min(target->req_lim - SRP_TSK_MGMT_SQ_SIZE,
1697 target->scsi_host->can_queue);
1698 target->scsi_host->cmd_per_lun
1699 = min_t(int, target->scsi_host->can_queue,
1700 target->scsi_host->cmd_per_lun);
1702 shost_printk(KERN_WARNING, target->scsi_host,
1703 PFX "Unhandled RSP opcode %#x\n", lrsp->opcode);
1708 if (!target->rx_ring) {
1709 ret = srp_alloc_iu_bufs(target);
1715 qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL);
1719 qp_attr->qp_state = IB_QPS_RTR;
1720 ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
1724 ret = ib_modify_qp(target->qp, qp_attr, attr_mask);
1728 for (i = 0; i < target->queue_size; i++) {
1729 struct srp_iu *iu = target->rx_ring[i];
1730 ret = srp_post_recv(target, iu);
1735 qp_attr->qp_state = IB_QPS_RTS;
1736 ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
1740 target->rq_tmo_jiffies = srp_compute_rq_tmo(qp_attr, attr_mask);
1742 ret = ib_modify_qp(target->qp, qp_attr, attr_mask);
1746 ret = ib_send_cm_rtu(cm_id, NULL, 0);
1752 target->status = ret;
1755 static void srp_cm_rej_handler(struct ib_cm_id *cm_id,
1756 struct ib_cm_event *event,
1757 struct srp_target_port *target)
1759 struct Scsi_Host *shost = target->scsi_host;
1760 struct ib_class_port_info *cpi;
1763 switch (event->param.rej_rcvd.reason) {
1764 case IB_CM_REJ_PORT_CM_REDIRECT:
1765 cpi = event->param.rej_rcvd.ari;
1766 target->path.dlid = cpi->redirect_lid;
1767 target->path.pkey = cpi->redirect_pkey;
1768 cm_id->remote_cm_qpn = be32_to_cpu(cpi->redirect_qp) & 0x00ffffff;
1769 memcpy(target->path.dgid.raw, cpi->redirect_gid, 16);
1771 target->status = target->path.dlid ?
1772 SRP_DLID_REDIRECT : SRP_PORT_REDIRECT;
1775 case IB_CM_REJ_PORT_REDIRECT:
1776 if (srp_target_is_topspin(target)) {
1778 * Topspin/Cisco SRP gateways incorrectly send
1779 * reject reason code 25 when they mean 24
1782 memcpy(target->path.dgid.raw,
1783 event->param.rej_rcvd.ari, 16);
1785 shost_printk(KERN_DEBUG, shost,
1786 PFX "Topspin/Cisco redirect to target port GID %016llx%016llx\n",
1787 (unsigned long long) be64_to_cpu(target->path.dgid.global.subnet_prefix),
1788 (unsigned long long) be64_to_cpu(target->path.dgid.global.interface_id));
1790 target->status = SRP_PORT_REDIRECT;
1792 shost_printk(KERN_WARNING, shost,
1793 " REJ reason: IB_CM_REJ_PORT_REDIRECT\n");
1794 target->status = -ECONNRESET;
1798 case IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID:
1799 shost_printk(KERN_WARNING, shost,
1800 " REJ reason: IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID\n");
1801 target->status = -ECONNRESET;
1804 case IB_CM_REJ_CONSUMER_DEFINED:
1805 opcode = *(u8 *) event->private_data;
1806 if (opcode == SRP_LOGIN_REJ) {
1807 struct srp_login_rej *rej = event->private_data;
1808 u32 reason = be32_to_cpu(rej->reason);
1810 if (reason == SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE)
1811 shost_printk(KERN_WARNING, shost,
1812 PFX "SRP_LOGIN_REJ: requested max_it_iu_len too large\n");
1814 shost_printk(KERN_WARNING, shost,
1815 PFX "SRP LOGIN REJECTED, reason 0x%08x\n", reason);
1817 shost_printk(KERN_WARNING, shost,
1818 " REJ reason: IB_CM_REJ_CONSUMER_DEFINED,"
1819 " opcode 0x%02x\n", opcode);
1820 target->status = -ECONNRESET;
1823 case IB_CM_REJ_STALE_CONN:
1824 shost_printk(KERN_WARNING, shost, " REJ reason: stale connection\n");
1825 target->status = SRP_STALE_CONN;
1829 shost_printk(KERN_WARNING, shost, " REJ reason 0x%x\n",
1830 event->param.rej_rcvd.reason);
1831 target->status = -ECONNRESET;
1835 static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
1837 struct srp_target_port *target = cm_id->context;
1840 switch (event->event) {
1841 case IB_CM_REQ_ERROR:
1842 shost_printk(KERN_DEBUG, target->scsi_host,
1843 PFX "Sending CM REQ failed\n");
1845 target->status = -ECONNRESET;
1848 case IB_CM_REP_RECEIVED:
1850 srp_cm_rep_handler(cm_id, event->private_data, target);
1853 case IB_CM_REJ_RECEIVED:
1854 shost_printk(KERN_DEBUG, target->scsi_host, PFX "REJ received\n");
1857 srp_cm_rej_handler(cm_id, event, target);
1860 case IB_CM_DREQ_RECEIVED:
1861 shost_printk(KERN_WARNING, target->scsi_host,
1862 PFX "DREQ received - connection closed\n");
1863 srp_change_conn_state(target, false);
1864 if (ib_send_cm_drep(cm_id, NULL, 0))
1865 shost_printk(KERN_ERR, target->scsi_host,
1866 PFX "Sending CM DREP failed\n");
1867 queue_work(system_long_wq, &target->tl_err_work);
1870 case IB_CM_TIMEWAIT_EXIT:
1871 shost_printk(KERN_ERR, target->scsi_host,
1872 PFX "connection closed\n");
1877 case IB_CM_MRA_RECEIVED:
1878 case IB_CM_DREQ_ERROR:
1879 case IB_CM_DREP_RECEIVED:
1883 shost_printk(KERN_WARNING, target->scsi_host,
1884 PFX "Unhandled CM event %d\n", event->event);
1889 complete(&target->done);
1895 * srp_change_queue_type - changing device queue tag type
1896 * @sdev: scsi device struct
1897 * @tag_type: requested tag type
1899 * Returns queue tag type.
1902 srp_change_queue_type(struct scsi_device *sdev, int tag_type)
1904 if (sdev->tagged_supported) {
1905 scsi_set_tag_type(sdev, tag_type);
1907 scsi_activate_tcq(sdev, sdev->queue_depth);
1909 scsi_deactivate_tcq(sdev, sdev->queue_depth);
1917 * srp_change_queue_depth - setting device queue depth
1918 * @sdev: scsi device struct
1919 * @qdepth: requested queue depth
1920 * @reason: SCSI_QDEPTH_DEFAULT/SCSI_QDEPTH_QFULL/SCSI_QDEPTH_RAMP_UP
1921 * (see include/scsi/scsi_host.h for definition)
1923 * Returns queue depth.
1926 srp_change_queue_depth(struct scsi_device *sdev, int qdepth, int reason)
1928 struct Scsi_Host *shost = sdev->host;
1930 if (reason == SCSI_QDEPTH_DEFAULT || reason == SCSI_QDEPTH_RAMP_UP) {
1931 max_depth = shost->can_queue;
1932 if (!sdev->tagged_supported)
1934 if (qdepth > max_depth)
1936 scsi_adjust_queue_depth(sdev, scsi_get_tag_type(sdev), qdepth);
1937 } else if (reason == SCSI_QDEPTH_QFULL)
1938 scsi_track_queue_full(sdev, qdepth);
1942 return sdev->queue_depth;
1945 static int srp_send_tsk_mgmt(struct srp_target_port *target,
1946 u64 req_tag, unsigned int lun, u8 func)
1948 struct srp_rport *rport = target->rport;
1949 struct ib_device *dev = target->srp_host->srp_dev->dev;
1951 struct srp_tsk_mgmt *tsk_mgmt;
1953 if (!target->connected || target->qp_in_error)
1956 init_completion(&target->tsk_mgmt_done);
1959 * Lock the rport mutex to avoid that srp_create_target_ib() is
1960 * invoked while a task management function is being sent.
1962 mutex_lock(&rport->mutex);
1963 spin_lock_irq(&target->lock);
1964 iu = __srp_get_tx_iu(target, SRP_IU_TSK_MGMT);
1965 spin_unlock_irq(&target->lock);
1968 mutex_unlock(&rport->mutex);
1973 ib_dma_sync_single_for_cpu(dev, iu->dma, sizeof *tsk_mgmt,
1976 memset(tsk_mgmt, 0, sizeof *tsk_mgmt);
1978 tsk_mgmt->opcode = SRP_TSK_MGMT;
1979 tsk_mgmt->lun = cpu_to_be64((u64) lun << 48);
1980 tsk_mgmt->tag = req_tag | SRP_TAG_TSK_MGMT;
1981 tsk_mgmt->tsk_mgmt_func = func;
1982 tsk_mgmt->task_tag = req_tag;
1984 ib_dma_sync_single_for_device(dev, iu->dma, sizeof *tsk_mgmt,
1986 if (srp_post_send(target, iu, sizeof *tsk_mgmt)) {
1987 srp_put_tx_iu(target, iu, SRP_IU_TSK_MGMT);
1988 mutex_unlock(&rport->mutex);
1992 mutex_unlock(&rport->mutex);
1994 if (!wait_for_completion_timeout(&target->tsk_mgmt_done,
1995 msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS)))
2001 static int srp_abort(struct scsi_cmnd *scmnd)
2003 struct srp_target_port *target = host_to_target(scmnd->device->host);
2004 struct srp_request *req = (struct srp_request *) scmnd->host_scribble;
2007 shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n");
2009 if (!req || !srp_claim_req(target, req, scmnd))
2011 if (srp_send_tsk_mgmt(target, req->index, scmnd->device->lun,
2012 SRP_TSK_ABORT_TASK) == 0)
2014 else if (target->rport->state == SRP_RPORT_LOST)
2018 srp_free_req(target, req, scmnd, 0);
2019 scmnd->result = DID_ABORT << 16;
2020 scmnd->scsi_done(scmnd);
2025 static int srp_reset_device(struct scsi_cmnd *scmnd)
2027 struct srp_target_port *target = host_to_target(scmnd->device->host);
2030 shost_printk(KERN_ERR, target->scsi_host, "SRP reset_device called\n");
2032 if (srp_send_tsk_mgmt(target, SRP_TAG_NO_REQ, scmnd->device->lun,
2035 if (target->tsk_mgmt_status)
2038 for (i = 0; i < target->req_ring_size; ++i) {
2039 struct srp_request *req = &target->req_ring[i];
2040 if (req->scmnd && req->scmnd->device == scmnd->device)
2041 srp_finish_req(target, req, DID_RESET << 16);
2047 static int srp_reset_host(struct scsi_cmnd *scmnd)
2049 struct srp_target_port *target = host_to_target(scmnd->device->host);
2051 shost_printk(KERN_ERR, target->scsi_host, PFX "SRP reset_host called\n");
2053 return srp_reconnect_rport(target->rport) == 0 ? SUCCESS : FAILED;
2056 static int srp_slave_configure(struct scsi_device *sdev)
2058 struct Scsi_Host *shost = sdev->host;
2059 struct srp_target_port *target = host_to_target(shost);
2060 struct request_queue *q = sdev->request_queue;
2061 unsigned long timeout;
2063 if (sdev->type == TYPE_DISK) {
2064 timeout = max_t(unsigned, 30 * HZ, target->rq_tmo_jiffies);
2065 blk_queue_rq_timeout(q, timeout);
2071 static ssize_t show_id_ext(struct device *dev, struct device_attribute *attr,
2074 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2076 return sprintf(buf, "0x%016llx\n",
2077 (unsigned long long) be64_to_cpu(target->id_ext));
2080 static ssize_t show_ioc_guid(struct device *dev, struct device_attribute *attr,
2083 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2085 return sprintf(buf, "0x%016llx\n",
2086 (unsigned long long) be64_to_cpu(target->ioc_guid));
2089 static ssize_t show_service_id(struct device *dev,
2090 struct device_attribute *attr, char *buf)
2092 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2094 return sprintf(buf, "0x%016llx\n",
2095 (unsigned long long) be64_to_cpu(target->service_id));
2098 static ssize_t show_pkey(struct device *dev, struct device_attribute *attr,
2101 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2103 return sprintf(buf, "0x%04x\n", be16_to_cpu(target->path.pkey));
2106 static ssize_t show_sgid(struct device *dev, struct device_attribute *attr,
2109 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2111 return sprintf(buf, "%pI6\n", target->path.sgid.raw);
2114 static ssize_t show_dgid(struct device *dev, struct device_attribute *attr,
2117 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2119 return sprintf(buf, "%pI6\n", target->path.dgid.raw);
2122 static ssize_t show_orig_dgid(struct device *dev,
2123 struct device_attribute *attr, char *buf)
2125 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2127 return sprintf(buf, "%pI6\n", target->orig_dgid);
2130 static ssize_t show_req_lim(struct device *dev,
2131 struct device_attribute *attr, char *buf)
2133 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2135 return sprintf(buf, "%d\n", target->req_lim);
2138 static ssize_t show_zero_req_lim(struct device *dev,
2139 struct device_attribute *attr, char *buf)
2141 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2143 return sprintf(buf, "%d\n", target->zero_req_lim);
2146 static ssize_t show_local_ib_port(struct device *dev,
2147 struct device_attribute *attr, char *buf)
2149 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2151 return sprintf(buf, "%d\n", target->srp_host->port);
2154 static ssize_t show_local_ib_device(struct device *dev,
2155 struct device_attribute *attr, char *buf)
2157 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2159 return sprintf(buf, "%s\n", target->srp_host->srp_dev->dev->name);
2162 static ssize_t show_comp_vector(struct device *dev,
2163 struct device_attribute *attr, char *buf)
2165 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2167 return sprintf(buf, "%d\n", target->comp_vector);
2170 static ssize_t show_tl_retry_count(struct device *dev,
2171 struct device_attribute *attr, char *buf)
2173 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2175 return sprintf(buf, "%d\n", target->tl_retry_count);
2178 static ssize_t show_cmd_sg_entries(struct device *dev,
2179 struct device_attribute *attr, char *buf)
2181 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2183 return sprintf(buf, "%u\n", target->cmd_sg_cnt);
2186 static ssize_t show_allow_ext_sg(struct device *dev,
2187 struct device_attribute *attr, char *buf)
2189 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2191 return sprintf(buf, "%s\n", target->allow_ext_sg ? "true" : "false");
2194 static DEVICE_ATTR(id_ext, S_IRUGO, show_id_ext, NULL);
2195 static DEVICE_ATTR(ioc_guid, S_IRUGO, show_ioc_guid, NULL);
2196 static DEVICE_ATTR(service_id, S_IRUGO, show_service_id, NULL);
2197 static DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL);
2198 static DEVICE_ATTR(sgid, S_IRUGO, show_sgid, NULL);
2199 static DEVICE_ATTR(dgid, S_IRUGO, show_dgid, NULL);
2200 static DEVICE_ATTR(orig_dgid, S_IRUGO, show_orig_dgid, NULL);
2201 static DEVICE_ATTR(req_lim, S_IRUGO, show_req_lim, NULL);
2202 static DEVICE_ATTR(zero_req_lim, S_IRUGO, show_zero_req_lim, NULL);
2203 static DEVICE_ATTR(local_ib_port, S_IRUGO, show_local_ib_port, NULL);
2204 static DEVICE_ATTR(local_ib_device, S_IRUGO, show_local_ib_device, NULL);
2205 static DEVICE_ATTR(comp_vector, S_IRUGO, show_comp_vector, NULL);
2206 static DEVICE_ATTR(tl_retry_count, S_IRUGO, show_tl_retry_count, NULL);
2207 static DEVICE_ATTR(cmd_sg_entries, S_IRUGO, show_cmd_sg_entries, NULL);
2208 static DEVICE_ATTR(allow_ext_sg, S_IRUGO, show_allow_ext_sg, NULL);
2210 static struct device_attribute *srp_host_attrs[] = {
2213 &dev_attr_service_id,
2217 &dev_attr_orig_dgid,
2219 &dev_attr_zero_req_lim,
2220 &dev_attr_local_ib_port,
2221 &dev_attr_local_ib_device,
2222 &dev_attr_comp_vector,
2223 &dev_attr_tl_retry_count,
2224 &dev_attr_cmd_sg_entries,
2225 &dev_attr_allow_ext_sg,
2229 static struct scsi_host_template srp_template = {
2230 .module = THIS_MODULE,
2231 .name = "InfiniBand SRP initiator",
2232 .proc_name = DRV_NAME,
2233 .slave_configure = srp_slave_configure,
2234 .info = srp_target_info,
2235 .queuecommand = srp_queuecommand,
2236 .change_queue_depth = srp_change_queue_depth,
2237 .change_queue_type = srp_change_queue_type,
2238 .eh_abort_handler = srp_abort,
2239 .eh_device_reset_handler = srp_reset_device,
2240 .eh_host_reset_handler = srp_reset_host,
2241 .skip_settle_delay = true,
2242 .sg_tablesize = SRP_DEF_SG_TABLESIZE,
2243 .can_queue = SRP_DEFAULT_CMD_SQ_SIZE,
2245 .cmd_per_lun = SRP_DEFAULT_CMD_SQ_SIZE,
2246 .use_clustering = ENABLE_CLUSTERING,
2247 .shost_attrs = srp_host_attrs
2250 static int srp_add_target(struct srp_host *host, struct srp_target_port *target)
2252 struct srp_rport_identifiers ids;
2253 struct srp_rport *rport;
2255 sprintf(target->target_name, "SRP.T10:%016llX",
2256 (unsigned long long) be64_to_cpu(target->id_ext));
2258 if (scsi_add_host(target->scsi_host, host->srp_dev->dev->dma_device))
2261 memcpy(ids.port_id, &target->id_ext, 8);
2262 memcpy(ids.port_id + 8, &target->ioc_guid, 8);
2263 ids.roles = SRP_RPORT_ROLE_TARGET;
2264 rport = srp_rport_add(target->scsi_host, &ids);
2265 if (IS_ERR(rport)) {
2266 scsi_remove_host(target->scsi_host);
2267 return PTR_ERR(rport);
2270 rport->lld_data = target;
2271 target->rport = rport;
2273 spin_lock(&host->target_lock);
2274 list_add_tail(&target->list, &host->target_list);
2275 spin_unlock(&host->target_lock);
2277 target->state = SRP_TARGET_LIVE;
2279 scsi_scan_target(&target->scsi_host->shost_gendev,
2280 0, target->scsi_id, SCAN_WILD_CARD, 0);
2285 static void srp_release_dev(struct device *dev)
2287 struct srp_host *host =
2288 container_of(dev, struct srp_host, dev);
2290 complete(&host->released);
2293 static struct class srp_class = {
2294 .name = "infiniband_srp",
2295 .dev_release = srp_release_dev
2299 * srp_conn_unique() - check whether the connection to a target is unique
2301 static bool srp_conn_unique(struct srp_host *host,
2302 struct srp_target_port *target)
2304 struct srp_target_port *t;
2307 if (target->state == SRP_TARGET_REMOVED)
2312 spin_lock(&host->target_lock);
2313 list_for_each_entry(t, &host->target_list, list) {
2315 target->id_ext == t->id_ext &&
2316 target->ioc_guid == t->ioc_guid &&
2317 target->initiator_ext == t->initiator_ext) {
2322 spin_unlock(&host->target_lock);
2329 * Target ports are added by writing
2331 * id_ext=<SRP ID ext>,ioc_guid=<SRP IOC GUID>,dgid=<dest GID>,
2332 * pkey=<P_Key>,service_id=<service ID>
2334 * to the add_target sysfs attribute.
2338 SRP_OPT_ID_EXT = 1 << 0,
2339 SRP_OPT_IOC_GUID = 1 << 1,
2340 SRP_OPT_DGID = 1 << 2,
2341 SRP_OPT_PKEY = 1 << 3,
2342 SRP_OPT_SERVICE_ID = 1 << 4,
2343 SRP_OPT_MAX_SECT = 1 << 5,
2344 SRP_OPT_MAX_CMD_PER_LUN = 1 << 6,
2345 SRP_OPT_IO_CLASS = 1 << 7,
2346 SRP_OPT_INITIATOR_EXT = 1 << 8,
2347 SRP_OPT_CMD_SG_ENTRIES = 1 << 9,
2348 SRP_OPT_ALLOW_EXT_SG = 1 << 10,
2349 SRP_OPT_SG_TABLESIZE = 1 << 11,
2350 SRP_OPT_COMP_VECTOR = 1 << 12,
2351 SRP_OPT_TL_RETRY_COUNT = 1 << 13,
2352 SRP_OPT_QUEUE_SIZE = 1 << 14,
2353 SRP_OPT_ALL = (SRP_OPT_ID_EXT |
2357 SRP_OPT_SERVICE_ID),
2360 static const match_table_t srp_opt_tokens = {
2361 { SRP_OPT_ID_EXT, "id_ext=%s" },
2362 { SRP_OPT_IOC_GUID, "ioc_guid=%s" },
2363 { SRP_OPT_DGID, "dgid=%s" },
2364 { SRP_OPT_PKEY, "pkey=%x" },
2365 { SRP_OPT_SERVICE_ID, "service_id=%s" },
2366 { SRP_OPT_MAX_SECT, "max_sect=%d" },
2367 { SRP_OPT_MAX_CMD_PER_LUN, "max_cmd_per_lun=%d" },
2368 { SRP_OPT_IO_CLASS, "io_class=%x" },
2369 { SRP_OPT_INITIATOR_EXT, "initiator_ext=%s" },
2370 { SRP_OPT_CMD_SG_ENTRIES, "cmd_sg_entries=%u" },
2371 { SRP_OPT_ALLOW_EXT_SG, "allow_ext_sg=%u" },
2372 { SRP_OPT_SG_TABLESIZE, "sg_tablesize=%u" },
2373 { SRP_OPT_COMP_VECTOR, "comp_vector=%u" },
2374 { SRP_OPT_TL_RETRY_COUNT, "tl_retry_count=%u" },
2375 { SRP_OPT_QUEUE_SIZE, "queue_size=%d" },
2376 { SRP_OPT_ERR, NULL }
2379 static int srp_parse_options(const char *buf, struct srp_target_port *target)
2381 char *options, *sep_opt;
2384 substring_t args[MAX_OPT_ARGS];
2390 options = kstrdup(buf, GFP_KERNEL);
2395 while ((p = strsep(&sep_opt, ",")) != NULL) {
2399 token = match_token(p, srp_opt_tokens, args);
2403 case SRP_OPT_ID_EXT:
2404 p = match_strdup(args);
2409 target->id_ext = cpu_to_be64(simple_strtoull(p, NULL, 16));
2413 case SRP_OPT_IOC_GUID:
2414 p = match_strdup(args);
2419 target->ioc_guid = cpu_to_be64(simple_strtoull(p, NULL, 16));
2424 p = match_strdup(args);
2429 if (strlen(p) != 32) {
2430 pr_warn("bad dest GID parameter '%s'\n", p);
2435 for (i = 0; i < 16; ++i) {
2436 strlcpy(dgid, p + i * 2, 3);
2437 target->path.dgid.raw[i] = simple_strtoul(dgid, NULL, 16);
2440 memcpy(target->orig_dgid, target->path.dgid.raw, 16);
2444 if (match_hex(args, &token)) {
2445 pr_warn("bad P_Key parameter '%s'\n", p);
2448 target->path.pkey = cpu_to_be16(token);
2451 case SRP_OPT_SERVICE_ID:
2452 p = match_strdup(args);
2457 target->service_id = cpu_to_be64(simple_strtoull(p, NULL, 16));
2458 target->path.service_id = target->service_id;
2462 case SRP_OPT_MAX_SECT:
2463 if (match_int(args, &token)) {
2464 pr_warn("bad max sect parameter '%s'\n", p);
2467 target->scsi_host->max_sectors = token;
2470 case SRP_OPT_QUEUE_SIZE:
2471 if (match_int(args, &token) || token < 1) {
2472 pr_warn("bad queue_size parameter '%s'\n", p);
2475 target->scsi_host->can_queue = token;
2476 target->queue_size = token + SRP_RSP_SQ_SIZE +
2477 SRP_TSK_MGMT_SQ_SIZE;
2478 if (!(opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
2479 target->scsi_host->cmd_per_lun = token;
2482 case SRP_OPT_MAX_CMD_PER_LUN:
2483 if (match_int(args, &token) || token < 1) {
2484 pr_warn("bad max cmd_per_lun parameter '%s'\n",
2488 target->scsi_host->cmd_per_lun = token;
2491 case SRP_OPT_IO_CLASS:
2492 if (match_hex(args, &token)) {
2493 pr_warn("bad IO class parameter '%s'\n", p);
2496 if (token != SRP_REV10_IB_IO_CLASS &&
2497 token != SRP_REV16A_IB_IO_CLASS) {
2498 pr_warn("unknown IO class parameter value %x specified (use %x or %x).\n",
2499 token, SRP_REV10_IB_IO_CLASS,
2500 SRP_REV16A_IB_IO_CLASS);
2503 target->io_class = token;
2506 case SRP_OPT_INITIATOR_EXT:
2507 p = match_strdup(args);
2512 target->initiator_ext = cpu_to_be64(simple_strtoull(p, NULL, 16));
2516 case SRP_OPT_CMD_SG_ENTRIES:
2517 if (match_int(args, &token) || token < 1 || token > 255) {
2518 pr_warn("bad max cmd_sg_entries parameter '%s'\n",
2522 target->cmd_sg_cnt = token;
2525 case SRP_OPT_ALLOW_EXT_SG:
2526 if (match_int(args, &token)) {
2527 pr_warn("bad allow_ext_sg parameter '%s'\n", p);
2530 target->allow_ext_sg = !!token;
2533 case SRP_OPT_SG_TABLESIZE:
2534 if (match_int(args, &token) || token < 1 ||
2535 token > SCSI_MAX_SG_CHAIN_SEGMENTS) {
2536 pr_warn("bad max sg_tablesize parameter '%s'\n",
2540 target->sg_tablesize = token;
2543 case SRP_OPT_COMP_VECTOR:
2544 if (match_int(args, &token) || token < 0) {
2545 pr_warn("bad comp_vector parameter '%s'\n", p);
2548 target->comp_vector = token;
2551 case SRP_OPT_TL_RETRY_COUNT:
2552 if (match_int(args, &token) || token < 2 || token > 7) {
2553 pr_warn("bad tl_retry_count parameter '%s' (must be a number between 2 and 7)\n",
2557 target->tl_retry_count = token;
2561 pr_warn("unknown parameter or missing value '%s' in target creation request\n",
2567 if ((opt_mask & SRP_OPT_ALL) == SRP_OPT_ALL)
2570 for (i = 0; i < ARRAY_SIZE(srp_opt_tokens); ++i)
2571 if ((srp_opt_tokens[i].token & SRP_OPT_ALL) &&
2572 !(srp_opt_tokens[i].token & opt_mask))
2573 pr_warn("target creation request is missing parameter '%s'\n",
2574 srp_opt_tokens[i].pattern);
2576 if (target->scsi_host->cmd_per_lun > target->scsi_host->can_queue
2577 && (opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
2578 pr_warn("cmd_per_lun = %d > queue_size = %d\n",
2579 target->scsi_host->cmd_per_lun,
2580 target->scsi_host->can_queue);
2587 static ssize_t srp_create_target(struct device *dev,
2588 struct device_attribute *attr,
2589 const char *buf, size_t count)
2591 struct srp_host *host =
2592 container_of(dev, struct srp_host, dev);
2593 struct Scsi_Host *target_host;
2594 struct srp_target_port *target;
2595 struct ib_device *ibdev = host->srp_dev->dev;
2598 target_host = scsi_host_alloc(&srp_template,
2599 sizeof (struct srp_target_port));
2603 target_host->transportt = ib_srp_transport_template;
2604 target_host->max_channel = 0;
2605 target_host->max_id = 1;
2606 target_host->max_lun = SRP_MAX_LUN;
2607 target_host->max_cmd_len = sizeof ((struct srp_cmd *) (void *) 0L)->cdb;
2609 target = host_to_target(target_host);
2611 target->io_class = SRP_REV16A_IB_IO_CLASS;
2612 target->scsi_host = target_host;
2613 target->srp_host = host;
2614 target->lkey = host->srp_dev->mr->lkey;
2615 target->rkey = host->srp_dev->mr->rkey;
2616 target->cmd_sg_cnt = cmd_sg_entries;
2617 target->sg_tablesize = indirect_sg_entries ? : cmd_sg_entries;
2618 target->allow_ext_sg = allow_ext_sg;
2619 target->tl_retry_count = 7;
2620 target->queue_size = SRP_DEFAULT_QUEUE_SIZE;
2622 ret = srp_parse_options(buf, target);
2626 target->req_ring_size = target->queue_size - SRP_TSK_MGMT_SQ_SIZE;
2628 if (!srp_conn_unique(target->srp_host, target)) {
2629 shost_printk(KERN_INFO, target->scsi_host,
2630 PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;initiator_ext=%016llx\n",
2631 be64_to_cpu(target->id_ext),
2632 be64_to_cpu(target->ioc_guid),
2633 be64_to_cpu(target->initiator_ext));
2638 if (!host->srp_dev->fmr_pool && !target->allow_ext_sg &&
2639 target->cmd_sg_cnt < target->sg_tablesize) {
2640 pr_warn("No FMR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n");
2641 target->sg_tablesize = target->cmd_sg_cnt;
2644 target_host->sg_tablesize = target->sg_tablesize;
2645 target->indirect_size = target->sg_tablesize *
2646 sizeof (struct srp_direct_buf);
2647 target->max_iu_len = sizeof (struct srp_cmd) +
2648 sizeof (struct srp_indirect_buf) +
2649 target->cmd_sg_cnt * sizeof (struct srp_direct_buf);
2651 INIT_WORK(&target->tl_err_work, srp_tl_err_work);
2652 INIT_WORK(&target->remove_work, srp_remove_work);
2653 spin_lock_init(&target->lock);
2654 INIT_LIST_HEAD(&target->free_tx);
2655 ret = srp_alloc_req_data(target);
2659 ib_query_gid(ibdev, host->port, 0, &target->path.sgid);
2661 shost_printk(KERN_DEBUG, target->scsi_host, PFX
2662 "new target: id_ext %016llx ioc_guid %016llx pkey %04x "
2663 "service_id %016llx dgid %pI6\n",
2664 (unsigned long long) be64_to_cpu(target->id_ext),
2665 (unsigned long long) be64_to_cpu(target->ioc_guid),
2666 be16_to_cpu(target->path.pkey),
2667 (unsigned long long) be64_to_cpu(target->service_id),
2668 target->path.dgid.raw);
2670 ret = srp_create_target_ib(target);
2674 ret = srp_new_cm_id(target);
2678 ret = srp_connect_target(target);
2680 shost_printk(KERN_ERR, target->scsi_host,
2681 PFX "Connection failed\n");
2685 ret = srp_add_target(host, target);
2687 goto err_disconnect;
2692 srp_disconnect_target(target);
2695 ib_destroy_cm_id(target->cm_id);
2698 srp_free_target_ib(target);
2701 srp_free_req_data(target);
2704 scsi_host_put(target_host);
2709 static DEVICE_ATTR(add_target, S_IWUSR, NULL, srp_create_target);
2711 static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr,
2714 struct srp_host *host = container_of(dev, struct srp_host, dev);
2716 return sprintf(buf, "%s\n", host->srp_dev->dev->name);
2719 static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
2721 static ssize_t show_port(struct device *dev, struct device_attribute *attr,
2724 struct srp_host *host = container_of(dev, struct srp_host, dev);
2726 return sprintf(buf, "%d\n", host->port);
2729 static DEVICE_ATTR(port, S_IRUGO, show_port, NULL);
2731 static struct srp_host *srp_add_port(struct srp_device *device, u8 port)
2733 struct srp_host *host;
2735 host = kzalloc(sizeof *host, GFP_KERNEL);
2739 INIT_LIST_HEAD(&host->target_list);
2740 spin_lock_init(&host->target_lock);
2741 init_completion(&host->released);
2742 host->srp_dev = device;
2745 host->dev.class = &srp_class;
2746 host->dev.parent = device->dev->dma_device;
2747 dev_set_name(&host->dev, "srp-%s-%d", device->dev->name, port);
2749 if (device_register(&host->dev))
2751 if (device_create_file(&host->dev, &dev_attr_add_target))
2753 if (device_create_file(&host->dev, &dev_attr_ibdev))
2755 if (device_create_file(&host->dev, &dev_attr_port))
2761 device_unregister(&host->dev);
2769 static void srp_add_one(struct ib_device *device)
2771 struct srp_device *srp_dev;
2772 struct ib_device_attr *dev_attr;
2773 struct ib_fmr_pool_param fmr_param;
2774 struct srp_host *host;
2775 int max_pages_per_fmr, fmr_page_shift, s, e, p;
2777 dev_attr = kmalloc(sizeof *dev_attr, GFP_KERNEL);
2781 if (ib_query_device(device, dev_attr)) {
2782 pr_warn("Query device failed for %s\n", device->name);
2786 srp_dev = kmalloc(sizeof *srp_dev, GFP_KERNEL);
2791 * Use the smallest page size supported by the HCA, down to a
2792 * minimum of 4096 bytes. We're unlikely to build large sglists
2793 * out of smaller entries.
2795 fmr_page_shift = max(12, ffs(dev_attr->page_size_cap) - 1);
2796 srp_dev->fmr_page_size = 1 << fmr_page_shift;
2797 srp_dev->fmr_page_mask = ~((u64) srp_dev->fmr_page_size - 1);
2798 srp_dev->fmr_max_size = srp_dev->fmr_page_size * SRP_FMR_SIZE;
2800 INIT_LIST_HEAD(&srp_dev->dev_list);
2802 srp_dev->dev = device;
2803 srp_dev->pd = ib_alloc_pd(device);
2804 if (IS_ERR(srp_dev->pd))
2807 srp_dev->mr = ib_get_dma_mr(srp_dev->pd,
2808 IB_ACCESS_LOCAL_WRITE |
2809 IB_ACCESS_REMOTE_READ |
2810 IB_ACCESS_REMOTE_WRITE);
2811 if (IS_ERR(srp_dev->mr))
2814 for (max_pages_per_fmr = SRP_FMR_SIZE;
2815 max_pages_per_fmr >= SRP_FMR_MIN_SIZE;
2816 max_pages_per_fmr /= 2, srp_dev->fmr_max_size /= 2) {
2817 memset(&fmr_param, 0, sizeof fmr_param);
2818 fmr_param.pool_size = SRP_FMR_POOL_SIZE;
2819 fmr_param.dirty_watermark = SRP_FMR_DIRTY_SIZE;
2820 fmr_param.cache = 1;
2821 fmr_param.max_pages_per_fmr = max_pages_per_fmr;
2822 fmr_param.page_shift = fmr_page_shift;
2823 fmr_param.access = (IB_ACCESS_LOCAL_WRITE |
2824 IB_ACCESS_REMOTE_WRITE |
2825 IB_ACCESS_REMOTE_READ);
2827 srp_dev->fmr_pool = ib_create_fmr_pool(srp_dev->pd, &fmr_param);
2828 if (!IS_ERR(srp_dev->fmr_pool))
2832 if (IS_ERR(srp_dev->fmr_pool))
2833 srp_dev->fmr_pool = NULL;
2835 if (device->node_type == RDMA_NODE_IB_SWITCH) {
2840 e = device->phys_port_cnt;
2843 for (p = s; p <= e; ++p) {
2844 host = srp_add_port(srp_dev, p);
2846 list_add_tail(&host->list, &srp_dev->dev_list);
2849 ib_set_client_data(device, &srp_client, srp_dev);
2854 ib_dealloc_pd(srp_dev->pd);
2863 static void srp_remove_one(struct ib_device *device)
2865 struct srp_device *srp_dev;
2866 struct srp_host *host, *tmp_host;
2867 struct srp_target_port *target;
2869 srp_dev = ib_get_client_data(device, &srp_client);
2873 list_for_each_entry_safe(host, tmp_host, &srp_dev->dev_list, list) {
2874 device_unregister(&host->dev);
2876 * Wait for the sysfs entry to go away, so that no new
2877 * target ports can be created.
2879 wait_for_completion(&host->released);
2882 * Remove all target ports.
2884 spin_lock(&host->target_lock);
2885 list_for_each_entry(target, &host->target_list, list)
2886 srp_queue_remove_work(target);
2887 spin_unlock(&host->target_lock);
2890 * Wait for tl_err and target port removal tasks.
2892 flush_workqueue(system_long_wq);
2893 flush_workqueue(srp_remove_wq);
2898 if (srp_dev->fmr_pool)
2899 ib_destroy_fmr_pool(srp_dev->fmr_pool);
2900 ib_dereg_mr(srp_dev->mr);
2901 ib_dealloc_pd(srp_dev->pd);
2906 static struct srp_function_template ib_srp_transport_functions = {
2907 .has_rport_state = true,
2908 .reset_timer_if_blocked = true,
2909 .reconnect_delay = &srp_reconnect_delay,
2910 .fast_io_fail_tmo = &srp_fast_io_fail_tmo,
2911 .dev_loss_tmo = &srp_dev_loss_tmo,
2912 .reconnect = srp_rport_reconnect,
2913 .rport_delete = srp_rport_delete,
2914 .terminate_rport_io = srp_terminate_io,
2917 static int __init srp_init_module(void)
2921 BUILD_BUG_ON(FIELD_SIZEOF(struct ib_wc, wr_id) < sizeof(void *));
2923 if (srp_sg_tablesize) {
2924 pr_warn("srp_sg_tablesize is deprecated, please use cmd_sg_entries\n");
2925 if (!cmd_sg_entries)
2926 cmd_sg_entries = srp_sg_tablesize;
2929 if (!cmd_sg_entries)
2930 cmd_sg_entries = SRP_DEF_SG_TABLESIZE;
2932 if (cmd_sg_entries > 255) {
2933 pr_warn("Clamping cmd_sg_entries to 255\n");
2934 cmd_sg_entries = 255;
2937 if (!indirect_sg_entries)
2938 indirect_sg_entries = cmd_sg_entries;
2939 else if (indirect_sg_entries < cmd_sg_entries) {
2940 pr_warn("Bumping up indirect_sg_entries to match cmd_sg_entries (%u)\n",
2942 indirect_sg_entries = cmd_sg_entries;
2945 srp_remove_wq = create_workqueue("srp_remove");
2946 if (IS_ERR(srp_remove_wq)) {
2947 ret = PTR_ERR(srp_remove_wq);
2952 ib_srp_transport_template =
2953 srp_attach_transport(&ib_srp_transport_functions);
2954 if (!ib_srp_transport_template)
2957 ret = class_register(&srp_class);
2959 pr_err("couldn't register class infiniband_srp\n");
2963 ib_sa_register_client(&srp_sa_client);
2965 ret = ib_register_client(&srp_client);
2967 pr_err("couldn't register IB client\n");
2975 ib_sa_unregister_client(&srp_sa_client);
2976 class_unregister(&srp_class);
2979 srp_release_transport(ib_srp_transport_template);
2982 destroy_workqueue(srp_remove_wq);
2986 static void __exit srp_cleanup_module(void)
2988 ib_unregister_client(&srp_client);
2989 ib_sa_unregister_client(&srp_sa_client);
2990 class_unregister(&srp_class);
2991 srp_release_transport(ib_srp_transport_template);
2992 destroy_workqueue(srp_remove_wq);
2995 module_init(srp_init_module);
2996 module_exit(srp_cleanup_module);