2 * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 #include <linux/module.h>
34 #include <linux/init.h>
35 #include <linux/slab.h>
36 #include <linux/errno.h>
38 #include <rdma/ib_user_verbs.h>
39 #include <rdma/ib_addr.h>
40 #include <rdma/uverbs_ioctl.h>
42 #include "usnic_abi.h"
44 #include "usnic_common_util.h"
45 #include "usnic_ib_qp_grp.h"
46 #include "usnic_ib_verbs.h"
47 #include "usnic_fwd.h"
48 #include "usnic_log.h"
49 #include "usnic_uiom.h"
50 #include "usnic_transport.h"
52 #define USNIC_DEFAULT_TRANSPORT USNIC_TRANSPORT_ROCE_CUSTOM
54 const struct usnic_vnic_res_spec min_transport_spec[USNIC_TRANSPORT_MAX] = {
55 { /*USNIC_TRANSPORT_UNKNOWN*/
57 {.type = USNIC_VNIC_RES_TYPE_EOL, .cnt = 0,},
60 { /*USNIC_TRANSPORT_ROCE_CUSTOM*/
62 {.type = USNIC_VNIC_RES_TYPE_WQ, .cnt = 1,},
63 {.type = USNIC_VNIC_RES_TYPE_RQ, .cnt = 1,},
64 {.type = USNIC_VNIC_RES_TYPE_CQ, .cnt = 1,},
65 {.type = USNIC_VNIC_RES_TYPE_EOL, .cnt = 0,},
68 { /*USNIC_TRANSPORT_IPV4_UDP*/
70 {.type = USNIC_VNIC_RES_TYPE_WQ, .cnt = 1,},
71 {.type = USNIC_VNIC_RES_TYPE_RQ, .cnt = 1,},
72 {.type = USNIC_VNIC_RES_TYPE_CQ, .cnt = 1,},
73 {.type = USNIC_VNIC_RES_TYPE_EOL, .cnt = 0,},
78 static void usnic_ib_fw_string_to_u64(char *fw_ver_str, u64 *fw_ver)
80 *fw_ver = *((u64 *)fw_ver_str);
83 static int usnic_ib_fill_create_qp_resp(struct usnic_ib_qp_grp *qp_grp,
84 struct ib_udata *udata)
86 struct usnic_ib_dev *us_ibdev;
87 struct usnic_ib_create_qp_resp resp;
89 struct vnic_dev_bar *bar;
90 struct usnic_vnic_res_chunk *chunk;
91 struct usnic_ib_qp_grp_flow *default_flow;
94 memset(&resp, 0, sizeof(resp));
96 us_ibdev = qp_grp->vf->pf;
97 pdev = usnic_vnic_get_pdev(qp_grp->vf->vnic);
99 usnic_err("Failed to get pdev of qp_grp %d\n",
104 bar = usnic_vnic_get_bar(qp_grp->vf->vnic, 0);
106 usnic_err("Failed to get bar0 of qp_grp %d vf %s",
107 qp_grp->grp_id, pci_name(pdev));
111 resp.vfid = usnic_vnic_get_index(qp_grp->vf->vnic);
112 resp.bar_bus_addr = bar->bus_addr;
113 resp.bar_len = bar->len;
115 chunk = usnic_ib_qp_grp_get_chunk(qp_grp, USNIC_VNIC_RES_TYPE_RQ);
117 usnic_err("Failed to get chunk %s for qp_grp %d with err %ld\n",
118 usnic_vnic_res_type_to_str(USNIC_VNIC_RES_TYPE_RQ),
121 return PTR_ERR(chunk);
124 WARN_ON(chunk->type != USNIC_VNIC_RES_TYPE_RQ);
125 resp.rq_cnt = chunk->cnt;
126 for (i = 0; i < chunk->cnt; i++)
127 resp.rq_idx[i] = chunk->res[i]->vnic_idx;
129 chunk = usnic_ib_qp_grp_get_chunk(qp_grp, USNIC_VNIC_RES_TYPE_WQ);
131 usnic_err("Failed to get chunk %s for qp_grp %d with err %ld\n",
132 usnic_vnic_res_type_to_str(USNIC_VNIC_RES_TYPE_WQ),
135 return PTR_ERR(chunk);
138 WARN_ON(chunk->type != USNIC_VNIC_RES_TYPE_WQ);
139 resp.wq_cnt = chunk->cnt;
140 for (i = 0; i < chunk->cnt; i++)
141 resp.wq_idx[i] = chunk->res[i]->vnic_idx;
143 chunk = usnic_ib_qp_grp_get_chunk(qp_grp, USNIC_VNIC_RES_TYPE_CQ);
145 usnic_err("Failed to get chunk %s for qp_grp %d with err %ld\n",
146 usnic_vnic_res_type_to_str(USNIC_VNIC_RES_TYPE_CQ),
149 return PTR_ERR(chunk);
152 WARN_ON(chunk->type != USNIC_VNIC_RES_TYPE_CQ);
153 resp.cq_cnt = chunk->cnt;
154 for (i = 0; i < chunk->cnt; i++)
155 resp.cq_idx[i] = chunk->res[i]->vnic_idx;
157 default_flow = list_first_entry(&qp_grp->flows_lst,
158 struct usnic_ib_qp_grp_flow, link);
159 resp.transport = default_flow->trans_type;
161 err = ib_copy_to_udata(udata, &resp, sizeof(resp));
163 usnic_err("Failed to copy udata for %s",
164 dev_name(&us_ibdev->ib_dev.dev));
172 find_free_vf_and_create_qp_grp(struct ib_qp *qp,
173 struct usnic_transport_spec *trans_spec,
174 struct usnic_vnic_res_spec *res_spec)
176 struct usnic_ib_dev *us_ibdev = to_usdev(qp->device);
177 struct usnic_ib_pd *pd = to_upd(qp->pd);
178 struct usnic_ib_vf *vf;
179 struct usnic_vnic *vnic;
180 struct usnic_ib_qp_grp *qp_grp = to_uqp_grp(qp);
181 struct device *dev, **dev_list;
184 BUG_ON(!mutex_is_locked(&us_ibdev->usdev_lock));
186 if (list_empty(&us_ibdev->vf_dev_list)) {
187 usnic_info("No vfs to allocate\n");
191 if (usnic_ib_share_vf) {
192 /* Try to find resouces on a used vf which is in pd */
193 dev_list = usnic_uiom_get_dev_list(pd->umem_pd);
194 if (IS_ERR(dev_list))
195 return PTR_ERR(dev_list);
196 for (i = 0; dev_list[i]; i++) {
198 vf = dev_get_drvdata(dev);
199 mutex_lock(&vf->lock);
201 if (!usnic_vnic_check_room(vnic, res_spec)) {
202 usnic_dbg("Found used vnic %s from %s\n",
203 dev_name(&us_ibdev->ib_dev.dev),
204 pci_name(usnic_vnic_get_pdev(
206 ret = usnic_ib_qp_grp_create(qp_grp,
211 mutex_unlock(&vf->lock);
214 mutex_unlock(&vf->lock);
217 usnic_uiom_free_dev_list(dev_list);
221 /* Try to find resources on an unused vf */
222 list_for_each_entry(vf, &us_ibdev->vf_dev_list, link) {
223 mutex_lock(&vf->lock);
225 if (vf->qp_grp_ref_cnt == 0 &&
226 usnic_vnic_check_room(vnic, res_spec) == 0) {
227 ret = usnic_ib_qp_grp_create(qp_grp, us_ibdev->ufdev,
231 mutex_unlock(&vf->lock);
234 mutex_unlock(&vf->lock);
237 usnic_info("No free qp grp found on %s\n",
238 dev_name(&us_ibdev->ib_dev.dev));
243 usnic_err("Failed to allocate qp_grp\n");
244 if (usnic_ib_share_vf)
245 usnic_uiom_free_dev_list(dev_list);
250 static void qp_grp_destroy(struct usnic_ib_qp_grp *qp_grp)
252 struct usnic_ib_vf *vf = qp_grp->vf;
254 WARN_ON(qp_grp->state != IB_QPS_RESET);
256 mutex_lock(&vf->lock);
257 usnic_ib_qp_grp_destroy(qp_grp);
258 mutex_unlock(&vf->lock);
261 static int create_qp_validate_user_data(struct usnic_ib_create_qp_cmd cmd)
263 if (cmd.spec.trans_type <= USNIC_TRANSPORT_UNKNOWN ||
264 cmd.spec.trans_type >= USNIC_TRANSPORT_MAX)
270 /* Start of ib callback functions */
272 enum rdma_link_layer usnic_ib_port_link_layer(struct ib_device *device,
275 return IB_LINK_LAYER_ETHERNET;
278 int usnic_ib_query_device(struct ib_device *ibdev,
279 struct ib_device_attr *props,
280 struct ib_udata *uhw)
282 struct usnic_ib_dev *us_ibdev = to_usdev(ibdev);
284 struct ethtool_drvinfo info;
288 if (uhw->inlen || uhw->outlen)
291 mutex_lock(&us_ibdev->usdev_lock);
292 us_ibdev->netdev->ethtool_ops->get_drvinfo(us_ibdev->netdev, &info);
293 memset(props, 0, sizeof(*props));
294 usnic_mac_ip_to_gid(us_ibdev->ufdev->mac, us_ibdev->ufdev->inaddr,
296 memcpy(&props->sys_image_guid, &gid.global.interface_id,
297 sizeof(gid.global.interface_id));
298 usnic_ib_fw_string_to_u64(&info.fw_version[0], &props->fw_ver);
299 props->max_mr_size = USNIC_UIOM_MAX_MR_SIZE;
300 props->page_size_cap = USNIC_UIOM_PAGE_SIZE;
301 props->vendor_id = PCI_VENDOR_ID_CISCO;
302 props->vendor_part_id = PCI_DEVICE_ID_CISCO_VIC_USPACE_NIC;
303 props->hw_ver = us_ibdev->pdev->subsystem_device;
304 qp_per_vf = max(us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_WQ],
305 us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_RQ]);
306 props->max_qp = qp_per_vf *
307 kref_read(&us_ibdev->vf_cnt);
308 props->device_cap_flags = IB_DEVICE_PORT_ACTIVE_EVENT |
309 IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
310 props->max_cq = us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_CQ] *
311 kref_read(&us_ibdev->vf_cnt);
312 props->max_pd = USNIC_UIOM_MAX_PD_CNT;
313 props->max_mr = USNIC_UIOM_MAX_MR_CNT;
314 props->local_ca_ack_delay = 0;
315 props->max_pkeys = 0;
316 props->atomic_cap = IB_ATOMIC_NONE;
317 props->masked_atomic_cap = props->atomic_cap;
318 props->max_qp_rd_atom = 0;
319 props->max_qp_init_rd_atom = 0;
320 props->max_res_rd_atom = 0;
322 props->max_srq_wr = 0;
323 props->max_srq_sge = 0;
324 props->max_fast_reg_page_list_len = 0;
325 props->max_mcast_grp = 0;
326 props->max_mcast_qp_attach = 0;
327 props->max_total_mcast_qp_attach = 0;
328 /* Owned by Userspace
329 * max_qp_wr, max_sge, max_sge_rd, max_cqe */
330 mutex_unlock(&us_ibdev->usdev_lock);
335 int usnic_ib_query_port(struct ib_device *ibdev, u32 port,
336 struct ib_port_attr *props)
338 struct usnic_ib_dev *us_ibdev = to_usdev(ibdev);
342 if (ib_get_eth_speed(ibdev, port, &props->active_speed,
343 &props->active_width))
347 * usdev_lock is acquired after (and not before) ib_get_eth_speed call
348 * because acquiring rtnl_lock in ib_get_eth_speed, while holding
349 * usdev_lock could lead to a deadlock.
351 mutex_lock(&us_ibdev->usdev_lock);
352 /* props being zeroed by the caller, avoid zeroing it here */
359 if (!us_ibdev->ufdev->link_up) {
360 props->state = IB_PORT_DOWN;
361 props->phys_state = IB_PORT_PHYS_STATE_DISABLED;
362 } else if (!us_ibdev->ufdev->inaddr) {
363 props->state = IB_PORT_INIT;
365 IB_PORT_PHYS_STATE_PORT_CONFIGURATION_TRAINING;
367 props->state = IB_PORT_ACTIVE;
368 props->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
371 props->port_cap_flags = 0;
372 props->gid_tbl_len = 1;
373 props->bad_pkey_cntr = 0;
374 props->qkey_viol_cntr = 0;
375 props->max_mtu = IB_MTU_4096;
376 props->active_mtu = iboe_get_mtu(us_ibdev->ufdev->mtu);
377 /* Userspace will adjust for hdrs */
378 props->max_msg_sz = us_ibdev->ufdev->mtu;
379 props->max_vl_num = 1;
380 mutex_unlock(&us_ibdev->usdev_lock);
385 int usnic_ib_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
387 struct ib_qp_init_attr *qp_init_attr)
389 struct usnic_ib_qp_grp *qp_grp;
390 struct usnic_ib_vf *vf;
395 memset(qp_attr, 0, sizeof(*qp_attr));
396 memset(qp_init_attr, 0, sizeof(*qp_init_attr));
398 qp_grp = to_uqp_grp(qp);
400 mutex_lock(&vf->pf->usdev_lock);
402 qp_attr->qp_state = qp_grp->state;
403 qp_attr->cur_qp_state = qp_grp->state;
405 switch (qp_grp->ibqp.qp_type) {
410 usnic_err("Unexpected qp_type %d\n", qp_grp->ibqp.qp_type);
415 mutex_unlock(&vf->pf->usdev_lock);
419 mutex_unlock(&vf->pf->usdev_lock);
423 int usnic_ib_query_gid(struct ib_device *ibdev, u32 port, int index,
427 struct usnic_ib_dev *us_ibdev = to_usdev(ibdev);
433 mutex_lock(&us_ibdev->usdev_lock);
434 memset(&(gid->raw[0]), 0, sizeof(gid->raw));
435 usnic_mac_ip_to_gid(us_ibdev->ufdev->mac, us_ibdev->ufdev->inaddr,
437 mutex_unlock(&us_ibdev->usdev_lock);
442 int usnic_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
444 struct usnic_ib_pd *pd = to_upd(ibpd);
447 umem_pd = pd->umem_pd = usnic_uiom_alloc_pd();
448 if (IS_ERR_OR_NULL(umem_pd)) {
449 return umem_pd ? PTR_ERR(umem_pd) : -ENOMEM;
455 int usnic_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
457 usnic_uiom_dealloc_pd((to_upd(pd))->umem_pd);
461 int usnic_ib_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init_attr,
462 struct ib_udata *udata)
465 struct usnic_ib_dev *us_ibdev;
466 struct usnic_ib_qp_grp *qp_grp = to_uqp_grp(ibqp);
467 struct usnic_ib_ucontext *ucontext = rdma_udata_to_drv_context(
468 udata, struct usnic_ib_ucontext, ibucontext);
470 struct usnic_vnic_res_spec res_spec;
471 struct usnic_ib_create_qp_cmd cmd;
472 struct usnic_transport_spec trans_spec;
476 us_ibdev = to_usdev(ibqp->device);
478 if (init_attr->create_flags)
481 err = ib_copy_from_udata(&cmd, udata, sizeof(cmd));
483 usnic_err("%s: cannot copy udata for create_qp\n",
484 dev_name(&us_ibdev->ib_dev.dev));
488 err = create_qp_validate_user_data(cmd);
490 usnic_err("%s: Failed to validate user data\n",
491 dev_name(&us_ibdev->ib_dev.dev));
495 if (init_attr->qp_type != IB_QPT_UD) {
496 usnic_err("%s asked to make a non-UD QP: %d\n",
497 dev_name(&us_ibdev->ib_dev.dev), init_attr->qp_type);
501 trans_spec = cmd.spec;
502 mutex_lock(&us_ibdev->usdev_lock);
503 cq_cnt = (init_attr->send_cq == init_attr->recv_cq) ? 1 : 2;
504 res_spec = min_transport_spec[trans_spec.trans_type];
505 usnic_vnic_res_spec_update(&res_spec, USNIC_VNIC_RES_TYPE_CQ, cq_cnt);
506 err = find_free_vf_and_create_qp_grp(ibqp, &trans_spec, &res_spec);
508 goto out_release_mutex;
510 err = usnic_ib_fill_create_qp_resp(qp_grp, udata);
513 goto out_release_qp_grp;
516 qp_grp->ctx = ucontext;
517 list_add_tail(&qp_grp->link, &ucontext->qp_grp_list);
518 usnic_ib_log_vf(qp_grp->vf);
519 mutex_unlock(&us_ibdev->usdev_lock);
523 qp_grp_destroy(qp_grp);
525 mutex_unlock(&us_ibdev->usdev_lock);
529 int usnic_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata)
531 struct usnic_ib_qp_grp *qp_grp;
532 struct usnic_ib_vf *vf;
536 qp_grp = to_uqp_grp(qp);
538 mutex_lock(&vf->pf->usdev_lock);
539 if (usnic_ib_qp_grp_modify(qp_grp, IB_QPS_RESET, NULL)) {
540 usnic_err("Failed to move qp grp %u to reset\n",
544 list_del(&qp_grp->link);
545 qp_grp_destroy(qp_grp);
546 mutex_unlock(&vf->pf->usdev_lock);
551 int usnic_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
552 int attr_mask, struct ib_udata *udata)
554 struct usnic_ib_qp_grp *qp_grp;
558 if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
561 qp_grp = to_uqp_grp(ibqp);
563 mutex_lock(&qp_grp->vf->pf->usdev_lock);
564 if ((attr_mask & IB_QP_PORT) && attr->port_num != 1) {
565 /* usnic devices only have one port */
569 if (attr_mask & IB_QP_STATE) {
570 status = usnic_ib_qp_grp_modify(qp_grp, attr->qp_state, NULL);
572 usnic_err("Unhandled request, attr_mask=0x%x\n", attr_mask);
577 mutex_unlock(&qp_grp->vf->pf->usdev_lock);
581 int usnic_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
582 struct ib_udata *udata)
590 int usnic_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
595 struct ib_mr *usnic_ib_reg_mr(struct ib_pd *pd, u64 start, u64 length,
596 u64 virt_addr, int access_flags,
597 struct ib_udata *udata)
599 struct usnic_ib_mr *mr;
602 usnic_dbg("start 0x%llx va 0x%llx length 0x%llx\n", start,
605 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
607 return ERR_PTR(-ENOMEM);
609 mr->umem = usnic_uiom_reg_get(to_upd(pd)->umem_pd, start, length,
611 if (IS_ERR_OR_NULL(mr->umem)) {
612 err = mr->umem ? PTR_ERR(mr->umem) : -EFAULT;
616 mr->ibmr.lkey = mr->ibmr.rkey = 0;
624 int usnic_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
626 struct usnic_ib_mr *mr = to_umr(ibmr);
628 usnic_dbg("va 0x%lx length 0x%zx\n", mr->umem->va, mr->umem->length);
630 usnic_uiom_reg_release(mr->umem);
635 int usnic_ib_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata)
637 struct ib_device *ibdev = uctx->device;
638 struct usnic_ib_ucontext *context = to_ucontext(uctx);
639 struct usnic_ib_dev *us_ibdev = to_usdev(ibdev);
642 INIT_LIST_HEAD(&context->qp_grp_list);
643 mutex_lock(&us_ibdev->usdev_lock);
644 list_add_tail(&context->link, &us_ibdev->ctx_list);
645 mutex_unlock(&us_ibdev->usdev_lock);
650 void usnic_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
652 struct usnic_ib_ucontext *context = to_uucontext(ibcontext);
653 struct usnic_ib_dev *us_ibdev = to_usdev(ibcontext->device);
656 mutex_lock(&us_ibdev->usdev_lock);
657 WARN_ON_ONCE(!list_empty(&context->qp_grp_list));
658 list_del(&context->link);
659 mutex_unlock(&us_ibdev->usdev_lock);
662 int usnic_ib_mmap(struct ib_ucontext *context,
663 struct vm_area_struct *vma)
665 struct usnic_ib_ucontext *uctx = to_ucontext(context);
666 struct usnic_ib_dev *us_ibdev;
667 struct usnic_ib_qp_grp *qp_grp;
668 struct usnic_ib_vf *vf;
669 struct vnic_dev_bar *bar;
676 us_ibdev = to_usdev(context->device);
677 vma->vm_flags |= VM_IO;
678 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
679 vfid = vma->vm_pgoff;
680 usnic_dbg("Page Offset %lu PAGE_SHIFT %u VFID %u\n",
681 vma->vm_pgoff, PAGE_SHIFT, vfid);
683 mutex_lock(&us_ibdev->usdev_lock);
684 list_for_each_entry(qp_grp, &uctx->qp_grp_list, link) {
686 if (usnic_vnic_get_index(vf->vnic) == vfid) {
687 bar = usnic_vnic_get_bar(vf->vnic, 0);
688 if ((vma->vm_end - vma->vm_start) != bar->len) {
689 usnic_err("Bar0 Len %lu - Request map %lu\n",
691 vma->vm_end - vma->vm_start);
692 mutex_unlock(&us_ibdev->usdev_lock);
695 bus_addr = bar->bus_addr;
697 usnic_dbg("bus: %pa vaddr: %p size: %ld\n",
698 &bus_addr, bar->vaddr, bar->len);
699 mutex_unlock(&us_ibdev->usdev_lock);
701 return remap_pfn_range(vma,
703 bus_addr >> PAGE_SHIFT,
704 len, vma->vm_page_prot);
708 mutex_unlock(&us_ibdev->usdev_lock);
709 usnic_err("No VF %u found\n", vfid);