From e0290cce6ac02f8e5ec501f25f6f6900f384550c Mon Sep 17 00:00:00 2001 From: Ram Amrani Date: Mon, 10 Oct 2016 13:15:35 +0300 Subject: [PATCH] qedr: Add support for memory registeration verbs Add support for user, dma and memory regions registration. Signed-off-by: Rajesh Borundia Signed-off-by: Ram Amrani Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qedr/main.c | 10 +- drivers/infiniband/hw/qedr/qedr.h | 40 ++++ drivers/infiniband/hw/qedr/verbs.c | 365 +++++++++++++++++++++++++++++ drivers/infiniband/hw/qedr/verbs.h | 11 + 4 files changed, 425 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index 13ba47b7b99f..bfc287c7d72e 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -100,7 +100,9 @@ static int qedr_register_device(struct qedr_dev *dev) QEDR_UVERBS(CREATE_QP) | QEDR_UVERBS(MODIFY_QP) | QEDR_UVERBS(QUERY_QP) | - QEDR_UVERBS(DESTROY_QP); + QEDR_UVERBS(DESTROY_QP) | + QEDR_UVERBS(REG_MR) | + QEDR_UVERBS(DEREG_MR); dev->ibdev.phys_port_cnt = 1; dev->ibdev.num_comp_vectors = dev->num_cnq; @@ -133,6 +135,12 @@ static int qedr_register_device(struct qedr_dev *dev) dev->ibdev.query_pkey = qedr_query_pkey; + dev->ibdev.get_dma_mr = qedr_get_dma_mr; + dev->ibdev.dereg_mr = qedr_dereg_mr; + dev->ibdev.reg_user_mr = qedr_reg_user_mr; + dev->ibdev.alloc_mr = qedr_alloc_mr; + dev->ibdev.map_mr_sg = qedr_map_mr_sg; + dev->ibdev.dma_device = &dev->pdev->dev; dev->ibdev.get_link_layer = qedr_link_layer; diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h index e9fe941c48ad..655955697995 100644 --- a/drivers/infiniband/hw/qedr/qedr.h +++ b/drivers/infiniband/hw/qedr/qedr.h @@ -375,6 +375,41 @@ struct qedr_qp { struct qedr_userq urq; }; +struct qedr_ah { + struct ib_ah ibah; + struct ib_ah_attr attr; +}; + +enum qedr_mr_type { + QEDR_MR_USER, + QEDR_MR_KERNEL, + QEDR_MR_DMA, + QEDR_MR_FRMR, +}; + +struct mr_info { + struct qedr_pbl *pbl_table; + struct qedr_pbl_info pbl_info; + struct list_head free_pbl_list; + struct list_head inuse_pbl_list; + u32 completed; + u32 completed_handled; +}; + +struct qedr_mr { + struct ib_mr ibmr; + struct ib_umem *umem; + + struct qed_rdma_register_tid_in_params hw_mr; + enum qedr_mr_type type; + + struct qedr_dev *dev; + struct mr_info info; + + u64 *pages; + u32 npages; +}; + static inline int qedr_get_dmac(struct qedr_dev *dev, struct ib_ah_attr *ah_attr, u8 *mac_addr) { @@ -418,4 +453,9 @@ static inline struct qedr_qp *get_qedr_qp(struct ib_qp *ibqp) { return container_of(ibqp, struct qedr_qp, ibqp); } + +static inline struct qedr_mr *get_qedr_mr(struct ib_mr *ibmr) +{ + return container_of(ibmr, struct qedr_mr, ibmr); +} #endif diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index a0d1c5fffb63..e70e808e2b9b 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -2082,3 +2082,368 @@ int qedr_destroy_qp(struct ib_qp *ibqp) return rc; } + +static void free_mr_info(struct qedr_dev *dev, struct mr_info *info) +{ + struct qedr_pbl *pbl, *tmp; + + if (info->pbl_table) + list_add_tail(&info->pbl_table->list_entry, + &info->free_pbl_list); + + if (!list_empty(&info->inuse_pbl_list)) + list_splice(&info->inuse_pbl_list, &info->free_pbl_list); + + list_for_each_entry_safe(pbl, tmp, &info->free_pbl_list, list_entry) { + list_del(&pbl->list_entry); + qedr_free_pbl(dev, &info->pbl_info, pbl); + } +} + +static int init_mr_info(struct qedr_dev *dev, struct mr_info *info, + size_t page_list_len, bool two_layered) +{ + struct qedr_pbl *tmp; + int rc; + + INIT_LIST_HEAD(&info->free_pbl_list); + INIT_LIST_HEAD(&info->inuse_pbl_list); + + rc = qedr_prepare_pbl_tbl(dev, &info->pbl_info, + page_list_len, two_layered); + if (rc) + goto done; + + info->pbl_table = qedr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL); + if (!info->pbl_table) { + rc = -ENOMEM; + goto done; + } + + DP_DEBUG(dev, QEDR_MSG_MR, "pbl_table_pa = %pa\n", + &info->pbl_table->pa); + + /* in usual case we use 2 PBLs, so we add one to free + * list and allocating another one + */ + tmp = qedr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL); + if (!tmp) { + DP_DEBUG(dev, QEDR_MSG_MR, "Extra PBL is not allocated\n"); + goto done; + } + + list_add_tail(&tmp->list_entry, &info->free_pbl_list); + + DP_DEBUG(dev, QEDR_MSG_MR, "extra pbl_table_pa = %pa\n", &tmp->pa); + +done: + if (rc) + free_mr_info(dev, info); + + return rc; +} + +struct ib_mr *qedr_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len, + u64 usr_addr, int acc, struct ib_udata *udata) +{ + struct qedr_dev *dev = get_qedr_dev(ibpd->device); + struct qedr_mr *mr; + struct qedr_pd *pd; + int rc = -ENOMEM; + + pd = get_qedr_pd(ibpd); + DP_DEBUG(dev, QEDR_MSG_MR, + "qedr_register user mr pd = %d start = %lld, len = %lld, usr_addr = %lld, acc = %d\n", + pd->pd_id, start, len, usr_addr, acc); + + if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE)) + return ERR_PTR(-EINVAL); + + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) + return ERR_PTR(rc); + + mr->type = QEDR_MR_USER; + + mr->umem = ib_umem_get(ibpd->uobject->context, start, len, acc, 0); + if (IS_ERR(mr->umem)) { + rc = -EFAULT; + goto err0; + } + + rc = init_mr_info(dev, &mr->info, ib_umem_page_count(mr->umem), 1); + if (rc) + goto err1; + + qedr_populate_pbls(dev, mr->umem, mr->info.pbl_table, + &mr->info.pbl_info); + + rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid); + if (rc) { + DP_ERR(dev, "roce alloc tid returned an error %d\n", rc); + goto err1; + } + + /* Index only, 18 bit long, lkey = itid << 8 | key */ + mr->hw_mr.tid_type = QED_RDMA_TID_REGISTERED_MR; + mr->hw_mr.key = 0; + mr->hw_mr.pd = pd->pd_id; + mr->hw_mr.local_read = 1; + mr->hw_mr.local_write = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0; + mr->hw_mr.remote_read = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0; + mr->hw_mr.remote_write = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0; + mr->hw_mr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0; + mr->hw_mr.mw_bind = false; + mr->hw_mr.pbl_ptr = mr->info.pbl_table[0].pa; + mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered; + mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size); + mr->hw_mr.page_size_log = ilog2(mr->umem->page_size); + mr->hw_mr.fbo = ib_umem_offset(mr->umem); + mr->hw_mr.length = len; + mr->hw_mr.vaddr = usr_addr; + mr->hw_mr.zbva = false; + mr->hw_mr.phy_mr = false; + mr->hw_mr.dma_mr = false; + + rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr); + if (rc) { + DP_ERR(dev, "roce register tid returned an error %d\n", rc); + goto err2; + } + + mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key; + if (mr->hw_mr.remote_write || mr->hw_mr.remote_read || + mr->hw_mr.remote_atomic) + mr->ibmr.rkey = mr->hw_mr.itid << 8 | mr->hw_mr.key; + + DP_DEBUG(dev, QEDR_MSG_MR, "register user mr lkey: %x\n", + mr->ibmr.lkey); + return &mr->ibmr; + +err2: + dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid); +err1: + qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table); +err0: + kfree(mr); + return ERR_PTR(rc); +} + +int qedr_dereg_mr(struct ib_mr *ib_mr) +{ + struct qedr_mr *mr = get_qedr_mr(ib_mr); + struct qedr_dev *dev = get_qedr_dev(ib_mr->device); + int rc = 0; + + rc = dev->ops->rdma_deregister_tid(dev->rdma_ctx, mr->hw_mr.itid); + if (rc) + return rc; + + dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid); + + if ((mr->type != QEDR_MR_DMA) && (mr->type != QEDR_MR_FRMR)) + qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table); + + /* it could be user registered memory. */ + if (mr->umem) + ib_umem_release(mr->umem); + + kfree(mr); + + return rc; +} + +struct qedr_mr *__qedr_alloc_mr(struct ib_pd *ibpd, int max_page_list_len) +{ + struct qedr_pd *pd = get_qedr_pd(ibpd); + struct qedr_dev *dev = get_qedr_dev(ibpd->device); + struct qedr_mr *mr; + int rc = -ENOMEM; + + DP_DEBUG(dev, QEDR_MSG_MR, + "qedr_alloc_frmr pd = %d max_page_list_len= %d\n", pd->pd_id, + max_page_list_len); + + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) + return ERR_PTR(rc); + + mr->dev = dev; + mr->type = QEDR_MR_FRMR; + + rc = init_mr_info(dev, &mr->info, max_page_list_len, 1); + if (rc) + goto err0; + + rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid); + if (rc) { + DP_ERR(dev, "roce alloc tid returned an error %d\n", rc); + goto err0; + } + + /* Index only, 18 bit long, lkey = itid << 8 | key */ + mr->hw_mr.tid_type = QED_RDMA_TID_FMR; + mr->hw_mr.key = 0; + mr->hw_mr.pd = pd->pd_id; + mr->hw_mr.local_read = 1; + mr->hw_mr.local_write = 0; + mr->hw_mr.remote_read = 0; + mr->hw_mr.remote_write = 0; + mr->hw_mr.remote_atomic = 0; + mr->hw_mr.mw_bind = false; + mr->hw_mr.pbl_ptr = 0; + mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered; + mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size); + mr->hw_mr.fbo = 0; + mr->hw_mr.length = 0; + mr->hw_mr.vaddr = 0; + mr->hw_mr.zbva = false; + mr->hw_mr.phy_mr = true; + mr->hw_mr.dma_mr = false; + + rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr); + if (rc) { + DP_ERR(dev, "roce register tid returned an error %d\n", rc); + goto err1; + } + + mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key; + mr->ibmr.rkey = mr->ibmr.lkey; + + DP_DEBUG(dev, QEDR_MSG_MR, "alloc frmr: %x\n", mr->ibmr.lkey); + return mr; + +err1: + dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid); +err0: + kfree(mr); + return ERR_PTR(rc); +} + +struct ib_mr *qedr_alloc_mr(struct ib_pd *ibpd, + enum ib_mr_type mr_type, u32 max_num_sg) +{ + struct qedr_dev *dev; + struct qedr_mr *mr; + + if (mr_type != IB_MR_TYPE_MEM_REG) + return ERR_PTR(-EINVAL); + + mr = __qedr_alloc_mr(ibpd, max_num_sg); + + if (IS_ERR(mr)) + return ERR_PTR(-EINVAL); + + dev = mr->dev; + + return &mr->ibmr; +} + +static int qedr_set_page(struct ib_mr *ibmr, u64 addr) +{ + struct qedr_mr *mr = get_qedr_mr(ibmr); + struct qedr_pbl *pbl_table; + struct regpair *pbe; + u32 pbes_in_page; + + if (unlikely(mr->npages == mr->info.pbl_info.num_pbes)) { + DP_ERR(mr->dev, "qedr_set_page failes when %d\n", mr->npages); + return -ENOMEM; + } + + DP_DEBUG(mr->dev, QEDR_MSG_MR, "qedr_set_page pages[%d] = 0x%llx\n", + mr->npages, addr); + + pbes_in_page = mr->info.pbl_info.pbl_size / sizeof(u64); + pbl_table = mr->info.pbl_table + (mr->npages / pbes_in_page); + pbe = (struct regpair *)pbl_table->va; + pbe += mr->npages % pbes_in_page; + pbe->lo = cpu_to_le32((u32)addr); + pbe->hi = cpu_to_le32((u32)upper_32_bits(addr)); + + mr->npages++; + + return 0; +} + +static void handle_completed_mrs(struct qedr_dev *dev, struct mr_info *info) +{ + int work = info->completed - info->completed_handled - 1; + + DP_DEBUG(dev, QEDR_MSG_MR, "Special FMR work = %d\n", work); + while (work-- > 0 && !list_empty(&info->inuse_pbl_list)) { + struct qedr_pbl *pbl; + + /* Free all the page list that are possible to be freed + * (all the ones that were invalidated), under the assumption + * that if an FMR was completed successfully that means that + * if there was an invalidate operation before it also ended + */ + pbl = list_first_entry(&info->inuse_pbl_list, + struct qedr_pbl, list_entry); + list_del(&pbl->list_entry); + list_add_tail(&pbl->list_entry, &info->free_pbl_list); + info->completed_handled++; + } +} + +int qedr_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, + int sg_nents, unsigned int *sg_offset) +{ + struct qedr_mr *mr = get_qedr_mr(ibmr); + + mr->npages = 0; + + handle_completed_mrs(mr->dev, &mr->info); + return ib_sg_to_pages(ibmr, sg, sg_nents, NULL, qedr_set_page); +} + +struct ib_mr *qedr_get_dma_mr(struct ib_pd *ibpd, int acc) +{ + struct qedr_dev *dev = get_qedr_dev(ibpd->device); + struct qedr_pd *pd = get_qedr_pd(ibpd); + struct qedr_mr *mr; + int rc; + + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); + + mr->type = QEDR_MR_DMA; + + rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid); + if (rc) { + DP_ERR(dev, "roce alloc tid returned an error %d\n", rc); + goto err1; + } + + /* index only, 18 bit long, lkey = itid << 8 | key */ + mr->hw_mr.tid_type = QED_RDMA_TID_REGISTERED_MR; + mr->hw_mr.pd = pd->pd_id; + mr->hw_mr.local_read = 1; + mr->hw_mr.local_write = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0; + mr->hw_mr.remote_read = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0; + mr->hw_mr.remote_write = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0; + mr->hw_mr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0; + mr->hw_mr.dma_mr = true; + + rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr); + if (rc) { + DP_ERR(dev, "roce register tid returned an error %d\n", rc); + goto err2; + } + + mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key; + if (mr->hw_mr.remote_write || mr->hw_mr.remote_read || + mr->hw_mr.remote_atomic) + mr->ibmr.rkey = mr->hw_mr.itid << 8 | mr->hw_mr.key; + + DP_DEBUG(dev, QEDR_MSG_MR, "get dma mr: lkey = %x\n", mr->ibmr.lkey); + return &mr->ibmr; + +err2: + dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid); +err1: + kfree(mr); + return ERR_PTR(rc); +} diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h index 056d6cb31fa2..4853f4af9983 100644 --- a/drivers/infiniband/hw/qedr/verbs.h +++ b/drivers/infiniband/hw/qedr/verbs.h @@ -70,4 +70,15 @@ int qedr_query_qp(struct ib_qp *, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *); int qedr_destroy_qp(struct ib_qp *ibqp); +int qedr_dereg_mr(struct ib_mr *); +struct ib_mr *qedr_get_dma_mr(struct ib_pd *, int acc); + +struct ib_mr *qedr_reg_user_mr(struct ib_pd *, u64 start, u64 length, + u64 virt, int acc, struct ib_udata *); + +int qedr_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, + int sg_nents, unsigned int *sg_offset); + +struct ib_mr *qedr_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, + u32 max_num_sg); #endif -- 2.34.1