RDMA/mana_ib: Add a driver for Microsoft Azure Network Adapter
authorLong Li <longli@microsoft.com>
Thu, 3 Nov 2022 19:16:30 +0000 (12:16 -0700)
committerLeon Romanovsky <leon@kernel.org>
Fri, 11 Nov 2022 09:40:09 +0000 (11:40 +0200)
Add a RDMA VF driver for Microsoft Azure Network Adapter (MANA).

Co-developed-by: Ajay Sharma <sharmaajay@microsoft.com>
Signed-off-by: Ajay Sharma <sharmaajay@microsoft.com>
Reviewed-by: Dexuan Cui <decui@microsoft.com>
Signed-off-by: Long Li <longli@microsoft.com>
Link: https://lore.kernel.org/r/1667502990-2559-13-git-send-email-longli@linuxonhyperv.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
15 files changed:
MAINTAINERS
drivers/infiniband/Kconfig
drivers/infiniband/hw/Makefile
drivers/infiniband/hw/mana/Kconfig [new file with mode: 0644]
drivers/infiniband/hw/mana/Makefile [new file with mode: 0644]
drivers/infiniband/hw/mana/cq.c [new file with mode: 0644]
drivers/infiniband/hw/mana/device.c [new file with mode: 0644]
drivers/infiniband/hw/mana/main.c [new file with mode: 0644]
drivers/infiniband/hw/mana/mana_ib.h [new file with mode: 0644]
drivers/infiniband/hw/mana/mr.c [new file with mode: 0644]
drivers/infiniband/hw/mana/qp.c [new file with mode: 0644]
drivers/infiniband/hw/mana/wq.c [new file with mode: 0644]
include/net/mana/mana.h
include/uapi/rdma/ib_user_ioctl_verbs.h
include/uapi/rdma/mana-abi.h [new file with mode: 0644]

index 441a65d..4db8e4e 100644 (file)
@@ -13669,6 +13669,15 @@ F:     drivers/scsi/smartpqi/smartpqi*.[ch]
 F:     include/linux/cciss*.h
 F:     include/uapi/linux/cciss*.h
 
+MICROSOFT MANA RDMA DRIVER
+M:     Long Li <longli@microsoft.com>
+M:     Ajay Sharma <sharmaajay@microsoft.com>
+L:     linux-rdma@vger.kernel.org
+S:     Supported
+F:     drivers/infiniband/hw/mana/
+F:     include/net/mana
+F:     include/uapi/rdma/mana-abi.h
+
 MICROSOFT SURFACE AGGREGATOR TABLET-MODE SWITCH
 M:     Maximilian Luz <luzmaximilian@gmail.com>
 L:     platform-driver-x86@vger.kernel.org
index aa36ac6..ccc8744 100644 (file)
@@ -85,6 +85,7 @@ source "drivers/infiniband/hw/erdma/Kconfig"
 source "drivers/infiniband/hw/hfi1/Kconfig"
 source "drivers/infiniband/hw/hns/Kconfig"
 source "drivers/infiniband/hw/irdma/Kconfig"
+source "drivers/infiniband/hw/mana/Kconfig"
 source "drivers/infiniband/hw/mlx4/Kconfig"
 source "drivers/infiniband/hw/mlx5/Kconfig"
 source "drivers/infiniband/hw/mthca/Kconfig"
index 6b3a880..1211f43 100644 (file)
@@ -4,6 +4,7 @@ obj-$(CONFIG_INFINIBAND_QIB)            += qib/
 obj-$(CONFIG_INFINIBAND_CXGB4)         += cxgb4/
 obj-$(CONFIG_INFINIBAND_EFA)           += efa/
 obj-$(CONFIG_INFINIBAND_IRDMA)         += irdma/
+obj-$(CONFIG_MANA_INFINIBAND)          += mana/
 obj-$(CONFIG_MLX4_INFINIBAND)          += mlx4/
 obj-$(CONFIG_MLX5_INFINIBAND)          += mlx5/
 obj-$(CONFIG_INFINIBAND_OCRDMA)                += ocrdma/
diff --git a/drivers/infiniband/hw/mana/Kconfig b/drivers/infiniband/hw/mana/Kconfig
new file mode 100644 (file)
index 0000000..5466406
--- /dev/null
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config MANA_INFINIBAND
+       tristate "Microsoft Azure Network Adapter support"
+       depends on NETDEVICES && ETHERNET && PCI && MICROSOFT_MANA
+       help
+         This driver provides low-level RDMA support for Microsoft Azure
+         Network Adapter (MANA). MANA supports RDMA features that can be used
+         for workloads (e.g. DPDK, MPI etc) that uses RDMA verbs to directly
+         access hardware from user-mode processes in Microsoft Azure cloud
+         environment.
diff --git a/drivers/infiniband/hw/mana/Makefile b/drivers/infiniband/hw/mana/Makefile
new file mode 100644 (file)
index 0000000..88655fe
--- /dev/null
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_MANA_INFINIBAND) += mana_ib.o
+
+mana_ib-y := device.o main.o wq.o qp.o cq.o mr.o
diff --git a/drivers/infiniband/hw/mana/cq.c b/drivers/infiniband/hw/mana/cq.c
new file mode 100644 (file)
index 0000000..d141cab
--- /dev/null
@@ -0,0 +1,79 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2022, Microsoft Corporation. All rights reserved.
+ */
+
+#include "mana_ib.h"
+
+int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+                     struct ib_udata *udata)
+{
+       struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq);
+       struct ib_device *ibdev = ibcq->device;
+       struct mana_ib_create_cq ucmd = {};
+       struct mana_ib_dev *mdev;
+       int err;
+
+       mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+
+       if (udata->inlen < sizeof(ucmd))
+               return -EINVAL;
+
+       err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen));
+       if (err) {
+               ibdev_dbg(ibdev,
+                         "Failed to copy from udata for create cq, %d\n", err);
+               return err;
+       }
+
+       if (attr->cqe > MAX_SEND_BUFFERS_PER_QUEUE) {
+               ibdev_dbg(ibdev, "CQE %d exceeding limit\n", attr->cqe);
+               return -EINVAL;
+       }
+
+       cq->cqe = attr->cqe;
+       cq->umem = ib_umem_get(ibdev, ucmd.buf_addr, cq->cqe * COMP_ENTRY_SIZE,
+                              IB_ACCESS_LOCAL_WRITE);
+       if (IS_ERR(cq->umem)) {
+               err = PTR_ERR(cq->umem);
+               ibdev_dbg(ibdev, "Failed to get umem for create cq, err %d\n",
+                         err);
+               return err;
+       }
+
+       err = mana_ib_gd_create_dma_region(mdev, cq->umem, &cq->gdma_region);
+       if (err) {
+               ibdev_dbg(ibdev,
+                         "Failed to create dma region for create cq, %d\n",
+                         err);
+               goto err_release_umem;
+       }
+
+       ibdev_dbg(ibdev,
+                 "mana_ib_gd_create_dma_region ret %d gdma_region 0x%llx\n",
+                 err, cq->gdma_region);
+
+       /*
+        * The CQ ID is not known at this time. The ID is generated at create_qp
+        */
+
+       return 0;
+
+err_release_umem:
+       ib_umem_release(cq->umem);
+       return err;
+}
+
+int mana_ib_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
+{
+       struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq);
+       struct ib_device *ibdev = ibcq->device;
+       struct mana_ib_dev *mdev;
+
+       mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+
+       mana_ib_gd_destroy_dma_region(mdev, cq->gdma_region);
+       ib_umem_release(cq->umem);
+
+       return 0;
+}
diff --git a/drivers/infiniband/hw/mana/device.c b/drivers/infiniband/hw/mana/device.c
new file mode 100644 (file)
index 0000000..d4541b8
--- /dev/null
@@ -0,0 +1,117 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2022, Microsoft Corporation. All rights reserved.
+ */
+
+#include "mana_ib.h"
+#include <net/mana/mana_auxiliary.h>
+
+MODULE_DESCRIPTION("Microsoft Azure Network Adapter IB driver");
+MODULE_LICENSE("GPL");
+MODULE_IMPORT_NS(NET_MANA);
+
+static const struct ib_device_ops mana_ib_dev_ops = {
+       .owner = THIS_MODULE,
+       .driver_id = RDMA_DRIVER_MANA,
+       .uverbs_abi_ver = MANA_IB_UVERBS_ABI_VERSION,
+
+       .alloc_pd = mana_ib_alloc_pd,
+       .alloc_ucontext = mana_ib_alloc_ucontext,
+       .create_cq = mana_ib_create_cq,
+       .create_qp = mana_ib_create_qp,
+       .create_rwq_ind_table = mana_ib_create_rwq_ind_table,
+       .create_wq = mana_ib_create_wq,
+       .dealloc_pd = mana_ib_dealloc_pd,
+       .dealloc_ucontext = mana_ib_dealloc_ucontext,
+       .dereg_mr = mana_ib_dereg_mr,
+       .destroy_cq = mana_ib_destroy_cq,
+       .destroy_qp = mana_ib_destroy_qp,
+       .destroy_rwq_ind_table = mana_ib_destroy_rwq_ind_table,
+       .destroy_wq = mana_ib_destroy_wq,
+       .disassociate_ucontext = mana_ib_disassociate_ucontext,
+       .get_port_immutable = mana_ib_get_port_immutable,
+       .mmap = mana_ib_mmap,
+       .modify_qp = mana_ib_modify_qp,
+       .modify_wq = mana_ib_modify_wq,
+       .query_device = mana_ib_query_device,
+       .query_gid = mana_ib_query_gid,
+       .query_port = mana_ib_query_port,
+       .reg_user_mr = mana_ib_reg_user_mr,
+
+       INIT_RDMA_OBJ_SIZE(ib_cq, mana_ib_cq, ibcq),
+       INIT_RDMA_OBJ_SIZE(ib_pd, mana_ib_pd, ibpd),
+       INIT_RDMA_OBJ_SIZE(ib_qp, mana_ib_qp, ibqp),
+       INIT_RDMA_OBJ_SIZE(ib_ucontext, mana_ib_ucontext, ibucontext),
+       INIT_RDMA_OBJ_SIZE(ib_rwq_ind_table, mana_ib_rwq_ind_table,
+                          ib_ind_table),
+};
+
+static int mana_ib_probe(struct auxiliary_device *adev,
+                        const struct auxiliary_device_id *id)
+{
+       struct mana_adev *madev = container_of(adev, struct mana_adev, adev);
+       struct gdma_dev *mdev = madev->mdev;
+       struct mana_context *mc;
+       struct mana_ib_dev *dev;
+       int ret;
+
+       mc = mdev->driver_data;
+
+       dev = ib_alloc_device(mana_ib_dev, ib_dev);
+       if (!dev)
+               return -ENOMEM;
+
+       ib_set_device_ops(&dev->ib_dev, &mana_ib_dev_ops);
+
+       dev->ib_dev.phys_port_cnt = mc->num_ports;
+
+       ibdev_dbg(&dev->ib_dev, "mdev=%p id=%d num_ports=%d\n", mdev,
+                 mdev->dev_id.as_uint32, dev->ib_dev.phys_port_cnt);
+
+       dev->gdma_dev = mdev;
+       dev->ib_dev.node_type = RDMA_NODE_IB_CA;
+
+       /*
+        * num_comp_vectors needs to set to the max MSIX index
+        * when interrupts and event queues are implemented
+        */
+       dev->ib_dev.num_comp_vectors = 1;
+       dev->ib_dev.dev.parent = mdev->gdma_context->dev;
+
+       ret = ib_register_device(&dev->ib_dev, "mana_%d",
+                                mdev->gdma_context->dev);
+       if (ret) {
+               ib_dealloc_device(&dev->ib_dev);
+               return ret;
+       }
+
+       dev_set_drvdata(&adev->dev, dev);
+
+       return 0;
+}
+
+static void mana_ib_remove(struct auxiliary_device *adev)
+{
+       struct mana_ib_dev *dev = dev_get_drvdata(&adev->dev);
+
+       ib_unregister_device(&dev->ib_dev);
+       ib_dealloc_device(&dev->ib_dev);
+}
+
+static const struct auxiliary_device_id mana_id_table[] = {
+       {
+               .name = "mana.rdma",
+       },
+       {},
+};
+
+MODULE_DEVICE_TABLE(auxiliary, mana_id_table);
+
+static struct auxiliary_driver mana_driver = {
+       .name = "rdma",
+       .probe = mana_ib_probe,
+       .remove = mana_ib_remove,
+       .id_table = mana_id_table,
+};
+
+module_auxiliary_driver(mana_driver);
diff --git a/drivers/infiniband/hw/mana/main.c b/drivers/infiniband/hw/mana/main.c
new file mode 100644 (file)
index 0000000..8b3bc30
--- /dev/null
@@ -0,0 +1,521 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2022, Microsoft Corporation. All rights reserved.
+ */
+
+#include "mana_ib.h"
+
+void mana_ib_uncfg_vport(struct mana_ib_dev *dev, struct mana_ib_pd *pd,
+                        u32 port)
+{
+       struct gdma_dev *gd = dev->gdma_dev;
+       struct mana_port_context *mpc;
+       struct net_device *ndev;
+       struct mana_context *mc;
+
+       mc = gd->driver_data;
+       ndev = mc->ports[port];
+       mpc = netdev_priv(ndev);
+
+       mutex_lock(&pd->vport_mutex);
+
+       pd->vport_use_count--;
+       WARN_ON(pd->vport_use_count < 0);
+
+       if (!pd->vport_use_count)
+               mana_uncfg_vport(mpc);
+
+       mutex_unlock(&pd->vport_mutex);
+}
+
+int mana_ib_cfg_vport(struct mana_ib_dev *dev, u32 port, struct mana_ib_pd *pd,
+                     u32 doorbell_id)
+{
+       struct gdma_dev *mdev = dev->gdma_dev;
+       struct mana_port_context *mpc;
+       struct mana_context *mc;
+       struct net_device *ndev;
+       int err;
+
+       mc = mdev->driver_data;
+       ndev = mc->ports[port];
+       mpc = netdev_priv(ndev);
+
+       mutex_lock(&pd->vport_mutex);
+
+       pd->vport_use_count++;
+       if (pd->vport_use_count > 1) {
+               ibdev_dbg(&dev->ib_dev,
+                         "Skip as this PD is already configured vport\n");
+               mutex_unlock(&pd->vport_mutex);
+               return 0;
+       }
+
+       err = mana_cfg_vport(mpc, pd->pdn, doorbell_id);
+       if (err) {
+               pd->vport_use_count--;
+               mutex_unlock(&pd->vport_mutex);
+
+               ibdev_dbg(&dev->ib_dev, "Failed to configure vPort %d\n", err);
+               return err;
+       }
+
+       mutex_unlock(&pd->vport_mutex);
+
+       pd->tx_shortform_allowed = mpc->tx_shortform_allowed;
+       pd->tx_vp_offset = mpc->tx_vp_offset;
+
+       ibdev_dbg(&dev->ib_dev, "vport handle %llx pdid %x doorbell_id %x\n",
+                 mpc->port_handle, pd->pdn, doorbell_id);
+
+       return 0;
+}
+
+int mana_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
+{
+       struct mana_ib_pd *pd = container_of(ibpd, struct mana_ib_pd, ibpd);
+       struct ib_device *ibdev = ibpd->device;
+       struct gdma_create_pd_resp resp = {};
+       struct gdma_create_pd_req req = {};
+       enum gdma_pd_flags flags = 0;
+       struct mana_ib_dev *dev;
+       struct gdma_dev *mdev;
+       int err;
+
+       dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+       mdev = dev->gdma_dev;
+
+       mana_gd_init_req_hdr(&req.hdr, GDMA_CREATE_PD, sizeof(req),
+                            sizeof(resp));
+
+       req.flags = flags;
+       err = mana_gd_send_request(mdev->gdma_context, sizeof(req), &req,
+                                  sizeof(resp), &resp);
+
+       if (err || resp.hdr.status) {
+               ibdev_dbg(&dev->ib_dev,
+                         "Failed to get pd_id err %d status %u\n", err,
+                         resp.hdr.status);
+               if (!err)
+                       err = -EPROTO;
+
+               return err;
+       }
+
+       pd->pd_handle = resp.pd_handle;
+       pd->pdn = resp.pd_id;
+       ibdev_dbg(&dev->ib_dev, "pd_handle 0x%llx pd_id %d\n",
+                 pd->pd_handle, pd->pdn);
+
+       mutex_init(&pd->vport_mutex);
+       pd->vport_use_count = 0;
+       return 0;
+}
+
+int mana_ib_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
+{
+       struct mana_ib_pd *pd = container_of(ibpd, struct mana_ib_pd, ibpd);
+       struct ib_device *ibdev = ibpd->device;
+       struct gdma_destory_pd_resp resp = {};
+       struct gdma_destroy_pd_req req = {};
+       struct mana_ib_dev *dev;
+       struct gdma_dev *mdev;
+       int err;
+
+       dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+       mdev = dev->gdma_dev;
+
+       mana_gd_init_req_hdr(&req.hdr, GDMA_DESTROY_PD, sizeof(req),
+                            sizeof(resp));
+
+       req.pd_handle = pd->pd_handle;
+       err = mana_gd_send_request(mdev->gdma_context, sizeof(req), &req,
+                                  sizeof(resp), &resp);
+
+       if (err || resp.hdr.status) {
+               ibdev_dbg(&dev->ib_dev,
+                         "Failed to destroy pd_handle 0x%llx err %d status %u",
+                         pd->pd_handle, err, resp.hdr.status);
+               if (!err)
+                       err = -EPROTO;
+       }
+
+       return err;
+}
+
+static int mana_gd_destroy_doorbell_page(struct gdma_context *gc,
+                                        int doorbell_page)
+{
+       struct gdma_destroy_resource_range_req req = {};
+       struct gdma_resp_hdr resp = {};
+       int err;
+
+       mana_gd_init_req_hdr(&req.hdr, GDMA_DESTROY_RESOURCE_RANGE,
+                            sizeof(req), sizeof(resp));
+
+       req.resource_type = GDMA_RESOURCE_DOORBELL_PAGE;
+       req.num_resources = 1;
+       req.allocated_resources = doorbell_page;
+
+       err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+       if (err || resp.status) {
+               dev_err(gc->dev,
+                       "Failed to destroy doorbell page: ret %d, 0x%x\n",
+                       err, resp.status);
+               return err ?: -EPROTO;
+       }
+
+       return 0;
+}
+
+static int mana_gd_allocate_doorbell_page(struct gdma_context *gc,
+                                         int *doorbell_page)
+{
+       struct gdma_allocate_resource_range_req req = {};
+       struct gdma_allocate_resource_range_resp resp = {};
+       int err;
+
+       mana_gd_init_req_hdr(&req.hdr, GDMA_ALLOCATE_RESOURCE_RANGE,
+                            sizeof(req), sizeof(resp));
+
+       req.resource_type = GDMA_RESOURCE_DOORBELL_PAGE;
+       req.num_resources = 1;
+       req.alignment = 1;
+
+       /* Have GDMA start searching from 0 */
+       req.allocated_resources = 0;
+
+       err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+       if (err || resp.hdr.status) {
+               dev_err(gc->dev,
+                       "Failed to allocate doorbell page: ret %d, 0x%x\n",
+                       err, resp.hdr.status);
+               return err ?: -EPROTO;
+       }
+
+       *doorbell_page = resp.allocated_resources;
+
+       return 0;
+}
+
+int mana_ib_alloc_ucontext(struct ib_ucontext *ibcontext,
+                          struct ib_udata *udata)
+{
+       struct mana_ib_ucontext *ucontext =
+               container_of(ibcontext, struct mana_ib_ucontext, ibucontext);
+       struct ib_device *ibdev = ibcontext->device;
+       struct mana_ib_dev *mdev;
+       struct gdma_context *gc;
+       struct gdma_dev *dev;
+       int doorbell_page;
+       int ret;
+
+       mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+       dev = mdev->gdma_dev;
+       gc = dev->gdma_context;
+
+       /* Allocate a doorbell page index */
+       ret = mana_gd_allocate_doorbell_page(gc, &doorbell_page);
+       if (ret) {
+               ibdev_dbg(ibdev, "Failed to allocate doorbell page %d\n", ret);
+               return ret;
+       }
+
+       ibdev_dbg(ibdev, "Doorbell page allocated %d\n", doorbell_page);
+
+       ucontext->doorbell = doorbell_page;
+
+       return 0;
+}
+
+void mana_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
+{
+       struct mana_ib_ucontext *mana_ucontext =
+               container_of(ibcontext, struct mana_ib_ucontext, ibucontext);
+       struct ib_device *ibdev = ibcontext->device;
+       struct mana_ib_dev *mdev;
+       struct gdma_context *gc;
+       int ret;
+
+       mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+       gc = mdev->gdma_dev->gdma_context;
+
+       ret = mana_gd_destroy_doorbell_page(gc, mana_ucontext->doorbell);
+       if (ret)
+               ibdev_dbg(ibdev, "Failed to destroy doorbell page %d\n", ret);
+}
+
+static int
+mana_ib_gd_first_dma_region(struct mana_ib_dev *dev,
+                           struct gdma_context *gc,
+                           struct gdma_create_dma_region_req *create_req,
+                           size_t num_pages, mana_handle_t *gdma_region)
+{
+       struct gdma_create_dma_region_resp create_resp = {};
+       unsigned int create_req_msg_size;
+       int err;
+
+       create_req_msg_size =
+               struct_size(create_req, page_addr_list, num_pages);
+       create_req->page_addr_list_len = num_pages;
+
+       err = mana_gd_send_request(gc, create_req_msg_size, create_req,
+                                  sizeof(create_resp), &create_resp);
+       if (err || create_resp.hdr.status) {
+               ibdev_dbg(&dev->ib_dev,
+                         "Failed to create DMA region: %d, 0x%x\n",
+                         err, create_resp.hdr.status);
+               if (!err)
+                       err = -EPROTO;
+
+               return err;
+       }
+
+       *gdma_region = create_resp.dma_region_handle;
+       ibdev_dbg(&dev->ib_dev, "Created DMA region handle 0x%llx\n",
+                 *gdma_region);
+
+       return 0;
+}
+
+static int
+mana_ib_gd_add_dma_region(struct mana_ib_dev *dev, struct gdma_context *gc,
+                         struct gdma_dma_region_add_pages_req *add_req,
+                         unsigned int num_pages, u32 expected_status)
+{
+       unsigned int add_req_msg_size =
+               struct_size(add_req, page_addr_list, num_pages);
+       struct gdma_general_resp add_resp = {};
+       int err;
+
+       mana_gd_init_req_hdr(&add_req->hdr, GDMA_DMA_REGION_ADD_PAGES,
+                            add_req_msg_size, sizeof(add_resp));
+       add_req->page_addr_list_len = num_pages;
+
+       err = mana_gd_send_request(gc, add_req_msg_size, add_req,
+                                  sizeof(add_resp), &add_resp);
+       if (err || add_resp.hdr.status != expected_status) {
+               ibdev_dbg(&dev->ib_dev,
+                         "Failed to create DMA region: %d, 0x%x\n",
+                         err, add_resp.hdr.status);
+
+               if (!err)
+                       err = -EPROTO;
+
+               return err;
+       }
+
+       return 0;
+}
+
+int mana_ib_gd_create_dma_region(struct mana_ib_dev *dev, struct ib_umem *umem,
+                                mana_handle_t *gdma_region)
+{
+       struct gdma_dma_region_add_pages_req *add_req = NULL;
+       size_t num_pages_processed = 0, num_pages_to_handle;
+       struct gdma_create_dma_region_req *create_req;
+       unsigned int create_req_msg_size;
+       struct hw_channel_context *hwc;
+       struct ib_block_iter biter;
+       size_t max_pgs_add_cmd = 0;
+       size_t max_pgs_create_cmd;
+       struct gdma_context *gc;
+       size_t num_pages_total;
+       struct gdma_dev *mdev;
+       unsigned long page_sz;
+       unsigned int tail = 0;
+       u64 *page_addr_list;
+       void *request_buf;
+       int err;
+
+       mdev = dev->gdma_dev;
+       gc = mdev->gdma_context;
+       hwc = gc->hwc.driver_data;
+
+       /* Hardware requires dma region to align to chosen page size */
+       page_sz = ib_umem_find_best_pgsz(umem, PAGE_SZ_BM, 0);
+       if (!page_sz) {
+               ibdev_dbg(&dev->ib_dev, "failed to find page size.\n");
+               return -ENOMEM;
+       }
+       num_pages_total = ib_umem_num_dma_blocks(umem, page_sz);
+
+       max_pgs_create_cmd =
+               (hwc->max_req_msg_size - sizeof(*create_req)) / sizeof(u64);
+       num_pages_to_handle =
+               min_t(size_t, num_pages_total, max_pgs_create_cmd);
+       create_req_msg_size =
+               struct_size(create_req, page_addr_list, num_pages_to_handle);
+
+       request_buf = kzalloc(hwc->max_req_msg_size, GFP_KERNEL);
+       if (!request_buf)
+               return -ENOMEM;
+
+       create_req = request_buf;
+       mana_gd_init_req_hdr(&create_req->hdr, GDMA_CREATE_DMA_REGION,
+                            create_req_msg_size,
+                            sizeof(struct gdma_create_dma_region_resp));
+
+       create_req->length = umem->length;
+       create_req->offset_in_page = umem->address & (page_sz - 1);
+       create_req->gdma_page_type = order_base_2(page_sz) - PAGE_SHIFT;
+       create_req->page_count = num_pages_total;
+
+       ibdev_dbg(&dev->ib_dev, "size_dma_region %lu num_pages_total %lu\n",
+                 umem->length, num_pages_total);
+
+       ibdev_dbg(&dev->ib_dev, "page_sz %lu offset_in_page %u\n",
+                 page_sz, create_req->offset_in_page);
+
+       ibdev_dbg(&dev->ib_dev, "num_pages_to_handle %lu, gdma_page_type %u",
+                 num_pages_to_handle, create_req->gdma_page_type);
+
+       page_addr_list = create_req->page_addr_list;
+       rdma_umem_for_each_dma_block(umem, &biter, page_sz) {
+               page_addr_list[tail++] = rdma_block_iter_dma_address(&biter);
+               if (tail < num_pages_to_handle)
+                       continue;
+
+               if (!num_pages_processed) {
+                       /* First create message */
+                       err = mana_ib_gd_first_dma_region(dev, gc, create_req,
+                                                         tail, gdma_region);
+                       if (err)
+                               goto out;
+
+                       max_pgs_add_cmd = (hwc->max_req_msg_size -
+                               sizeof(*add_req)) / sizeof(u64);
+
+                       add_req = request_buf;
+                       add_req->dma_region_handle = *gdma_region;
+                       add_req->reserved3 = 0;
+                       page_addr_list = add_req->page_addr_list;
+               } else {
+                       /* Subsequent create messages */
+                       u32 expected_s = 0;
+
+                       if (num_pages_processed + num_pages_to_handle <
+                           num_pages_total)
+                               expected_s = GDMA_STATUS_MORE_ENTRIES;
+
+                       err = mana_ib_gd_add_dma_region(dev, gc, add_req, tail,
+                                                       expected_s);
+                       if (err)
+                               break;
+               }
+
+               num_pages_processed += tail;
+               tail = 0;
+
+               /* The remaining pages to create */
+               num_pages_to_handle =
+                       min_t(size_t,
+                             num_pages_total - num_pages_processed,
+                             max_pgs_add_cmd);
+       }
+
+       if (err)
+               mana_ib_gd_destroy_dma_region(dev, *gdma_region);
+
+out:
+       kfree(request_buf);
+       return err;
+}
+
+int mana_ib_gd_destroy_dma_region(struct mana_ib_dev *dev, u64 gdma_region)
+{
+       struct gdma_dev *mdev = dev->gdma_dev;
+       struct gdma_context *gc;
+
+       gc = mdev->gdma_context;
+       ibdev_dbg(&dev->ib_dev, "destroy dma region 0x%llx\n", gdma_region);
+
+       return mana_gd_destroy_dma_region(gc, gdma_region);
+}
+
+int mana_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
+{
+       struct mana_ib_ucontext *mana_ucontext =
+               container_of(ibcontext, struct mana_ib_ucontext, ibucontext);
+       struct ib_device *ibdev = ibcontext->device;
+       struct mana_ib_dev *mdev;
+       struct gdma_context *gc;
+       phys_addr_t pfn;
+       pgprot_t prot;
+       int ret;
+
+       mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+       gc = mdev->gdma_dev->gdma_context;
+
+       if (vma->vm_pgoff != 0) {
+               ibdev_dbg(ibdev, "Unexpected vm_pgoff %lu\n", vma->vm_pgoff);
+               return -EINVAL;
+       }
+
+       /* Map to the page indexed by ucontext->doorbell */
+       pfn = (gc->phys_db_page_base +
+              gc->db_page_size * mana_ucontext->doorbell) >>
+             PAGE_SHIFT;
+       prot = pgprot_writecombine(vma->vm_page_prot);
+
+       ret = rdma_user_mmap_io(ibcontext, vma, pfn, gc->db_page_size, prot,
+                               NULL);
+       if (ret)
+               ibdev_dbg(ibdev, "can't rdma_user_mmap_io ret %d\n", ret);
+       else
+               ibdev_dbg(ibdev, "mapped I/O pfn 0x%llx page_size %u, ret %d\n",
+                         pfn, gc->db_page_size, ret);
+
+       return ret;
+}
+
+int mana_ib_get_port_immutable(struct ib_device *ibdev, u32 port_num,
+                              struct ib_port_immutable *immutable)
+{
+       /*
+        * This version only support RAW_PACKET
+        * other values need to be filled for other types
+        */
+       immutable->core_cap_flags = RDMA_CORE_PORT_RAW_PACKET;
+
+       return 0;
+}
+
+int mana_ib_query_device(struct ib_device *ibdev, struct ib_device_attr *props,
+                        struct ib_udata *uhw)
+{
+       props->max_qp = MANA_MAX_NUM_QUEUES;
+       props->max_qp_wr = MAX_SEND_BUFFERS_PER_QUEUE;
+
+       /*
+        * max_cqe could be potentially much bigger.
+        * As this version of driver only support RAW QP, set it to the same
+        * value as max_qp_wr
+        */
+       props->max_cqe = MAX_SEND_BUFFERS_PER_QUEUE;
+
+       props->max_mr_size = MANA_IB_MAX_MR_SIZE;
+       props->max_mr = MANA_IB_MAX_MR;
+       props->max_send_sge = MAX_TX_WQE_SGL_ENTRIES;
+       props->max_recv_sge = MAX_RX_WQE_SGL_ENTRIES;
+
+       return 0;
+}
+
+int mana_ib_query_port(struct ib_device *ibdev, u32 port,
+                      struct ib_port_attr *props)
+{
+       /* This version doesn't return port properties */
+       return 0;
+}
+
+int mana_ib_query_gid(struct ib_device *ibdev, u32 port, int index,
+                     union ib_gid *gid)
+{
+       /* This version doesn't return GID properties */
+       return 0;
+}
+
+void mana_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
+{
+}
diff --git a/drivers/infiniband/hw/mana/mana_ib.h b/drivers/infiniband/hw/mana/mana_ib.h
new file mode 100644 (file)
index 0000000..502cc86
--- /dev/null
@@ -0,0 +1,162 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2022 Microsoft Corporation. All rights reserved.
+ */
+
+#ifndef _MANA_IB_H_
+#define _MANA_IB_H_
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_mad.h>
+#include <rdma/ib_umem.h>
+#include <rdma/mana-abi.h>
+#include <rdma/uverbs_ioctl.h>
+
+#include <net/mana/mana.h>
+
+#define PAGE_SZ_BM                                                             \
+       (SZ_4K | SZ_8K | SZ_16K | SZ_32K | SZ_64K | SZ_128K | SZ_256K |        \
+        SZ_512K | SZ_1M | SZ_2M)
+
+/* MANA doesn't have any limit for MR size */
+#define MANA_IB_MAX_MR_SIZE    U64_MAX
+
+/*
+ * The hardware limit of number of MRs is greater than maximum number of MRs
+ * that can possibly represent in 24 bits
+ */
+#define MANA_IB_MAX_MR         0xFFFFFFu
+
+struct mana_ib_dev {
+       struct ib_device ib_dev;
+       struct gdma_dev *gdma_dev;
+};
+
+struct mana_ib_wq {
+       struct ib_wq ibwq;
+       struct ib_umem *umem;
+       int wqe;
+       u32 wq_buf_size;
+       u64 gdma_region;
+       u64 id;
+       mana_handle_t rx_object;
+};
+
+struct mana_ib_pd {
+       struct ib_pd ibpd;
+       u32 pdn;
+       mana_handle_t pd_handle;
+
+       /* Mutex for sharing access to vport_use_count */
+       struct mutex vport_mutex;
+       int vport_use_count;
+
+       bool tx_shortform_allowed;
+       u32 tx_vp_offset;
+};
+
+struct mana_ib_mr {
+       struct ib_mr ibmr;
+       struct ib_umem *umem;
+       mana_handle_t mr_handle;
+};
+
+struct mana_ib_cq {
+       struct ib_cq ibcq;
+       struct ib_umem *umem;
+       int cqe;
+       u64 gdma_region;
+       u64 id;
+};
+
+struct mana_ib_qp {
+       struct ib_qp ibqp;
+
+       /* Work queue info */
+       struct ib_umem *sq_umem;
+       int sqe;
+       u64 sq_gdma_region;
+       u64 sq_id;
+       mana_handle_t tx_object;
+
+       /* The port on the IB device, starting with 1 */
+       u32 port;
+};
+
+struct mana_ib_ucontext {
+       struct ib_ucontext ibucontext;
+       u32 doorbell;
+};
+
+struct mana_ib_rwq_ind_table {
+       struct ib_rwq_ind_table ib_ind_table;
+};
+
+int mana_ib_gd_create_dma_region(struct mana_ib_dev *dev, struct ib_umem *umem,
+                                mana_handle_t *gdma_region);
+
+int mana_ib_gd_destroy_dma_region(struct mana_ib_dev *dev,
+                                 mana_handle_t gdma_region);
+
+struct ib_wq *mana_ib_create_wq(struct ib_pd *pd,
+                               struct ib_wq_init_attr *init_attr,
+                               struct ib_udata *udata);
+
+int mana_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
+                     u32 wq_attr_mask, struct ib_udata *udata);
+
+int mana_ib_destroy_wq(struct ib_wq *ibwq, struct ib_udata *udata);
+
+int mana_ib_create_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_table,
+                                struct ib_rwq_ind_table_init_attr *init_attr,
+                                struct ib_udata *udata);
+
+int mana_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl);
+
+struct ib_mr *mana_ib_get_dma_mr(struct ib_pd *ibpd, int access_flags);
+
+struct ib_mr *mana_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+                                 u64 iova, int access_flags,
+                                 struct ib_udata *udata);
+
+int mana_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata);
+
+int mana_ib_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *qp_init_attr,
+                     struct ib_udata *udata);
+
+int mana_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+                     int attr_mask, struct ib_udata *udata);
+
+int mana_ib_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata);
+
+int mana_ib_cfg_vport(struct mana_ib_dev *dev, u32 port_id,
+                     struct mana_ib_pd *pd, u32 doorbell_id);
+void mana_ib_uncfg_vport(struct mana_ib_dev *dev, struct mana_ib_pd *pd,
+                        u32 port);
+
+int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+                     struct ib_udata *udata);
+
+int mana_ib_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata);
+
+int mana_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata);
+int mana_ib_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata);
+
+int mana_ib_alloc_ucontext(struct ib_ucontext *ibcontext,
+                          struct ib_udata *udata);
+void mana_ib_dealloc_ucontext(struct ib_ucontext *ibcontext);
+
+int mana_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma);
+
+int mana_ib_get_port_immutable(struct ib_device *ibdev, u32 port_num,
+                              struct ib_port_immutable *immutable);
+int mana_ib_query_device(struct ib_device *ibdev, struct ib_device_attr *props,
+                        struct ib_udata *uhw);
+int mana_ib_query_port(struct ib_device *ibdev, u32 port,
+                      struct ib_port_attr *props);
+int mana_ib_query_gid(struct ib_device *ibdev, u32 port, int index,
+                     union ib_gid *gid);
+
+void mana_ib_disassociate_ucontext(struct ib_ucontext *ibcontext);
+
+#endif
diff --git a/drivers/infiniband/hw/mana/mr.c b/drivers/infiniband/hw/mana/mr.c
new file mode 100644 (file)
index 0000000..a56236c
--- /dev/null
@@ -0,0 +1,198 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2022, Microsoft Corporation. All rights reserved.
+ */
+
+#include "mana_ib.h"
+
+#define VALID_MR_FLAGS                                                         \
+       (IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ)
+
+static enum gdma_mr_access_flags
+mana_ib_verbs_to_gdma_access_flags(int access_flags)
+{
+       enum gdma_mr_access_flags flags = GDMA_ACCESS_FLAG_LOCAL_READ;
+
+       if (access_flags & IB_ACCESS_LOCAL_WRITE)
+               flags |= GDMA_ACCESS_FLAG_LOCAL_WRITE;
+
+       if (access_flags & IB_ACCESS_REMOTE_WRITE)
+               flags |= GDMA_ACCESS_FLAG_REMOTE_WRITE;
+
+       if (access_flags & IB_ACCESS_REMOTE_READ)
+               flags |= GDMA_ACCESS_FLAG_REMOTE_READ;
+
+       return flags;
+}
+
+static int mana_ib_gd_create_mr(struct mana_ib_dev *dev, struct mana_ib_mr *mr,
+                               struct gdma_create_mr_params *mr_params)
+{
+       struct gdma_create_mr_response resp = {};
+       struct gdma_create_mr_request req = {};
+       struct gdma_dev *mdev = dev->gdma_dev;
+       struct gdma_context *gc;
+       int err;
+
+       gc = mdev->gdma_context;
+
+       mana_gd_init_req_hdr(&req.hdr, GDMA_CREATE_MR, sizeof(req),
+                            sizeof(resp));
+       req.pd_handle = mr_params->pd_handle;
+       req.mr_type = mr_params->mr_type;
+
+       switch (mr_params->mr_type) {
+       case GDMA_MR_TYPE_GVA:
+               req.gva.dma_region_handle = mr_params->gva.dma_region_handle;
+               req.gva.virtual_address = mr_params->gva.virtual_address;
+               req.gva.access_flags = mr_params->gva.access_flags;
+               break;
+
+       default:
+               ibdev_dbg(&dev->ib_dev,
+                         "invalid param (GDMA_MR_TYPE) passed, type %d\n",
+                         req.mr_type);
+               return -EINVAL;
+       }
+
+       err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+
+       if (err || resp.hdr.status) {
+               ibdev_dbg(&dev->ib_dev, "Failed to create mr %d, %u", err,
+                         resp.hdr.status);
+               if (!err)
+                       err = -EPROTO;
+
+               return err;
+       }
+
+       mr->ibmr.lkey = resp.lkey;
+       mr->ibmr.rkey = resp.rkey;
+       mr->mr_handle = resp.mr_handle;
+
+       return 0;
+}
+
+static int mana_ib_gd_destroy_mr(struct mana_ib_dev *dev,
+                                gdma_obj_handle_t mr_handle)
+{
+       struct gdma_destroy_mr_response resp = {};
+       struct gdma_destroy_mr_request req = {};
+       struct gdma_dev *mdev = dev->gdma_dev;
+       struct gdma_context *gc;
+       int err;
+
+       gc = mdev->gdma_context;
+
+       mana_gd_init_req_hdr(&req.hdr, GDMA_DESTROY_MR, sizeof(req),
+                            sizeof(resp));
+
+       req.mr_handle = mr_handle;
+
+       err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+       if (err || resp.hdr.status) {
+               dev_err(gc->dev, "Failed to destroy MR: %d, 0x%x\n", err,
+                       resp.hdr.status);
+               if (!err)
+                       err = -EPROTO;
+               return err;
+       }
+
+       return 0;
+}
+
+struct ib_mr *mana_ib_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 length,
+                                 u64 iova, int access_flags,
+                                 struct ib_udata *udata)
+{
+       struct mana_ib_pd *pd = container_of(ibpd, struct mana_ib_pd, ibpd);
+       struct gdma_create_mr_params mr_params = {};
+       struct ib_device *ibdev = ibpd->device;
+       gdma_obj_handle_t dma_region_handle;
+       struct mana_ib_dev *dev;
+       struct mana_ib_mr *mr;
+       int err;
+
+       dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+
+       ibdev_dbg(ibdev,
+                 "start 0x%llx, iova 0x%llx length 0x%llx access_flags 0x%x",
+                 start, iova, length, access_flags);
+
+       if (access_flags & ~VALID_MR_FLAGS)
+               return ERR_PTR(-EINVAL);
+
+       mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+       if (!mr)
+               return ERR_PTR(-ENOMEM);
+
+       mr->umem = ib_umem_get(ibdev, start, length, access_flags);
+       if (IS_ERR(mr->umem)) {
+               err = PTR_ERR(mr->umem);
+               ibdev_dbg(ibdev,
+                         "Failed to get umem for register user-mr, %d\n", err);
+               goto err_free;
+       }
+
+       err = mana_ib_gd_create_dma_region(dev, mr->umem, &dma_region_handle);
+       if (err) {
+               ibdev_dbg(ibdev, "Failed create dma region for user-mr, %d\n",
+                         err);
+               goto err_umem;
+       }
+
+       ibdev_dbg(ibdev,
+                 "mana_ib_gd_create_dma_region ret %d gdma_region %llx\n", err,
+                 dma_region_handle);
+
+       mr_params.pd_handle = pd->pd_handle;
+       mr_params.mr_type = GDMA_MR_TYPE_GVA;
+       mr_params.gva.dma_region_handle = dma_region_handle;
+       mr_params.gva.virtual_address = iova;
+       mr_params.gva.access_flags =
+               mana_ib_verbs_to_gdma_access_flags(access_flags);
+
+       err = mana_ib_gd_create_mr(dev, mr, &mr_params);
+       if (err)
+               goto err_dma_region;
+
+       /*
+        * There is no need to keep track of dma_region_handle after MR is
+        * successfully created. The dma_region_handle is tracked in the PF
+        * as part of the lifecycle of this MR.
+        */
+
+       return &mr->ibmr;
+
+err_dma_region:
+       mana_gd_destroy_dma_region(dev->gdma_dev->gdma_context,
+                                  dma_region_handle);
+
+err_umem:
+       ib_umem_release(mr->umem);
+
+err_free:
+       kfree(mr);
+       return ERR_PTR(err);
+}
+
+int mana_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
+{
+       struct mana_ib_mr *mr = container_of(ibmr, struct mana_ib_mr, ibmr);
+       struct ib_device *ibdev = ibmr->device;
+       struct mana_ib_dev *dev;
+       int err;
+
+       dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+
+       err = mana_ib_gd_destroy_mr(dev, mr->mr_handle);
+       if (err)
+               return err;
+
+       if (mr->umem)
+               ib_umem_release(mr->umem);
+
+       kfree(mr);
+
+       return 0;
+}
diff --git a/drivers/infiniband/hw/mana/qp.c b/drivers/infiniband/hw/mana/qp.c
new file mode 100644 (file)
index 0000000..ea15ec7
--- /dev/null
@@ -0,0 +1,506 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2022, Microsoft Corporation. All rights reserved.
+ */
+
+#include "mana_ib.h"
+
+static int mana_ib_cfg_vport_steering(struct mana_ib_dev *dev,
+                                     struct net_device *ndev,
+                                     mana_handle_t default_rxobj,
+                                     mana_handle_t ind_table[],
+                                     u32 log_ind_tbl_size, u32 rx_hash_key_len,
+                                     u8 *rx_hash_key)
+{
+       struct mana_port_context *mpc = netdev_priv(ndev);
+       struct mana_cfg_rx_steer_req *req = NULL;
+       struct mana_cfg_rx_steer_resp resp = {};
+       mana_handle_t *req_indir_tab;
+       struct gdma_context *gc;
+       struct gdma_dev *mdev;
+       u32 req_buf_size;
+       int i, err;
+
+       mdev = dev->gdma_dev;
+       gc = mdev->gdma_context;
+
+       req_buf_size =
+               sizeof(*req) + sizeof(mana_handle_t) * MANA_INDIRECT_TABLE_SIZE;
+       req = kzalloc(req_buf_size, GFP_KERNEL);
+       if (!req)
+               return -ENOMEM;
+
+       mana_gd_init_req_hdr(&req->hdr, MANA_CONFIG_VPORT_RX, req_buf_size,
+                            sizeof(resp));
+
+       req->vport = mpc->port_handle;
+       req->rx_enable = 1;
+       req->update_default_rxobj = 1;
+       req->default_rxobj = default_rxobj;
+       req->hdr.dev_id = mdev->dev_id;
+
+       /* If there are more than 1 entries in indirection table, enable RSS */
+       if (log_ind_tbl_size)
+               req->rss_enable = true;
+
+       req->num_indir_entries = MANA_INDIRECT_TABLE_SIZE;
+       req->indir_tab_offset = sizeof(*req);
+       req->update_indir_tab = true;
+
+       req_indir_tab = (mana_handle_t *)(req + 1);
+       /* The ind table passed to the hardware must have
+        * MANA_INDIRECT_TABLE_SIZE entries. Adjust the verb
+        * ind_table to MANA_INDIRECT_TABLE_SIZE if required
+        */
+       ibdev_dbg(&dev->ib_dev, "ind table size %u\n", 1 << log_ind_tbl_size);
+       for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) {
+               req_indir_tab[i] = ind_table[i % (1 << log_ind_tbl_size)];
+               ibdev_dbg(&dev->ib_dev, "index %u handle 0x%llx\n", i,
+                         req_indir_tab[i]);
+       }
+
+       req->update_hashkey = true;
+       if (rx_hash_key_len)
+               memcpy(req->hashkey, rx_hash_key, rx_hash_key_len);
+       else
+               netdev_rss_key_fill(req->hashkey, MANA_HASH_KEY_SIZE);
+
+       ibdev_dbg(&dev->ib_dev, "vport handle %llu default_rxobj 0x%llx\n",
+                 req->vport, default_rxobj);
+
+       err = mana_gd_send_request(gc, req_buf_size, req, sizeof(resp), &resp);
+       if (err) {
+               netdev_err(ndev, "Failed to configure vPort RX: %d\n", err);
+               goto out;
+       }
+
+       if (resp.hdr.status) {
+               netdev_err(ndev, "vPort RX configuration failed: 0x%x\n",
+                          resp.hdr.status);
+               err = -EPROTO;
+               goto out;
+       }
+
+       netdev_info(ndev, "Configured steering vPort %llu log_entries %u\n",
+                   mpc->port_handle, log_ind_tbl_size);
+
+out:
+       kfree(req);
+       return err;
+}
+
+static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd,
+                                struct ib_qp_init_attr *attr,
+                                struct ib_udata *udata)
+{
+       struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp);
+       struct mana_ib_dev *mdev =
+               container_of(pd->device, struct mana_ib_dev, ib_dev);
+       struct ib_rwq_ind_table *ind_tbl = attr->rwq_ind_tbl;
+       struct mana_ib_create_qp_rss_resp resp = {};
+       struct mana_ib_create_qp_rss ucmd = {};
+       struct gdma_dev *gd = mdev->gdma_dev;
+       mana_handle_t *mana_ind_table;
+       struct mana_port_context *mpc;
+       struct mana_context *mc;
+       struct net_device *ndev;
+       struct mana_ib_cq *cq;
+       struct mana_ib_wq *wq;
+       unsigned int ind_tbl_size;
+       struct ib_cq *ibcq;
+       struct ib_wq *ibwq;
+       int i = 0;
+       u32 port;
+       int ret;
+
+       mc = gd->driver_data;
+
+       if (!udata || udata->inlen < sizeof(ucmd))
+               return -EINVAL;
+
+       ret = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen));
+       if (ret) {
+               ibdev_dbg(&mdev->ib_dev,
+                         "Failed copy from udata for create rss-qp, err %d\n",
+                         ret);
+               return ret;
+       }
+
+       if (attr->cap.max_recv_wr > MAX_SEND_BUFFERS_PER_QUEUE) {
+               ibdev_dbg(&mdev->ib_dev,
+                         "Requested max_recv_wr %d exceeding limit\n",
+                         attr->cap.max_recv_wr);
+               return -EINVAL;
+       }
+
+       if (attr->cap.max_recv_sge > MAX_RX_WQE_SGL_ENTRIES) {
+               ibdev_dbg(&mdev->ib_dev,
+                         "Requested max_recv_sge %d exceeding limit\n",
+                         attr->cap.max_recv_sge);
+               return -EINVAL;
+       }
+
+       ind_tbl_size = 1 << ind_tbl->log_ind_tbl_size;
+       if (ind_tbl_size > MANA_INDIRECT_TABLE_SIZE) {
+               ibdev_dbg(&mdev->ib_dev,
+                         "Indirect table size %d exceeding limit\n",
+                         ind_tbl_size);
+               return -EINVAL;
+       }
+
+       if (ucmd.rx_hash_function != MANA_IB_RX_HASH_FUNC_TOEPLITZ) {
+               ibdev_dbg(&mdev->ib_dev,
+                         "RX Hash function is not supported, %d\n",
+                         ucmd.rx_hash_function);
+               return -EINVAL;
+       }
+
+       /* IB ports start with 1, MANA start with 0 */
+       port = ucmd.port;
+       if (port < 1 || port > mc->num_ports) {
+               ibdev_dbg(&mdev->ib_dev, "Invalid port %u in creating qp\n",
+                         port);
+               return -EINVAL;
+       }
+       ndev = mc->ports[port - 1];
+       mpc = netdev_priv(ndev);
+
+       ibdev_dbg(&mdev->ib_dev, "rx_hash_function %d port %d\n",
+                 ucmd.rx_hash_function, port);
+
+       mana_ind_table = kcalloc(ind_tbl_size, sizeof(mana_handle_t),
+                                GFP_KERNEL);
+       if (!mana_ind_table) {
+               ret = -ENOMEM;
+               goto fail;
+       }
+
+       qp->port = port;
+
+       for (i = 0; i < ind_tbl_size; i++) {
+               struct mana_obj_spec wq_spec = {};
+               struct mana_obj_spec cq_spec = {};
+
+               ibwq = ind_tbl->ind_tbl[i];
+               wq = container_of(ibwq, struct mana_ib_wq, ibwq);
+
+               ibcq = ibwq->cq;
+               cq = container_of(ibcq, struct mana_ib_cq, ibcq);
+
+               wq_spec.gdma_region = wq->gdma_region;
+               wq_spec.queue_size = wq->wq_buf_size;
+
+               cq_spec.gdma_region = cq->gdma_region;
+               cq_spec.queue_size = cq->cqe * COMP_ENTRY_SIZE;
+               cq_spec.modr_ctx_id = 0;
+               cq_spec.attached_eq = GDMA_CQ_NO_EQ;
+
+               ret = mana_create_wq_obj(mpc, mpc->port_handle, GDMA_RQ,
+                                        &wq_spec, &cq_spec, &wq->rx_object);
+               if (ret)
+                       goto fail;
+
+               /* The GDMA regions are now owned by the WQ object */
+               wq->gdma_region = GDMA_INVALID_DMA_REGION;
+               cq->gdma_region = GDMA_INVALID_DMA_REGION;
+
+               wq->id = wq_spec.queue_index;
+               cq->id = cq_spec.queue_index;
+
+               ibdev_dbg(&mdev->ib_dev,
+                         "ret %d rx_object 0x%llx wq id %llu cq id %llu\n",
+                         ret, wq->rx_object, wq->id, cq->id);
+
+               resp.entries[i].cqid = cq->id;
+               resp.entries[i].wqid = wq->id;
+
+               mana_ind_table[i] = wq->rx_object;
+       }
+       resp.num_entries = i;
+
+       ret = mana_ib_cfg_vport_steering(mdev, ndev, wq->rx_object,
+                                        mana_ind_table,
+                                        ind_tbl->log_ind_tbl_size,
+                                        ucmd.rx_hash_key_len,
+                                        ucmd.rx_hash_key);
+       if (ret)
+               goto fail;
+
+       ret = ib_copy_to_udata(udata, &resp, sizeof(resp));
+       if (ret) {
+               ibdev_dbg(&mdev->ib_dev,
+                         "Failed to copy to udata create rss-qp, %d\n",
+                         ret);
+               goto fail;
+       }
+
+       kfree(mana_ind_table);
+
+       return 0;
+
+fail:
+       while (i-- > 0) {
+               ibwq = ind_tbl->ind_tbl[i];
+               wq = container_of(ibwq, struct mana_ib_wq, ibwq);
+               mana_destroy_wq_obj(mpc, GDMA_RQ, wq->rx_object);
+       }
+
+       kfree(mana_ind_table);
+
+       return ret;
+}
+
+static int mana_ib_create_qp_raw(struct ib_qp *ibqp, struct ib_pd *ibpd,
+                                struct ib_qp_init_attr *attr,
+                                struct ib_udata *udata)
+{
+       struct mana_ib_pd *pd = container_of(ibpd, struct mana_ib_pd, ibpd);
+       struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp);
+       struct mana_ib_dev *mdev =
+               container_of(ibpd->device, struct mana_ib_dev, ib_dev);
+       struct mana_ib_cq *send_cq =
+               container_of(attr->send_cq, struct mana_ib_cq, ibcq);
+       struct mana_ib_ucontext *mana_ucontext =
+               rdma_udata_to_drv_context(udata, struct mana_ib_ucontext,
+                                         ibucontext);
+       struct mana_ib_create_qp_resp resp = {};
+       struct gdma_dev *gd = mdev->gdma_dev;
+       struct mana_ib_create_qp ucmd = {};
+       struct mana_obj_spec wq_spec = {};
+       struct mana_obj_spec cq_spec = {};
+       struct mana_port_context *mpc;
+       struct mana_context *mc;
+       struct net_device *ndev;
+       struct ib_umem *umem;
+       int err;
+       u32 port;
+
+       mc = gd->driver_data;
+
+       if (!mana_ucontext || udata->inlen < sizeof(ucmd))
+               return -EINVAL;
+
+       err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen));
+       if (err) {
+               ibdev_dbg(&mdev->ib_dev,
+                         "Failed to copy from udata create qp-raw, %d\n", err);
+               return err;
+       }
+
+       /* IB ports start with 1, MANA Ethernet ports start with 0 */
+       port = ucmd.port;
+       if (ucmd.port > mc->num_ports)
+               return -EINVAL;
+
+       if (attr->cap.max_send_wr > MAX_SEND_BUFFERS_PER_QUEUE) {
+               ibdev_dbg(&mdev->ib_dev,
+                         "Requested max_send_wr %d exceeding limit\n",
+                         attr->cap.max_send_wr);
+               return -EINVAL;
+       }
+
+       if (attr->cap.max_send_sge > MAX_TX_WQE_SGL_ENTRIES) {
+               ibdev_dbg(&mdev->ib_dev,
+                         "Requested max_send_sge %d exceeding limit\n",
+                         attr->cap.max_send_sge);
+               return -EINVAL;
+       }
+
+       ndev = mc->ports[port - 1];
+       mpc = netdev_priv(ndev);
+       ibdev_dbg(&mdev->ib_dev, "port %u ndev %p mpc %p\n", port, ndev, mpc);
+
+       err = mana_ib_cfg_vport(mdev, port - 1, pd, mana_ucontext->doorbell);
+       if (err)
+               return -ENODEV;
+
+       qp->port = port;
+
+       ibdev_dbg(&mdev->ib_dev, "ucmd sq_buf_addr 0x%llx port %u\n",
+                 ucmd.sq_buf_addr, ucmd.port);
+
+       umem = ib_umem_get(ibpd->device, ucmd.sq_buf_addr, ucmd.sq_buf_size,
+                          IB_ACCESS_LOCAL_WRITE);
+       if (IS_ERR(umem)) {
+               err = PTR_ERR(umem);
+               ibdev_dbg(&mdev->ib_dev,
+                         "Failed to get umem for create qp-raw, err %d\n",
+                         err);
+               goto err_free_vport;
+       }
+       qp->sq_umem = umem;
+
+       err = mana_ib_gd_create_dma_region(mdev, qp->sq_umem,
+                                          &qp->sq_gdma_region);
+       if (err) {
+               ibdev_dbg(&mdev->ib_dev,
+                         "Failed to create dma region for create qp-raw, %d\n",
+                         err);
+               goto err_release_umem;
+       }
+
+       ibdev_dbg(&mdev->ib_dev,
+                 "mana_ib_gd_create_dma_region ret %d gdma_region 0x%llx\n",
+                 err, qp->sq_gdma_region);
+
+       /* Create a WQ on the same port handle used by the Ethernet */
+       wq_spec.gdma_region = qp->sq_gdma_region;
+       wq_spec.queue_size = ucmd.sq_buf_size;
+
+       cq_spec.gdma_region = send_cq->gdma_region;
+       cq_spec.queue_size = send_cq->cqe * COMP_ENTRY_SIZE;
+       cq_spec.modr_ctx_id = 0;
+       cq_spec.attached_eq = GDMA_CQ_NO_EQ;
+
+       err = mana_create_wq_obj(mpc, mpc->port_handle, GDMA_SQ, &wq_spec,
+                                &cq_spec, &qp->tx_object);
+       if (err) {
+               ibdev_dbg(&mdev->ib_dev,
+                         "Failed to create wq for create raw-qp, err %d\n",
+                         err);
+               goto err_destroy_dma_region;
+       }
+
+       /* The GDMA regions are now owned by the WQ object */
+       qp->sq_gdma_region = GDMA_INVALID_DMA_REGION;
+       send_cq->gdma_region = GDMA_INVALID_DMA_REGION;
+
+       qp->sq_id = wq_spec.queue_index;
+       send_cq->id = cq_spec.queue_index;
+
+       ibdev_dbg(&mdev->ib_dev,
+                 "ret %d qp->tx_object 0x%llx sq id %llu cq id %llu\n", err,
+                 qp->tx_object, qp->sq_id, send_cq->id);
+
+       resp.sqid = qp->sq_id;
+       resp.cqid = send_cq->id;
+       resp.tx_vp_offset = pd->tx_vp_offset;
+
+       err = ib_copy_to_udata(udata, &resp, sizeof(resp));
+       if (err) {
+               ibdev_dbg(&mdev->ib_dev,
+                         "Failed copy udata for create qp-raw, %d\n",
+                         err);
+               goto err_destroy_wq_obj;
+       }
+
+       return 0;
+
+err_destroy_wq_obj:
+       mana_destroy_wq_obj(mpc, GDMA_SQ, qp->tx_object);
+
+err_destroy_dma_region:
+       mana_ib_gd_destroy_dma_region(mdev, qp->sq_gdma_region);
+
+err_release_umem:
+       ib_umem_release(umem);
+
+err_free_vport:
+       mana_ib_uncfg_vport(mdev, pd, port - 1);
+
+       return err;
+}
+
+int mana_ib_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attr,
+                     struct ib_udata *udata)
+{
+       switch (attr->qp_type) {
+       case IB_QPT_RAW_PACKET:
+               /* When rwq_ind_tbl is used, it's for creating WQs for RSS */
+               if (attr->rwq_ind_tbl)
+                       return mana_ib_create_qp_rss(ibqp, ibqp->pd, attr,
+                                                    udata);
+
+               return mana_ib_create_qp_raw(ibqp, ibqp->pd, attr, udata);
+       default:
+               /* Creating QP other than IB_QPT_RAW_PACKET is not supported */
+               ibdev_dbg(ibqp->device, "Creating QP type %u not supported\n",
+                         attr->qp_type);
+       }
+
+       return -EINVAL;
+}
+
+int mana_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+                     int attr_mask, struct ib_udata *udata)
+{
+       /* modify_qp is not supported by this version of the driver */
+       return -EOPNOTSUPP;
+}
+
+static int mana_ib_destroy_qp_rss(struct mana_ib_qp *qp,
+                                 struct ib_rwq_ind_table *ind_tbl,
+                                 struct ib_udata *udata)
+{
+       struct mana_ib_dev *mdev =
+               container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev);
+       struct gdma_dev *gd = mdev->gdma_dev;
+       struct mana_port_context *mpc;
+       struct mana_context *mc;
+       struct net_device *ndev;
+       struct mana_ib_wq *wq;
+       struct ib_wq *ibwq;
+       int i;
+
+       mc = gd->driver_data;
+       ndev = mc->ports[qp->port - 1];
+       mpc = netdev_priv(ndev);
+
+       for (i = 0; i < (1 << ind_tbl->log_ind_tbl_size); i++) {
+               ibwq = ind_tbl->ind_tbl[i];
+               wq = container_of(ibwq, struct mana_ib_wq, ibwq);
+               ibdev_dbg(&mdev->ib_dev, "destroying wq->rx_object %llu\n",
+                         wq->rx_object);
+               mana_destroy_wq_obj(mpc, GDMA_RQ, wq->rx_object);
+       }
+
+       return 0;
+}
+
+static int mana_ib_destroy_qp_raw(struct mana_ib_qp *qp, struct ib_udata *udata)
+{
+       struct mana_ib_dev *mdev =
+               container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev);
+       struct gdma_dev *gd = mdev->gdma_dev;
+       struct ib_pd *ibpd = qp->ibqp.pd;
+       struct mana_port_context *mpc;
+       struct mana_context *mc;
+       struct net_device *ndev;
+       struct mana_ib_pd *pd;
+
+       mc = gd->driver_data;
+       ndev = mc->ports[qp->port - 1];
+       mpc = netdev_priv(ndev);
+       pd = container_of(ibpd, struct mana_ib_pd, ibpd);
+
+       mana_destroy_wq_obj(mpc, GDMA_SQ, qp->tx_object);
+
+       if (qp->sq_umem) {
+               mana_ib_gd_destroy_dma_region(mdev, qp->sq_gdma_region);
+               ib_umem_release(qp->sq_umem);
+       }
+
+       mana_ib_uncfg_vport(mdev, pd, qp->port - 1);
+
+       return 0;
+}
+
+int mana_ib_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
+{
+       struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp);
+
+       switch (ibqp->qp_type) {
+       case IB_QPT_RAW_PACKET:
+               if (ibqp->rwq_ind_tbl)
+                       return mana_ib_destroy_qp_rss(qp, ibqp->rwq_ind_tbl,
+                                                     udata);
+
+               return mana_ib_destroy_qp_raw(qp, udata);
+
+       default:
+               ibdev_dbg(ibqp->device, "Unexpected QP type %u\n",
+                         ibqp->qp_type);
+       }
+
+       return -ENOENT;
+}
diff --git a/drivers/infiniband/hw/mana/wq.c b/drivers/infiniband/hw/mana/wq.c
new file mode 100644 (file)
index 0000000..372d361
--- /dev/null
@@ -0,0 +1,115 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2022, Microsoft Corporation. All rights reserved.
+ */
+
+#include "mana_ib.h"
+
+struct ib_wq *mana_ib_create_wq(struct ib_pd *pd,
+                               struct ib_wq_init_attr *init_attr,
+                               struct ib_udata *udata)
+{
+       struct mana_ib_dev *mdev =
+               container_of(pd->device, struct mana_ib_dev, ib_dev);
+       struct mana_ib_create_wq ucmd = {};
+       struct mana_ib_wq *wq;
+       struct ib_umem *umem;
+       int err;
+
+       if (udata->inlen < sizeof(ucmd))
+               return ERR_PTR(-EINVAL);
+
+       err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen));
+       if (err) {
+               ibdev_dbg(&mdev->ib_dev,
+                         "Failed to copy from udata for create wq, %d\n", err);
+               return ERR_PTR(err);
+       }
+
+       wq = kzalloc(sizeof(*wq), GFP_KERNEL);
+       if (!wq)
+               return ERR_PTR(-ENOMEM);
+
+       ibdev_dbg(&mdev->ib_dev, "ucmd wq_buf_addr 0x%llx\n", ucmd.wq_buf_addr);
+
+       umem = ib_umem_get(pd->device, ucmd.wq_buf_addr, ucmd.wq_buf_size,
+                          IB_ACCESS_LOCAL_WRITE);
+       if (IS_ERR(umem)) {
+               err = PTR_ERR(umem);
+               ibdev_dbg(&mdev->ib_dev,
+                         "Failed to get umem for create wq, err %d\n", err);
+               goto err_free_wq;
+       }
+
+       wq->umem = umem;
+       wq->wqe = init_attr->max_wr;
+       wq->wq_buf_size = ucmd.wq_buf_size;
+       wq->rx_object = INVALID_MANA_HANDLE;
+
+       err = mana_ib_gd_create_dma_region(mdev, wq->umem, &wq->gdma_region);
+       if (err) {
+               ibdev_dbg(&mdev->ib_dev,
+                         "Failed to create dma region for create wq, %d\n",
+                         err);
+               goto err_release_umem;
+       }
+
+       ibdev_dbg(&mdev->ib_dev,
+                 "mana_ib_gd_create_dma_region ret %d gdma_region 0x%llx\n",
+                 err, wq->gdma_region);
+
+       /* WQ ID is returned at wq_create time, doesn't know the value yet */
+
+       return &wq->ibwq;
+
+err_release_umem:
+       ib_umem_release(umem);
+
+err_free_wq:
+       kfree(wq);
+
+       return ERR_PTR(err);
+}
+
+int mana_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
+                     u32 wq_attr_mask, struct ib_udata *udata)
+{
+       /* modify_wq is not supported by this version of the driver */
+       return -EOPNOTSUPP;
+}
+
+int mana_ib_destroy_wq(struct ib_wq *ibwq, struct ib_udata *udata)
+{
+       struct mana_ib_wq *wq = container_of(ibwq, struct mana_ib_wq, ibwq);
+       struct ib_device *ib_dev = ibwq->device;
+       struct mana_ib_dev *mdev;
+
+       mdev = container_of(ib_dev, struct mana_ib_dev, ib_dev);
+
+       mana_ib_gd_destroy_dma_region(mdev, wq->gdma_region);
+       ib_umem_release(wq->umem);
+
+       kfree(wq);
+
+       return 0;
+}
+
+int mana_ib_create_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_table,
+                                struct ib_rwq_ind_table_init_attr *init_attr,
+                                struct ib_udata *udata)
+{
+       /*
+        * There is no additional data in ind_table to be maintained by this
+        * driver, do nothing
+        */
+       return 0;
+}
+
+int mana_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl)
+{
+       /*
+        * There is no additional data in ind_table to be maintained by this
+        * driver, do nothing
+        */
+       return 0;
+}
index 713a8f8..20212ff 100644 (file)
@@ -412,6 +412,9 @@ int mana_bpf(struct net_device *ndev, struct netdev_bpf *bpf);
 
 extern const struct ethtool_ops mana_ethtool_ops;
 
+/* A CQ can be created not associated with any EQ */
+#define GDMA_CQ_NO_EQ  0xffff
+
 struct mana_obj_spec {
        u32 queue_index;
        u64 gdma_region;
index 7dd5621..e0c2553 100644 (file)
@@ -251,6 +251,7 @@ enum rdma_driver_id {
        RDMA_DRIVER_EFA,
        RDMA_DRIVER_SIW,
        RDMA_DRIVER_ERDMA,
+       RDMA_DRIVER_MANA,
 };
 
 enum ib_uverbs_gid_type {
diff --git a/include/uapi/rdma/mana-abi.h b/include/uapi/rdma/mana-abi.h
new file mode 100644 (file)
index 0000000..5fcb31b
--- /dev/null
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) */
+/*
+ * Copyright (c) 2022, Microsoft Corporation. All rights reserved.
+ */
+
+#ifndef MANA_ABI_USER_H
+#define MANA_ABI_USER_H
+
+#include <linux/types.h>
+#include <rdma/ib_user_ioctl_verbs.h>
+
+/*
+ * Increment this value if any changes that break userspace ABI
+ * compatibility are made.
+ */
+
+#define MANA_IB_UVERBS_ABI_VERSION 1
+
+struct mana_ib_create_cq {
+       __aligned_u64 buf_addr;
+};
+
+struct mana_ib_create_qp {
+       __aligned_u64 sq_buf_addr;
+       __u32 sq_buf_size;
+       __u32 port;
+};
+
+struct mana_ib_create_qp_resp {
+       __u32 sqid;
+       __u32 cqid;
+       __u32 tx_vp_offset;
+       __u32 reserved;
+};
+
+struct mana_ib_create_wq {
+       __aligned_u64 wq_buf_addr;
+       __u32 wq_buf_size;
+       __u32 reserved;
+};
+
+/* RX Hash function flags */
+enum mana_ib_rx_hash_function_flags {
+       MANA_IB_RX_HASH_FUNC_TOEPLITZ = 1 << 0,
+};
+
+struct mana_ib_create_qp_rss {
+       __aligned_u64 rx_hash_fields_mask;
+       __u8 rx_hash_function;
+       __u8 reserved[7];
+       __u32 rx_hash_key_len;
+       __u8 rx_hash_key[40];
+       __u32 port;
+};
+
+struct rss_resp_entry {
+       __u32 cqid;
+       __u32 wqid;
+};
+
+struct mana_ib_create_qp_rss_resp {
+       __aligned_u64 num_entries;
+       struct rss_resp_entry entries[64];
+};
+
+#endif