IB/mlx4: Separate tunnel and wire bufs parameters
authorHåkon Bugge <haakon.bugge@oracle.com>
Mon, 3 Aug 2020 06:19:38 +0000 (08:19 +0200)
committerJason Gunthorpe <jgg@nvidia.com>
Mon, 24 Aug 2020 14:31:21 +0000 (11:31 -0300)
Using CX-3 in virtualized mode, MAD packets are proxied through the PF
driver. The feed is N tunnel QPs, and what is received from the VFs is
multiplexed out on the wire QP. Since this is a many-to-one scenario, it
is better to have separate initialization parameters for the two usages.

The number of wire and tunnel bufs are yanked up to 2K and 512
respectively. With this set of parameters, a system consisting of eight
physical servers, each with eight VMs and 14 I/O servers (BM), can run
switch fail-over without seeing:

mlx4_ib_demux_mad: failed sending GSI to slave 3 via tunnel qp (-11)

or

mlx4_ib_multiplex_mad: failed sending GSI to wire on behalf of slave 2 (-11)

Fixes: 3cf69cc8dbeb ("IB/mlx4: Add CM paravirtualization")
Link: https://lore.kernel.org/r/20200803061941.1139994-4-haakon.bugge@oracle.com
Signed-off-by: Håkon Bugge <haakon.bugge@oracle.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
drivers/infiniband/hw/mlx4/mad.c
drivers/infiniband/hw/mlx4/mlx4_ib.h

index 932786b0689e6cb1215a44157e7fdf4a31052123..e1310820352ea0506737c943eb499fcf01009482 100644 (file)
@@ -1391,10 +1391,10 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
 
        spin_lock(&sqp->tx_lock);
        if (sqp->tx_ix_head - sqp->tx_ix_tail >=
-           (MLX4_NUM_TUNNEL_BUFS - 1))
+           (MLX4_NUM_WIRE_BUFS - 1))
                ret = -EAGAIN;
        else
-               wire_tx_ix = (++sqp->tx_ix_head) & (MLX4_NUM_TUNNEL_BUFS - 1);
+               wire_tx_ix = (++sqp->tx_ix_head) & (MLX4_NUM_WIRE_BUFS - 1);
        spin_unlock(&sqp->tx_lock);
        if (ret)
                goto out;
@@ -1590,19 +1590,20 @@ static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
        int i;
        struct mlx4_ib_demux_pv_qp *tun_qp;
        int rx_buf_size, tx_buf_size;
+       const int nmbr_bufs = is_tun ? MLX4_NUM_TUNNEL_BUFS : MLX4_NUM_WIRE_BUFS;
 
        if (qp_type > IB_QPT_GSI)
                return -EINVAL;
 
        tun_qp = &ctx->qp[qp_type];
 
-       tun_qp->ring = kcalloc(MLX4_NUM_TUNNEL_BUFS,
+       tun_qp->ring = kcalloc(nmbr_bufs,
                               sizeof(struct mlx4_ib_buf),
                               GFP_KERNEL);
        if (!tun_qp->ring)
                return -ENOMEM;
 
-       tun_qp->tx_ring = kcalloc(MLX4_NUM_TUNNEL_BUFS,
+       tun_qp->tx_ring = kcalloc(nmbr_bufs,
                                  sizeof (struct mlx4_ib_tun_tx_buf),
                                  GFP_KERNEL);
        if (!tun_qp->tx_ring) {
@@ -1619,7 +1620,7 @@ static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
                tx_buf_size = sizeof (struct mlx4_mad_snd_buf);
        }
 
-       for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
+       for (i = 0; i < nmbr_bufs; i++) {
                tun_qp->ring[i].addr = kmalloc(rx_buf_size, GFP_KERNEL);
                if (!tun_qp->ring[i].addr)
                        goto err;
@@ -1633,7 +1634,7 @@ static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
                }
        }
 
-       for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
+       for (i = 0; i < nmbr_bufs; i++) {
                tun_qp->tx_ring[i].buf.addr =
                        kmalloc(tx_buf_size, GFP_KERNEL);
                if (!tun_qp->tx_ring[i].buf.addr)
@@ -1664,7 +1665,7 @@ tx_err:
                                    tx_buf_size, DMA_TO_DEVICE);
                kfree(tun_qp->tx_ring[i].buf.addr);
        }
-       i = MLX4_NUM_TUNNEL_BUFS;
+       i = nmbr_bufs;
 err:
        while (i > 0) {
                --i;
@@ -1685,6 +1686,7 @@ static void mlx4_ib_free_pv_qp_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
        int i;
        struct mlx4_ib_demux_pv_qp *tun_qp;
        int rx_buf_size, tx_buf_size;
+       const int nmbr_bufs = is_tun ? MLX4_NUM_TUNNEL_BUFS : MLX4_NUM_WIRE_BUFS;
 
        if (qp_type > IB_QPT_GSI)
                return;
@@ -1699,13 +1701,13 @@ static void mlx4_ib_free_pv_qp_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
        }
 
 
-       for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
+       for (i = 0; i < nmbr_bufs; i++) {
                ib_dma_unmap_single(ctx->ib_dev, tun_qp->ring[i].map,
                                    rx_buf_size, DMA_FROM_DEVICE);
                kfree(tun_qp->ring[i].addr);
        }
 
-       for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
+       for (i = 0; i < nmbr_bufs; i++) {
                ib_dma_unmap_single(ctx->ib_dev, tun_qp->tx_ring[i].buf.map,
                                    tx_buf_size, DMA_TO_DEVICE);
                kfree(tun_qp->tx_ring[i].buf.addr);
@@ -1785,6 +1787,7 @@ static int create_pv_sqp(struct mlx4_ib_demux_pv_ctx *ctx,
        struct mlx4_ib_qp_tunnel_init_attr qp_init_attr;
        struct ib_qp_attr attr;
        int qp_attr_mask_INIT;
+       const int nmbr_bufs = create_tun ? MLX4_NUM_TUNNEL_BUFS : MLX4_NUM_WIRE_BUFS;
 
        if (qp_type > IB_QPT_GSI)
                return -EINVAL;
@@ -1795,8 +1798,8 @@ static int create_pv_sqp(struct mlx4_ib_demux_pv_ctx *ctx,
        qp_init_attr.init_attr.send_cq = ctx->cq;
        qp_init_attr.init_attr.recv_cq = ctx->cq;
        qp_init_attr.init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
-       qp_init_attr.init_attr.cap.max_send_wr = MLX4_NUM_TUNNEL_BUFS;
-       qp_init_attr.init_attr.cap.max_recv_wr = MLX4_NUM_TUNNEL_BUFS;
+       qp_init_attr.init_attr.cap.max_send_wr = nmbr_bufs;
+       qp_init_attr.init_attr.cap.max_recv_wr = nmbr_bufs;
        qp_init_attr.init_attr.cap.max_send_sge = 1;
        qp_init_attr.init_attr.cap.max_recv_sge = 1;
        if (create_tun) {
@@ -1858,7 +1861,7 @@ static int create_pv_sqp(struct mlx4_ib_demux_pv_ctx *ctx,
                goto err_qp;
        }
 
-       for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
+       for (i = 0; i < nmbr_bufs; i++) {
                ret = mlx4_ib_post_pv_qp_buf(ctx, tun_qp, i);
                if (ret) {
                        pr_err(" mlx4_ib_post_pv_buf error"
@@ -1894,8 +1897,8 @@ static void mlx4_ib_sqp_comp_worker(struct work_struct *work)
                        switch (wc.opcode) {
                        case IB_WC_SEND:
                                kfree(sqp->tx_ring[wc.wr_id &
-                                     (MLX4_NUM_TUNNEL_BUFS - 1)].ah);
-                               sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
+                                     (MLX4_NUM_WIRE_BUFS - 1)].ah);
+                               sqp->tx_ring[wc.wr_id & (MLX4_NUM_WIRE_BUFS - 1)].ah
                                        = NULL;
                                spin_lock(&sqp->tx_lock);
                                sqp->tx_ix_tail++;
@@ -1904,13 +1907,13 @@ static void mlx4_ib_sqp_comp_worker(struct work_struct *work)
                        case IB_WC_RECV:
                                mad = (struct ib_mad *) &(((struct mlx4_mad_rcv_buf *)
                                                (sqp->ring[wc.wr_id &
-                                               (MLX4_NUM_TUNNEL_BUFS - 1)].addr))->payload);
+                                               (MLX4_NUM_WIRE_BUFS - 1)].addr))->payload);
                                grh = &(((struct mlx4_mad_rcv_buf *)
                                                (sqp->ring[wc.wr_id &
-                                               (MLX4_NUM_TUNNEL_BUFS - 1)].addr))->grh);
+                                               (MLX4_NUM_WIRE_BUFS - 1)].addr))->grh);
                                mlx4_ib_demux_mad(ctx->ib_dev, ctx->port, &wc, grh, mad);
                                if (mlx4_ib_post_pv_qp_buf(ctx, sqp, wc.wr_id &
-                                                          (MLX4_NUM_TUNNEL_BUFS - 1)))
+                                                          (MLX4_NUM_WIRE_BUFS - 1)))
                                        pr_err("Failed reposting SQP "
                                               "buf:%lld\n", wc.wr_id);
                                break;
@@ -1923,8 +1926,8 @@ static void mlx4_ib_sqp_comp_worker(struct work_struct *work)
                                 ctx->slave, wc.status, wc.wr_id);
                        if (!MLX4_TUN_IS_RECV(wc.wr_id)) {
                                kfree(sqp->tx_ring[wc.wr_id &
-                                     (MLX4_NUM_TUNNEL_BUFS - 1)].ah);
-                               sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
+                                     (MLX4_NUM_WIRE_BUFS - 1)].ah);
+                               sqp->tx_ring[wc.wr_id & (MLX4_NUM_WIRE_BUFS - 1)].ah
                                        = NULL;
                                spin_lock(&sqp->tx_lock);
                                sqp->tx_ix_tail++;
@@ -1964,6 +1967,7 @@ static int create_pv_resources(struct ib_device *ibdev, int slave, int port,
 {
        int ret, cq_size;
        struct ib_cq_init_attr cq_attr = {};
+       const int nmbr_bufs = create_tun ? MLX4_NUM_TUNNEL_BUFS : MLX4_NUM_WIRE_BUFS;
 
        if (ctx->state != DEMUX_PV_STATE_DOWN)
                return -EEXIST;
@@ -1988,7 +1992,7 @@ static int create_pv_resources(struct ib_device *ibdev, int slave, int port,
                goto err_out_qp0;
        }
 
-       cq_size = 2 * MLX4_NUM_TUNNEL_BUFS;
+       cq_size = 2 * nmbr_bufs;
        if (ctx->has_smi)
                cq_size *= 2;
 
index 38e87a700a2a203df2cd7ff2b1246252c82b8f2a..db3cc61de0db2b68476adad001163151733ede1f 100644 (file)
@@ -233,7 +233,8 @@ enum mlx4_ib_mad_ifc_flags {
 };
 
 enum {
-       MLX4_NUM_TUNNEL_BUFS            = 256,
+       MLX4_NUM_TUNNEL_BUFS            = 512,
+       MLX4_NUM_WIRE_BUFS              = 2048,
 };
 
 struct mlx4_ib_tunnel_header {