IB/mlx5: Implement UD QP offloads for IPoIB in the TX flow
authorErez Shitrit <erezsh@mellanox.com>
Sun, 21 Feb 2016 14:27:17 +0000 (16:27 +0200)
committerDoug Ledford <dledford@redhat.com>
Tue, 1 Mar 2016 15:57:15 +0000 (10:57 -0500)
In order to support LSO and CSUM in the TX flow the driver does the
following:
* LSO bit for the enum mlx5_ib_qp_flags was added, indicates QP that
  supports LSO offloads.
* Enables the special offload when the QP is created, and enable the
  special work request id (IB_WR_LSO) when comes.
* Calculates the size of the WQE according to the new WQE format that
  support these offloads.
* Handles the new WQE format when arrived, sets the relevant
  fields, and copies the needed data.

Signed-off-by: Erez Shitrit <erezsh@mellanox.com>
Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
drivers/infiniband/hw/mlx5/main.c
drivers/infiniband/hw/mlx5/mlx5_ib.h
drivers/infiniband/hw/mlx5/qp.c

index 03c418c..76b0939 100644 (file)
@@ -504,6 +504,11 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
            (MLX5_CAP_ETH(dev->mdev, csum_cap)))
                        props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM;
 
+       if (MLX5_CAP_GEN(mdev, ipoib_basic_offloads)) {
+               props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM;
+               props->device_cap_flags |= IB_DEVICE_UD_TSO;
+       }
+
        props->vendor_part_id      = mdev->pdev->device;
        props->hw_ver              = mdev->pdev->revision;
 
index d2b9737..14396b0 100644 (file)
@@ -325,11 +325,12 @@ struct mlx5_ib_cq_buf {
 };
 
 enum mlx5_ib_qp_flags {
-       MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK     = 1 << 0,
-       MLX5_IB_QP_SIGNATURE_HANDLING           = 1 << 1,
-       MLX5_IB_QP_CROSS_CHANNEL                = 1 << 2,
-       MLX5_IB_QP_MANAGED_SEND                 = 1 << 3,
-       MLX5_IB_QP_MANAGED_RECV                 = 1 << 4,
+       MLX5_IB_QP_LSO                          = IB_QP_CREATE_IPOIB_UD_LSO,
+       MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK     = IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK,
+       MLX5_IB_QP_CROSS_CHANNEL            = IB_QP_CREATE_CROSS_CHANNEL,
+       MLX5_IB_QP_MANAGED_SEND             = IB_QP_CREATE_MANAGED_SEND,
+       MLX5_IB_QP_MANAGED_RECV             = IB_QP_CREATE_MANAGED_RECV,
+       MLX5_IB_QP_SIGNATURE_HANDLING           = 1 << 5,
 };
 
 struct mlx5_umr_wr {
index 34cb8e8..baa8808 100644 (file)
@@ -58,6 +58,7 @@ enum {
 
 static const u32 mlx5_ib_opcode[] = {
        [IB_WR_SEND]                            = MLX5_OPCODE_SEND,
+       [IB_WR_LSO]                             = MLX5_OPCODE_LSO,
        [IB_WR_SEND_WITH_IMM]                   = MLX5_OPCODE_SEND_IMM,
        [IB_WR_RDMA_WRITE]                      = MLX5_OPCODE_RDMA_WRITE,
        [IB_WR_RDMA_WRITE_WITH_IMM]             = MLX5_OPCODE_RDMA_WRITE_IMM,
@@ -72,6 +73,9 @@ static const u32 mlx5_ib_opcode[] = {
        [MLX5_IB_WR_UMR]                        = MLX5_OPCODE_UMR,
 };
 
+struct mlx5_wqe_eth_pad {
+       u8 rsvd0[16];
+};
 
 static int is_qp0(enum ib_qp_type qp_type)
 {
@@ -260,11 +264,11 @@ static int set_rq_size(struct mlx5_ib_dev *dev, struct ib_qp_cap *cap,
        return 0;
 }
 
-static int sq_overhead(enum ib_qp_type qp_type)
+static int sq_overhead(struct ib_qp_init_attr *attr)
 {
        int size = 0;
 
-       switch (qp_type) {
+       switch (attr->qp_type) {
        case IB_QPT_XRC_INI:
                size += sizeof(struct mlx5_wqe_xrc_seg);
                /* fall through */
@@ -287,6 +291,10 @@ static int sq_overhead(enum ib_qp_type qp_type)
                break;
 
        case IB_QPT_UD:
+               if (attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)
+                       size += sizeof(struct mlx5_wqe_eth_pad) +
+                               sizeof(struct mlx5_wqe_eth_seg);
+               /* fall through */
        case IB_QPT_SMI:
        case IB_QPT_GSI:
                size += sizeof(struct mlx5_wqe_ctrl_seg) +
@@ -311,7 +319,7 @@ static int calc_send_wqe(struct ib_qp_init_attr *attr)
        int inl_size = 0;
        int size;
 
-       size = sq_overhead(attr->qp_type);
+       size = sq_overhead(attr);
        if (size < 0)
                return size;
 
@@ -348,8 +356,8 @@ static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr,
                return -EINVAL;
        }
 
-       qp->max_inline_data = wqe_size - sq_overhead(attr->qp_type) -
-               sizeof(struct mlx5_wqe_inline_seg);
+       qp->max_inline_data = wqe_size - sq_overhead(attr) -
+                             sizeof(struct mlx5_wqe_inline_seg);
        attr->cap.max_inline_data = qp->max_inline_data;
 
        if (attr->create_flags & IB_QP_CREATE_SIGNATURE_EN)
@@ -783,7 +791,9 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
        int err;
 
        uuari = &dev->mdev->priv.uuari;
-       if (init_attr->create_flags & ~(IB_QP_CREATE_SIGNATURE_EN | IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK))
+       if (init_attr->create_flags & ~(IB_QP_CREATE_SIGNATURE_EN |
+                                       IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK |
+                                       IB_QP_CREATE_IPOIB_UD_LSO))
                return -EINVAL;
 
        if (init_attr->qp_type == MLX5_IB_QPT_REG_UMR)
@@ -1228,6 +1238,14 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
                if (init_attr->create_flags & IB_QP_CREATE_MANAGED_RECV)
                        qp->flags |= MLX5_IB_QP_MANAGED_RECV;
        }
+
+       if (init_attr->qp_type == IB_QPT_UD &&
+           (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO))
+               if (!MLX5_CAP_GEN(mdev, ipoib_basic_offloads)) {
+                       mlx5_ib_dbg(dev, "ipoib UD lso qp isn't supported\n");
+                       return -EOPNOTSUPP;
+               }
+
        if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
                qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE;
 
@@ -1385,6 +1403,13 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
                /* 0xffffff means we ask to work with cqe version 0 */
                MLX5_SET(qpc, qpc, user_index, uidx);
        }
+       /* we use IB_QP_CREATE_IPOIB_UD_LSO to indicates ipoib qp */
+       if (init_attr->qp_type == IB_QPT_UD &&
+           (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)) {
+               qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
+               MLX5_SET(qpc, qpc, ulp_stateless_offload_mode, 1);
+               qp->flags |= MLX5_IB_QP_LSO;
+       }
 
        if (init_attr->qp_type == IB_QPT_RAW_PACKET) {
                qp->raw_packet_qp.sq.ubuffer.buf_addr = ucmd.sq_buf_addr;
@@ -2442,6 +2467,59 @@ static __always_inline void set_raddr_seg(struct mlx5_wqe_raddr_seg *rseg,
        rseg->reserved = 0;
 }
 
+static void *set_eth_seg(struct mlx5_wqe_eth_seg *eseg,
+                        struct ib_send_wr *wr, void *qend,
+                        struct mlx5_ib_qp *qp, int *size)
+{
+       void *seg = eseg;
+
+       memset(eseg, 0, sizeof(struct mlx5_wqe_eth_seg));
+
+       if (wr->send_flags & IB_SEND_IP_CSUM)
+               eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM |
+                                MLX5_ETH_WQE_L4_CSUM;
+
+       seg += sizeof(struct mlx5_wqe_eth_seg);
+       *size += sizeof(struct mlx5_wqe_eth_seg) / 16;
+
+       if (wr->opcode == IB_WR_LSO) {
+               struct ib_ud_wr *ud_wr = container_of(wr, struct ib_ud_wr, wr);
+               int size_of_inl_hdr_start = sizeof(eseg->inline_hdr_start);
+               u64 left, leftlen, copysz;
+               void *pdata = ud_wr->header;
+
+               left = ud_wr->hlen;
+               eseg->mss = cpu_to_be16(ud_wr->mss);
+               eseg->inline_hdr_sz = cpu_to_be16(left);
+
+               /*
+                * check if there is space till the end of queue, if yes,
+                * copy all in one shot, otherwise copy till the end of queue,
+                * rollback and than the copy the left
+                */
+               leftlen = qend - (void *)eseg->inline_hdr_start;
+               copysz = min_t(u64, leftlen, left);
+
+               memcpy(seg - size_of_inl_hdr_start, pdata, copysz);
+
+               if (likely(copysz > size_of_inl_hdr_start)) {
+                       seg += ALIGN(copysz - size_of_inl_hdr_start, 16);
+                       *size += ALIGN(copysz - size_of_inl_hdr_start, 16) / 16;
+               }
+
+               if (unlikely(copysz < left)) { /* the last wqe in the queue */
+                       seg = mlx5_get_send_wqe(qp, 0);
+                       left -= copysz;
+                       pdata += copysz;
+                       memcpy(seg, pdata, left);
+                       seg += ALIGN(left, 16);
+                       *size += ALIGN(left, 16) / 16;
+               }
+       }
+
+       return seg;
+}
+
 static void set_datagram_seg(struct mlx5_wqe_datagram_seg *dseg,
                             struct ib_send_wr *wr)
 {
@@ -3373,7 +3451,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                        }
                        break;
 
-               case IB_QPT_UD:
                case IB_QPT_SMI:
                case IB_QPT_GSI:
                        set_datagram_seg(seg, wr);
@@ -3382,7 +3459,29 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                        if (unlikely((seg == qend)))
                                seg = mlx5_get_send_wqe(qp, 0);
                        break;
+               case IB_QPT_UD:
+                       set_datagram_seg(seg, wr);
+                       seg += sizeof(struct mlx5_wqe_datagram_seg);
+                       size += sizeof(struct mlx5_wqe_datagram_seg) / 16;
 
+                       if (unlikely((seg == qend)))
+                               seg = mlx5_get_send_wqe(qp, 0);
+
+                       /* handle qp that supports ud offload */
+                       if (qp->flags & IB_QP_CREATE_IPOIB_UD_LSO) {
+                               struct mlx5_wqe_eth_pad *pad;
+
+                               pad = seg;
+                               memset(pad, 0, sizeof(struct mlx5_wqe_eth_pad));
+                               seg += sizeof(struct mlx5_wqe_eth_pad);
+                               size += sizeof(struct mlx5_wqe_eth_pad) / 16;
+
+                               seg = set_eth_seg(seg, wr, qend, qp, &size);
+
+                               if (unlikely((seg == qend)))
+                                       seg = mlx5_get_send_wqe(qp, 0);
+                       }
+                       break;
                case MLX5_IB_QPT_REG_UMR:
                        if (wr->opcode != MLX5_IB_WR_UMR) {
                                err = -EINVAL;