Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
authorLinus Torvalds <torvalds@linux-foundation.org>
Thu, 14 Nov 2019 16:37:48 +0000 (08:37 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 14 Nov 2019 16:37:48 +0000 (08:37 -0800)
Pull RDMA fixes from Jason Gunthorpe:
 "Bug fixes for old bugs in the hns and hfi1 drivers:

   - Calculate various values in hns properly to avoid over/underflows
     in some cases

   - Fix an oops, PCI negotiation on Gen4 systems, and bugs related to
     retries"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma:
  RDMA/hns: Correct the value of srq_desc_size
  RDMA/hns: Correct the value of HNS_ROCE_HEM_CHUNK_LEN
  IB/hfi1: TID RDMA WRITE should not return IB_WC_RNR_RETRY_EXC_ERR
  IB/hfi1: Calculate flow weight based on QP MTU for TID RDMA
  IB/hfi1: Ensure r_tid_ack is valid before building TID RDMA ACK packet
  IB/hfi1: Ensure full Gen3 speed in a Gen4 system

drivers/infiniband/hw/hfi1/init.c
drivers/infiniband/hw/hfi1/pcie.c
drivers/infiniband/hw/hfi1/rc.c
drivers/infiniband/hw/hfi1/tid_rdma.c
drivers/infiniband/hw/hfi1/tid_rdma.h
drivers/infiniband/hw/hns/hns_roce_hem.h
drivers/infiniband/hw/hns/hns_roce_srq.c

index 71cb952..26b792b 100644 (file)
@@ -1489,7 +1489,6 @@ static int __init hfi1_mod_init(void)
                goto bail_dev;
        }
 
-       hfi1_compute_tid_rdma_flow_wt();
        /*
         * These must be called before the driver is registered with
         * the PCI subsystem.
index 61aa550..61362bd 100644 (file)
@@ -319,7 +319,9 @@ int pcie_speeds(struct hfi1_devdata *dd)
        /*
         * bus->max_bus_speed is set from the bridge's linkcap Max Link Speed
         */
-       if (parent && dd->pcidev->bus->max_bus_speed != PCIE_SPEED_8_0GT) {
+       if (parent &&
+           (dd->pcidev->bus->max_bus_speed == PCIE_SPEED_2_5GT ||
+            dd->pcidev->bus->max_bus_speed == PCIE_SPEED_5_0GT)) {
                dd_dev_info(dd, "Parent PCIe bridge does not support Gen3\n");
                dd->link_gen3_capable = 0;
        }
index 513a8aa..1a3c647 100644 (file)
@@ -2209,15 +2209,15 @@ int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
                if (qp->s_flags & RVT_S_WAIT_RNR)
                        goto bail_stop;
                rdi = ib_to_rvt(qp->ibqp.device);
-               if (qp->s_rnr_retry == 0 &&
-                   !((rdi->post_parms[wqe->wr.opcode].flags &
-                     RVT_OPERATION_IGN_RNR_CNT) &&
-                     qp->s_rnr_retry_cnt == 0)) {
-                       status = IB_WC_RNR_RETRY_EXC_ERR;
-                       goto class_b;
+               if (!(rdi->post_parms[wqe->wr.opcode].flags &
+                      RVT_OPERATION_IGN_RNR_CNT)) {
+                       if (qp->s_rnr_retry == 0) {
+                               status = IB_WC_RNR_RETRY_EXC_ERR;
+                               goto class_b;
+                       }
+                       if (qp->s_rnr_retry_cnt < 7 && qp->s_rnr_retry_cnt > 0)
+                               qp->s_rnr_retry--;
                }
-               if (qp->s_rnr_retry_cnt < 7 && qp->s_rnr_retry_cnt > 0)
-                       qp->s_rnr_retry--;
 
                /*
                 * The last valid PSN is the previous PSN. For TID RDMA WRITE
index f21fca3..e53f542 100644 (file)
@@ -107,8 +107,6 @@ static u32 mask_generation(u32 a)
  * C - Capcode
  */
 
-static u32 tid_rdma_flow_wt;
-
 static void tid_rdma_trigger_resume(struct work_struct *work);
 static void hfi1_kern_exp_rcv_free_flows(struct tid_rdma_request *req);
 static int hfi1_kern_exp_rcv_alloc_flows(struct tid_rdma_request *req,
@@ -136,6 +134,26 @@ static void update_r_next_psn_fecn(struct hfi1_packet *packet,
                                   struct tid_rdma_flow *flow,
                                   bool fecn);
 
+static void validate_r_tid_ack(struct hfi1_qp_priv *priv)
+{
+       if (priv->r_tid_ack == HFI1_QP_WQE_INVALID)
+               priv->r_tid_ack = priv->r_tid_tail;
+}
+
+static void tid_rdma_schedule_ack(struct rvt_qp *qp)
+{
+       struct hfi1_qp_priv *priv = qp->priv;
+
+       priv->s_flags |= RVT_S_ACK_PENDING;
+       hfi1_schedule_tid_send(qp);
+}
+
+static void tid_rdma_trigger_ack(struct rvt_qp *qp)
+{
+       validate_r_tid_ack(qp->priv);
+       tid_rdma_schedule_ack(qp);
+}
+
 static u64 tid_rdma_opfn_encode(struct tid_rdma_params *p)
 {
        return
@@ -3005,10 +3023,7 @@ nak_psn:
                qpriv->s_nak_state = IB_NAK_PSN_ERROR;
                /* We are NAK'ing the next expected PSN */
                qpriv->s_nak_psn = mask_psn(flow->flow_state.r_next_psn);
-               qpriv->s_flags |= RVT_S_ACK_PENDING;
-               if (qpriv->r_tid_ack == HFI1_QP_WQE_INVALID)
-                       qpriv->r_tid_ack = qpriv->r_tid_tail;
-               hfi1_schedule_tid_send(qp);
+               tid_rdma_trigger_ack(qp);
        }
        goto unlock;
 }
@@ -3371,18 +3386,17 @@ u32 hfi1_build_tid_rdma_write_req(struct rvt_qp *qp, struct rvt_swqe *wqe,
        return sizeof(ohdr->u.tid_rdma.w_req) / sizeof(u32);
 }
 
-void hfi1_compute_tid_rdma_flow_wt(void)
+static u32 hfi1_compute_tid_rdma_flow_wt(struct rvt_qp *qp)
 {
        /*
         * Heuristic for computing the RNR timeout when waiting on the flow
         * queue. Rather than a computationaly expensive exact estimate of when
         * a flow will be available, we assume that if a QP is at position N in
         * the flow queue it has to wait approximately (N + 1) * (number of
-        * segments between two sync points), assuming PMTU of 4K. The rationale
-        * for this is that flows are released and recycled at each sync point.
+        * segments between two sync points). The rationale for this is that
+        * flows are released and recycled at each sync point.
         */
-       tid_rdma_flow_wt = MAX_TID_FLOW_PSN * enum_to_mtu(OPA_MTU_4096) /
-               TID_RDMA_MAX_SEGMENT_SIZE;
+       return (MAX_TID_FLOW_PSN * qp->pmtu) >> TID_RDMA_SEGMENT_SHIFT;
 }
 
 static u32 position_in_queue(struct hfi1_qp_priv *qpriv,
@@ -3505,7 +3519,7 @@ static void hfi1_tid_write_alloc_resources(struct rvt_qp *qp, bool intr_ctx)
                if (qpriv->flow_state.index >= RXE_NUM_TID_FLOWS) {
                        ret = hfi1_kern_setup_hw_flow(qpriv->rcd, qp);
                        if (ret) {
-                               to_seg = tid_rdma_flow_wt *
+                               to_seg = hfi1_compute_tid_rdma_flow_wt(qp) *
                                        position_in_queue(qpriv,
                                                          &rcd->flow_queue);
                                break;
@@ -3526,7 +3540,7 @@ static void hfi1_tid_write_alloc_resources(struct rvt_qp *qp, bool intr_ctx)
                /*
                 * If overtaking req->acked_tail, send an RNR NAK. Because the
                 * QP is not queued in this case, and the issue can only be
-                * caused due a delay in scheduling the second leg which we
+                * caused by a delay in scheduling the second leg which we
                 * cannot estimate, we use a rather arbitrary RNR timeout of
                 * (MAX_FLOWS / 2) segments
                 */
@@ -3534,8 +3548,7 @@ static void hfi1_tid_write_alloc_resources(struct rvt_qp *qp, bool intr_ctx)
                                MAX_FLOWS)) {
                        ret = -EAGAIN;
                        to_seg = MAX_FLOWS >> 1;
-                       qpriv->s_flags |= RVT_S_ACK_PENDING;
-                       hfi1_schedule_tid_send(qp);
+                       tid_rdma_trigger_ack(qp);
                        break;
                }
 
@@ -4335,8 +4348,7 @@ void hfi1_rc_rcv_tid_rdma_write_data(struct hfi1_packet *packet)
        trace_hfi1_tid_req_rcv_write_data(qp, 0, e->opcode, e->psn, e->lpsn,
                                          req);
        trace_hfi1_tid_write_rsp_rcv_data(qp);
-       if (priv->r_tid_ack == HFI1_QP_WQE_INVALID)
-               priv->r_tid_ack = priv->r_tid_tail;
+       validate_r_tid_ack(priv);
 
        if (opcode == TID_OP(WRITE_DATA_LAST)) {
                release_rdma_sge_mr(e);
@@ -4375,8 +4387,7 @@ void hfi1_rc_rcv_tid_rdma_write_data(struct hfi1_packet *packet)
        }
 
 done:
-       priv->s_flags |= RVT_S_ACK_PENDING;
-       hfi1_schedule_tid_send(qp);
+       tid_rdma_schedule_ack(qp);
 exit:
        priv->r_next_psn_kdeth = flow->flow_state.r_next_psn;
        if (fecn)
@@ -4388,10 +4399,7 @@ send_nak:
        if (!priv->s_nak_state) {
                priv->s_nak_state = IB_NAK_PSN_ERROR;
                priv->s_nak_psn = flow->flow_state.r_next_psn;
-               priv->s_flags |= RVT_S_ACK_PENDING;
-               if (priv->r_tid_ack == HFI1_QP_WQE_INVALID)
-                       priv->r_tid_ack = priv->r_tid_tail;
-               hfi1_schedule_tid_send(qp);
+               tid_rdma_trigger_ack(qp);
        }
        goto done;
 }
@@ -4939,8 +4947,7 @@ void hfi1_rc_rcv_tid_rdma_resync(struct hfi1_packet *packet)
        qpriv->resync = true;
        /* RESYNC request always gets a TID RDMA ACK. */
        qpriv->s_nak_state = 0;
-       qpriv->s_flags |= RVT_S_ACK_PENDING;
-       hfi1_schedule_tid_send(qp);
+       tid_rdma_trigger_ack(qp);
 bail:
        if (fecn)
                qp->s_flags |= RVT_S_ECN;
index 1c53618..6e82df2 100644 (file)
@@ -17,6 +17,7 @@
 #define TID_RDMA_MIN_SEGMENT_SIZE       BIT(18)   /* 256 KiB (for now) */
 #define TID_RDMA_MAX_SEGMENT_SIZE       BIT(18)   /* 256 KiB (for now) */
 #define TID_RDMA_MAX_PAGES              (BIT(18) >> PAGE_SHIFT)
+#define TID_RDMA_SEGMENT_SHIFT         18
 
 /*
  * Bit definitions for priv->s_flags.
@@ -274,8 +275,6 @@ u32 hfi1_build_tid_rdma_write_req(struct rvt_qp *qp, struct rvt_swqe *wqe,
                                  struct ib_other_headers *ohdr,
                                  u32 *bth1, u32 *bth2, u32 *len);
 
-void hfi1_compute_tid_rdma_flow_wt(void);
-
 void hfi1_rc_rcv_tid_rdma_write_req(struct hfi1_packet *packet);
 
 u32 hfi1_build_tid_rdma_write_resp(struct rvt_qp *qp, struct rvt_ack_entry *e,
index 8678327..3bb8f78 100644 (file)
@@ -59,7 +59,7 @@ enum {
 
 #define HNS_ROCE_HEM_CHUNK_LEN \
         ((256 - sizeof(struct list_head) - 2 * sizeof(int)) /   \
-        (sizeof(struct scatterlist)))
+        (sizeof(struct scatterlist) + sizeof(void *)))
 
 #define check_whether_bt_num_3(type, hop_num) \
        (type < HEM_TYPE_MTT && hop_num == 2)
index 9591457..43ea2c1 100644 (file)
@@ -376,7 +376,7 @@ int hns_roce_create_srq(struct ib_srq *ib_srq,
        srq->max = roundup_pow_of_two(srq_init_attr->attr.max_wr + 1);
        srq->max_gs = srq_init_attr->attr.max_sge;
 
-       srq_desc_size = max(16, 16 * srq->max_gs);
+       srq_desc_size = roundup_pow_of_two(max(16, 16 * srq->max_gs));
 
        srq->wqe_shift = ilog2(srq_desc_size);