IB/{hfi1, qib, rdmavt}: Schedule multi RC/UC packets instead of posting
authorMichael J. Ruhl <michael.j.ruhl@intel.com>
Mon, 10 Sep 2018 16:49:27 +0000 (09:49 -0700)
committerJason Gunthorpe <jgg@mellanox.com>
Tue, 11 Sep 2018 15:55:02 +0000 (09:55 -0600)
The post_send() path determines if it should post directly or, schedule
the post for later.  The current logic is:

  if the swqe ring is empty or (for hfi1) wqe->length <= piothreshold
    post the send
  else
    schedule

This can allow large requests to call the send engine directly.  Large
requests can potentially produce a large number of packets prior to
returning to the caller, blocking the caller from posting more requests,
and allowing better parallel processing.

Allow the driver(s) more say in this logic (pass call_send to the driver,
rather than examining a return value).

Update hfi1/qib logic to schedule the send engine if an RC or UC message
is larger than the QP MTU size.

Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Signed-off-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
drivers/infiniband/hw/hfi1/qp.c
drivers/infiniband/hw/hfi1/verbs.h
drivers/infiniband/hw/qib/qib_qp.c
drivers/infiniband/hw/qib/qib_verbs.h
drivers/infiniband/sw/rdmavt/qp.c
include/rdma/rdma_vt.h

index 9b1e84a..54d9ff1 100644 (file)
@@ -285,17 +285,13 @@ void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
  * hfi1_check_send_wqe - validate wqe
  * @qp - The qp
  * @wqe - The built wqe
- *
- * validate wqe.  This is called
- * prior to inserting the wqe into
- * the ring but after the wqe has been
- * setup.
+ * @call_send - Determine if the send should be posted or scheduled.
  *
  * Returns 0 on success, -EINVAL on failure
  *
  */
 int hfi1_check_send_wqe(struct rvt_qp *qp,
-                       struct rvt_swqe *wqe)
+                       struct rvt_swqe *wqe, bool *call_send)
 {
        struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
        struct rvt_ah *ah;
@@ -305,6 +301,8 @@ int hfi1_check_send_wqe(struct rvt_qp *qp,
        case IB_QPT_UC:
                if (wqe->length > 0x80000000U)
                        return -EINVAL;
+               if (wqe->length > qp->pmtu)
+                       *call_send = false;
                break;
        case IB_QPT_SMI:
                ah = ibah_to_rvtah(wqe->ud_wr.ah);
@@ -321,7 +319,7 @@ int hfi1_check_send_wqe(struct rvt_qp *qp,
        default:
                break;
        }
-       return wqe->length <= piothreshold;
+       return 0;
 }
 
 /**
index a4d0650..269ec33 100644 (file)
@@ -343,7 +343,8 @@ int hfi1_check_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
 void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
                    int attr_mask, struct ib_udata *udata);
 void hfi1_restart_rc(struct rvt_qp *qp, u32 psn, int wait);
-int hfi1_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe);
+int hfi1_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe,
+                       bool *call_send);
 
 extern const u32 rc_only_opcode;
 extern const u32 uc_only_opcode;
index 344e401..a81905d 100644 (file)
@@ -378,25 +378,22 @@ void qib_flush_qp_waiters(struct rvt_qp *qp)
  * qib_check_send_wqe - validate wr/wqe
  * @qp - The qp
  * @wqe - The built wqe
+ * @call_send - Determine if the send should be posted or scheduled
  *
- * validate wr/wqe.  This is called
- * prior to inserting the wqe into
- * the ring but after the wqe has been
- * setup.
- *
- * Returns 1 to force direct progress, 0 otherwise, -EINVAL on failure
+ * Returns 0 on success, -EINVAL on failure
  */
 int qib_check_send_wqe(struct rvt_qp *qp,
-                      struct rvt_swqe *wqe)
+                      struct rvt_swqe *wqe, bool *call_send)
 {
        struct rvt_ah *ah;
-       int ret = 0;
 
        switch (qp->ibqp.qp_type) {
        case IB_QPT_RC:
        case IB_QPT_UC:
                if (wqe->length > 0x80000000U)
                        return -EINVAL;
+               if (wqe->length > qp->pmtu)
+                       *call_send = false;
                break;
        case IB_QPT_SMI:
        case IB_QPT_GSI:
@@ -405,12 +402,12 @@ int qib_check_send_wqe(struct rvt_qp *qp,
                if (wqe->length > (1 << ah->log_pmtu))
                        return -EINVAL;
                /* progress hint */
-               ret = 1;
+               *call_send = true;
                break;
        default:
                break;
        }
-       return ret;
+       return 0;
 }
 
 #ifdef CONFIG_DEBUG_FS
index 666613e..3d7b744 100644 (file)
@@ -303,7 +303,8 @@ void qib_rc_rcv(struct qib_ctxtdata *rcd, struct ib_header *hdr,
 
 int qib_check_ah(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr);
 
-int qib_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe);
+int qib_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe,
+                      bool *call_send);
 
 struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid);
 
index 5ce403c..a9b7d7f 100644 (file)
@@ -1718,7 +1718,7 @@ static inline int rvt_qp_is_avail(
  */
 static int rvt_post_one_wr(struct rvt_qp *qp,
                           const struct ib_send_wr *wr,
-                          int *call_send)
+                          bool *call_send)
 {
        struct rvt_swqe *wqe;
        u32 next;
@@ -1825,11 +1825,9 @@ static int rvt_post_one_wr(struct rvt_qp *qp,
 
        /* general part of wqe valid - allow for driver checks */
        if (rdi->driver_f.check_send_wqe) {
-               ret = rdi->driver_f.check_send_wqe(qp, wqe);
+               ret = rdi->driver_f.check_send_wqe(qp, wqe, call_send);
                if (ret < 0)
                        goto bail_inval_free;
-               if (ret)
-                       *call_send = ret;
        }
 
        log_pmtu = qp->log_pmtu;
@@ -1897,7 +1895,7 @@ int rvt_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
        struct rvt_qp *qp = ibqp_to_rvtqp(ibqp);
        struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
        unsigned long flags = 0;
-       int call_send;
+       bool call_send;
        unsigned nreq = 0;
        int err = 0;
 
@@ -1930,7 +1928,11 @@ int rvt_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
 bail:
        spin_unlock_irqrestore(&qp->s_hlock, flags);
        if (nreq) {
-               if (call_send)
+               /*
+                * Only call do_send if there is exactly one packet, and the
+                * driver said it was ok.
+                */
+               if (nreq == 1 && call_send)
                        rdi->driver_f.do_send(qp);
                else
                        rdi->driver_f.schedule_send_no_lock(qp);
index e79229a..e32facd 100644 (file)
@@ -214,8 +214,14 @@ struct rvt_driver_provided {
        void (*schedule_send)(struct rvt_qp *qp);
        void (*schedule_send_no_lock)(struct rvt_qp *qp);
 
-       /* Driver specific work request checking */
-       int (*check_send_wqe)(struct rvt_qp *qp, struct rvt_swqe *wqe);
+       /*
+        * Validate the wqe.  This needs to be done prior to inserting the
+        * wqe into the ring, but after the wqe has been set up.  Allow for
+        * driver specific work request checking by providing a callback.
+        * call_send indicates if the wqe should be posted or scheduled.
+        */
+       int (*check_send_wqe)(struct rvt_qp *qp, struct rvt_swqe *wqe,
+                             bool *call_send);
 
        /*
         * Sometimes rdmavt needs to kick the driver's send progress. That is