IB/hfi1: Process qp wait list in IRQ thread periodically
authorMike Marciniszyn <mike.marciniszyn@intel.com>
Wed, 8 Feb 2017 13:26:02 +0000 (05:26 -0800)
committerDoug Ledford <dledford@redhat.com>
Sun, 19 Feb 2017 14:18:32 +0000 (09:18 -0500)
In the event that the IRQ thread is extremely busy, the
processing of an rcd wait list can be delayed by quite
a bit until the IRQ thread completes its work.

The QP reset reference count wait can then appear to be stuck, thus
causing up a QP destroy to emit the hung task diagnostic.

Fix by processing the qp wait list periodically from the thread.  The
interval is a multiple (currently 4) of the MAX_PKT_RECV.

Also, reduce some of the excessive inlining.   The guidelines
are per packet is ok inline, otherwise the choice is based on
likelyhood of execution.

Reviewed-by: Sebastian Sanchez <sebastian.sanchez@intel.com>
Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
drivers/infiniband/hw/hfi1/driver.c

index 4fbaee6..29db673 100644 (file)
@@ -100,6 +100,11 @@ MODULE_VERSION(HFI1_DRIVER_VERSION);
  * MAX_PKT_RCV is the max # if packets processed per receive interrupt.
  */
 #define MAX_PKT_RECV 64
+/*
+ * MAX_PKT_THREAD_RCV is the max # of packets processed before
+ * the qp_wait_list queue is flushed.
+ */
+#define MAX_PKT_RECV_THREAD (MAX_PKT_RECV * 4)
 #define EGR_HEAD_UPDATE_THRESHOLD 16
 
 struct hfi1_ib_stats hfi1_stats;
@@ -259,7 +264,7 @@ static inline void *get_egrbuf(const struct hfi1_ctxtdata *rcd, u64 rhf,
  * allowed size ranges for the respective type and, optionally,
  * return the proper encoding.
  */
-inline int hfi1_rcvbuf_validate(u32 size, u8 type, u16 *encoded)
+int hfi1_rcvbuf_validate(u32 size, u8 type, u16 *encoded)
 {
        if (unlikely(!PAGE_ALIGNED(size)))
                return 0;
@@ -654,24 +659,68 @@ next:
        }
 }
 
-static inline int skip_rcv_packet(struct hfi1_packet *packet, int thread)
+static void process_rcv_qp_work(struct hfi1_ctxtdata *rcd)
+{
+       struct rvt_qp *qp, *nqp;
+
+       /*
+        * Iterate over all QPs waiting to respond.
+        * The list won't change since the IRQ is only run on one CPU.
+        */
+       list_for_each_entry_safe(qp, nqp, &rcd->qp_wait_list, rspwait) {
+               list_del_init(&qp->rspwait);
+               if (qp->r_flags & RVT_R_RSP_NAK) {
+                       qp->r_flags &= ~RVT_R_RSP_NAK;
+                       hfi1_send_rc_ack(rcd, qp, 0);
+               }
+               if (qp->r_flags & RVT_R_RSP_SEND) {
+                       unsigned long flags;
+
+                       qp->r_flags &= ~RVT_R_RSP_SEND;
+                       spin_lock_irqsave(&qp->s_lock, flags);
+                       if (ib_rvt_state_ops[qp->state] &
+                                       RVT_PROCESS_OR_FLUSH_SEND)
+                               hfi1_schedule_send(qp);
+                       spin_unlock_irqrestore(&qp->s_lock, flags);
+               }
+               rvt_put_qp(qp);
+       }
+}
+
+static noinline int max_packet_exceeded(struct hfi1_packet *packet, int thread)
+{
+       if (thread) {
+               if ((packet->numpkt & (MAX_PKT_RECV_THREAD - 1)) == 0)
+                       /* allow defered processing */
+                       process_rcv_qp_work(packet->rcd);
+               cond_resched();
+               return RCV_PKT_OK;
+       } else {
+               this_cpu_inc(*packet->rcd->dd->rcv_limit);
+               return RCV_PKT_LIMIT;
+       }
+}
+
+static inline int check_max_packet(struct hfi1_packet *packet, int thread)
 {
        int ret = RCV_PKT_OK;
 
+       if (unlikely((packet->numpkt & (MAX_PKT_RECV - 1)) == 0))
+               ret = max_packet_exceeded(packet, thread);
+       return ret;
+}
+
+static noinline int skip_rcv_packet(struct hfi1_packet *packet, int thread)
+{
+       int ret;
+
        /* Set up for the next packet */
        packet->rhqoff += packet->rsize;
        if (packet->rhqoff >= packet->maxcnt)
                packet->rhqoff = 0;
 
        packet->numpkt++;
-       if (unlikely((packet->numpkt & (MAX_PKT_RECV - 1)) == 0)) {
-               if (thread) {
-                       cond_resched();
-               } else {
-                       ret = RCV_PKT_LIMIT;
-                       this_cpu_inc(*packet->rcd->dd->rcv_limit);
-               }
-       }
+       ret = check_max_packet(packet, thread);
 
        packet->rhf_addr = (__le32 *)packet->rcd->rcvhdrq + packet->rhqoff +
                                     packet->rcd->dd->rhf_offset;
@@ -682,7 +731,7 @@ static inline int skip_rcv_packet(struct hfi1_packet *packet, int thread)
 
 static inline int process_rcv_packet(struct hfi1_packet *packet, int thread)
 {
-       int ret = RCV_PKT_OK;
+       int ret;
 
        packet->hdr = hfi1_get_msgheader(packet->rcd->dd,
                                         packet->rhf_addr);
@@ -723,14 +772,7 @@ static inline int process_rcv_packet(struct hfi1_packet *packet, int thread)
        if (packet->rhqoff >= packet->maxcnt)
                packet->rhqoff = 0;
 
-       if (unlikely((packet->numpkt & (MAX_PKT_RECV - 1)) == 0)) {
-               if (thread) {
-                       cond_resched();
-               } else {
-                       ret = RCV_PKT_LIMIT;
-                       this_cpu_inc(*packet->rcd->dd->rcv_limit);
-               }
-       }
+       ret = check_max_packet(packet, thread);
 
        packet->rhf_addr = (__le32 *)packet->rcd->rcvhdrq + packet->rhqoff +
                                      packet->rcd->dd->rhf_offset;
@@ -767,38 +809,6 @@ static inline void finish_packet(struct hfi1_packet *packet)
                       packet->etail, rcv_intr_dynamic, packet->numpkt);
 }
 
-static inline void process_rcv_qp_work(struct hfi1_packet *packet)
-{
-       struct hfi1_ctxtdata *rcd;
-       struct rvt_qp *qp, *nqp;
-
-       rcd = packet->rcd;
-       rcd->head = packet->rhqoff;
-
-       /*
-        * Iterate over all QPs waiting to respond.
-        * The list won't change since the IRQ is only run on one CPU.
-        */
-       list_for_each_entry_safe(qp, nqp, &rcd->qp_wait_list, rspwait) {
-               list_del_init(&qp->rspwait);
-               if (qp->r_flags & RVT_R_RSP_NAK) {
-                       qp->r_flags &= ~RVT_R_RSP_NAK;
-                       hfi1_send_rc_ack(rcd, qp, 0);
-               }
-               if (qp->r_flags & RVT_R_RSP_SEND) {
-                       unsigned long flags;
-
-                       qp->r_flags &= ~RVT_R_RSP_SEND;
-                       spin_lock_irqsave(&qp->s_lock, flags);
-                       if (ib_rvt_state_ops[qp->state] &
-                                       RVT_PROCESS_OR_FLUSH_SEND)
-                               hfi1_schedule_send(qp);
-                       spin_unlock_irqrestore(&qp->s_lock, flags);
-               }
-               rvt_put_qp(qp);
-       }
-}
-
 /*
  * Handle receive interrupts when using the no dma rtail option.
  */
@@ -826,7 +836,8 @@ int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd, int thread)
                        last = RCV_PKT_DONE;
                process_rcv_update(last, &packet);
        }
-       process_rcv_qp_work(&packet);
+       process_rcv_qp_work(rcd);
+       rcd->head = packet.rhqoff;
 bail:
        finish_packet(&packet);
        return last;
@@ -854,7 +865,8 @@ int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *rcd, int thread)
                        last = RCV_PKT_DONE;
                process_rcv_update(last, &packet);
        }
-       process_rcv_qp_work(&packet);
+       process_rcv_qp_work(rcd);
+       rcd->head = packet.rhqoff;
 bail:
        finish_packet(&packet);
        return last;
@@ -1024,7 +1036,8 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread)
                process_rcv_update(last, &packet);
        }
 
-       process_rcv_qp_work(&packet);
+       process_rcv_qp_work(rcd);
+       rcd->head = packet.rhqoff;
 
 bail:
        /*