RDMA/rxe: Limit the number of calls to each tasklet
authorBob Pearson <rpearsonhpe@gmail.com>
Thu, 30 Jun 2022 19:04:25 +0000 (14:04 -0500)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 25 Aug 2022 09:40:37 +0000 (11:40 +0200)
[ Upstream commit eff6d998ca297cb0b2e53b032a56cf8e04dd8b17 ]

Limit the maximum number of calls to each tasklet from rxe_do_task()
before yielding the cpu. When the limit is reached reschedule the tasklet
and exit the calling loop. This patch prevents one tasklet from consuming
100% of a cpu core and causing a deadlock or soft lockup.

Link: https://lore.kernel.org/r/20220630190425.2251-9-rpearsonhpe@gmail.com
Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
drivers/infiniband/sw/rxe/rxe_param.h
drivers/infiniband/sw/rxe/rxe_task.c

index b5a70cb..8723898 100644 (file)
@@ -103,6 +103,12 @@ enum rxe_device_param {
        RXE_INFLIGHT_SKBS_PER_QP_HIGH   = 64,
        RXE_INFLIGHT_SKBS_PER_QP_LOW    = 16,
 
+       /* Max number of interations of each tasklet
+        * before yielding the cpu to let other
+        * work make progress
+        */
+       RXE_MAX_ITERATIONS              = 1024,
+
        /* Delay before calling arbiter timer */
        RXE_NSEC_ARB_TIMER_DELAY        = 200,
 
index 6951fdc..568cf56 100644 (file)
@@ -8,7 +8,7 @@
 #include <linux/interrupt.h>
 #include <linux/hardirq.h>
 
-#include "rxe_task.h"
+#include "rxe.h"
 
 int __rxe_do_task(struct rxe_task *task)
 
@@ -34,6 +34,7 @@ void rxe_do_task(struct tasklet_struct *t)
        int ret;
        unsigned long flags;
        struct rxe_task *task = from_tasklet(task, t, tasklet);
+       unsigned int iterations = RXE_MAX_ITERATIONS;
 
        spin_lock_irqsave(&task->state_lock, flags);
        switch (task->state) {
@@ -62,13 +63,20 @@ void rxe_do_task(struct tasklet_struct *t)
                spin_lock_irqsave(&task->state_lock, flags);
                switch (task->state) {
                case TASK_STATE_BUSY:
-                       if (ret)
+                       if (ret) {
                                task->state = TASK_STATE_START;
-                       else
+                       } else if (iterations--) {
                                cont = 1;
+                       } else {
+                               /* reschedule the tasklet and exit
+                                * the loop to give up the cpu
+                                */
+                               tasklet_schedule(&task->tasklet);
+                               task->state = TASK_STATE_START;
+                       }
                        break;
 
-               /* soneone tried to run the task since the last time we called
+               /* someone tried to run the task since the last time we called
                 * func, so we will call one more time regardless of the
                 * return value
                 */