RDMA/siw: Fix tx thread initialization.
authorBernard Metzler <bmt@zurich.ibm.com>
Fri, 28 Jul 2023 11:44:18 +0000 (13:44 +0200)
committerLeon Romanovsky <leon@kernel.org>
Mon, 31 Jul 2023 07:05:23 +0000 (10:05 +0300)
Immediately removing the siw module after insertion may
crash in siw_stop_tx_thread(), if the according thread did
not yet had a chance to initialize its wait queue and
siw_stop_tx_thread() tries to wakeup that thread. Initializing
the threads state before spwaning it fixes it.

Reported-by: Guoqing Jiang <guoqing.jiang@linux.dev>
Signed-off-by: Bernard Metzler <bmt@zurich.ibm.com>
Link: https://lore.kernel.org/r/20230728114418.124328-1-bmt@zurich.ibm.com
Tested-by: Guoqing Jiang <guoqing.jiang@linux.dev>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
drivers/infiniband/sw/siw/siw.h
drivers/infiniband/sw/siw/siw_main.c
drivers/infiniband/sw/siw/siw_qp_tx.c

index 8b4a710..58dddb1 100644 (file)
@@ -531,11 +531,12 @@ void siw_qp_llp_data_ready(struct sock *sk);
 void siw_qp_llp_write_space(struct sock *sk);
 
 /* QP TX path functions */
+int siw_create_tx_threads(void);
+void siw_stop_tx_threads(void);
 int siw_run_sq(void *arg);
 int siw_qp_sq_process(struct siw_qp *qp);
 int siw_sq_start(struct siw_qp *qp);
 int siw_activate_tx(struct siw_qp *qp);
-void siw_stop_tx_thread(int nr_cpu);
 int siw_get_tx_cpu(struct siw_device *sdev);
 void siw_put_tx_cpu(int cpu);
 
index f45600d..d4b6e01 100644 (file)
@@ -87,29 +87,6 @@ static void siw_device_cleanup(struct ib_device *base_dev)
        xa_destroy(&sdev->mem_xa);
 }
 
-static int siw_create_tx_threads(void)
-{
-       int cpu, assigned = 0;
-
-       for_each_online_cpu(cpu) {
-               /* Skip HT cores */
-               if (cpu % cpumask_weight(topology_sibling_cpumask(cpu)))
-                       continue;
-
-               siw_tx_thread[cpu] =
-                       kthread_run_on_cpu(siw_run_sq,
-                                          (unsigned long *)(long)cpu,
-                                          cpu, "siw_tx/%u");
-               if (IS_ERR(siw_tx_thread[cpu])) {
-                       siw_tx_thread[cpu] = NULL;
-                       continue;
-               }
-
-               assigned++;
-       }
-       return assigned;
-}
-
 static int siw_dev_qualified(struct net_device *netdev)
 {
        /*
@@ -529,7 +506,6 @@ static struct rdma_link_ops siw_link_ops = {
 static __init int siw_init_module(void)
 {
        int rv;
-       int nr_cpu;
 
        if (SENDPAGE_THRESH < SIW_MAX_INLINE) {
                pr_info("siw: sendpage threshold too small: %u\n",
@@ -574,12 +550,8 @@ static __init int siw_init_module(void)
        return 0;
 
 out_error:
-       for (nr_cpu = 0; nr_cpu < nr_cpu_ids; nr_cpu++) {
-               if (siw_tx_thread[nr_cpu]) {
-                       siw_stop_tx_thread(nr_cpu);
-                       siw_tx_thread[nr_cpu] = NULL;
-               }
-       }
+       siw_stop_tx_threads();
+
        if (siw_crypto_shash)
                crypto_free_shash(siw_crypto_shash);
 
@@ -593,14 +565,8 @@ out_error:
 
 static void __exit siw_exit_module(void)
 {
-       int cpu;
+       siw_stop_tx_threads();
 
-       for_each_possible_cpu(cpu) {
-               if (siw_tx_thread[cpu]) {
-                       siw_stop_tx_thread(cpu);
-                       siw_tx_thread[cpu] = NULL;
-               }
-       }
        unregister_netdevice_notifier(&siw_netdev_nb);
        rdma_link_unregister(&siw_link_ops);
        ib_unregister_driver(RDMA_DRIVER_SIW);
index 7c7a51d..3ff339e 100644 (file)
@@ -1208,10 +1208,45 @@ struct tx_task_t {
 
 static DEFINE_PER_CPU(struct tx_task_t, siw_tx_task_g);
 
-void siw_stop_tx_thread(int nr_cpu)
+int siw_create_tx_threads(void)
 {
-       kthread_stop(siw_tx_thread[nr_cpu]);
-       wake_up(&per_cpu(siw_tx_task_g, nr_cpu).waiting);
+       int cpu, assigned = 0;
+
+       for_each_online_cpu(cpu) {
+               struct tx_task_t *tx_task;
+
+               /* Skip HT cores */
+               if (cpu % cpumask_weight(topology_sibling_cpumask(cpu)))
+                       continue;
+
+               tx_task = &per_cpu(siw_tx_task_g, cpu);
+               init_llist_head(&tx_task->active);
+               init_waitqueue_head(&tx_task->waiting);
+
+               siw_tx_thread[cpu] =
+                       kthread_run_on_cpu(siw_run_sq,
+                                          (unsigned long *)(long)cpu,
+                                          cpu, "siw_tx/%u");
+               if (IS_ERR(siw_tx_thread[cpu])) {
+                       siw_tx_thread[cpu] = NULL;
+                       continue;
+               }
+               assigned++;
+       }
+       return assigned;
+}
+
+void siw_stop_tx_threads(void)
+{
+       int cpu;
+
+       for_each_possible_cpu(cpu) {
+               if (siw_tx_thread[cpu]) {
+                       kthread_stop(siw_tx_thread[cpu]);
+                       wake_up(&per_cpu(siw_tx_task_g, cpu).waiting);
+                       siw_tx_thread[cpu] = NULL;
+               }
+       }
 }
 
 int siw_run_sq(void *data)
@@ -1221,9 +1256,6 @@ int siw_run_sq(void *data)
        struct siw_qp *qp;
        struct tx_task_t *tx_task = &per_cpu(siw_tx_task_g, nr_cpu);
 
-       init_llist_head(&tx_task->active);
-       init_waitqueue_head(&tx_task->waiting);
-
        while (1) {
                struct llist_node *fifo_list = NULL;