RDMA/mlx5: Add missing srcu_read_lock in ODP implicit flow
authorMaor Gottlieb <maorg@mellanox.com>
Sun, 19 Jul 2020 06:57:47 +0000 (09:57 +0300)
committerJason Gunthorpe <jgg@nvidia.com>
Fri, 24 Jul 2020 19:44:06 +0000 (16:44 -0300)
According to the locking scheme, mlx5_ib_update_xlt() should be called
with srcu_read_lock(dev->odp->srcu). Prefetch missed this. This fixes the
below WARN from lockdep_assert_held():

  WARNING: CPU: 1 PID: 1130 at drivers/infiniband/hw/mlx5/odp.c:132 mlx5_odp_populate_xlt+0x175/0x180 [mlx5_ib]
  Modules linked in: xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink xt_addrtype iptable_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 br_netfilter overlay ib_srp scsi_transport_srp rpcrdma rdma_ucm ib_iser libiscsi scsi_transport_iscsi rdma_cm iw_cm ib_umad ib_ipoib ib_cm mlx5_ib ib_uverbs ib_core mlx5_core mlxfw ptp pps_core
  CPU: 1 PID: 1130 Comm: kworker/u16:11 Tainted: G        W 5.8.0-rc5_for_upstream_debug_2020_07_13_11_04 #1
  Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.12.1-0-ga5cab58e9a3f-prebuilt.qemu.org 04/01/2014
  Workqueue: events_unbound mlx5_ib_prefetch_mr_work [mlx5_ib]
  RIP: 0010:mlx5_odp_populate_xlt+0x175/0x180 [mlx5_ib]
  Code: 08 e2 85 c0 0f 84 65 ff ff ff 49 8b 87 60 01 00 00 be ff ff ff ff 48 8d b8 b0 39 00 00 e8 93 e0 50 e1 85 c0 0f 85 45 ff ff ff <0f> 0b e9 3e ff ff ff 0f 0b eb c7 0f 1f 44 00 00 48 8b 87 98 0f 00
  RSP: 0018:ffff88840f44fc68 EFLAGS: 00010246
  RAX: 0000000000000000 RBX: ffff88840cc9d000 RCX: ffff88840efcd940
  RDX: 0000000000000000 RSI: ffff88844871b9b0 RDI: ffff88840efce100
  RBP: ffff88840cc9d040 R08: 0000000000000040 R09: 0000000000000001
  R10: ffff88846ced3068 R11: 0000000000000000 R12: 00000000000156ec
  R13: 0000000000000004 R14: 0000000000000004 R15: ffff888439941000
  FS:  0000000000000000(0000) GS:ffff88846fa80000(0000) knlGS:0000000000000000
  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  CR2: 00007f8536d12430 CR3: 0000000437a5e006 CR4: 0000000000360ea0
  DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
  DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
  Call Trace:
   mlx5_ib_update_xlt+0x37c/0x7c0 [mlx5_ib]
   pagefault_mr+0x315/0x440 [mlx5_ib]
   mlx5_ib_prefetch_mr_work+0x56/0xa0 [mlx5_ib]
   process_one_work+0x215/0x5c0
   worker_thread+0x3c/0x380
   ? process_one_work+0x5c0/0x5c0
   kthread+0x133/0x150
   ? kthread_park+0x90/0x90
   ret_from_fork+0x1f/0x30

Hold the SRCU during prefetch, even though it strictly isn't needed since
prefetch is holding the num_deferred_work it does make it easier to reason
about.

Fixes: 5256edcb98a1 ("RDMA/mlx5: Rework implicit ODP destroy")
Link: https://lore.kernel.org/r/20200719065747.131157-1-leon@kernel.org
Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
drivers/infiniband/core/uverbs_std_types_mr.c
drivers/infiniband/core/verbs.c
drivers/infiniband/hw/mlx5/odp.c

index 62f58ad..9b22bb5 100644 (file)
@@ -69,7 +69,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_ADVISE_MR)(
 
        num_sge = uverbs_attr_ptr_get_array_size(
                attrs, UVERBS_ATTR_ADVISE_MR_SGE_LIST, sizeof(struct ib_sge));
-       if (num_sge < 0)
+       if (num_sge <= 0)
                return num_sge;
 
        sg_list = uverbs_attr_get_alloced_ptr(attrs,
index a927831..10f650a 100644 (file)
@@ -2072,6 +2072,9 @@ int ib_advise_mr(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice,
        if (!pd->device->ops.advise_mr)
                return -EOPNOTSUPP;
 
+       if (!num_sge)
+               return 0;
+
        return pd->device->ops.advise_mr(pd, advice, flags, sg_list, num_sge,
                                         NULL);
 }
index ee88b32..8f44264 100644 (file)
@@ -800,6 +800,7 @@ static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt,
 {
        struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
 
+       lockdep_assert_held(&mr->dev->odp_srcu);
        if (unlikely(io_virt < mr->mmkey.iova))
                return -EFAULT;
 
@@ -1749,10 +1750,17 @@ static void mlx5_ib_prefetch_mr_work(struct work_struct *w)
 {
        struct prefetch_mr_work *work =
                container_of(w, struct prefetch_mr_work, work);
+       struct mlx5_ib_dev *dev;
        u32 bytes_mapped = 0;
+       int srcu_key;
        int ret;
        u32 i;
 
+       /* We rely on IB/core that work is executed if we have num_sge != 0 only. */
+       WARN_ON(!work->num_sge);
+       dev = work->frags[0].mr->dev;
+       /* SRCU should be held when calling to mlx5_odp_populate_xlt() */
+       srcu_key = srcu_read_lock(&dev->odp_srcu);
        for (i = 0; i < work->num_sge; ++i) {
                ret = pagefault_mr(work->frags[i].mr, work->frags[i].io_virt,
                                   work->frags[i].length, &bytes_mapped,
@@ -1761,6 +1769,7 @@ static void mlx5_ib_prefetch_mr_work(struct work_struct *w)
                        continue;
                mlx5_update_odp_stats(work->frags[i].mr, prefetch, ret);
        }
+       srcu_read_unlock(&dev->odp_srcu, srcu_key);
 
        destroy_prefetch_work(work);
 }