RDMA/mlx5: Enable Relaxed Ordering by default for kernel ULPs
authorAvihai Horon <avihaih@nvidia.com>
Wed, 9 Jun 2021 11:05:03 +0000 (14:05 +0300)
committerJason Gunthorpe <jgg@nvidia.com>
Mon, 21 Jun 2021 15:33:08 +0000 (12:33 -0300)
Relaxed Ordering is a capability that can only benefit users that support
it. All kernel ULPs should support Relaxed Ordering, as they are designed
to read data only after observing the CQE and use the DMA API correctly.

Hence, implicitly enable Relaxed Ordering by default for MR transfers in
kernel ULPs.

Link: https://lore.kernel.org/r/b7e820aab7402b8efa63605f4ea465831b3b1e5e.1623236426.git.leonro@nvidia.com
Signed-off-by: Avihai Horon <avihaih@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
drivers/infiniband/hw/mlx5/mr.c
drivers/infiniband/hw/mlx5/wr.c
include/rdma/ib_verbs.h

index 383c0c6..e288531 100644 (file)
@@ -68,6 +68,7 @@ static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr,
                                          struct ib_pd *pd)
 {
        struct mlx5_ib_dev *dev = to_mdev(pd->device);
+       bool ro_pci_enabled = pcie_relaxed_ordering_enabled(dev->mdev->pdev);
 
        MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC));
        MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE));
@@ -77,10 +78,10 @@ static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr,
 
        if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write))
                MLX5_SET(mkc, mkc, relaxed_ordering_write,
-                        !!(acc & IB_ACCESS_RELAXED_ORDERING));
+                        (acc & IB_ACCESS_RELAXED_ORDERING) && ro_pci_enabled);
        if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read))
                MLX5_SET(mkc, mkc, relaxed_ordering_read,
-                        !!(acc & IB_ACCESS_RELAXED_ORDERING));
+                        (acc & IB_ACCESS_RELAXED_ORDERING) && ro_pci_enabled);
 
        MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
        MLX5_SET(mkc, mkc, qpn, 0xffffff);
@@ -811,7 +812,8 @@ struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
 
        MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA);
        MLX5_SET(mkc, mkc, length64, 1);
-       set_mkc_access_pd_addr_fields(mkc, acc, 0, pd);
+       set_mkc_access_pd_addr_fields(mkc, acc | IB_ACCESS_RELAXED_ORDERING, 0,
+                                     pd);
 
        err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen);
        if (err)
@@ -2010,7 +2012,7 @@ static void mlx5_set_umr_free_mkey(struct ib_pd *pd, u32 *in, int ndescs,
        mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
 
        /* This is only used from the kernel, so setting the PD is OK. */
-       set_mkc_access_pd_addr_fields(mkc, 0, 0, pd);
+       set_mkc_access_pd_addr_fields(mkc, IB_ACCESS_RELAXED_ORDERING, 0, pd);
        MLX5_SET(mkc, mkc, free, 1);
        MLX5_SET(mkc, mkc, translations_octword_size, ndescs);
        MLX5_SET(mkc, mkc, access_mode_1_0, access_mode & 0x3);
index 6880627..8841620 100644 (file)
@@ -866,7 +866,10 @@ static int set_reg_wr(struct mlx5_ib_qp *qp,
        bool atomic = wr->access & IB_ACCESS_REMOTE_ATOMIC;
        u8 flags = 0;
 
-       /* Matches access in mlx5_set_umr_free_mkey() */
+       /* Matches access in mlx5_set_umr_free_mkey().
+        * Relaxed Ordering is set implicitly in mlx5_set_umr_free_mkey() and
+        * kernel ULPs are not aware of it, so we don't set it here.
+        */
        if (!mlx5_ib_can_reconfig_with_umr(dev, 0, wr->access)) {
                mlx5_ib_warn(
                        to_mdev(qp->ibqp.device),
index 9423e70..34971c7 100644 (file)
@@ -2468,6 +2468,14 @@ struct ib_device_ops {
                         enum ib_uverbs_advise_mr_advice advice, u32 flags,
                         struct ib_sge *sg_list, u32 num_sge,
                         struct uverbs_attr_bundle *attrs);
+
+       /*
+        * Kernel users should universally support relaxed ordering (RO), as
+        * they are designed to read data only after observing the CQE and use
+        * the DMA API correctly.
+        *
+        * Some drivers implicitly enable RO if platform supports it.
+        */
        int (*map_mr_sg)(struct ib_mr *mr, struct scatterlist *sg, int sg_nents,
                         unsigned int *sg_offset);
        int (*check_mr_status)(struct ib_mr *mr, u32 check_mask,