RDMA/mlx5: Use ib_umem_find_best_pgoff() for SRQ

author Jason Gunthorpe <jgg@nvidia.com>

Sun, 15 Nov 2020 11:43:05 +0000 (13:43 +0200)

committer Jason Gunthorpe <jgg@nvidia.com>

Mon, 16 Nov 2020 20:53:29 +0000 (16:53 -0400)
author Jason Gunthorpe <jgg@nvidia.com>
Sun, 15 Nov 2020 11:43:05 +0000 (13:43 +0200)
committer Jason Gunthorpe <jgg@nvidia.com>
Mon, 16 Nov 2020 20:53:29 +0000 (16:53 -0400)
diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c

index 92e7621..fd97781 100644 (file)
--- a/drivers/infiniband/hw/mlx5/mem.c
+++ b/drivers/infiniband/hw/mlx5/mem.c
@@ -107,6 +107,51 @@ void mlx5_ib_populate_pas(struct ib_umem *umem, size_t page_size, __be64 *pas,
         }
  }
  
+/*
+ * Compute the page shift and page_offset for mailboxes that use a quantized
+ * page_offset. The granulatity of the page offset scales according to page
+ * size.
+ */
+unsigned long __mlx5_umem_find_best_quantized_pgoff(
+       struct ib_umem *umem, unsigned long pgsz_bitmap,
+       unsigned int page_offset_bits, u64 pgoff_bitmask, unsigned int scale,
+       unsigned int *page_offset_quantized)
+{
+       const u64 page_offset_mask = (1 << page_offset_bits) - 1;
+       unsigned long page_size;
+       u64 page_offset;
+
+       page_size = ib_umem_find_best_pgoff(umem, pgsz_bitmap, pgoff_bitmask);
+       if (!page_size)
+               return 0;
+
+       /*
+        * page size is the largest possible page size.
+        *
+        * Reduce the page_size, and thus the page_offset and quanta, until the
+        * page_offset fits into the mailbox field. Once page_size < scale this
+        * loop is guaranteed to terminate.
+        */
+       page_offset = ib_umem_dma_offset(umem, page_size);
+       while (page_offset & ~(u64)(page_offset_mask * (page_size / scale))) {
+               page_size /= 2;
+               page_offset = ib_umem_dma_offset(umem, page_size);
+       }
+
+       /*
+        * The address is not aligned, or otherwise cannot be represented by the
+        * page_offset.
+        */
+       if (!(pgsz_bitmap & page_size))
+               return 0;
+
+       *page_offset_quantized =
+               (unsigned long)page_offset / (page_size / scale);
+       if (WARN_ON(*page_offset_quantized > page_offset_mask))
+               return 0;
+       return page_size;
+}
+
  int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset)
  {
         u64 page_size;
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h

index bb44080..2f08a5b 100644 (file)
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -69,6 +69,37 @@ __mlx5_log_page_size_to_bitmap(unsigned int log_pgsz_bits,
                                        pgsz_shift),                            \
                                iova)
  
+static __always_inline unsigned long
+__mlx5_page_offset_to_bitmask(unsigned int page_offset_bits,
+                             unsigned int offset_shift)
+{
+       unsigned int largest_offset_shift =
+               min_t(unsigned long, page_offset_bits - 1 + offset_shift,
+                     BITS_PER_LONG - 1);
+
+       return GENMASK(largest_offset_shift, offset_shift);
+}
+
+/*
+ * QP/CQ/WQ/etc type commands take a page offset that satisifies:
+ *   page_offset_quantized * (page_size/scale) = page_offset
+ * Which restricts allowed page sizes to ones that satisify the above.
+ */
+unsigned long __mlx5_umem_find_best_quantized_pgoff(
+       struct ib_umem *umem, unsigned long pgsz_bitmap,
+       unsigned int page_offset_bits, u64 pgoff_bitmask, unsigned int scale,
+       unsigned int *page_offset_quantized);
+#define mlx5_umem_find_best_quantized_pgoff(umem, typ, log_pgsz_fld,           \
+                                           pgsz_shift, page_offset_fld,       \
+                                           scale, page_offset_quantized)      \
+       __mlx5_umem_find_best_quantized_pgoff(                                 \
+               umem,                                                          \
+               __mlx5_log_page_size_to_bitmap(                                \
+                       __mlx5_bit_sz(typ, log_pgsz_fld), pgsz_shift),         \
+               __mlx5_bit_sz(typ, page_offset_fld),                           \
+               GENMASK(31, order_base_2(scale)), scale,                       \
+               page_offset_quantized)
+
  enum {
         MLX5_IB_MMAP_OFFSET_START = 9,
         MLX5_IB_MMAP_OFFSET_END = 255,
diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c

index cb5ad04..7dfdc9e 100644 (file)
--- a/drivers/infiniband/hw/mlx5/srq.c
+++ b/drivers/infiniband/hw/mlx5/srq.c
@@ -51,8 +51,8 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
                 udata, struct mlx5_ib_ucontext, ibucontext);
         size_t ucmdlen;
         int err;
-       int page_shift;
-       u32 offset;
+       unsigned int page_offset_quantized;
+       unsigned int page_size;
         u32 uidx = MLX5_IB_DEFAULT_UIDX;
  
         ucmdlen = min(udata->inlen, sizeof(ucmd));
@@ -85,22 +85,22 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
                 return err;
         }
  
-       mlx5_ib_cont_pages(srq->umem, ucmd.buf_addr, 0, &page_shift);
-       err = mlx5_ib_get_buf_offset(ucmd.buf_addr, page_shift,
-                                    &offset);
-       if (err) {
+       page_size = mlx5_umem_find_best_quantized_pgoff(
+               srq->umem, srqc, log_page_size, MLX5_ADAPTER_PAGE_SHIFT,
+               page_offset, 64, &page_offset_quantized);
+       if (!page_size) {
                 mlx5_ib_warn(dev, "bad offset\n");
                 goto err_umem;
         }
  
-       in->pas = kvcalloc(ib_umem_num_dma_blocks(srq->umem, 1UL << page_shift),
+       in->pas = kvcalloc(ib_umem_num_dma_blocks(srq->umem, page_size),
                            sizeof(*in->pas), GFP_KERNEL);
         if (!in->pas) {
                 err = -ENOMEM;
                 goto err_umem;
         }
  
-       mlx5_ib_populate_pas(srq->umem, 1UL << page_shift, in->pas, 0);
+       mlx5_ib_populate_pas(srq->umem, page_size, in->pas, 0);
  
         err = mlx5_ib_db_map_user(ucontext, udata, ucmd.db_addr, &srq->db);
         if (err) {
@@ -108,8 +108,8 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
                 goto err_in;
         }
  
-       in->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
-       in->page_offset = offset;
+       in->log_page_size = order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT;
+       in->page_offset = page_offset_quantized;
         in->uid = (in->type != IB_SRQT_XRC) ?  to_mpd(pd)->uid : 0;
         if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 &&
             in->type != IB_SRQT_BASIC)
diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h

index 7059750..7752211 100644 (file)
--- a/include/rdma/ib_umem.h
+++ b/include/rdma/ib_umem.h
@@ -34,6 +34,13 @@ static inline int ib_umem_offset(struct ib_umem *umem)
         return umem->address & ~PAGE_MASK;
  }
  
+static inline unsigned long ib_umem_dma_offset(struct ib_umem *umem,
+                                              unsigned long pgsz)
+{
+       return (sg_dma_address(umem->sg_head.sgl) + ib_umem_offset(umem)) &
+              (pgsz - 1);
+}
+
  static inline size_t ib_umem_num_dma_blocks(struct ib_umem *umem,
                                             unsigned long pgsz)
  {
@@ -79,6 +86,35 @@ int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset,
  unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem,
                                      unsigned long pgsz_bitmap,
                                      unsigned long virt);
+/**
+ * ib_umem_find_best_pgoff - Find best HW page size
+ *
+ * @umem: umem struct
+ * @pgsz_bitmap bitmap of HW supported page sizes
+ * @pgoff_bitmask: Mask of bits that can be represented with an offset
+ *
+ * This is very similar to ib_umem_find_best_pgsz() except instead of accepting
+ * an IOVA it accepts a bitmask specifying what address bits can be represented
+ * with a page offset.
+ *
+ * For instance if the HW has multiple page sizes, requires 64 byte alignemnt,
+ * and can support aligned offsets up to 4032 then pgoff_bitmask would be
+ * "111111000000".
+ *
+ * If the pgoff_bitmask requires either alignment in the low bit or an
+ * unavailable page size for the high bits, this function returns 0.
+ */
+static inline unsigned long ib_umem_find_best_pgoff(struct ib_umem *umem,
+                                                   unsigned long pgsz_bitmap,
+                                                   u64 pgoff_bitmask)
+{
+       struct scatterlist *sg = umem->sg_head.sgl;
+       dma_addr_t dma_addr;
+
+       dma_addr = sg_dma_address(sg) + (umem->address & ~PAGE_MASK);
+       return ib_umem_find_best_pgsz(umem, pgsz_bitmap,
+                                     dma_addr & pgoff_bitmask);
+}
  
  #else /* CONFIG_INFINIBAND_USER_MEM */
  
@@ -101,6 +137,12 @@ static inline unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem,
  {
         return 0;
  }
+static inline unsigned long ib_umem_find_best_pgoff(struct ib_umem *umem,
+                                                   unsigned long pgsz_bitmap,
+                                                   u64 pgoff_bitmask)
+{
+       return 0;
+}
  
  #endif /* CONFIG_INFINIBAND_USER_MEM */
author	Jason Gunthorpe <jgg@nvidia.com>
	Sun, 15 Nov 2020 11:43:05 +0000 (13:43 +0200)
committer	Jason Gunthorpe <jgg@nvidia.com>
	Mon, 16 Nov 2020 20:53:29 +0000 (16:53 -0400)
drivers/infiniband/hw/mlx5/mem.c		patch \| blob \| history
drivers/infiniband/hw/mlx5/mlx5_ib.h		patch \| blob \| history
drivers/infiniband/hw/mlx5/srq.c		patch \| blob \| history
include/rdma/ib_umem.h		patch \| blob \| history