RDMA/hns: Add support for CQ stash
authorLang Cheng <chenglang@huawei.com>
Thu, 26 Nov 2020 07:04:10 +0000 (15:04 +0800)
committerJason Gunthorpe <jgg@nvidia.com>
Fri, 27 Nov 2020 16:53:59 +0000 (12:53 -0400)
Stash is a mechanism that uses the core information carried by the ARM AXI
bus to access the L3 cache. It can be used to improve the performance by
increasing the hit ratio of L3 cache. CQs need to enable stash by default.

Link: https://lore.kernel.org/r/1606374251-21512-2-git-send-email-liweihang@huawei.com
Signed-off-by: Lang Cheng <chenglang@huawei.com>
Signed-off-by: Weihang Li <liweihang@huawei.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
drivers/infiniband/hw/hns/hns_roce_common.h
drivers/infiniband/hw/hns/hns_roce_device.h
drivers/infiniband/hw/hns/hns_roce_hw_v2.c
drivers/infiniband/hw/hns/hns_roce_hw_v2.h

index f5669ff8cfebc9bc88b854023f89bf68b1c23c1e..29469e15dfd36aba9fd0576c1f0fecaeeec0b735 100644 (file)
 #define roce_set_bit(origin, shift, val) \
        roce_set_field((origin), (1ul << (shift)), (shift), (val))
 
+#define FIELD_LOC(field_type, field_h, field_l) field_type, field_h, field_l
+
+#define _hr_reg_enable(ptr, field_type, field_h, field_l)                      \
+       ({                                                                     \
+               const field_type *_ptr = ptr;                                  \
+               *((__le32 *)_ptr + (field_h) / 32) |=                          \
+                       cpu_to_le32(BIT((field_l) % 32)) +                     \
+                       BUILD_BUG_ON_ZERO((field_h) != (field_l));             \
+       })
+
+#define hr_reg_enable(ptr, field) _hr_reg_enable(ptr, field)
+
 #define ROCEE_GLB_CFG_ROCEE_DB_SQ_MODE_S 3
 #define ROCEE_GLB_CFG_ROCEE_DB_OTH_MODE_S 4
 
index c0c8b1e3ddbbffebc379fd0b0d4acfac8c6dae19..a5c6bb073569e1b10e852742bd69ec0fcdade063 100644 (file)
@@ -225,6 +225,7 @@ enum {
        HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL          = BIT(9),
        HNS_ROCE_CAP_FLAG_ATOMIC                = BIT(10),
        HNS_ROCE_CAP_FLAG_SDI_MODE              = BIT(14),
+       HNS_ROCE_CAP_FLAG_STASH                 = BIT(17),
 };
 
 #define HNS_ROCE_DB_TYPE_COUNT                 2
index 1bd81fb7b6e14886c79c169cbac6e0a33b33d596..125ab4e03e5aa44e704e2d4f89dbd0a728b528a4 100644 (file)
@@ -3168,6 +3168,9 @@ static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev,
                       V2_CQC_BYTE_8_CQE_SIZE_S, hr_cq->cqe_size ==
                       HNS_ROCE_V3_CQE_SIZE ? 1 : 0);
 
+       if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_STASH)
+               hr_reg_enable(cq_context, CQC_STASH);
+
        cq_context->cqe_cur_blk_addr = cpu_to_le32(to_hr_hw_page_addr(mtts[0]));
 
        roce_set_field(cq_context->byte_16_hop_addr,
index c06851767a35efc60279dd5b113e57a1bef105e9..a4c2be11e4187964c6d662a6991722edb131efd9 100644 (file)
@@ -267,23 +267,24 @@ enum hns_roce_sgid_type {
 };
 
 struct hns_roce_v2_cq_context {
-       __le32  byte_4_pg_ceqn;
-       __le32  byte_8_cqn;
-       __le32  cqe_cur_blk_addr;
-       __le32  byte_16_hop_addr;
-       __le32  cqe_nxt_blk_addr;
-       __le32  byte_24_pgsz_addr;
-       __le32  byte_28_cq_pi;
-       __le32  byte_32_cq_ci;
-       __le32  cqe_ba;
-       __le32  byte_40_cqe_ba;
-       __le32  byte_44_db_record;
-       __le32  db_record_addr;
-       __le32  byte_52_cqe_cnt;
-       __le32  byte_56_cqe_period_maxcnt;
-       __le32  cqe_report_timer;
-       __le32  byte_64_se_cqe_idx;
+       __le32 byte_4_pg_ceqn;
+       __le32 byte_8_cqn;
+       __le32 cqe_cur_blk_addr;
+       __le32 byte_16_hop_addr;
+       __le32 cqe_nxt_blk_addr;
+       __le32 byte_24_pgsz_addr;
+       __le32 byte_28_cq_pi;
+       __le32 byte_32_cq_ci;
+       __le32 cqe_ba;
+       __le32 byte_40_cqe_ba;
+       __le32 byte_44_db_record;
+       __le32 db_record_addr;
+       __le32 byte_52_cqe_cnt;
+       __le32 byte_56_cqe_period_maxcnt;
+       __le32 cqe_report_timer;
+       __le32 byte_64_se_cqe_idx;
 };
+
 #define HNS_ROCE_V2_CQ_DEFAULT_BURST_NUM 0x0
 #define HNS_ROCE_V2_CQ_DEFAULT_INTERVAL        0x0
 
@@ -360,6 +361,10 @@ struct hns_roce_v2_cq_context {
 #define        V2_CQC_BYTE_64_SE_CQE_IDX_S 0
 #define        V2_CQC_BYTE_64_SE_CQE_IDX_M GENMASK(23, 0)
 
+#define CQC_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_cq_context, h, l)
+
+#define CQC_STASH CQC_FIELD_LOC(63, 63)
+
 struct hns_roce_srq_context {
        __le32  byte_4_srqn_srqst;
        __le32  byte_8_limit_wl;