RDMA: Add support to dump resource tracker in RAW format
authorMaor Gottlieb <maorg@mellanox.com>
Tue, 23 Jun 2020 11:30:40 +0000 (14:30 +0300)
committerJason Gunthorpe <jgg@nvidia.com>
Wed, 24 Jun 2020 11:52:29 +0000 (08:52 -0300)
Add support to get resource dump in raw format. It enable drivers to
return the entire device specific QP/CQ/MR context without a need from the
driver to set each field separately.

The raw query returns only the device specific data, general data is still
returned by using the existing queries.

Example:

$ rdma res show mr dev mlx5_1 mrn 2 -r -j
[{"ifindex":7,"ifname":"mlx5_1",
"data":[0,4,255,254,0,0,0,0,0,0,0,0,16,28,0,216,...]}]

Link: https://lore.kernel.org/r/20200623113043.1228482-9-leon@kernel.org
Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
drivers/infiniband/core/device.c
drivers/infiniband/core/nldev.c
include/rdma/ib_verbs.h
include/uapi/rdma/rdma_netlink.h

index cbe95e729cf104df1d580d0992315466f310eb65..1335ed1f1e4a257376b9446443aa06c08831f1f5 100644 (file)
@@ -2619,8 +2619,11 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
        SET_DEVICE_OP(dev_ops, enable_driver);
        SET_DEVICE_OP(dev_ops, fill_res_cm_id_entry);
        SET_DEVICE_OP(dev_ops, fill_res_cq_entry);
+       SET_DEVICE_OP(dev_ops, fill_res_cq_entry_raw);
        SET_DEVICE_OP(dev_ops, fill_res_mr_entry);
+       SET_DEVICE_OP(dev_ops, fill_res_mr_entry_raw);
        SET_DEVICE_OP(dev_ops, fill_res_qp_entry);
+       SET_DEVICE_OP(dev_ops, fill_res_qp_entry_raw);
        SET_DEVICE_OP(dev_ops, fill_stat_mr_entry);
        SET_DEVICE_OP(dev_ops, get_dev_fw_str);
        SET_DEVICE_OP(dev_ops, get_dma_mr);
index 394e307c342c3bf17a932866994729680cac3cf9..1051b5622b625f210558954514956787cd07e924 100644 (file)
@@ -114,6 +114,7 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
        [RDMA_NLDEV_ATTR_RES_PS]                = { .type = NLA_U32 },
        [RDMA_NLDEV_ATTR_RES_QP]                = { .type = NLA_NESTED },
        [RDMA_NLDEV_ATTR_RES_QP_ENTRY]          = { .type = NLA_NESTED },
+       [RDMA_NLDEV_ATTR_RES_RAW]               = { .type = NLA_BINARY },
        [RDMA_NLDEV_ATTR_RES_RKEY]              = { .type = NLA_U32 },
        [RDMA_NLDEV_ATTR_RES_RQPN]              = { .type = NLA_U32 },
        [RDMA_NLDEV_ATTR_RES_RQ_PSN]            = { .type = NLA_U32 },
@@ -446,11 +447,11 @@ static int fill_res_name_pid(struct sk_buff *msg,
        return err ? -EMSGSIZE : 0;
 }
 
-static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin,
-                            struct rdma_restrack_entry *res, uint32_t port)
+static int fill_res_qp_entry_query(struct sk_buff *msg,
+                                  struct rdma_restrack_entry *res,
+                                  struct ib_device *dev,
+                                  struct ib_qp *qp)
 {
-       struct ib_qp *qp = container_of(res, struct ib_qp, res);
-       struct ib_device *dev = qp->device;
        struct ib_qp_init_attr qp_init_attr;
        struct ib_qp_attr qp_attr;
        int ret;
@@ -459,16 +460,6 @@ static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin,
        if (ret)
                return ret;
 
-       if (port && port != qp_attr.port_num)
-               return -EAGAIN;
-
-       /* In create_qp() port is not set yet */
-       if (qp_attr.port_num &&
-           nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, qp_attr.port_num))
-               goto err;
-
-       if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qp->qp_num))
-               goto err;
        if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC) {
                if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQPN,
                                qp_attr.dest_qp_num))
@@ -492,13 +483,6 @@ static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin,
        if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, qp_attr.qp_state))
                goto err;
 
-       if (!rdma_is_kernel_res(res) &&
-           nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, qp->pd->res.id))
-               goto err;
-
-       if (fill_res_name_pid(msg, res))
-               goto err;
-
        if (dev->ops.fill_res_qp_entry)
                return dev->ops.fill_res_qp_entry(msg, qp);
        return 0;
@@ -506,6 +490,48 @@ static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin,
 err:   return -EMSGSIZE;
 }
 
+static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin,
+                            struct rdma_restrack_entry *res, uint32_t port)
+{
+       struct ib_qp *qp = container_of(res, struct ib_qp, res);
+       struct ib_device *dev = qp->device;
+       int ret;
+
+       if (port && port != qp->port)
+               return -EAGAIN;
+
+       /* In create_qp() port is not set yet */
+       if (qp->port && nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, qp->port))
+               return -EINVAL;
+
+       ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qp->qp_num);
+       if (ret)
+               return -EMSGSIZE;
+
+       if (!rdma_is_kernel_res(res) &&
+           nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, qp->pd->res.id))
+               return -EMSGSIZE;
+
+       ret = fill_res_name_pid(msg, res);
+       if (ret)
+               return -EMSGSIZE;
+
+       return fill_res_qp_entry_query(msg, res, dev, qp);
+}
+
+static int fill_res_qp_raw_entry(struct sk_buff *msg, bool has_cap_net_admin,
+                                struct rdma_restrack_entry *res, uint32_t port)
+{
+       struct ib_qp *qp = container_of(res, struct ib_qp, res);
+       struct ib_device *dev = qp->device;
+
+       if (port && port != qp->port)
+               return -EAGAIN;
+       if (!dev->ops.fill_res_qp_entry_raw)
+               return -EINVAL;
+       return dev->ops.fill_res_qp_entry_raw(msg, qp);
+}
+
 static int fill_res_cm_id_entry(struct sk_buff *msg, bool has_cap_net_admin,
                                struct rdma_restrack_entry *res, uint32_t port)
 {
@@ -565,34 +591,42 @@ static int fill_res_cq_entry(struct sk_buff *msg, bool has_cap_net_admin,
        struct ib_device *dev = cq->device;
 
        if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQE, cq->cqe))
-               goto err;
+               return -EMSGSIZE;
        if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT,
                              atomic_read(&cq->usecnt), RDMA_NLDEV_ATTR_PAD))
-               goto err;
+               return -EMSGSIZE;
 
        /* Poll context is only valid for kernel CQs */
        if (rdma_is_kernel_res(res) &&
            nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_POLL_CTX, cq->poll_ctx))
-               goto err;
+               return -EMSGSIZE;
 
        if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_DIM, (cq->dim != NULL)))
-               goto err;
+               return -EMSGSIZE;
 
        if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQN, res->id))
-               goto err;
+               return -EMSGSIZE;
        if (!rdma_is_kernel_res(res) &&
            nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN,
                        cq->uobject->uevent.uobject.context->res.id))
-               goto err;
+               return -EMSGSIZE;
 
        if (fill_res_name_pid(msg, res))
-               goto err;
+               return -EMSGSIZE;
 
-       if (dev->ops.fill_res_cq_entry)
-               return dev->ops.fill_res_cq_entry(msg, cq);
-       return 0;
+       return (dev->ops.fill_res_cq_entry) ?
+               dev->ops.fill_res_cq_entry(msg, cq) : 0;
+}
 
-err:   return -EMSGSIZE;
+static int fill_res_cq_raw_entry(struct sk_buff *msg, bool has_cap_net_admin,
+                                struct rdma_restrack_entry *res, uint32_t port)
+{
+       struct ib_cq *cq = container_of(res, struct ib_cq, res);
+       struct ib_device *dev = cq->device;
+
+       if (!dev->ops.fill_res_cq_entry_raw)
+               return -EINVAL;
+       return dev->ops.fill_res_cq_entry_raw(msg, cq);
 }
 
 static int fill_res_mr_entry(struct sk_buff *msg, bool has_cap_net_admin,
@@ -603,30 +637,39 @@ static int fill_res_mr_entry(struct sk_buff *msg, bool has_cap_net_admin,
 
        if (has_cap_net_admin) {
                if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RKEY, mr->rkey))
-                       goto err;
+                       return -EMSGSIZE;
                if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LKEY, mr->lkey))
-                       goto err;
+                       return -EMSGSIZE;
        }
 
        if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_MRLEN, mr->length,
                              RDMA_NLDEV_ATTR_PAD))
-               goto err;
+               return -EMSGSIZE;
 
        if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_MRN, res->id))
-               goto err;
+               return -EMSGSIZE;
 
        if (!rdma_is_kernel_res(res) &&
            nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, mr->pd->res.id))
-               goto err;
+               return -EMSGSIZE;
 
        if (fill_res_name_pid(msg, res))
-               goto err;
+               return -EMSGSIZE;
 
-       if (dev->ops.fill_res_mr_entry)
-               return dev->ops.fill_res_mr_entry(msg, mr);
-       return 0;
+       return (dev->ops.fill_res_mr_entry) ?
+                      dev->ops.fill_res_mr_entry(msg, mr) :
+                      0;
+}
 
-err:   return -EMSGSIZE;
+static int fill_res_mr_raw_entry(struct sk_buff *msg, bool has_cap_net_admin,
+                                struct rdma_restrack_entry *res, uint32_t port)
+{
+       struct ib_mr *mr = container_of(res, struct ib_mr, res);
+       struct ib_device *dev = mr->pd->device;
+
+       if (!dev->ops.fill_res_mr_entry_raw)
+               return -EINVAL;
+       return dev->ops.fill_res_mr_entry_raw(msg, mr);
 }
 
 static int fill_res_pd_entry(struct sk_buff *msg, bool has_cap_net_admin,
@@ -1149,7 +1192,6 @@ static int nldev_res_get_dumpit(struct sk_buff *skb,
 
 struct nldev_fill_res_entry {
        enum rdma_nldev_attr nldev_attr;
-       enum rdma_nldev_command nldev_cmd;
        u8 flags;
        u32 entry;
        u32 id;
@@ -1161,40 +1203,34 @@ enum nldev_res_flags {
 
 static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = {
        [RDMA_RESTRACK_QP] = {
-               .nldev_cmd = RDMA_NLDEV_CMD_RES_QP_GET,
                .nldev_attr = RDMA_NLDEV_ATTR_RES_QP,
                .entry = RDMA_NLDEV_ATTR_RES_QP_ENTRY,
                .id = RDMA_NLDEV_ATTR_RES_LQPN,
        },
        [RDMA_RESTRACK_CM_ID] = {
-               .nldev_cmd = RDMA_NLDEV_CMD_RES_CM_ID_GET,
                .nldev_attr = RDMA_NLDEV_ATTR_RES_CM_ID,
                .entry = RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY,
                .id = RDMA_NLDEV_ATTR_RES_CM_IDN,
        },
        [RDMA_RESTRACK_CQ] = {
-               .nldev_cmd = RDMA_NLDEV_CMD_RES_CQ_GET,
                .nldev_attr = RDMA_NLDEV_ATTR_RES_CQ,
                .flags = NLDEV_PER_DEV,
                .entry = RDMA_NLDEV_ATTR_RES_CQ_ENTRY,
                .id = RDMA_NLDEV_ATTR_RES_CQN,
        },
        [RDMA_RESTRACK_MR] = {
-               .nldev_cmd = RDMA_NLDEV_CMD_RES_MR_GET,
                .nldev_attr = RDMA_NLDEV_ATTR_RES_MR,
                .flags = NLDEV_PER_DEV,
                .entry = RDMA_NLDEV_ATTR_RES_MR_ENTRY,
                .id = RDMA_NLDEV_ATTR_RES_MRN,
        },
        [RDMA_RESTRACK_PD] = {
-               .nldev_cmd = RDMA_NLDEV_CMD_RES_PD_GET,
                .nldev_attr = RDMA_NLDEV_ATTR_RES_PD,
                .flags = NLDEV_PER_DEV,
                .entry = RDMA_NLDEV_ATTR_RES_PD_ENTRY,
                .id = RDMA_NLDEV_ATTR_RES_PDN,
        },
        [RDMA_RESTRACK_COUNTER] = {
-               .nldev_cmd = RDMA_NLDEV_CMD_STAT_GET,
                .nldev_attr = RDMA_NLDEV_ATTR_STAT_COUNTER,
                .entry = RDMA_NLDEV_ATTR_STAT_COUNTER_ENTRY,
                .id = RDMA_NLDEV_ATTR_STAT_COUNTER_ID,
@@ -1253,7 +1289,8 @@ static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
        }
 
        nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
-                       RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, fe->nldev_cmd),
+                       RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
+                                        RDMA_NL_GET_OP(nlh->nlmsg_type)),
                        0, 0);
 
        if (fill_nldev_handle(msg, device)) {
@@ -1331,7 +1368,8 @@ static int res_get_common_dumpit(struct sk_buff *skb,
        }
 
        nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
-                       RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, fe->nldev_cmd),
+                       RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
+                                        RDMA_NL_GET_OP(cb->nlh->nlmsg_type)),
                        0, NLM_F_MULTI);
 
        if (fill_nldev_handle(skb, device)) {
@@ -1413,26 +1451,29 @@ err_index:
        return ret;
 }
 
-#define RES_GET_FUNCS(name, type)                                             \
-       static int nldev_res_get_##name##_dumpit(struct sk_buff *skb,          \
+#define RES_GET_FUNCS(name, type)                                              \
+       static int nldev_res_get_##name##_dumpit(struct sk_buff *skb,          \
                                                 struct netlink_callback *cb)  \
-       {                                                                      \
-               return res_get_common_dumpit(skb, cb, type,                    \
-                                            fill_res_##name##_entry);         \
-       }                                                                      \
-       static int nldev_res_get_##name##_doit(struct sk_buff *skb,            \
-                                              struct nlmsghdr *nlh,           \
+       {                                                                      \
+               return res_get_common_dumpit(skb, cb, type,                    \
+                                            fill_res_##name##_entry);         \
+       }                                                                      \
+       static int nldev_res_get_##name##_doit(struct sk_buff *skb,            \
+                                              struct nlmsghdr *nlh,           \
                                               struct netlink_ext_ack *extack) \
-       {                                                                      \
-               return res_get_common_doit(skb, nlh, extack, type,             \
-                                          fill_res_##name##_entry);           \
+       {                                                                      \
+               return res_get_common_doit(skb, nlh, extack, type,             \
+                                          fill_res_##name##_entry);           \
        }
 
 RES_GET_FUNCS(qp, RDMA_RESTRACK_QP);
+RES_GET_FUNCS(qp_raw, RDMA_RESTRACK_QP);
 RES_GET_FUNCS(cm_id, RDMA_RESTRACK_CM_ID);
 RES_GET_FUNCS(cq, RDMA_RESTRACK_CQ);
+RES_GET_FUNCS(cq_raw, RDMA_RESTRACK_CQ);
 RES_GET_FUNCS(pd, RDMA_RESTRACK_PD);
 RES_GET_FUNCS(mr, RDMA_RESTRACK_MR);
+RES_GET_FUNCS(mr_raw, RDMA_RESTRACK_MR);
 RES_GET_FUNCS(counter, RDMA_RESTRACK_COUNTER);
 
 static LIST_HEAD(link_ops);
@@ -2117,6 +2158,21 @@ static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
                .doit = nldev_stat_del_doit,
                .flags = RDMA_NL_ADMIN_PERM,
        },
+       [RDMA_NLDEV_CMD_RES_QP_GET_RAW] = {
+               .doit = nldev_res_get_qp_raw_doit,
+               .dump = nldev_res_get_qp_raw_dumpit,
+               .flags = RDMA_NL_ADMIN_PERM,
+       },
+       [RDMA_NLDEV_CMD_RES_CQ_GET_RAW] = {
+               .doit = nldev_res_get_cq_raw_doit,
+               .dump = nldev_res_get_cq_raw_dumpit,
+               .flags = RDMA_NL_ADMIN_PERM,
+       },
+       [RDMA_NLDEV_CMD_RES_MR_GET_RAW] = {
+               .doit = nldev_res_get_mr_raw_doit,
+               .dump = nldev_res_get_mr_raw_dumpit,
+               .flags = RDMA_NL_ADMIN_PERM,
+       },
 };
 
 void __init nldev_init(void)
index 9127cffafccdbfd78b26db69ccc6b041b9145add..77106ff3cd2607946a513d0dc5a86b3973fd88b6 100644 (file)
@@ -2583,8 +2583,11 @@ struct ib_device_ops {
         * Allows rdma drivers to add their own restrack attributes.
         */
        int (*fill_res_mr_entry)(struct sk_buff *msg, struct ib_mr *ibmr);
+       int (*fill_res_mr_entry_raw)(struct sk_buff *msg, struct ib_mr *ibmr);
        int (*fill_res_cq_entry)(struct sk_buff *msg, struct ib_cq *ibcq);
+       int (*fill_res_cq_entry_raw)(struct sk_buff *msg, struct ib_cq *ibcq);
        int (*fill_res_qp_entry)(struct sk_buff *msg, struct ib_qp *ibqp);
+       int (*fill_res_qp_entry_raw)(struct sk_buff *msg, struct ib_qp *ibqp);
        int (*fill_res_cm_id_entry)(struct sk_buff *msg, struct rdma_cm_id *id);
 
        /* Device lifecycle callbacks */
index 8e277783fa9610d79629e73c69df658959914c3b..3826143d420dd10b46092d0d8951c87a5a23a4d1 100644 (file)
@@ -287,6 +287,12 @@ enum rdma_nldev_command {
 
        RDMA_NLDEV_CMD_STAT_DEL,
 
+       RDMA_NLDEV_CMD_RES_QP_GET_RAW,
+
+       RDMA_NLDEV_CMD_RES_CQ_GET_RAW,
+
+       RDMA_NLDEV_CMD_RES_MR_GET_RAW,
+
        RDMA_NLDEV_NUM_OPS
 };
 
@@ -525,6 +531,8 @@ enum rdma_nldev_attr {
         */
        RDMA_NLDEV_ATTR_DEV_DIM,                /* u8 */
 
+       RDMA_NLDEV_ATTR_RES_RAW,        /* binary */
+
        /*
         * Always the end
         */