RDMA/cm: Add tracepoints to track MAD send operations
authorChuck Lever <chuck.lever@oracle.com>
Mon, 17 Aug 2020 13:53:22 +0000 (09:53 -0400)
committerJason Gunthorpe <jgg@nvidia.com>
Mon, 24 Aug 2020 22:41:41 +0000 (19:41 -0300)
Surface the operation of MAD exchanges during connection
establishment. Some samples:

[root@klimt ~]# trace-cmd report -F ib_cma
cpus=4
     kworker/0:4-123   [000]    60.677388: icm_send_rep:         local_id=1965336542 remote_id=1096195961 state=REQ_RCVD lap_state=LAP_UNINIT
   kworker/u8:11-391   [002]    60.678808: icm_send_req:         local_id=1982113758 remote_id=0 state=IDLE lap_state=LAP_UNINIT
     kworker/0:4-123   [000]    60.679652: icm_send_rtu:         local_id=1982113758 remote_id=1079418745 state=REP_RCVD lap_state=LAP_UNINIT
            nfsd-1954  [001]    60.691350: icm_send_rep:         local_id=1998890974 remote_id=1129750393 state=MRA_REQ_SENT lap_state=LAP_UNINIT
            nfsd-1954  [003]    62.017931: icm_send_drep:        local_id=1998890974 remote_id=1129750393 state=TIMEWAIT lap_state=LAP_UNINIT

Link: https://lore.kernel.org/r/159767240197.2968.12048458026453596018.stgit@klimt.1015granger.net
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
drivers/infiniband/core/cm.c
drivers/infiniband/core/cm_trace.h

index 478be84..8fe1417 100644 (file)
@@ -1563,6 +1563,7 @@ int ib_send_cm_req(struct ib_cm_id *cm_id,
        cm_id_priv->local_qpn = cpu_to_be32(IBA_GET(CM_REQ_LOCAL_QPN, req_msg));
        cm_id_priv->rq_psn = cpu_to_be32(IBA_GET(CM_REQ_STARTING_PSN, req_msg));
 
+       trace_icm_send_req(&cm_id_priv->id);
        spin_lock_irqsave(&cm_id_priv->lock, flags);
        ret = ib_post_send_mad(cm_id_priv->msg, NULL);
        if (ret) {
@@ -1610,6 +1611,9 @@ static int cm_issue_rej(struct cm_port *port,
                IBA_SET_MEM(CM_REJ_ARI, rej_msg, ari, ari_length);
        }
 
+       trace_icm_issue_rej(
+               IBA_GET(CM_REJ_LOCAL_COMM_ID, rcv_msg),
+               IBA_GET(CM_REJ_REMOTE_COMM_ID, rcv_msg));
        ret = ib_post_send_mad(msg, NULL);
        if (ret)
                cm_free_msg(msg);
@@ -1961,6 +1965,7 @@ static void cm_dup_req_handler(struct cm_work *work,
        }
        spin_unlock_irq(&cm_id_priv->lock);
 
+       trace_icm_send_dup_req(&cm_id_priv->id);
        ret = ib_post_send_mad(msg, NULL);
        if (ret)
                goto free;
@@ -2287,6 +2292,7 @@ int ib_send_cm_rep(struct ib_cm_id *cm_id,
        msg->timeout_ms = cm_id_priv->timeout_ms;
        msg->context[1] = (void *) (unsigned long) IB_CM_REP_SENT;
 
+       trace_icm_send_rep(cm_id);
        ret = ib_post_send_mad(msg, NULL);
        if (ret) {
                spin_unlock_irqrestore(&cm_id_priv->lock, flags);
@@ -2358,6 +2364,7 @@ int ib_send_cm_rtu(struct ib_cm_id *cm_id,
        cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv,
                      private_data, private_data_len);
 
+       trace_icm_send_rtu(cm_id);
        ret = ib_post_send_mad(msg, NULL);
        if (ret) {
                spin_unlock_irqrestore(&cm_id_priv->lock, flags);
@@ -2439,6 +2446,7 @@ static void cm_dup_rep_handler(struct cm_work *work)
                goto unlock;
        spin_unlock_irq(&cm_id_priv->lock);
 
+       trace_icm_send_dup_rep(&cm_id_priv->id);
        ret = ib_post_send_mad(msg, NULL);
        if (ret)
                goto free;
@@ -2660,6 +2668,7 @@ static int cm_send_dreq_locked(struct cm_id_private *cm_id_priv,
        msg->timeout_ms = cm_id_priv->timeout_ms;
        msg->context[1] = (void *) (unsigned long) IB_CM_DREQ_SENT;
 
+       trace_icm_send_dreq(&cm_id_priv->id);
        ret = ib_post_send_mad(msg, NULL);
        if (ret) {
                cm_enter_timewait(cm_id_priv);
@@ -2730,6 +2739,7 @@ static int cm_send_drep_locked(struct cm_id_private *cm_id_priv,
        cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv,
                       private_data, private_data_len);
 
+       trace_icm_send_drep(&cm_id_priv->id);
        ret = ib_post_send_mad(msg, NULL);
        if (ret) {
                cm_free_msg(msg);
@@ -2779,6 +2789,9 @@ static int cm_issue_drep(struct cm_port *port,
        IBA_SET(CM_DREP_LOCAL_COMM_ID, drep_msg,
                IBA_GET(CM_DREQ_REMOTE_COMM_ID, dreq_msg));
 
+       trace_icm_issue_drep(
+               IBA_GET(CM_DREQ_LOCAL_COMM_ID, dreq_msg),
+               IBA_GET(CM_DREQ_REMOTE_COMM_ID, dreq_msg));
        ret = ib_post_send_mad(msg, NULL);
        if (ret)
                cm_free_msg(msg);
@@ -2936,6 +2949,7 @@ static int cm_send_rej_locked(struct cm_id_private *cm_id_priv,
                return -EINVAL;
        }
 
+       trace_icm_send_rej(&cm_id_priv->id, reason);
        ret = ib_post_send_mad(msg, NULL);
        if (ret) {
                cm_free_msg(msg);
@@ -3114,6 +3128,7 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id,
                cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
                              msg_response, service_timeout,
                              private_data, private_data_len);
+               trace_icm_send_mra(cm_id);
                ret = ib_post_send_mad(msg, NULL);
                if (ret)
                        goto error2;
@@ -3484,10 +3499,12 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
        msg->context[1] = (void *) (unsigned long) IB_CM_SIDR_REQ_SENT;
 
        spin_lock_irqsave(&cm_id_priv->lock, flags);
-       if (cm_id->state == IB_CM_IDLE)
+       if (cm_id->state == IB_CM_IDLE) {
+               trace_icm_send_sidr_req(&cm_id_priv->id);
                ret = ib_post_send_mad(msg, NULL);
-       else
+       } else {
                ret = -EINVAL;
+       }
 
        if (ret) {
                spin_unlock_irqrestore(&cm_id_priv->lock, flags);
@@ -3649,6 +3666,7 @@ static int cm_send_sidr_rep_locked(struct cm_id_private *cm_id_priv,
 
        cm_format_sidr_rep((struct cm_sidr_rep_msg *) msg->mad, cm_id_priv,
                           param);
+       trace_icm_send_sidr_rep(&cm_id_priv->id);
        ret = ib_post_send_mad(msg, NULL);
        if (ret) {
                cm_free_msg(msg);
index d6431b2..e9d2826 100644 (file)
@@ -80,6 +80,59 @@ IB_CM_LAP_STATE_LIST
 #define show_ib_cm_lap_state(x) \
                __print_symbolic(x, IB_CM_LAP_STATE_LIST)
 
+/*
+ * enum ib_cm_rej_reason, from include/rdma/ib_cm.h
+ */
+#define IB_CM_REJ_REASON_LIST                                  \
+       ib_cm_rej_reason(REJ_NO_QP)                             \
+       ib_cm_rej_reason(REJ_NO_EEC)                            \
+       ib_cm_rej_reason(REJ_NO_RESOURCES)                      \
+       ib_cm_rej_reason(REJ_TIMEOUT)                           \
+       ib_cm_rej_reason(REJ_UNSUPPORTED)                       \
+       ib_cm_rej_reason(REJ_INVALID_COMM_ID)                   \
+       ib_cm_rej_reason(REJ_INVALID_COMM_INSTANCE)             \
+       ib_cm_rej_reason(REJ_INVALID_SERVICE_ID)                \
+       ib_cm_rej_reason(REJ_INVALID_TRANSPORT_TYPE)            \
+       ib_cm_rej_reason(REJ_STALE_CONN)                        \
+       ib_cm_rej_reason(REJ_RDC_NOT_EXIST)                     \
+       ib_cm_rej_reason(REJ_INVALID_GID)                       \
+       ib_cm_rej_reason(REJ_INVALID_LID)                       \
+       ib_cm_rej_reason(REJ_INVALID_SL)                        \
+       ib_cm_rej_reason(REJ_INVALID_TRAFFIC_CLASS)             \
+       ib_cm_rej_reason(REJ_INVALID_HOP_LIMIT)                 \
+       ib_cm_rej_reason(REJ_INVALID_PACKET_RATE)               \
+       ib_cm_rej_reason(REJ_INVALID_ALT_GID)                   \
+       ib_cm_rej_reason(REJ_INVALID_ALT_LID)                   \
+       ib_cm_rej_reason(REJ_INVALID_ALT_SL)                    \
+       ib_cm_rej_reason(REJ_INVALID_ALT_TRAFFIC_CLASS)         \
+       ib_cm_rej_reason(REJ_INVALID_ALT_HOP_LIMIT)             \
+       ib_cm_rej_reason(REJ_INVALID_ALT_PACKET_RATE)           \
+       ib_cm_rej_reason(REJ_PORT_CM_REDIRECT)                  \
+       ib_cm_rej_reason(REJ_PORT_REDIRECT)                     \
+       ib_cm_rej_reason(REJ_INVALID_MTU)                       \
+       ib_cm_rej_reason(REJ_INSUFFICIENT_RESP_RESOURCES)       \
+       ib_cm_rej_reason(REJ_CONSUMER_DEFINED)                  \
+       ib_cm_rej_reason(REJ_INVALID_RNR_RETRY)                 \
+       ib_cm_rej_reason(REJ_DUPLICATE_LOCAL_COMM_ID)           \
+       ib_cm_rej_reason(REJ_INVALID_CLASS_VERSION)             \
+       ib_cm_rej_reason(REJ_INVALID_FLOW_LABEL)                \
+       ib_cm_rej_reason(REJ_INVALID_ALT_FLOW_LABEL)            \
+       ib_cm_rej_reason_end(REJ_VENDOR_OPTION_NOT_SUPPORTED)
+
+#undef  ib_cm_rej_reason
+#undef  ib_cm_rej_reason_end
+#define ib_cm_rej_reason(x)    TRACE_DEFINE_ENUM(IB_CM_##x);
+#define ib_cm_rej_reason_end(x)        TRACE_DEFINE_ENUM(IB_CM_##x);
+
+IB_CM_REJ_REASON_LIST
+
+#undef  ib_cm_rej_reason
+#undef  ib_cm_rej_reason_end
+#define ib_cm_rej_reason(x)    { IB_CM_##x, #x },
+#define ib_cm_rej_reason_end(x)        { IB_CM_##x, #x }
+
+#define show_ib_cm_rej_reason(x) \
+               __print_symbolic(x, IB_CM_REJ_REASON_LIST)
 
 DECLARE_EVENT_CLASS(icm_id_class,
        TP_PROTO(
@@ -111,6 +164,56 @@ DECLARE_EVENT_CLASS(icm_id_class,
        )
 );
 
+#define DEFINE_CM_SEND_EVENT(name)                                     \
+               DEFINE_EVENT(icm_id_class,                              \
+                               icm_send_##name,                                \
+                               TP_PROTO(                               \
+                                       const struct ib_cm_id *cm_id    \
+                               ),                                      \
+                               TP_ARGS(cm_id))
+
+DEFINE_CM_SEND_EVENT(req);
+DEFINE_CM_SEND_EVENT(rep);
+DEFINE_CM_SEND_EVENT(dup_req);
+DEFINE_CM_SEND_EVENT(dup_rep);
+DEFINE_CM_SEND_EVENT(rtu);
+DEFINE_CM_SEND_EVENT(mra);
+DEFINE_CM_SEND_EVENT(sidr_req);
+DEFINE_CM_SEND_EVENT(sidr_rep);
+DEFINE_CM_SEND_EVENT(dreq);
+DEFINE_CM_SEND_EVENT(drep);
+
+TRACE_EVENT(icm_send_rej,
+       TP_PROTO(
+               const struct ib_cm_id *cm_id,
+               enum ib_cm_rej_reason reason
+       ),
+
+       TP_ARGS(cm_id, reason),
+
+       TP_STRUCT__entry(
+               __field(const void *, cm_id)
+               __field(u32, local_id)
+               __field(u32, remote_id)
+               __field(unsigned long, state)
+               __field(unsigned long, reason)
+       ),
+
+       TP_fast_assign(
+               __entry->cm_id = cm_id;
+               __entry->local_id = be32_to_cpu(cm_id->local_id);
+               __entry->remote_id = be32_to_cpu(cm_id->remote_id);
+               __entry->state = cm_id->state;
+               __entry->reason = reason;
+       ),
+
+       TP_printk("local_id=%u remote_id=%u state=%s reason=%s",
+               __entry->local_id, __entry->remote_id,
+               show_ib_cm_state(__entry->state),
+               show_ib_cm_rej_reason(__entry->reason)
+       )
+);
+
 #define DEFINE_CM_ERR_EVENT(name)                                      \
                DEFINE_EVENT(icm_id_class,                              \
                                icm_##name##_err,                       \
@@ -172,6 +275,8 @@ DECLARE_EVENT_CLASS(icm_local_class,
                                ),                                      \
                                TP_ARGS(local_id, remote_id))
 
+DEFINE_CM_LOCAL_EVENT(issue_rej);
+DEFINE_CM_LOCAL_EVENT(issue_drep);
 DEFINE_CM_LOCAL_EVENT(staleconn_err);
 DEFINE_CM_LOCAL_EVENT(no_priv_err);