IB/mlx5: Expose extended error counters
authorParav Pandit <parav@mellanox.com>
Mon, 19 Jun 2017 04:19:37 +0000 (07:19 +0300)
committerDoug Ledford <dledford@redhat.com>
Mon, 24 Jul 2017 14:41:01 +0000 (10:41 -0400)
This patch adds below requester and responder side error counters,
which will be exposed by hardware counters interface and are supported
as part of query Q counters command extension.

 +---------------------------+-------------------------------------+
 |      Name                 |           Description               |
 |---------------------------+-------------------------------------|
 |resp_local_length_error    | Number of times responder detected  |
 |                           | local length errors                 |
 |---------------------------+-------------------------------------|
 |resp_cqe_error             | Number of CQEs completed with error |
 |                           | at responder                        |
 |---------------------------+-------------------------------------|
 |req_cqe_error              | Number of CQEs completed with error |
 |                           | at requester                        |
 |---------------------------+-------------------------------------|
 |req_remote_invalid_request | Number of times requester detected  |
 |                           | remote invalid request error        |
 |---------------------------+-------------------------------------|
 |req_remote_access_error    | Number of times requester detected  |
 |                           | remote access error                 |
 |---------------------------+-------------------------------------|
 |resp_remote_access_error   | Number of times responder detected  |
 |                           | remote access error                 |
 |---------------------------+-------------------------------------|
 |resp_cqe_flush_error       | Number of CQEs completed with       |
 |                           | flushed with error at responder     |
 |---------------------------+-------------------------------------|
 |req_cqe_flush_error        | Number of CQEs completed with       |
 |                           | flushed with error at requester     |
 +---------------------------+-------------------------------------+

Signed-off-by: Parav Pandit <parav@mellanox.com>
Reviewed-by: Daniel Jurgens <danielj@mellanox.com>
Reviewed-by: Eli Cohen <eli@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>
drivers/infiniband/hw/mlx5/main.c
include/linux/mlx5/mlx5_ifc.h

index 2757d44..9dd9759 100644 (file)
@@ -3432,6 +3432,17 @@ static const struct mlx5_ib_counter cong_cnts[] = {
        INIT_CONG_COUNTER(np_cnp_sent),
 };
 
+static const struct mlx5_ib_counter extended_err_cnts[] = {
+       INIT_Q_COUNTER(resp_local_length_error),
+       INIT_Q_COUNTER(resp_cqe_error),
+       INIT_Q_COUNTER(req_cqe_error),
+       INIT_Q_COUNTER(req_remote_invalid_request),
+       INIT_Q_COUNTER(req_remote_access_errors),
+       INIT_Q_COUNTER(resp_remote_access_errors),
+       INIT_Q_COUNTER(resp_cqe_flush_error),
+       INIT_Q_COUNTER(req_cqe_flush_error),
+};
+
 static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev)
 {
        unsigned int i;
@@ -3456,6 +3467,10 @@ static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev,
 
        if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters))
                num_counters += ARRAY_SIZE(retrans_q_cnts);
+
+       if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters))
+               num_counters += ARRAY_SIZE(extended_err_cnts);
+
        cnts->num_q_counters = num_counters;
 
        if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
@@ -3505,6 +3520,13 @@ static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev,
                }
        }
 
+       if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) {
+               for (i = 0; i < ARRAY_SIZE(extended_err_cnts); i++, j++) {
+                       names[j] = extended_err_cnts[i].name;
+                       offsets[j] = extended_err_cnts[i].offset;
+               }
+       }
+
        if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
                for (i = 0; i < ARRAY_SIZE(cong_cnts); i++, j++) {
                        names[j] = cong_cnts[i].name;
index f350688..5bae70e 100644 (file)
@@ -858,7 +858,7 @@ struct mlx5_ifc_cmd_hca_cap_bits {
        u8         pcam_reg[0x1];
        u8         local_ca_ack_delay[0x5];
        u8         port_module_event[0x1];
-       u8         reserved_at_1b1[0x1];
+       u8         enhanced_error_q_counters[0x1];
        u8         ports_check[0x1];
        u8         reserved_at_1b3[0x1];
        u8         disable_link_up[0x1];
@@ -3953,7 +3953,47 @@ struct mlx5_ifc_query_q_counter_out_bits {
 
        u8         local_ack_timeout_err[0x20];
 
-       u8         reserved_at_320[0x4e0];
+       u8         reserved_at_320[0xa0];
+
+       u8         resp_local_length_error[0x20];
+
+       u8         req_local_length_error[0x20];
+
+       u8         resp_local_qp_error[0x20];
+
+       u8         local_operation_error[0x20];
+
+       u8         resp_local_protection[0x20];
+
+       u8         req_local_protection[0x20];
+
+       u8         resp_cqe_error[0x20];
+
+       u8         req_cqe_error[0x20];
+
+       u8         req_mw_binding[0x20];
+
+       u8         req_bad_response[0x20];
+
+       u8         req_remote_invalid_request[0x20];
+
+       u8         resp_remote_invalid_request[0x20];
+
+       u8         req_remote_access_errors[0x20];
+
+       u8         resp_remote_access_errors[0x20];
+
+       u8         req_remote_operation_errors[0x20];
+
+       u8         req_transport_retries_exceeded[0x20];
+
+       u8         cq_overflow[0x20];
+
+       u8         resp_cqe_flush_error[0x20];
+
+       u8         req_cqe_flush_error[0x20];
+
+       u8         reserved_at_620[0x1e0];
 };
 
 struct mlx5_ifc_query_q_counter_in_bits {