RDMA/efa: Count admin commands errors
authorGal Pressman <galpress@amazon.com>
Mon, 20 Apr 2020 06:22:13 +0000 (09:22 +0300)
committerJason Gunthorpe <jgg@mellanox.com>
Sat, 2 May 2020 23:32:14 +0000 (20:32 -0300)
Add a new stat that counts admin commands failures, which might help when
debugging different issues.

Link: https://lore.kernel.org/r/20200420062213.44577-4-galpress@amazon.com
Reviewed-by: Daniel Kranzdorf <dkkranzd@amazon.com>
Reviewed-by: Yossi Leybovich <sleybo@amazon.com>
Signed-off-by: Gal Pressman <galpress@amazon.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
drivers/infiniband/hw/efa/efa_com.c
drivers/infiniband/hw/efa/efa_com.h
drivers/infiniband/hw/efa/efa_verbs.c

index 7fce69f5568f30b8a5cf4283a8ea595ea564bc9c..336bc2c57bb1d0eb83bd1c0819c4964e99ca0451 100644 (file)
@@ -631,17 +631,20 @@ int efa_com_cmd_exec(struct efa_com_admin_queue *aq,
                        cmd->aq_common_descriptor.opcode, PTR_ERR(comp_ctx));
 
                up(&aq->avail_cmds);
+               atomic64_inc(&aq->stats.cmd_err);
                return PTR_ERR(comp_ctx);
        }
 
        err = efa_com_wait_and_process_admin_cq(comp_ctx, aq);
-       if (err)
+       if (err) {
                ibdev_err_ratelimited(
                        aq->efa_dev,
                        "Failed to process command %s (opcode %u) comp_status %d err %d\n",
                        efa_com_cmd_str(cmd->aq_common_descriptor.opcode),
                        cmd->aq_common_descriptor.opcode, comp_ctx->comp_status,
                        err);
+               atomic64_inc(&aq->stats.cmd_err);
+       }
 
        up(&aq->avail_cmds);
 
index c67dd8109d1cd6a93b59d45c88a0f16fec2fbc16..5e4c88877ddb59340e898e17df16c3f76f2a2877 100644 (file)
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
 /*
- * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
+ * Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All rights reserved.
  */
 
 #ifndef _EFA_COM_H_
@@ -47,6 +47,7 @@ struct efa_com_admin_sq {
 struct efa_com_stats_admin {
        atomic64_t submitted_cmd;
        atomic64_t completed_cmd;
+       atomic64_t cmd_err;
        atomic64_t no_completion;
 };
 
index 1f8162b2067db1b6dc472b34567f813af2aa985d..08313f7c73bc06f76a10c0ec7f419766f76bcd81 100644 (file)
@@ -37,6 +37,7 @@ struct efa_user_mmap_entry {
        op(EFA_RX_DROPS, "rx_drops") \
        op(EFA_SUBMITTED_CMDS, "submitted_cmds") \
        op(EFA_COMPLETED_CMDS, "completed_cmds") \
+       op(EFA_CMDS_ERR, "cmds_err") \
        op(EFA_NO_COMPLETION_CMDS, "no_completion_cmds") \
        op(EFA_KEEP_ALIVE_RCVD, "keep_alive_rcvd") \
        op(EFA_ALLOC_PD_ERR, "alloc_pd_err") \
@@ -1752,6 +1753,7 @@ int efa_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
        as = &dev->edev.aq.stats;
        stats->value[EFA_SUBMITTED_CMDS] = atomic64_read(&as->submitted_cmd);
        stats->value[EFA_COMPLETED_CMDS] = atomic64_read(&as->completed_cmd);
+       stats->value[EFA_CMDS_ERR] = atomic64_read(&as->cmd_err);
        stats->value[EFA_NO_COMPLETION_CMDS] = atomic64_read(&as->no_completion);
 
        s = &dev->stats;