net/mlx5: Add command failures data to debugfs
authorMoshe Shemesh <moshe@nvidia.com>
Thu, 27 Jan 2022 13:22:21 +0000 (15:22 +0200)
committerSaeed Mahameed <saeedm@nvidia.com>
Wed, 9 Mar 2022 21:33:00 +0000 (13:33 -0800)
Add new counters to command interface debugfs to count command failures.
The following counters added:
total_failed - number of times command failed (any kind of failure).
failed_mbox_status - number of times command failed on bad status
returned by FW.

In addition, add data about last command failure to command interface
debugfs:
last_failed_errno - last command failed returned errno.
last_failed_mbox_status - last bad status returned by FW.

Signed-off-by: Moshe Shemesh <moshe@nvidia.com>
Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
drivers/net/ethernet/mellanox/mlx5/core/cmd.c
drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
include/linux/mlx5/driver.h

index 823d5808d5a0e02fa11ae59baaab71c8659bde7c..8933c00067e820ba3e2f13912e27931a9aeeb6a6 100644 (file)
@@ -1877,16 +1877,38 @@ out_in:
        return err;
 }
 
+static void cmd_status_log(struct mlx5_core_dev *dev, u16 opcode, u8 status, int err)
+{
+       struct mlx5_cmd_stats *stats;
+
+       if (!err)
+               return;
+
+       stats = &dev->cmd.stats[opcode];
+       spin_lock_irq(&stats->lock);
+       stats->failed++;
+       if (err < 0)
+               stats->last_failed_errno = -err;
+       if (err == -EREMOTEIO) {
+               stats->failed_mbox_status++;
+               stats->last_failed_mbox_status = status;
+       }
+       spin_unlock_irq(&stats->lock);
+}
+
 /* preserve -EREMOTEIO for outbox.status != OK, otherwise return err as is */
-static int cmd_status_err(int err, void *out)
+static int cmd_status_err(struct mlx5_core_dev *dev, int err, u16 opcode, void *out)
 {
-       if (err) /* -EREMOTEIO is preserved */
-               return err == -EREMOTEIO ? -EIO : err;
+       u8 status = MLX5_GET(mbox_out, out, status);
 
-       if (MLX5_GET(mbox_out, out, status) != MLX5_CMD_STAT_OK)
-               return -EREMOTEIO;
+       if (err == -EREMOTEIO) /* -EREMOTEIO is preserved */
+               err = -EIO;
 
-       return 0;
+       if (!err && status != MLX5_CMD_STAT_OK)
+               err = -EREMOTEIO;
+
+       cmd_status_log(dev, opcode, status, err);
+       return err;
 }
 
 /**
@@ -1910,8 +1932,10 @@ static int cmd_status_err(int err, void *out)
 int mlx5_cmd_do(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int out_size)
 {
        int err = cmd_exec(dev, in, in_size, out, out_size, NULL, NULL, false);
+       u16 opcode = MLX5_GET(mbox_in, in, opcode);
 
-       return cmd_status_err(err, out);
+       err = cmd_status_err(dev, err, opcode, out);
+       return err;
 }
 EXPORT_SYMBOL(mlx5_cmd_do);
 
@@ -1954,8 +1978,9 @@ int mlx5_cmd_exec_polling(struct mlx5_core_dev *dev, void *in, int in_size,
                          void *out, int out_size)
 {
        int err = cmd_exec(dev, in, in_size, out, out_size, NULL, NULL, true);
+       u16 opcode = MLX5_GET(mbox_in, in, opcode);
 
-       err = cmd_status_err(err, out);
+       err = cmd_status_err(dev, err, opcode, out);
        return mlx5_cmd_check(dev, err, in, out);
 }
 EXPORT_SYMBOL(mlx5_cmd_exec_polling);
@@ -1991,7 +2016,7 @@ static void mlx5_cmd_exec_cb_handler(int status, void *_work)
        struct mlx5_async_ctx *ctx;
 
        ctx = work->ctx;
-       status = cmd_status_err(status, work->out);
+       status = cmd_status_err(ctx->dev, status, work->opcode, work->out);
        work->user_callback(status, work);
        if (atomic_dec_and_test(&ctx->num_inflight))
                wake_up(&ctx->wait);
@@ -2005,6 +2030,7 @@ int mlx5_cmd_exec_cb(struct mlx5_async_ctx *ctx, void *in, int in_size,
 
        work->ctx = ctx;
        work->user_callback = callback;
+       work->opcode = MLX5_GET(mbox_in, in, opcode);
        work->out = out;
        if (WARN_ON(!atomic_inc_not_zero(&ctx->num_inflight)))
                return -EIO;
index 10d195042ab554029304343e76922967f632b55a..18b04e977bb82f86c442b11613fa8cd88b7ce6e2 100644 (file)
@@ -180,6 +180,13 @@ void mlx5_cmdif_debugfs_init(struct mlx5_core_dev *dev)
                        debugfs_create_file("average", 0400, stats->root, stats,
                                            &stats_fops);
                        debugfs_create_u64("n", 0400, stats->root, &stats->n);
+                       debugfs_create_u64("failed", 0400, stats->root, &stats->failed);
+                       debugfs_create_u64("failed_mbox_status", 0400, stats->root,
+                                          &stats->failed_mbox_status);
+                       debugfs_create_u32("last_failed_errno", 0400, stats->root,
+                                          &stats->last_failed_errno);
+                       debugfs_create_u8("last_failed_mbox_status", 0400, stats->root,
+                                         &stats->last_failed_mbox_status);
                }
        }
 }
index d3b1a6a1f8d28b5b5b8955e174f3b51245697c30..f18c1e15a12c877bff32cdcfe393c2062a9d97cd 100644 (file)
@@ -264,6 +264,14 @@ enum {
 struct mlx5_cmd_stats {
        u64             sum;
        u64             n;
+       /* number of times command failed */
+       u64             failed;
+       /* number of times command failed on bad status returned by FW */
+       u64             failed_mbox_status;
+       /* last command failed returned errno */
+       u32             last_failed_errno;
+       /* last bad status returned by FW */
+       u8              last_failed_mbox_status;
        struct dentry  *root;
        /* protect command average calculations */
        spinlock_t      lock;
@@ -955,6 +963,7 @@ typedef void (*mlx5_async_cbk_t)(int status, struct mlx5_async_work *context);
 struct mlx5_async_work {
        struct mlx5_async_ctx *ctx;
        mlx5_async_cbk_t user_callback;
+       u16 opcode; /* cmd opcode */
        void *out; /* pointer to the cmd output buffer */
 };