net/mlx5: Add debugfs counters for page commands failures
authorMoshe Shemesh <moshe@nvidia.com>
Thu, 27 Jan 2022 05:51:14 +0000 (07:51 +0200)
committerSaeed Mahameed <saeedm@nvidia.com>
Wed, 9 Mar 2022 21:33:02 +0000 (13:33 -0800)
Add the following new debugfs counters for debug and verbosity:
fw_pages_alloc_failed - number of pages FW requested but driver failed
to allocate.
give_pages_dropped - number of pages given to FW, but command give pages
failed by FW.
reclaim_pages_discard - number of pages which were about to reclaim back
and FW failed the command.

Signed-off-by: Moshe Shemesh <moshe@nvidia.com>
Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
include/linux/mlx5/driver.h

index 8673ba2..d69bac9 100644 (file)
@@ -222,6 +222,10 @@ void mlx5_pages_debugfs_init(struct mlx5_core_dev *dev)
        debugfs_create_u32("fw_pages_total", 0400, pages, &dev->priv.fw_pages);
        debugfs_create_u32("fw_pages_vfs", 0400, pages, &dev->priv.vfs_pages);
        debugfs_create_u32("fw_pages_host_pf", 0400, pages, &dev->priv.host_pf_pages);
+       debugfs_create_u32("fw_pages_alloc_failed", 0400, pages, &dev->priv.fw_pages_alloc_failed);
+       debugfs_create_u32("fw_pages_give_dropped", 0400, pages, &dev->priv.give_pages_dropped);
+       debugfs_create_u32("fw_pages_reclaim_discard", 0400, pages,
+                          &dev->priv.reclaim_pages_discard);
 }
 
 void mlx5_pages_debugfs_cleanup(struct mlx5_core_dev *dev)
index 8855fe7..e0543b8 100644 (file)
@@ -352,8 +352,10 @@ retry:
                if (err) {
                        if (err == -ENOMEM)
                                err = alloc_system_page(dev, function);
-                       if (err)
+                       if (err) {
+                               dev->priv.fw_pages_alloc_failed += (npages - i);
                                goto out_4k;
+                       }
 
                        goto retry;
                }
@@ -372,14 +374,14 @@ retry:
                /* if triggered by FW and failed by FW ignore */
                if (event) {
                        err = 0;
-                       goto out_4k;
+                       goto out_dropped;
                }
        }
        if (err) {
                err = mlx5_cmd_check(dev, err, in, out);
                mlx5_core_warn(dev, "func_id 0x%x, npages %d, err %d\n",
                               func_id, npages, err);
-               goto out_4k;
+               goto out_dropped;
        }
 
        dev->priv.fw_pages += npages;
@@ -394,6 +396,8 @@ retry:
        kvfree(in);
        return 0;
 
+out_dropped:
+       dev->priv.give_pages_dropped += npages;
 out_4k:
        for (i--; i >= 0; i--)
                free_4k(dev, MLX5_GET64(manage_pages_in, in, pas[i]), function);
@@ -516,6 +520,10 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
        mlx5_core_dbg(dev, "func 0x%x, npages %d, outlen %d\n",
                      func_id, npages, outlen);
        err = reclaim_pages_cmd(dev, in, sizeof(in), out, outlen);
+       if (err) {
+               npages = MLX5_GET(manage_pages_in, in, input_num_entries);
+               dev->priv.reclaim_pages_discard += npages;
+       }
        /* if triggered by FW event and failed by FW then ignore */
        if (event && err == -EREMOTEIO)
                err = 0;
index c5f93b5..00a914b 100644 (file)
@@ -575,6 +575,9 @@ struct mlx5_priv {
        struct list_head        free_list;
        u32                     vfs_pages;
        u32                     host_pf_pages;
+       u32                     fw_pages_alloc_failed;
+       u32                     give_pages_dropped;
+       u32                     reclaim_pages_discard;
 
        struct mlx5_core_health health;
        struct list_head        traps;