net/mlx5: Read timeout values from DTOR
authorAmir Tzin <amirtz@nvidia.com>
Wed, 13 Oct 2021 06:07:13 +0000 (09:07 +0300)
committerSaeed Mahameed <saeedm@nvidia.com>
Sat, 16 Oct 2021 00:37:43 +0000 (17:37 -0700)
Replace hard coded timeouts with values stored by firmware in default
timeouts register (DTOR). Timeouts are read during driver load. If DTOR
is not supported by firmware then fallback to hard coded defaults
instead.

Signed-off-by: Amir Tzin <amirtz@nvidia.com>
Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
drivers/net/ethernet/mellanox/mlx5/core/en/health.h
drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
drivers/net/ethernet/mellanox/mlx5/core/fw.c
drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
drivers/net/ethernet/mellanox/mlx5/core/health.c
drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c
drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h
drivers/net/ethernet/mellanox/mlx5/core/main.c
drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c

index 018262d..d5b7110 100644 (file)
@@ -32,7 +32,6 @@ void mlx5e_reporter_rq_cqe_err(struct mlx5e_rq *rq);
 void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq);
 
 #define MLX5E_REPORTER_PER_Q_MAX_LEN 256
-#define MLX5E_REPORTER_FLUSH_TIMEOUT_MSEC 2000
 
 struct mlx5e_err_ctx {
        int (*recover)(void *ctx);
index 0eb1253..74086eb 100644 (file)
@@ -6,6 +6,7 @@
 #include "txrx.h"
 #include "devlink.h"
 #include "ptp.h"
+#include "lib/tout.h"
 
 static int mlx5e_query_rq_state(struct mlx5_core_dev *dev, u32 rqn, u8 *state)
 {
@@ -32,8 +33,10 @@ out:
 
 static int mlx5e_wait_for_icosq_flush(struct mlx5e_icosq *icosq)
 {
-       unsigned long exp_time = jiffies +
-                                msecs_to_jiffies(MLX5E_REPORTER_FLUSH_TIMEOUT_MSEC);
+       struct mlx5_core_dev *dev = icosq->channel->mdev;
+       unsigned long exp_time;
+
+       exp_time = jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, FLUSH_ON_ERROR));
 
        while (time_before(jiffies, exp_time)) {
                if (icosq->cc == icosq->pc)
index bb682fd..4f4bc87 100644 (file)
@@ -4,11 +4,14 @@
 #include "health.h"
 #include "en/ptp.h"
 #include "en/devlink.h"
+#include "lib/tout.h"
 
 static int mlx5e_wait_for_sq_flush(struct mlx5e_txqsq *sq)
 {
-       unsigned long exp_time = jiffies +
-                                msecs_to_jiffies(MLX5E_REPORTER_FLUSH_TIMEOUT_MSEC);
+       struct mlx5_core_dev *dev = sq->mdev;
+       unsigned long exp_time;
+
+       exp_time = jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, FLUSH_ON_ERROR));
 
        while (time_before(jiffies, exp_time)) {
                if (sq->cc == sq->pc)
index 016d26f..f4f8993 100644 (file)
@@ -35,6 +35,7 @@
 #include <linux/module.h>
 #include "mlx5_core.h"
 #include "../../mlxfw/mlxfw.h"
+#include "lib/tout.h"
 #include "accel/tls.h"
 
 enum {
@@ -317,10 +318,9 @@ int mlx5_cmd_force_teardown_hca(struct mlx5_core_dev *dev)
        return 0;
 }
 
-#define MLX5_FAST_TEARDOWN_WAIT_MS   3000
 int mlx5_cmd_fast_teardown_hca(struct mlx5_core_dev *dev)
 {
-       unsigned long end, delay_ms = MLX5_FAST_TEARDOWN_WAIT_MS;
+       unsigned long end, delay_ms = mlx5_tout_ms(dev, TEARDOWN);
        u32 out[MLX5_ST_SZ_DW(teardown_hca_out)] = {};
        u32 in[MLX5_ST_SZ_DW(teardown_hca_in)] = {};
        int state;
@@ -618,17 +618,18 @@ static void mlx5_fsm_release(struct mlxfw_dev *mlxfw_dev, u32 fwhandle)
                         fwhandle, 0);
 }
 
-#define MLX5_FSM_REACTIVATE_TOUT 5000 /* msecs */
 static int mlx5_fsm_reactivate(struct mlxfw_dev *mlxfw_dev, u8 *status)
 {
-       unsigned long exp_time = jiffies + msecs_to_jiffies(MLX5_FSM_REACTIVATE_TOUT);
        struct mlx5_mlxfw_dev *mlx5_mlxfw_dev =
                container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev);
        struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev;
        u32 out[MLX5_ST_SZ_DW(mirc_reg)];
        u32 in[MLX5_ST_SZ_DW(mirc_reg)];
+       unsigned long exp_time;
        int err;
 
+       exp_time = jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, FSM_REACTIVATE));
+
        if (!MLX5_CAP_MCAM_REG2(dev, mirc))
                return -EOPNOTSUPP;
 
index 106b50e..eaca79c 100644 (file)
@@ -3,6 +3,7 @@
 
 #include "fw_reset.h"
 #include "diag/fw_tracer.h"
+#include "lib/tout.h"
 
 enum {
        MLX5_FW_RESET_FLAGS_RESET_REQUESTED,
@@ -228,8 +229,6 @@ static void mlx5_sync_reset_request_event(struct work_struct *work)
                mlx5_core_warn(dev, "PCI Sync FW Update Reset Ack. Device reset is expected.\n");
 }
 
-#define MLX5_PCI_LINK_UP_TIMEOUT 2000
-
 static int mlx5_pci_link_toggle(struct mlx5_core_dev *dev)
 {
        struct pci_bus *bridge_bus = dev->pdev->bus;
@@ -286,7 +285,7 @@ static int mlx5_pci_link_toggle(struct mlx5_core_dev *dev)
                goto restore;
        }
 
-       timeout = jiffies + msecs_to_jiffies(MLX5_PCI_LINK_UP_TIMEOUT);
+       timeout = jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, PCI_TOGGLE));
        do {
                err = pci_read_config_word(bridge, cap + PCI_EXP_LNKSTA, &reg16);
                if (err)
@@ -299,8 +298,8 @@ static int mlx5_pci_link_toggle(struct mlx5_core_dev *dev)
        if (reg16 & PCI_EXP_LNKSTA_DLLLA) {
                mlx5_core_info(dev, "PCI Link up\n");
        } else {
-               mlx5_core_err(dev, "PCI link not ready (0x%04x) after %d ms\n",
-                             reg16, MLX5_PCI_LINK_UP_TIMEOUT);
+               mlx5_core_err(dev, "PCI link not ready (0x%04x) after %llu ms\n",
+                             reg16, mlx5_tout_ms(dev, PCI_TOGGLE));
                err = -ETIMEDOUT;
        }
 
@@ -395,16 +394,15 @@ static int fw_reset_event_notifier(struct notifier_block *nb, unsigned long acti
        return NOTIFY_OK;
 }
 
-#define MLX5_FW_RESET_TIMEOUT_MSEC 5000
 int mlx5_fw_reset_wait_reset_done(struct mlx5_core_dev *dev)
 {
-       unsigned long timeout = msecs_to_jiffies(MLX5_FW_RESET_TIMEOUT_MSEC);
+       unsigned long timeout = msecs_to_jiffies(mlx5_tout_ms(dev, PCI_SYNC_UPDATE));
        struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
        int err;
 
        if (!wait_for_completion_timeout(&fw_reset->done, timeout)) {
-               mlx5_core_warn(dev, "FW sync reset timeout after %d seconds\n",
-                              MLX5_FW_RESET_TIMEOUT_MSEC / 1000);
+               mlx5_core_warn(dev, "FW sync reset timeout after %llu seconds\n",
+                              mlx5_tout_ms(dev, PCI_SYNC_UPDATE) / 1000);
                err = -ETIMEDOUT;
                goto out;
        }
index 037e18d..6a4dd7f 100644 (file)
 #include "lib/eq.h"
 #include "lib/mlx5.h"
 #include "lib/pci_vsc.h"
+#include "lib/tout.h"
 #include "diag/fw_tracer.h"
 
 enum {
-       MLX5_HEALTH_POLL_INTERVAL       = 2 * HZ,
        MAX_MISSES                      = 3,
 };
 
@@ -219,11 +219,9 @@ unlock:
        mutex_unlock(&dev->intf_state_mutex);
 }
 
-#define MLX5_CRDUMP_WAIT_MS    60000
-#define MLX5_FW_RESET_WAIT_MS  1000
 void mlx5_error_sw_reset(struct mlx5_core_dev *dev)
 {
-       unsigned long end, delay_ms = MLX5_FW_RESET_WAIT_MS;
+       unsigned long end, delay_ms = mlx5_tout_ms(dev, PCI_TOGGLE);
        int lock = -EBUSY;
 
        mutex_lock(&dev->intf_state_mutex);
@@ -237,7 +235,7 @@ void mlx5_error_sw_reset(struct mlx5_core_dev *dev)
                lock = lock_sem_sw_reset(dev, true);
 
                if (lock == -EBUSY) {
-                       delay_ms = MLX5_CRDUMP_WAIT_MS;
+                       delay_ms = mlx5_tout_ms(dev, FULL_CRDUMP);
                        goto recover_from_sw_reset;
                }
                /* Execute SW reset */
@@ -307,13 +305,11 @@ static void mlx5_handle_bad_state(struct mlx5_core_dev *dev)
        mlx5_disable_device(dev);
 }
 
-/* How much time to wait until health resetting the driver (in msecs) */
-#define MLX5_RECOVERY_WAIT_MSECS 60000
 int mlx5_health_wait_pci_up(struct mlx5_core_dev *dev)
 {
        unsigned long end;
 
-       end = jiffies + msecs_to_jiffies(MLX5_RECOVERY_WAIT_MSECS);
+       end = jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, FW_RESET));
        while (sensor_pci_not_working(dev)) {
                if (time_after(jiffies, end))
                        return -ETIMEDOUT;
@@ -674,13 +670,13 @@ static void mlx5_fw_reporters_destroy(struct mlx5_core_dev *dev)
                devlink_health_reporter_destroy(health->fw_fatal_reporter);
 }
 
-static unsigned long get_next_poll_jiffies(void)
+static unsigned long get_next_poll_jiffies(struct mlx5_core_dev *dev)
 {
        unsigned long next;
 
        get_random_bytes(&next, sizeof(next));
        next %= HZ;
-       next += jiffies + MLX5_HEALTH_POLL_INTERVAL;
+       next += jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, HEALTH_POLL_INTERVAL));
 
        return next;
 }
@@ -740,11 +736,12 @@ static void poll_health(struct timer_list *t)
                queue_work(health->wq, &health->report_work);
 
 out:
-       mod_timer(&health->timer, get_next_poll_jiffies());
+       mod_timer(&health->timer, get_next_poll_jiffies(dev));
 }
 
 void mlx5_start_health_poll(struct mlx5_core_dev *dev)
 {
+       u64 poll_interval_ms =  mlx5_tout_ms(dev, HEALTH_POLL_INTERVAL);
        struct mlx5_core_health *health = &dev->priv.health;
 
        timer_setup(&health->timer, poll_health, 0);
@@ -753,7 +750,7 @@ void mlx5_start_health_poll(struct mlx5_core_dev *dev)
        health->health = &dev->iseg->health;
        health->health_counter = &dev->iseg->health_counter;
 
-       health->timer.expires = round_jiffies(jiffies + MLX5_HEALTH_POLL_INTERVAL);
+       health->timer.expires = jiffies + msecs_to_jiffies(poll_interval_ms);
        add_timer(&health->timer);
 }
 
index ee266e0..0dd96a6 100644 (file)
@@ -13,7 +13,17 @@ static const u32 tout_def_sw_val[MAX_TIMEOUT_TYPES] = {
        [MLX5_TO_FW_PRE_INIT_WARN_MESSAGE_INTERVAL_MS] = 20000,
        [MLX5_TO_FW_PRE_INIT_WAIT_MS] = 2,
        [MLX5_TO_FW_INIT_MS] = 2000,
-       [MLX5_TO_CMD_MS] = 60000
+       [MLX5_TO_CMD_MS] = 60000,
+       [MLX5_TO_PCI_TOGGLE_MS] =  2000,
+       [MLX5_TO_HEALTH_POLL_INTERVAL_MS] =  2000,
+       [MLX5_TO_FULL_CRDUMP_MS] = 60000,
+       [MLX5_TO_FW_RESET_MS] = 60000,
+       [MLX5_TO_FLUSH_ON_ERROR_MS] = 2000,
+       [MLX5_TO_PCI_SYNC_UPDATE_MS] = 5000,
+       [MLX5_TO_TEARDOWN_MS] = 3000,
+       [MLX5_TO_FSM_REACTIVATE_MS] = 5000,
+       [MLX5_TO_RECLAIM_PAGES_MS] = 5000,
+       [MLX5_TO_RECLAIM_VFS_PAGES_MS] = 120000
 };
 
 static void tout_set(struct mlx5_core_dev *dev, u64 val, enum mlx5_timeouts_types type)
@@ -94,3 +104,59 @@ u64 _mlx5_tout_ms(struct mlx5_core_dev *dev, enum mlx5_timeouts_types type)
 {
        return dev->timeouts->to[type];
 }
+
+#define MLX5_TIMEOUT_QUERY(fld, reg_out) \
+       ({ \
+       struct mlx5_ifc_default_timeout_bits *time_field; \
+       u32 to_multi, to_value; \
+       u64 to_val_ms; \
+       \
+       time_field = MLX5_ADDR_OF(dtor_reg, reg_out, fld); \
+       to_multi = MLX5_GET(default_timeout, time_field, to_multiplier); \
+       to_value = MLX5_GET(default_timeout, time_field, to_value); \
+       to_val_ms = tout_convert_reg_field_to_ms(to_multi, to_value); \
+       to_val_ms; \
+       })
+
+#define MLX5_TIMEOUT_FILL(fld, reg_out, dev, to_type, to_extra) \
+       ({ \
+       u64 fw_to = MLX5_TIMEOUT_QUERY(fld, reg_out); \
+       tout_set(dev, fw_to + (to_extra), to_type); \
+       fw_to; \
+       })
+
+static int tout_query_dtor(struct mlx5_core_dev *dev)
+{
+       u64 pcie_toggle_to_val, tear_down_to_val;
+       u32 out[MLX5_ST_SZ_DW(dtor_reg)] = {};
+       u32 in[MLX5_ST_SZ_DW(dtor_reg)] = {};
+       int err;
+
+       err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_DTOR, 0, 0);
+       if (err)
+               return err;
+
+       pcie_toggle_to_val = MLX5_TIMEOUT_FILL(pcie_toggle_to, out, dev, MLX5_TO_PCI_TOGGLE_MS, 0);
+       MLX5_TIMEOUT_FILL(fw_reset_to, out, dev, MLX5_TO_FW_RESET_MS, pcie_toggle_to_val);
+
+       tear_down_to_val = MLX5_TIMEOUT_FILL(tear_down_to, out, dev, MLX5_TO_TEARDOWN_MS, 0);
+       MLX5_TIMEOUT_FILL(pci_sync_update_to, out, dev, MLX5_TO_PCI_SYNC_UPDATE_MS,
+                         tear_down_to_val);
+
+       MLX5_TIMEOUT_FILL(health_poll_to, out, dev, MLX5_TO_HEALTH_POLL_INTERVAL_MS, 0);
+       MLX5_TIMEOUT_FILL(full_crdump_to, out, dev, MLX5_TO_FULL_CRDUMP_MS, 0);
+       MLX5_TIMEOUT_FILL(flush_on_err_to, out, dev, MLX5_TO_FLUSH_ON_ERROR_MS, 0);
+       MLX5_TIMEOUT_FILL(fsm_reactivate_to, out, dev, MLX5_TO_FSM_REACTIVATE_MS, 0);
+       MLX5_TIMEOUT_FILL(reclaim_pages_to, out, dev, MLX5_TO_RECLAIM_PAGES_MS, 0);
+       MLX5_TIMEOUT_FILL(reclaim_vfs_pages_to, out, dev, MLX5_TO_RECLAIM_VFS_PAGES_MS, 0);
+
+       return 0;
+}
+
+int mlx5_tout_query_dtor(struct mlx5_core_dev *dev)
+{
+       if (tout_is_supported(dev))
+               return tout_query_dtor(dev);
+
+       return 0;
+}
index 7e6fc61..31faa5c 100644 (file)
@@ -14,6 +14,18 @@ enum mlx5_timeouts_types {
        MLX5_TO_FW_INIT_MS,
        MLX5_TO_CMD_MS,
 
+       /* DTOR timeouts */
+       MLX5_TO_PCI_TOGGLE_MS,
+       MLX5_TO_HEALTH_POLL_INTERVAL_MS,
+       MLX5_TO_FULL_CRDUMP_MS,
+       MLX5_TO_FW_RESET_MS,
+       MLX5_TO_FLUSH_ON_ERROR_MS,
+       MLX5_TO_PCI_SYNC_UPDATE_MS,
+       MLX5_TO_TEARDOWN_MS,
+       MLX5_TO_FSM_REACTIVATE_MS,
+       MLX5_TO_RECLAIM_PAGES_MS,
+       MLX5_TO_RECLAIM_VFS_PAGES_MS,
+
        MAX_TIMEOUT_TYPES
 };
 
@@ -21,6 +33,7 @@ struct mlx5_core_dev;
 int mlx5_tout_init(struct mlx5_core_dev *dev);
 void mlx5_tout_cleanup(struct mlx5_core_dev *dev);
 void mlx5_tout_query_iseg(struct mlx5_core_dev *dev);
+int mlx5_tout_query_dtor(struct mlx5_core_dev *dev);
 u64 _mlx5_tout_ms(struct mlx5_core_dev *dev, enum mlx5_timeouts_types type);
 
 #define mlx5_tout_ms(dev, type) _mlx5_tout_ms(dev, MLX5_TO_##type##_MS)
index b4893ea..75d2842 100644 (file)
@@ -1020,6 +1020,12 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot)
                goto err_disable_hca;
        }
 
+       err = mlx5_tout_query_dtor(dev);
+       if (err) {
+               mlx5_core_err(dev, "failed to read dtor\n");
+               goto reclaim_boot_pages;
+       }
+
        err = set_hca_ctrl(dev);
        if (err) {
                mlx5_core_err(dev, "set_hca_ctrl failed\n");
index 110c083..f6b5451 100644 (file)
@@ -38,6 +38,7 @@
 #include <linux/xarray.h>
 #include "mlx5_core.h"
 #include "lib/eq.h"
+#include "lib/tout.h"
 
 enum {
        MLX5_PAGES_CANT_GIVE    = 0,
@@ -65,11 +66,6 @@ struct fw_page {
 };
 
 enum {
-       MAX_RECLAIM_TIME_MSECS  = 5000,
-       MAX_RECLAIM_VFS_PAGES_TIME_MSECS = 2 * 1000 * 60,
-};
-
-enum {
        MLX5_MAX_RECLAIM_TIME_MILI      = 5000,
        MLX5_NUM_4K_IN_PAGE             = PAGE_SIZE / MLX5_ADAPTER_PAGE_SIZE,
 };
@@ -641,7 +637,8 @@ static int optimal_reclaimed_pages(void)
 static int mlx5_reclaim_root_pages(struct mlx5_core_dev *dev,
                                   struct rb_root *root, u16 func_id)
 {
-       unsigned long end = jiffies + msecs_to_jiffies(MAX_RECLAIM_TIME_MSECS);
+       u64 recl_pages_to_jiffies = msecs_to_jiffies(mlx5_tout_ms(dev, RECLAIM_PAGES));
+       unsigned long end = jiffies + recl_pages_to_jiffies;
 
        while (!RB_EMPTY_ROOT(root)) {
                int nclaimed;
@@ -656,7 +653,7 @@ static int mlx5_reclaim_root_pages(struct mlx5_core_dev *dev,
                }
 
                if (nclaimed)
-                       end = jiffies + msecs_to_jiffies(MAX_RECLAIM_TIME_MSECS);
+                       end = jiffies + recl_pages_to_jiffies;
 
                if (time_after(jiffies, end)) {
                        mlx5_core_warn(dev, "FW did not return all pages. giving up...\n");
@@ -727,7 +724,8 @@ void mlx5_pagealloc_stop(struct mlx5_core_dev *dev)
 
 int mlx5_wait_for_pages(struct mlx5_core_dev *dev, int *pages)
 {
-       unsigned long end = jiffies + msecs_to_jiffies(MAX_RECLAIM_VFS_PAGES_TIME_MSECS);
+       u64 recl_vf_pages_to_jiffies = msecs_to_jiffies(mlx5_tout_ms(dev, RECLAIM_VFS_PAGES));
+       unsigned long end = jiffies + recl_vf_pages_to_jiffies;
        int prev_pages = *pages;
 
        /* In case of internal error we will free the pages manually later */
@@ -743,7 +741,7 @@ int mlx5_wait_for_pages(struct mlx5_core_dev *dev, int *pages)
                        return -ETIMEDOUT;
                }
                if (*pages < prev_pages) {
-                       end = jiffies + msecs_to_jiffies(MAX_RECLAIM_VFS_PAGES_TIME_MSECS);
+                       end = jiffies + recl_vf_pages_to_jiffies;
                        prev_pages = *pages;
                }
                msleep(50);