Before commit
76c31e5f7585 ("net/mlx5e: Use FW limitation for max MPW
WQEBBs"), the maximum size of MPWQE in WQEBBs was hardcoded as a driver
constant. That commit started using the firmware capability that can
further limit the size, however, it unintentionally changed a few
things:
1. The calculation of MLX5E_MAX_KLM_PER_WQE used the size in DS, which
was replaced by the size in WQEBBs, making the resulting value 4 times
smaller.
2. MLX5E_TX_MPW_MAX_WQEBBS used to be aligned to the cache line size
(either 64 or 128 bytes, i.e. 1 or 2 WQEBBs), but it's no longer the
case if the firmware capability is smaller than the driver maximum.
Fix both issues by using the correct units for MLX5E_MAX_KLM_PER_WQE and
by aligning mlx5e_get_sw_max_sq_mpw_wqebbs after taking the minimum.
Besides fixing the arithmetics in calculation of MLX5E_MAX_KLM_PER_WQE,
also use appropriate constants: `size of BSF * num of DS per WQEBB *
number of WQEBBs` (the calculation before the blamed commit) doesn't
make much sense to calculate the WQE size in bytes, so just use `size of
WQEBB * number of WQEBBs`.
While at it, replace the types that hold the number of WQEBBs by u8.
These values don't exceed 16, and it allows to fill holes in two
structs.
Fixes: 76c31e5f7585 ("net/mlx5e: Use FW limitation for max MPW WQEBBs")
Signed-off-by: Maxim Mikityanskiy <maximmi@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
ALIGN_DOWN(MLX5E_KLM_MAX_ENTRIES_PER_WQE(wqe_size), MLX5_UMR_KLM_ALIGNMENT)
#define MLX5E_MAX_KLM_PER_WQE(mdev) \
- MLX5E_KLM_ENTRIES_PER_WQE(mlx5e_get_sw_max_sq_mpw_wqebbs(mlx5e_get_max_sq_wqebbs(mdev)) \
- << MLX5_MKEY_BSF_OCTO_SIZE)
+ MLX5E_KLM_ENTRIES_PER_WQE(MLX5_SEND_WQE_BB * \
+ mlx5e_get_sw_max_sq_mpw_wqebbs(mlx5e_get_max_sq_wqebbs(mdev)))
#define MLX5E_MSG_LEVEL NETIF_MSG_LINK
MLX5_CAP_GEN(mdev, max_wqe_sz_sq) / MLX5_SEND_WQE_BB);
}
-static inline u16 mlx5e_get_sw_max_sq_mpw_wqebbs(u16 max_sq_wqebbs)
+static inline u8 mlx5e_get_sw_max_sq_mpw_wqebbs(u8 max_sq_wqebbs)
{
/* The return value will be multiplied by MLX5_SEND_WQEBB_NUM_DS.
* Since max_sq_wqebbs may be up to MLX5_SEND_WQE_MAX_WQEBBS == 16,
* than MLX5_SEND_WQE_MAX_WQEBBS to let a full-session WQE be
* cache-aligned.
*/
-#if L1_CACHE_BYTES < 128
- return min_t(u16, max_sq_wqebbs, MLX5_SEND_WQE_MAX_WQEBBS - 1);
-#else
- return min_t(u16, max_sq_wqebbs, MLX5_SEND_WQE_MAX_WQEBBS - 2);
+ u8 wqebbs = min_t(u8, max_sq_wqebbs, MLX5_SEND_WQE_MAX_WQEBBS - 1);
+
+#if L1_CACHE_BYTES >= 128
+ wqebbs = ALIGN_DOWN(wqebbs, 2);
#endif
+ return wqebbs;
}
struct mlx5e_tx_wqe {
struct netdev_queue *txq;
u32 sqn;
u16 stop_room;
- u16 max_sq_mpw_wqebbs;
+ u8 max_sq_mpw_wqebbs;
u8 min_inline_mode;
struct device *pdev;
__be32 mkey_be;
struct device *pdev;
__be32 mkey_be;
u16 stop_room;
- u16 max_sq_mpw_wqebbs;
+ u8 max_sq_mpw_wqebbs;
u8 min_inline_mode;
unsigned long state;
unsigned int hw_mtu;