From 99cbfa93a6122b1e9011d3f4e94b58e10d2f5cd0 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Mon, 2 Apr 2018 17:31:31 +0300 Subject: [PATCH] net/mlx5e: RX, Use cyclic WQ in legacy RQ Now that LRO is not supported for Legacy RQ, there is no source of out-of-order completions in the WQ, and we can use a cyclic one. This has multiple advantages: - reduces the WQE size (smaller PCI transactions). - lower overhead in datapath (no handling of 'next' pointers). - no reserved WQE for the WQ head (was need in linked-list). - allows using a constant map between frag and dma_info struct, in downstream patch. Performance tests: ConnectX-4, single core, single RX ring. Major gain in packet rate of single ring XDP drop. Bottleneck is shifted form HW (at 16Mpps) to SW (at 20Mpps). Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 10 +- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 89 ++++++++++------- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 115 ++++++++++------------ drivers/net/ethernet/mellanox/mlx5/core/wq.c | 1 + drivers/net/ethernet/mellanox/mlx5/core/wq.h | 55 ++++++++++- 6 files changed, 161 insertions(+), 111 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index f2f2dcf..af521dd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -186,9 +186,13 @@ struct mlx5e_tx_wqe { struct mlx5_wqe_data_seg data[0]; }; -struct mlx5e_rx_wqe { +struct mlx5e_rx_wqe_ll { struct mlx5_wqe_srq_next_seg next; - struct mlx5_wqe_data_seg data; + struct mlx5_wqe_data_seg data[0]; +}; + +struct mlx5e_rx_wqe_cyc { + struct mlx5_wqe_data_seg data[0]; }; struct mlx5e_umr_wqe { @@ -500,7 +504,7 @@ struct mlx5e_rq { /* data path */ union { struct { - struct mlx5_wq_ll wq; + struct mlx5_wq_cyc wq; struct mlx5e_wqe_frag_info *frag_info; u32 frag_sz; /* max possible skb frag_sz */ union { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 3a00771..7fd2d73 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -166,7 +166,7 @@ static u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev, linear_rq_headroom += NET_IP_ALIGN; - if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST) + if (params->rq_wq_type == MLX5_WQ_TYPE_CYCLIC) return linear_rq_headroom; if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params)) @@ -205,7 +205,7 @@ void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params) params->rq_wq_type = mlx5e_striding_rq_possible(mdev, params) && MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ) ? MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ : - MLX5_WQ_TYPE_LINKED_LIST; + MLX5_WQ_TYPE_CYCLIC; } static void mlx5e_update_carrier(struct mlx5e_priv *priv) @@ -325,7 +325,7 @@ static u32 mlx5e_rqwq_get_size(struct mlx5e_rq *rq) case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: return mlx5_wq_ll_get_size(&rq->mpwqe.wq); default: - return mlx5_wq_ll_get_size(&rq->wqe.wq); + return mlx5_wq_cyc_get_size(&rq->wqe.wq); } } @@ -491,15 +491,15 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, if (err) goto err_destroy_umr_mkey; break; - default: /* MLX5_WQ_TYPE_LINKED_LIST */ - err = mlx5_wq_ll_create(mdev, &rqp->wq, rqc_wq, &rq->wqe.wq, - &rq->wq_ctrl); + default: /* MLX5_WQ_TYPE_CYCLIC */ + err = mlx5_wq_cyc_create(mdev, &rqp->wq, rqc_wq, &rq->wqe.wq, + &rq->wq_ctrl); if (err) return err; rq->wqe.wq.db = &rq->wqe.wq.db[MLX5_RCV_DBR]; - wq_sz = mlx5_wq_ll_get_size(&rq->wqe.wq); + wq_sz = mlx5_wq_cyc_get_size(&rq->wqe.wq); rq->wqe.frag_info = kzalloc_node(wq_sz * sizeof(*rq->wqe.frag_info), @@ -568,19 +568,19 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, for (i = 0; i < wq_sz; i++) { if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) { - struct mlx5e_rx_wqe *wqe = + struct mlx5e_rx_wqe_ll *wqe = mlx5_wq_ll_get_wqe(&rq->mpwqe.wq, i); u64 dma_offset = mlx5e_get_mpwqe_offset(rq, i); - wqe->data.addr = cpu_to_be64(dma_offset + rq->buff.headroom); - wqe->data.byte_count = cpu_to_be32(byte_count); - wqe->data.lkey = rq->mkey_be; + wqe->data[0].addr = cpu_to_be64(dma_offset + rq->buff.headroom); + wqe->data[0].byte_count = cpu_to_be32(byte_count); + wqe->data[0].lkey = rq->mkey_be; } else { - struct mlx5e_rx_wqe *wqe = - mlx5_wq_ll_get_wqe(&rq->wqe.wq, i); + struct mlx5e_rx_wqe_cyc *wqe = + mlx5_wq_cyc_get_wqe(&rq->wqe.wq, i); - wqe->data.byte_count = cpu_to_be32(byte_count); - wqe->data.lkey = rq->mkey_be; + wqe->data[0].byte_count = cpu_to_be32(byte_count); + wqe->data[0].lkey = rq->mkey_be; } } @@ -630,7 +630,7 @@ static void mlx5e_free_rq(struct mlx5e_rq *rq) kfree(rq->mpwqe.info); mlx5_core_destroy_mkey(rq->mdev, &rq->umr_mkey); break; - default: /* MLX5_WQ_TYPE_LINKED_LIST */ + default: /* MLX5_WQ_TYPE_CYCLIC */ kfree(rq->wqe.frag_info); } @@ -801,11 +801,12 @@ static void mlx5e_free_rx_descs(struct mlx5e_rq *rq) if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) { struct mlx5_wq_ll *wq = &rq->mpwqe.wq; + /* UMR WQE (if in progress) is always at wq->head */ if (rq->mpwqe.umr_in_progress) mlx5e_free_rx_mpwqe(rq, &rq->mpwqe.info[wq->head]); while (!mlx5_wq_ll_is_empty(wq)) { - struct mlx5e_rx_wqe *wqe; + struct mlx5e_rx_wqe_ll *wqe; wqe_ix_be = *wq->tail_next; wqe_ix = be16_to_cpu(wqe_ix_be); @@ -815,24 +816,19 @@ static void mlx5e_free_rx_descs(struct mlx5e_rq *rq) &wqe->next.next_wqe_index); } } else { - struct mlx5_wq_ll *wq = &rq->wqe.wq; - - while (!mlx5_wq_ll_is_empty(wq)) { - struct mlx5e_rx_wqe *wqe; + struct mlx5_wq_cyc *wq = &rq->wqe.wq; - wqe_ix_be = *wq->tail_next; - wqe_ix = be16_to_cpu(wqe_ix_be); - wqe = mlx5_wq_ll_get_wqe(wq, wqe_ix); + while (!mlx5_wq_cyc_is_empty(wq)) { + wqe_ix = mlx5_wq_cyc_get_tail(wq); rq->dealloc_wqe(rq, wqe_ix); - mlx5_wq_ll_pop(wq, wqe_ix_be, - &wqe->next.next_wqe_index); + mlx5_wq_cyc_pop(wq); } /* Clean outstanding pages on handled WQEs that decided to do page-reuse, * but yet to be re-posted. */ if (rq->wqe.page_reuse) { - int wq_sz = mlx5_wq_ll_get_size(wq); + int wq_sz = mlx5_wq_cyc_get_size(wq); for (wqe_ix = 0; wqe_ix < wq_sz; wqe_ix++) rq->dealloc_wqe(rq, wqe_ix); @@ -1958,6 +1954,21 @@ static void mlx5e_close_channel(struct mlx5e_channel *c) kfree(c); } +static inline u8 mlx5e_get_rqwq_log_stride(u8 wq_type, int ndsegs) +{ + int sz = sizeof(struct mlx5_wqe_data_seg) * ndsegs; + + switch (wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + sz += sizeof(struct mlx5e_rx_wqe_ll); + break; + default: /* MLX5_WQ_TYPE_CYCLIC */ + sz += sizeof(struct mlx5e_rx_wqe_cyc); + } + + return order_base_2(sz); +} + static void mlx5e_build_rq_param(struct mlx5e_priv *priv, struct mlx5e_params *params, struct mlx5e_rq_param *param) @@ -1965,6 +1976,7 @@ static void mlx5e_build_rq_param(struct mlx5e_priv *priv, struct mlx5_core_dev *mdev = priv->mdev; void *rqc = param->rqc; void *wq = MLX5_ADDR_OF(rqc, rqc, wq); + int ndsegs = 1; switch (params->rq_wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: @@ -1974,16 +1986,16 @@ static void mlx5e_build_rq_param(struct mlx5e_priv *priv, MLX5_SET(wq, wq, log_wqe_stride_size, mlx5e_mpwqe_get_log_stride_size(mdev, params) - MLX5_MPWQE_LOG_STRIDE_SZ_BASE); - MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ); MLX5_SET(wq, wq, log_wq_sz, mlx5e_mpwqe_get_log_rq_size(params)); break; - default: /* MLX5_WQ_TYPE_LINKED_LIST */ - MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST); + default: /* MLX5_WQ_TYPE_CYCLIC */ MLX5_SET(wq, wq, log_wq_sz, params->log_rq_mtu_frames); } + MLX5_SET(wq, wq, wq_type, params->rq_wq_type); MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN); - MLX5_SET(wq, wq, log_wq_stride, ilog2(sizeof(struct mlx5e_rx_wqe))); + MLX5_SET(wq, wq, log_wq_stride, + mlx5e_get_rqwq_log_stride(params->rq_wq_type, ndsegs)); MLX5_SET(wq, wq, pd, mdev->mlx5e_res.pdn); MLX5_SET(rqc, rqc, counter_set_id, priv->q_counter); MLX5_SET(rqc, rqc, vsd, params->vlan_strip_disable); @@ -1999,8 +2011,9 @@ static void mlx5e_build_drop_rq_param(struct mlx5e_priv *priv, void *rqc = param->rqc; void *wq = MLX5_ADDR_OF(rqc, rqc, wq); - MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST); - MLX5_SET(wq, wq, log_wq_stride, ilog2(sizeof(struct mlx5e_rx_wqe))); + MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); + MLX5_SET(wq, wq, log_wq_stride, + mlx5e_get_rqwq_log_stride(MLX5_WQ_TYPE_CYCLIC, 1)); MLX5_SET(rqc, rqc, counter_set_id, priv->drop_rq_q_counter); param->wq.buf_numa_node = dev_to_node(&mdev->pdev->dev); @@ -2051,7 +2064,7 @@ static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv, log_cq_size = mlx5e_mpwqe_get_log_rq_size(params) + mlx5e_mpwqe_get_log_num_strides(mdev, params); break; - default: /* MLX5_WQ_TYPE_LINKED_LIST */ + default: /* MLX5_WQ_TYPE_CYCLIC */ log_cq_size = params->log_rq_mtu_frames; } @@ -2857,8 +2870,8 @@ static int mlx5e_alloc_drop_rq(struct mlx5_core_dev *mdev, param->wq.db_numa_node = param->wq.buf_numa_node; - err = mlx5_wq_ll_create(mdev, ¶m->wq, rqc_wq, &rq->wqe.wq, - &rq->wq_ctrl); + err = mlx5_wq_cyc_create(mdev, ¶m->wq, rqc_wq, &rq->wqe.wq, + &rq->wq_ctrl); if (err) return err; @@ -3360,7 +3373,7 @@ static int set_feature_lro(struct net_device *netdev, bool enable) new_channels.params = *old_params; new_channels.params.lro_en = enable; - if (old_params->rq_wq_type != MLX5_WQ_TYPE_LINKED_LIST) { + if (old_params->rq_wq_type != MLX5_WQ_TYPE_CYCLIC) { if (mlx5e_rx_mpwqe_is_linear_skb(mdev, old_params) == mlx5e_rx_mpwqe_is_linear_skb(mdev, &new_channels.params)) reset = false; @@ -3566,7 +3579,7 @@ int mlx5e_change_mtu(struct net_device *netdev, int new_mtu, new_channels.params = *params; new_channels.params.sw_mtu = new_mtu; - if (params->rq_wq_type != MLX5_WQ_TYPE_LINKED_LIST) { + if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) { u8 ppw_old = mlx5e_mpwqe_log_pkts_per_wqe(params); u8 ppw_new = mlx5e_mpwqe_log_pkts_per_wqe(&new_channels.params); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 8ab4c96..3857f22 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -927,7 +927,7 @@ static void mlx5e_build_rep_params(struct mlx5_core_dev *mdev, params->hard_mtu = MLX5E_ETH_HARD_MTU; params->sw_mtu = mtu; params->log_sq_size = MLX5E_REP_PARAMS_LOG_SQ_SIZE; - params->rq_wq_type = MLX5_WQ_TYPE_LINKED_LIST; + params->rq_wq_type = MLX5_WQ_TYPE_CYCLIC; params->log_rq_mtu_frames = MLX5E_REP_PARAMS_LOG_RQ_SIZE; params->rx_dim_enabled = MLX5_CAP_GEN(mdev, cq_moderation); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 3b12d4d..3cdf2c0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -113,7 +113,7 @@ static inline void mlx5e_decompress_cqe(struct mlx5e_rq *rq, mpwrq_get_cqe_consumed_strides(&cq->title); else cq->decmprs_wqe_counter = - mlx5_wq_ll_ctr2ix(&rq->wqe.wq, cq->decmprs_wqe_counter + 1); + mlx5_wq_cyc_ctr2ix(&rq->wqe.wq, cq->decmprs_wqe_counter + 1); } static inline void mlx5e_decompress_cqe_no_hash(struct mlx5e_rq *rq, @@ -270,7 +270,12 @@ static inline bool mlx5e_page_reuse(struct mlx5e_rq *rq, !mlx5e_page_is_reserved(wi->di.page); } -static int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) +static inline struct mlx5e_wqe_frag_info *get_frag(struct mlx5e_rq *rq, u16 ix) +{ + return &rq->wqe.frag_info[ix]; +} + +static int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe_cyc *wqe, u16 ix) { struct mlx5e_wqe_frag_info *wi = &rq->wqe.frag_info[ix]; @@ -281,7 +286,7 @@ static int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 wi->offset = 0; } - wqe->data.addr = cpu_to_be64(wi->di.addr + wi->offset + rq->buff.headroom); + wqe->data[0].addr = cpu_to_be64(wi->di.addr + wi->offset + rq->buff.headroom); return 0; } @@ -370,7 +375,7 @@ void mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi) static void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq) { struct mlx5_wq_ll *wq = &rq->mpwqe.wq; - struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(wq, wq->head); + struct mlx5e_rx_wqe_ll *wqe = mlx5_wq_ll_get_wqe(wq, wq->head); rq->mpwqe.umr_in_progress = false; @@ -470,31 +475,32 @@ void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq) { - struct mlx5_wq_ll *wq = &rq->wqe.wq; + struct mlx5_wq_cyc *wq = &rq->wqe.wq; int err; if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state))) return false; - if (mlx5_wq_ll_is_full(wq)) + if (mlx5_wq_cyc_is_full(wq)) return false; do { - struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(wq, wq->head); + u16 head = mlx5_wq_cyc_get_head(wq); + struct mlx5e_rx_wqe_cyc *wqe = mlx5_wq_cyc_get_wqe(wq, head); - err = mlx5e_alloc_rx_wqe(rq, wqe, wq->head); + err = mlx5e_alloc_rx_wqe(rq, wqe, head); if (unlikely(err)) { rq->stats->buff_alloc_err++; break; } - mlx5_wq_ll_push(wq, be16_to_cpu(wqe->next.next_wqe_index)); - } while (!mlx5_wq_ll_is_full(wq)); + mlx5_wq_cyc_push(wq); + } while (!mlx5_wq_cyc_is_full(wq)); /* ensure wqes are visible to device before updating doorbell record */ dma_wmb(); - mlx5_wq_ll_update_db_record(wq); + mlx5_wq_cyc_update_db_record(wq); return !!err; } @@ -987,19 +993,15 @@ struct sk_buff *skb_from_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) { - struct mlx5_wq_ll *wq = &rq->wqe.wq; + struct mlx5_wq_cyc *wq = &rq->wqe.wq; struct mlx5e_wqe_frag_info *wi; - struct mlx5e_rx_wqe *wqe; - __be16 wqe_counter_be; struct sk_buff *skb; - u16 wqe_counter; u32 cqe_bcnt; + u16 ci; - wqe_counter_be = cqe->wqe_counter; - wqe_counter = be16_to_cpu(wqe_counter_be); - wqe = mlx5_wq_ll_get_wqe(wq, wqe_counter); - wi = &rq->wqe.frag_info[wqe_counter]; - cqe_bcnt = be32_to_cpu(cqe->byte_cnt); + ci = mlx5_wq_cyc_ctr2ix(wq, be16_to_cpu(cqe->wqe_counter)); + wi = get_frag(rq, ci); + cqe_bcnt = be32_to_cpu(cqe->byte_cnt); skb = skb_from_cqe(rq, cqe, wi, cqe_bcnt); if (!skb) { @@ -1007,20 +1009,19 @@ void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) { wi->di.page = NULL; /* do not return page to cache, it will be returned on XDP_TX completion */ - goto wq_ll_pop; + goto wq_cyc_pop; } /* probably an XDP_DROP, save the page-reuse checks */ mlx5e_free_rx_wqe(rq, wi); - goto wq_ll_pop; + goto wq_cyc_pop; } mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); napi_gro_receive(rq->cq.napi, skb); mlx5e_free_rx_wqe_reuse(rq, wi); -wq_ll_pop: - mlx5_wq_ll_pop(wq, wqe_counter_be, - &wqe->next.next_wqe_index); +wq_cyc_pop: + mlx5_wq_cyc_pop(wq); } #ifdef CONFIG_MLX5_ESWITCH @@ -1030,30 +1031,26 @@ void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5e_rep_priv *rpriv = priv->ppriv; struct mlx5_eswitch_rep *rep = rpriv->rep; - struct mlx5_wq_ll *wq = &rq->wqe.wq; + struct mlx5_wq_cyc *wq = &rq->wqe.wq; struct mlx5e_wqe_frag_info *wi; - struct mlx5e_rx_wqe *wqe; struct sk_buff *skb; - __be16 wqe_counter_be; - u16 wqe_counter; u32 cqe_bcnt; + u16 ci; - wqe_counter_be = cqe->wqe_counter; - wqe_counter = be16_to_cpu(wqe_counter_be); - wqe = mlx5_wq_ll_get_wqe(wq, wqe_counter); - wi = &rq->wqe.frag_info[wqe_counter]; - cqe_bcnt = be32_to_cpu(cqe->byte_cnt); + ci = mlx5_wq_cyc_ctr2ix(wq, be16_to_cpu(cqe->wqe_counter)); + wi = get_frag(rq, ci); + cqe_bcnt = be32_to_cpu(cqe->byte_cnt); skb = skb_from_cqe(rq, cqe, wi, cqe_bcnt); if (!skb) { if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) { wi->di.page = NULL; /* do not return page to cache, it will be returned on XDP_TX completion */ - goto wq_ll_pop; + goto wq_cyc_pop; } /* probably an XDP_DROP, save the page-reuse checks */ mlx5e_free_rx_wqe(rq, wi); - goto wq_ll_pop; + goto wq_cyc_pop; } mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); @@ -1064,9 +1061,8 @@ void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) napi_gro_receive(rq->cq.napi, skb); mlx5e_free_rx_wqe_reuse(rq, wi); -wq_ll_pop: - mlx5_wq_ll_pop(wq, wqe_counter_be, - &wqe->next.next_wqe_index); +wq_cyc_pop: + mlx5_wq_cyc_pop(wq); } #endif @@ -1165,7 +1161,7 @@ void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) u32 wqe_offset = stride_ix << rq->mpwqe.log_stride_sz; u32 head_offset = wqe_offset & (PAGE_SIZE - 1); u32 page_idx = wqe_offset >> PAGE_SHIFT; - struct mlx5e_rx_wqe *wqe; + struct mlx5e_rx_wqe_ll *wqe; struct mlx5_wq_ll *wq; struct sk_buff *skb; u16 cqe_bcnt; @@ -1403,19 +1399,15 @@ static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq, void mlx5i_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) { - struct mlx5_wq_ll *wq = &rq->wqe.wq; + struct mlx5_wq_cyc *wq = &rq->wqe.wq; struct mlx5e_wqe_frag_info *wi; - struct mlx5e_rx_wqe *wqe; - __be16 wqe_counter_be; struct sk_buff *skb; - u16 wqe_counter; u32 cqe_bcnt; + u16 ci; - wqe_counter_be = cqe->wqe_counter; - wqe_counter = be16_to_cpu(wqe_counter_be); - wqe = mlx5_wq_ll_get_wqe(wq, wqe_counter); - wi = &rq->wqe.frag_info[wqe_counter]; - cqe_bcnt = be32_to_cpu(cqe->byte_cnt); + ci = mlx5_wq_cyc_ctr2ix(wq, be16_to_cpu(cqe->wqe_counter)); + wi = get_frag(rq, ci); + cqe_bcnt = be32_to_cpu(cqe->byte_cnt); skb = skb_from_cqe(rq, cqe, wi, cqe_bcnt); if (!skb) @@ -1430,8 +1422,7 @@ void mlx5i_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) wq_free_wqe: mlx5e_free_rx_wqe_reuse(rq, wi); - mlx5_wq_ll_pop(wq, wqe_counter_be, - &wqe->next.next_wqe_index); + mlx5_wq_cyc_pop(wq); } #endif /* CONFIG_MLX5_CORE_IPOIB */ @@ -1440,38 +1431,34 @@ wq_free_wqe: void mlx5e_ipsec_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) { - struct mlx5_wq_ll *wq = &rq->wqe.wq; + struct mlx5_wq_cyc *wq = &rq->wqe.wq; struct mlx5e_wqe_frag_info *wi; - struct mlx5e_rx_wqe *wqe; - __be16 wqe_counter_be; struct sk_buff *skb; - u16 wqe_counter; u32 cqe_bcnt; + u16 ci; - wqe_counter_be = cqe->wqe_counter; - wqe_counter = be16_to_cpu(wqe_counter_be); - wqe = mlx5_wq_ll_get_wqe(wq, wqe_counter); - wi = &rq->wqe.frag_info[wqe_counter]; - cqe_bcnt = be32_to_cpu(cqe->byte_cnt); + ci = mlx5_wq_cyc_ctr2ix(wq, be16_to_cpu(cqe->wqe_counter)); + wi = get_frag(rq, ci); + cqe_bcnt = be32_to_cpu(cqe->byte_cnt); skb = skb_from_cqe(rq, cqe, wi, cqe_bcnt); if (unlikely(!skb)) { /* a DROP, save the page-reuse checks */ mlx5e_free_rx_wqe(rq, wi); - goto wq_ll_pop; + goto wq_cyc_pop; } skb = mlx5e_ipsec_handle_rx_skb(rq->netdev, skb); if (unlikely(!skb)) { mlx5e_free_rx_wqe(rq, wi); - goto wq_ll_pop; + goto wq_cyc_pop; } mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); napi_gro_receive(rq->cq.napi, skb); mlx5e_free_rx_wqe_reuse(rq, wi); -wq_ll_pop: - mlx5_wq_ll_pop(wq, wqe_counter_be, &wqe->next.next_wqe_index); +wq_cyc_pop: + mlx5_wq_cyc_pop(wq); } #endif /* CONFIG_MLX5_EN_IPSEC */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/drivers/net/ethernet/mellanox/mlx5/core/wq.c index 5b8b353..b97bb72 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/wq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c @@ -85,6 +85,7 @@ int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, mlx5_fill_fbc(MLX5_GET(wq, wqc, log_wq_stride), MLX5_GET(wq, wqc, log_wq_sz), fbc); + wq->sz = wq->fbc.sz_m1 + 1; err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node); if (err) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.h b/drivers/net/ethernet/mellanox/mlx5/core/wq.h index b9d7c01..0b47126 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/wq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.h @@ -51,6 +51,9 @@ struct mlx5_wq_ctrl { struct mlx5_wq_cyc { struct mlx5_frag_buf_ctrl fbc; __be32 *db; + u16 sz; + u16 wqe_ctr; + u16 cur_sz; }; struct mlx5_wq_qp { @@ -95,6 +98,43 @@ u32 mlx5_wq_ll_get_size(struct mlx5_wq_ll *wq); void mlx5_wq_destroy(struct mlx5_wq_ctrl *wq_ctrl); +static inline int mlx5_wq_cyc_is_full(struct mlx5_wq_cyc *wq) +{ + return wq->cur_sz == wq->sz; +} + +static inline int mlx5_wq_cyc_missing(struct mlx5_wq_cyc *wq) +{ + return wq->sz - wq->cur_sz; +} + +static inline int mlx5_wq_cyc_is_empty(struct mlx5_wq_cyc *wq) +{ + return !wq->cur_sz; +} + +static inline void mlx5_wq_cyc_push(struct mlx5_wq_cyc *wq) +{ + wq->wqe_ctr++; + wq->cur_sz++; +} + +static inline void mlx5_wq_cyc_push_n(struct mlx5_wq_cyc *wq, u8 n) +{ + wq->wqe_ctr += n; + wq->cur_sz += n; +} + +static inline void mlx5_wq_cyc_pop(struct mlx5_wq_cyc *wq) +{ + wq->cur_sz--; +} + +static inline void mlx5_wq_cyc_update_db_record(struct mlx5_wq_cyc *wq) +{ + *wq->db = cpu_to_be32(wq->wqe_ctr); +} + static inline u16 mlx5_wq_cyc_ctr2ix(struct mlx5_wq_cyc *wq, u16 ctr) { return ctr & wq->fbc.sz_m1; @@ -105,6 +145,16 @@ static inline u16 mlx5_wq_cyc_ctr2fragix(struct mlx5_wq_cyc *wq, u16 ctr) return ctr & wq->fbc.frag_sz_m1; } +static inline u16 mlx5_wq_cyc_get_head(struct mlx5_wq_cyc *wq) +{ + return mlx5_wq_cyc_ctr2ix(wq, wq->wqe_ctr); +} + +static inline u16 mlx5_wq_cyc_get_tail(struct mlx5_wq_cyc *wq) +{ + return mlx5_wq_cyc_ctr2ix(wq, wq->wqe_ctr - wq->cur_sz); +} + static inline void *mlx5_wq_cyc_get_wqe(struct mlx5_wq_cyc *wq, u16 ix) { return mlx5_frag_buf_get_wqe(&wq->fbc, ix); @@ -179,11 +229,6 @@ static inline int mlx5_wq_ll_is_empty(struct mlx5_wq_ll *wq) return !wq->cur_sz; } -static inline u16 mlx5_wq_ll_ctr2ix(struct mlx5_wq_ll *wq, u16 ctr) -{ - return ctr & wq->fbc.sz_m1; -} - static inline void *mlx5_wq_ll_get_wqe(struct mlx5_wq_ll *wq, u16 ix) { return mlx5_frag_buf_get_wqe(&wq->fbc, ix); -- 2.7.4