#include <net/busy_poll.h>
#include <net/ip6_checksum.h>
#include <net/page_pool.h>
+#include <net/inet_ecn.h>
#include "en.h"
#include "en_tc.h"
#include "eswitch.h"
static inline void mlx5e_fill_icosq_frag_edge(struct mlx5e_icosq *sq,
struct mlx5_wq_cyc *wq,
- u16 pi, u16 frag_pi)
+ u16 pi, u16 nnops)
{
struct mlx5e_sq_wqe_info *edge_wi, *wi = &sq->db.ico_wqe[pi];
- u8 nnops = mlx5_wq_cyc_get_frag_size(wq) - frag_pi;
edge_wi = wi + nnops;
struct mlx5_wq_cyc *wq = &sq->wq;
struct mlx5e_umr_wqe *umr_wqe;
u16 xlt_offset = ix << (MLX5E_LOG_ALIGNED_MPWQE_PPW - 1);
- u16 pi, frag_pi;
+ u16 pi, contig_wqebbs_room;
int err;
int i;
pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
- frag_pi = mlx5_wq_cyc_ctr2fragix(wq, sq->pc);
-
- if (unlikely(frag_pi + MLX5E_UMR_WQEBBS > mlx5_wq_cyc_get_frag_size(wq))) {
- mlx5e_fill_icosq_frag_edge(sq, wq, pi, frag_pi);
+ contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
+ if (unlikely(contig_wqebbs_room < MLX5E_UMR_WQEBBS)) {
+ mlx5e_fill_icosq_frag_edge(sq, wq, pi, contig_wqebbs_room);
pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
}
skb_set_hash(skb, be32_to_cpu(cqe->rss_hash_result), ht);
}
-static inline bool is_last_ethertype_ip(struct sk_buff *skb, int *network_depth)
+static inline bool is_last_ethertype_ip(struct sk_buff *skb, int *network_depth,
+ __be16 *proto)
{
- __be16 ethertype = ((struct ethhdr *)skb->data)->h_proto;
-
- ethertype = __vlan_get_protocol(skb, ethertype, network_depth);
- return (ethertype == htons(ETH_P_IP) || ethertype == htons(ETH_P_IPV6));
+ *proto = ((struct ethhdr *)skb->data)->h_proto;
+ *proto = __vlan_get_protocol(skb, *proto, network_depth);
+ return (*proto == htons(ETH_P_IP) || *proto == htons(ETH_P_IPV6));
}
-static __be32 mlx5e_get_fcs(struct sk_buff *skb)
+static inline void mlx5e_enable_ecn(struct mlx5e_rq *rq, struct sk_buff *skb)
{
- int last_frag_sz, bytes_in_prev, nr_frags;
- u8 *fcs_p1, *fcs_p2;
- skb_frag_t *last_frag;
- __be32 fcs_bytes;
+ int network_depth = 0;
+ __be16 proto;
+ void *ip;
+ int rc;
- if (!skb_is_nonlinear(skb))
- return *(__be32 *)(skb->data + skb->len - ETH_FCS_LEN);
+ if (unlikely(!is_last_ethertype_ip(skb, &network_depth, &proto)))
+ return;
- nr_frags = skb_shinfo(skb)->nr_frags;
- last_frag = &skb_shinfo(skb)->frags[nr_frags - 1];
- last_frag_sz = skb_frag_size(last_frag);
+ ip = skb->data + network_depth;
+ rc = ((proto == htons(ETH_P_IP)) ? IP_ECN_set_ce((struct iphdr *)ip) :
+ IP6_ECN_set_ce(skb, (struct ipv6hdr *)ip));
- /* If all FCS data is in last frag */
- if (last_frag_sz >= ETH_FCS_LEN)
- return *(__be32 *)(skb_frag_address(last_frag) +
- last_frag_sz - ETH_FCS_LEN);
+ rq->stats->ecn_mark += !!rc;
+}
- fcs_p2 = (u8 *)skb_frag_address(last_frag);
- bytes_in_prev = ETH_FCS_LEN - last_frag_sz;
+static u32 mlx5e_get_fcs(const struct sk_buff *skb)
+{
+ const void *fcs_bytes;
+ u32 _fcs_bytes;
- /* Find where the other part of the FCS is - Linear or another frag */
- if (nr_frags == 1) {
- fcs_p1 = skb_tail_pointer(skb);
- } else {
- skb_frag_t *prev_frag = &skb_shinfo(skb)->frags[nr_frags - 2];
+ fcs_bytes = skb_header_pointer(skb, skb->len - ETH_FCS_LEN,
+ ETH_FCS_LEN, &_fcs_bytes);
- fcs_p1 = skb_frag_address(prev_frag) +
- skb_frag_size(prev_frag);
- }
- fcs_p1 -= bytes_in_prev;
+ return __get_unaligned_cpu32(fcs_bytes);
+}
- memcpy(&fcs_bytes, fcs_p1, bytes_in_prev);
- memcpy(((u8 *)&fcs_bytes) + bytes_in_prev, fcs_p2, last_frag_sz);
+static u8 get_ip_proto(struct sk_buff *skb, int network_depth, __be16 proto)
+{
+ void *ip_p = skb->data + network_depth;
- return fcs_bytes;
+ return (proto == htons(ETH_P_IP)) ? ((struct iphdr *)ip_p)->protocol :
+ ((struct ipv6hdr *)ip_p)->nexthdr;
}
+#define short_frame(size) ((size) <= ETH_ZLEN + ETH_FCS_LEN)
+
static inline void mlx5e_handle_csum(struct net_device *netdev,
struct mlx5_cqe64 *cqe,
struct mlx5e_rq *rq,
{
struct mlx5e_rq_stats *stats = rq->stats;
int network_depth = 0;
+ __be16 proto;
if (unlikely(!(netdev->features & NETIF_F_RXCSUM)))
goto csum_none;
return;
}
- if (likely(is_last_ethertype_ip(skb, &network_depth))) {
+ /* True when explicitly set via priv flag, or XDP prog is loaded */
+ if (test_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &rq->state))
+ goto csum_unnecessary;
+
+ /* CQE csum doesn't cover padding octets in short ethernet
+ * frames. And the pad field is appended prior to calculating
+ * and appending the FCS field.
+ *
+ * Detecting these padded frames requires to verify and parse
+ * IP headers, so we simply force all those small frames to be
+ * CHECKSUM_UNNECESSARY even if they are not padded.
+ */
+ if (short_frame(skb->len))
+ goto csum_unnecessary;
+
+ if (likely(is_last_ethertype_ip(skb, &network_depth, &proto))) {
+ if (unlikely(get_ip_proto(skb, network_depth, proto) == IPPROTO_SCTP))
+ goto csum_unnecessary;
+
skb->ip_summed = CHECKSUM_COMPLETE;
skb->csum = csum_unfold((__force __sum16)cqe->check_sum);
if (network_depth > ETH_HLEN)
network_depth - ETH_HLEN,
skb->csum);
if (unlikely(netdev->features & NETIF_F_RXFCS))
- skb->csum = csum_add(skb->csum,
- (__force __wsum)mlx5e_get_fcs(skb));
+ skb->csum = csum_block_add(skb->csum,
+ (__force __wsum)mlx5e_get_fcs(skb),
+ skb->len - ETH_FCS_LEN);
stats->csum_complete++;
return;
}
+csum_unnecessary:
if (likely((cqe->hds_ip_ext & CQE_L3_OK) &&
(cqe->hds_ip_ext & CQE_L4_OK))) {
skb->ip_summed = CHECKSUM_UNNECESSARY;
stats->csum_none++;
}
+#define MLX5E_CE_BIT_MASK 0x80
+
static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe,
u32 cqe_bcnt,
struct mlx5e_rq *rq,
skb->mark = be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK;
mlx5e_handle_csum(netdev, cqe, rq, skb, !!lro_num_seg);
+ /* checking CE bit in cqe - MSB in ml_path field */
+ if (unlikely(cqe->ml_path & MLX5E_CE_BIT_MASK))
+ mlx5e_enable_ecn(rq, skb);
+
skb->protocol = eth_type_trans(skb, netdev);
}
u32 frag_size;
bool consumed;
+ /* Check packet size. Note LRO doesn't use linear SKB */
+ if (unlikely(cqe_bcnt > rq->hw_mtu)) {
+ rq->stats->oversize_pkts_sw_drop++;
+ return NULL;
+ }
+
va = page_address(di->page) + head_offset;
data = va + rx_headroom;
frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt32);
int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
{
struct mlx5e_rq *rq = container_of(cq, struct mlx5e_rq, cq);
- struct mlx5e_xdpsq *xdpsq;
+ struct mlx5e_xdpsq *xdpsq = &rq->xdpsq;
struct mlx5_cqe64 *cqe;
int work_done = 0;
work_done += mlx5e_decompress_cqes_cont(rq, cq, 0, budget);
cqe = mlx5_cqwq_get_cqe(&cq->wq);
- if (!cqe)
+ if (!cqe) {
+ if (unlikely(work_done))
+ goto out;
return 0;
-
- xdpsq = &rq->xdpsq;
+ }
do {
if (mlx5_get_cqe_format(cqe) == MLX5_COMPRESSED) {
rq->handle_rx_cqe(rq, cqe);
} while ((++work_done < budget) && (cqe = mlx5_cqwq_get_cqe(&cq->wq)));
+out:
if (xdpsq->doorbell) {
mlx5e_xmit_xdp_doorbell(xdpsq);
xdpsq->doorbell = false;