net/mlx5e: XDP, Avoid checksum complete when XDP prog is loaded
[platform/kernel/linux-rpi.git] / drivers / net / ethernet / mellanox / mlx5 / core / en_rx.c
index 15d8ae2..98509e2 100644 (file)
@@ -37,6 +37,7 @@
 #include <net/busy_poll.h>
 #include <net/ip6_checksum.h>
 #include <net/page_pool.h>
+#include <net/inet_ecn.h>
 #include "en.h"
 #include "en_tc.h"
 #include "eswitch.h"
@@ -432,10 +433,9 @@ static inline u16 mlx5e_icosq_wrap_cnt(struct mlx5e_icosq *sq)
 
 static inline void mlx5e_fill_icosq_frag_edge(struct mlx5e_icosq *sq,
                                              struct mlx5_wq_cyc *wq,
-                                             u16 pi, u16 frag_pi)
+                                             u16 pi, u16 nnops)
 {
        struct mlx5e_sq_wqe_info *edge_wi, *wi = &sq->db.ico_wqe[pi];
-       u8 nnops = mlx5_wq_cyc_get_frag_size(wq) - frag_pi;
 
        edge_wi = wi + nnops;
 
@@ -454,15 +454,14 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
        struct mlx5_wq_cyc *wq = &sq->wq;
        struct mlx5e_umr_wqe *umr_wqe;
        u16 xlt_offset = ix << (MLX5E_LOG_ALIGNED_MPWQE_PPW - 1);
-       u16 pi, frag_pi;
+       u16 pi, contig_wqebbs_room;
        int err;
        int i;
 
        pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
-       frag_pi = mlx5_wq_cyc_ctr2fragix(wq, sq->pc);
-
-       if (unlikely(frag_pi + MLX5E_UMR_WQEBBS > mlx5_wq_cyc_get_frag_size(wq))) {
-               mlx5e_fill_icosq_frag_edge(sq, wq, pi, frag_pi);
+       contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
+       if (unlikely(contig_wqebbs_room < MLX5E_UMR_WQEBBS)) {
+               mlx5e_fill_icosq_frag_edge(sq, wq, pi, contig_wqebbs_room);
                pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
        }
 
@@ -690,53 +689,52 @@ static inline void mlx5e_skb_set_hash(struct mlx5_cqe64 *cqe,
        skb_set_hash(skb, be32_to_cpu(cqe->rss_hash_result), ht);
 }
 
-static inline bool is_last_ethertype_ip(struct sk_buff *skb, int *network_depth)
+static inline bool is_last_ethertype_ip(struct sk_buff *skb, int *network_depth,
+                                       __be16 *proto)
 {
-       __be16 ethertype = ((struct ethhdr *)skb->data)->h_proto;
-
-       ethertype = __vlan_get_protocol(skb, ethertype, network_depth);
-       return (ethertype == htons(ETH_P_IP) || ethertype == htons(ETH_P_IPV6));
+       *proto = ((struct ethhdr *)skb->data)->h_proto;
+       *proto = __vlan_get_protocol(skb, *proto, network_depth);
+       return (*proto == htons(ETH_P_IP) || *proto == htons(ETH_P_IPV6));
 }
 
-static __be32 mlx5e_get_fcs(struct sk_buff *skb)
+static inline void mlx5e_enable_ecn(struct mlx5e_rq *rq, struct sk_buff *skb)
 {
-       int last_frag_sz, bytes_in_prev, nr_frags;
-       u8 *fcs_p1, *fcs_p2;
-       skb_frag_t *last_frag;
-       __be32 fcs_bytes;
+       int network_depth = 0;
+       __be16 proto;
+       void *ip;
+       int rc;
 
-       if (!skb_is_nonlinear(skb))
-               return *(__be32 *)(skb->data + skb->len - ETH_FCS_LEN);
+       if (unlikely(!is_last_ethertype_ip(skb, &network_depth, &proto)))
+               return;
 
-       nr_frags = skb_shinfo(skb)->nr_frags;
-       last_frag = &skb_shinfo(skb)->frags[nr_frags - 1];
-       last_frag_sz = skb_frag_size(last_frag);
+       ip = skb->data + network_depth;
+       rc = ((proto == htons(ETH_P_IP)) ? IP_ECN_set_ce((struct iphdr *)ip) :
+                                        IP6_ECN_set_ce(skb, (struct ipv6hdr *)ip));
 
-       /* If all FCS data is in last frag */
-       if (last_frag_sz >= ETH_FCS_LEN)
-               return *(__be32 *)(skb_frag_address(last_frag) +
-                                  last_frag_sz - ETH_FCS_LEN);
+       rq->stats->ecn_mark += !!rc;
+}
 
-       fcs_p2 = (u8 *)skb_frag_address(last_frag);
-       bytes_in_prev = ETH_FCS_LEN - last_frag_sz;
+static u32 mlx5e_get_fcs(const struct sk_buff *skb)
+{
+       const void *fcs_bytes;
+       u32 _fcs_bytes;
 
-       /* Find where the other part of the FCS is - Linear or another frag */
-       if (nr_frags == 1) {
-               fcs_p1 = skb_tail_pointer(skb);
-       } else {
-               skb_frag_t *prev_frag = &skb_shinfo(skb)->frags[nr_frags - 2];
+       fcs_bytes = skb_header_pointer(skb, skb->len - ETH_FCS_LEN,
+                                      ETH_FCS_LEN, &_fcs_bytes);
 
-               fcs_p1 = skb_frag_address(prev_frag) +
-                           skb_frag_size(prev_frag);
-       }
-       fcs_p1 -= bytes_in_prev;
+       return __get_unaligned_cpu32(fcs_bytes);
+}
 
-       memcpy(&fcs_bytes, fcs_p1, bytes_in_prev);
-       memcpy(((u8 *)&fcs_bytes) + bytes_in_prev, fcs_p2, last_frag_sz);
+static u8 get_ip_proto(struct sk_buff *skb, int network_depth, __be16 proto)
+{
+       void *ip_p = skb->data + network_depth;
 
-       return fcs_bytes;
+       return (proto == htons(ETH_P_IP)) ? ((struct iphdr *)ip_p)->protocol :
+                                           ((struct ipv6hdr *)ip_p)->nexthdr;
 }
 
+#define short_frame(size) ((size) <= ETH_ZLEN + ETH_FCS_LEN)
+
 static inline void mlx5e_handle_csum(struct net_device *netdev,
                                     struct mlx5_cqe64 *cqe,
                                     struct mlx5e_rq *rq,
@@ -745,6 +743,7 @@ static inline void mlx5e_handle_csum(struct net_device *netdev,
 {
        struct mlx5e_rq_stats *stats = rq->stats;
        int network_depth = 0;
+       __be16 proto;
 
        if (unlikely(!(netdev->features & NETIF_F_RXCSUM)))
                goto csum_none;
@@ -755,7 +754,25 @@ static inline void mlx5e_handle_csum(struct net_device *netdev,
                return;
        }
 
-       if (likely(is_last_ethertype_ip(skb, &network_depth))) {
+       /* True when explicitly set via priv flag, or XDP prog is loaded */
+       if (test_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &rq->state))
+               goto csum_unnecessary;
+
+       /* CQE csum doesn't cover padding octets in short ethernet
+        * frames. And the pad field is appended prior to calculating
+        * and appending the FCS field.
+        *
+        * Detecting these padded frames requires to verify and parse
+        * IP headers, so we simply force all those small frames to be
+        * CHECKSUM_UNNECESSARY even if they are not padded.
+        */
+       if (short_frame(skb->len))
+               goto csum_unnecessary;
+
+       if (likely(is_last_ethertype_ip(skb, &network_depth, &proto))) {
+               if (unlikely(get_ip_proto(skb, network_depth, proto) == IPPROTO_SCTP))
+                       goto csum_unnecessary;
+
                skb->ip_summed = CHECKSUM_COMPLETE;
                skb->csum = csum_unfold((__force __sum16)cqe->check_sum);
                if (network_depth > ETH_HLEN)
@@ -767,12 +784,14 @@ static inline void mlx5e_handle_csum(struct net_device *netdev,
                                                 network_depth - ETH_HLEN,
                                                 skb->csum);
                if (unlikely(netdev->features & NETIF_F_RXFCS))
-                       skb->csum = csum_add(skb->csum,
-                                            (__force __wsum)mlx5e_get_fcs(skb));
+                       skb->csum = csum_block_add(skb->csum,
+                                                  (__force __wsum)mlx5e_get_fcs(skb),
+                                                  skb->len - ETH_FCS_LEN);
                stats->csum_complete++;
                return;
        }
 
+csum_unnecessary:
        if (likely((cqe->hds_ip_ext & CQE_L3_OK) &&
                   (cqe->hds_ip_ext & CQE_L4_OK))) {
                skb->ip_summed = CHECKSUM_UNNECESSARY;
@@ -790,6 +809,8 @@ csum_none:
        stats->csum_none++;
 }
 
+#define MLX5E_CE_BIT_MASK 0x80
+
 static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe,
                                      u32 cqe_bcnt,
                                      struct mlx5e_rq *rq,
@@ -834,6 +855,10 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe,
        skb->mark = be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK;
 
        mlx5e_handle_csum(netdev, cqe, rq, skb, !!lro_num_seg);
+       /* checking CE bit in cqe - MSB in ml_path field */
+       if (unlikely(cqe->ml_path & MLX5E_CE_BIT_MASK))
+               mlx5e_enable_ecn(rq, skb);
+
        skb->protocol = eth_type_trans(skb, netdev);
 }
 
@@ -1093,6 +1118,12 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
        u32 frag_size;
        bool consumed;
 
+       /* Check packet size. Note LRO doesn't use linear SKB */
+       if (unlikely(cqe_bcnt > rq->hw_mtu)) {
+               rq->stats->oversize_pkts_sw_drop++;
+               return NULL;
+       }
+
        va             = page_address(di->page) + head_offset;
        data           = va + rx_headroom;
        frag_size      = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt32);
@@ -1173,7 +1204,7 @@ mpwrq_cqe_out:
 int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
 {
        struct mlx5e_rq *rq = container_of(cq, struct mlx5e_rq, cq);
-       struct mlx5e_xdpsq *xdpsq;
+       struct mlx5e_xdpsq *xdpsq = &rq->xdpsq;
        struct mlx5_cqe64 *cqe;
        int work_done = 0;
 
@@ -1184,10 +1215,11 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
                work_done += mlx5e_decompress_cqes_cont(rq, cq, 0, budget);
 
        cqe = mlx5_cqwq_get_cqe(&cq->wq);
-       if (!cqe)
+       if (!cqe) {
+               if (unlikely(work_done))
+                       goto out;
                return 0;
-
-       xdpsq = &rq->xdpsq;
+       }
 
        do {
                if (mlx5_get_cqe_format(cqe) == MLX5_COMPRESSED) {
@@ -1202,6 +1234,7 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
                rq->handle_rx_cqe(rq, cqe);
        } while ((++work_done < budget) && (cqe = mlx5_cqwq_get_cqe(&cq->wq)));
 
+out:
        if (xdpsq->doorbell) {
                mlx5e_xmit_xdp_doorbell(xdpsq);
                xdpsq->doorbell = false;