Merge https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next

author Jakub Kicinski <kuba@kernel.org>

Tue, 10 Aug 2021 14:27:09 +0000 (07:27 -0700)

committer Jakub Kicinski <kuba@kernel.org>

Tue, 10 Aug 2021 14:53:22 +0000 (07:53 -0700)
author Jakub Kicinski <kuba@kernel.org>
Tue, 10 Aug 2021 14:27:09 +0000 (07:27 -0700)
committer Jakub Kicinski <kuba@kernel.org>
Tue, 10 Aug 2021 14:53:22 +0000 (07:53 -0700)
diff --combined drivers/net/bonding/bond_main.c

index 3ba5f48,04cf78f..365953e
--- 1/drivers/net/bonding/bond_main.c
--- 2/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@@ -317,6 -317,19 +317,19 @@@ bool bond_sk_check(struct bonding *bond
         }
   }
   
+ static bool bond_xdp_check(struct bonding *bond)
+ {
+       switch (BOND_MODE(bond)) {
+       case BOND_MODE_ROUNDROBIN:
+       case BOND_MODE_ACTIVEBACKUP:
+       case BOND_MODE_8023AD:
+       case BOND_MODE_XOR:
+               return true;
+       default:
+               return false;
+       }
+ }
+ 
   /*---------------------------------- VLAN -----------------------------------*/
   
   /* In the following 2 functions, bond_vlan_rx_add_vid and bond_vlan_rx_kill_vid,
@@@ -2133,6 -2146,41 +2146,41 @@@ int bond_enslave(struct net_device *bon
                 bond_update_slave_arr(bond, NULL);
   
   
+       if (!slave_dev->netdev_ops->ndo_bpf ||
+           !slave_dev->netdev_ops->ndo_xdp_xmit) {
+               if (bond->xdp_prog) {
+                       NL_SET_ERR_MSG(extack, "Slave does not support XDP");
+                       slave_err(bond_dev, slave_dev, "Slave does not support XDP\n");
+                       res = -EOPNOTSUPP;
+                       goto err_sysfs_del;
+               }
+       } else {
+               struct netdev_bpf xdp = {
+                       .command = XDP_SETUP_PROG,
+                       .flags   = 0,
+                       .prog    = bond->xdp_prog,
+                       .extack  = extack,
+               };
+ 
+               if (dev_xdp_prog_count(slave_dev) > 0) {
+                       NL_SET_ERR_MSG(extack,
+                                      "Slave has XDP program loaded, please unload before enslaving");
+                       slave_err(bond_dev, slave_dev,
+                                 "Slave has XDP program loaded, please unload before enslaving\n");
+                       res = -EOPNOTSUPP;
+                       goto err_sysfs_del;
+               }
+ 
+               res = slave_dev->netdev_ops->ndo_bpf(slave_dev, &xdp);
+               if (res < 0) {
+                       /* ndo_bpf() sets extack error message */
+                       slave_dbg(bond_dev, slave_dev, "Error %d calling ndo_bpf\n", res);
+                       goto err_sysfs_del;
+               }
+               if (bond->xdp_prog)
+                       bpf_prog_inc(bond->xdp_prog);
+       }
+ 
         slave_info(bond_dev, slave_dev, "Enslaving as %s interface with %s link\n",
                    bond_is_active_slave(new_slave) ? "an active" : "a backup",
                    new_slave->link != BOND_LINK_DOWN ? "an up" : "a down");
@@@ -2252,6 -2300,17 +2300,17 @@@ static int __bond_release_one(struct ne
         /* recompute stats just before removing the slave */
         bond_get_stats(bond->dev, &bond->bond_stats);
   
+       if (bond->xdp_prog) {
+               struct netdev_bpf xdp = {
+                       .command = XDP_SETUP_PROG,
+                       .flags   = 0,
+                       .prog    = NULL,
+                       .extack  = NULL,
+               };
+               if (slave_dev->netdev_ops->ndo_bpf(slave_dev, &xdp))
+                       slave_warn(bond_dev, slave_dev, "failed to unload XDP program\n");
+       }
+ 
         /* unregister rx_handler early so bond_handle_frame wouldn't be called
          * for this slave anymore.
          */
@@@ -3614,55 -3673,80 +3673,80 @@@ static struct notifier_block bond_netde
   
   /*---------------------------- Hashing Policies -----------------------------*/
   
+ /* Helper to access data in a packet, with or without a backing skb.
+  * If skb is given the data is linearized if necessary via pskb_may_pull.
+  */
+ static inline const void *bond_pull_data(struct sk_buff *skb,
+                                        const void *data, int hlen, int n)
+ {
+       if (likely(n <= hlen))
+               return data;
+       else if (skb && likely(pskb_may_pull(skb, n)))
+               return skb->head;
+ 
+       return NULL;
+ }
+ 
   /* L2 hash helper */
- static inline u32 bond_eth_hash(struct sk_buff *skb)
+ static inline u32 bond_eth_hash(struct sk_buff *skb, const void *data, int mhoff, int hlen)
   {
-       struct ethhdr *ep, hdr_tmp;
+       struct ethhdr *ep;
   
-       ep = skb_header_pointer(skb, 0, sizeof(hdr_tmp), &hdr_tmp);
-       if (ep)
-               return ep->h_dest[5] ^ ep->h_source[5] ^ ep->h_proto;
-       return 0;
+       data = bond_pull_data(skb, data, hlen, mhoff + sizeof(struct ethhdr));
+       if (!data)
+               return 0;
+ 
+       ep = (struct ethhdr *)(data + mhoff);
+       return ep->h_dest[5] ^ ep->h_source[5] ^ be16_to_cpu(ep->h_proto);
   }
   
- static bool bond_flow_ip(struct sk_buff *skb, struct flow_keys *fk,
-                        int *noff, int *proto, bool l34)
+ static bool bond_flow_ip(struct sk_buff *skb, struct flow_keys *fk, const void *data,
+                        int hlen, __be16 l2_proto, int *nhoff, int *ip_proto, bool l34)
   {
         const struct ipv6hdr *iph6;
         const struct iphdr *iph;
   
-       if (skb->protocol == htons(ETH_P_IP)) {
-               if (unlikely(!pskb_may_pull(skb, *noff + sizeof(*iph))))
+       if (l2_proto == htons(ETH_P_IP)) {
+               data = bond_pull_data(skb, data, hlen, *nhoff + sizeof(*iph));
+               if (!data)
                         return false;
-               iph = (const struct iphdr *)(skb->data + *noff);
+ 
+               iph = (const struct iphdr *)(data + *nhoff);
                 iph_to_flow_copy_v4addrs(fk, iph);
-               *noff += iph->ihl << 2;
+               *nhoff += iph->ihl << 2;
                 if (!ip_is_fragment(iph))
-                       *proto = iph->protocol;
-       } else if (skb->protocol == htons(ETH_P_IPV6)) {
-               if (unlikely(!pskb_may_pull(skb, *noff + sizeof(*iph6))))
+                       *ip_proto = iph->protocol;
+       } else if (l2_proto == htons(ETH_P_IPV6)) {
+               data = bond_pull_data(skb, data, hlen, *nhoff + sizeof(*iph6));
+               if (!data)
                         return false;
-               iph6 = (const struct ipv6hdr *)(skb->data + *noff);
+ 
+               iph6 = (const struct ipv6hdr *)(data + *nhoff);
                 iph_to_flow_copy_v6addrs(fk, iph6);
-               *noff += sizeof(*iph6);
-               *proto = iph6->nexthdr;
+               *nhoff += sizeof(*iph6);
+               *ip_proto = iph6->nexthdr;
         } else {
                 return false;
         }
   
-       if (l34 && *proto >= 0)
-               fk->ports.ports = skb_flow_get_ports(skb, *noff, *proto);
+       if (l34 && *ip_proto >= 0)
+               fk->ports.ports = __skb_flow_get_ports(skb, *nhoff, *ip_proto, data, hlen);
   
         return true;
   }
   
- static u32 bond_vlan_srcmac_hash(struct sk_buff *skb)
+ static u32 bond_vlan_srcmac_hash(struct sk_buff *skb, const void *data, int mhoff, int hlen)
   {
-       struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb);
+       struct ethhdr *mac_hdr;
         u32 srcmac_vendor = 0, srcmac_dev = 0;
         u16 vlan;
         int i;
   
+       data = bond_pull_data(skb, data, hlen, mhoff + sizeof(struct ethhdr));
+       if (!data)
+               return 0;
+       mac_hdr = (struct ethhdr *)(data + mhoff);
+ 
         for (i = 0; i < 3; i++)
                 srcmac_vendor = (srcmac_vendor << 8) | mac_hdr->h_source[i];
   
@@@ -3678,26 -3762,25 +3762,25 @@@
   }
   
   /* Extract the appropriate headers based on bond's xmit policy */
- static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb,
-                             struct flow_keys *fk)
+ static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb, const void *data,
+                             __be16 l2_proto, int nhoff, int hlen, struct flow_keys *fk)
   {
         bool l34 = bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34;
-       int noff, proto = -1;
+       int ip_proto = -1;
   
         switch (bond->params.xmit_policy) {
         case BOND_XMIT_POLICY_ENCAP23:
         case BOND_XMIT_POLICY_ENCAP34:
                 memset(fk, 0, sizeof(*fk));
                 return __skb_flow_dissect(NULL, skb, &flow_keys_bonding,
-                                         fk, NULL, 0, 0, 0, 0);
+                                         fk, data, l2_proto, nhoff, hlen, 0);
         default:
                 break;
         }
   
         fk->ports.ports = 0;
         memset(&fk->icmp, 0, sizeof(fk->icmp));
-       noff = skb_network_offset(skb);
-       if (!bond_flow_ip(skb, fk, &noff, &proto, l34))
+       if (!bond_flow_ip(skb, fk, data, hlen, l2_proto, &nhoff, &ip_proto, l34))
                 return false;
   
         /* ICMP error packets contains at least 8 bytes of the header
@@@ -3705,22 -3788,20 +3788,20 @@@
          * to correlate ICMP error packets within the same flow which
          * generated the error.
          */
-       if (proto == IPPROTO_ICMP || proto == IPPROTO_ICMPV6) {
-               skb_flow_get_icmp_tci(skb, &fk->icmp, skb->data,
-                                     skb_transport_offset(skb),
-                                     skb_headlen(skb));
-               if (proto == IPPROTO_ICMP) {
+       if (ip_proto == IPPROTO_ICMP || ip_proto == IPPROTO_ICMPV6) {
+               skb_flow_get_icmp_tci(skb, &fk->icmp, data, nhoff, hlen);
+               if (ip_proto == IPPROTO_ICMP) {
                         if (!icmp_is_err(fk->icmp.type))
                                 return true;
   
-                       noff += sizeof(struct icmphdr);
-               } else if (proto == IPPROTO_ICMPV6) {
+                       nhoff += sizeof(struct icmphdr);
+               } else if (ip_proto == IPPROTO_ICMPV6) {
                         if (!icmpv6_is_err(fk->icmp.type))
                                 return true;
   
-                       noff += sizeof(struct icmp6hdr);
+                       nhoff += sizeof(struct icmp6hdr);
                 }
-               return bond_flow_ip(skb, fk, &noff, &proto, l34);
+               return bond_flow_ip(skb, fk, data, hlen, l2_proto, &nhoff, &ip_proto, l34);
         }
   
         return true;
@@@ -3736,33 -3817,26 +3817,26 @@@ static u32 bond_ip_hash(u32 hash, struc
         return hash >> 1;
   }
   
- /**
-  * bond_xmit_hash - generate a hash value based on the xmit policy
-  * @bond: bonding device
-  * @skb: buffer to use for headers
-  *
-  * This function will extract the necessary headers from the skb buffer and use
-  * them to generate a hash based on the xmit_policy set in the bonding device
+ /* Generate hash based on xmit policy. If @skb is given it is used to linearize
+  * the data as required, but this function can be used without it if the data is
+  * known to be linear (e.g. with xdp_buff).
    */
- u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb)
+ static u32 __bond_xmit_hash(struct bonding *bond, struct sk_buff *skb, const void *data,
+                           __be16 l2_proto, int mhoff, int nhoff, int hlen)
   {
         struct flow_keys flow;
         u32 hash;
   
-       if (bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP34 &&
-           skb->l4_hash)
-               return skb->hash;
- 
         if (bond->params.xmit_policy == BOND_XMIT_POLICY_VLAN_SRCMAC)
-               return bond_vlan_srcmac_hash(skb);
+               return bond_vlan_srcmac_hash(skb, data, mhoff, hlen);
   
         if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER2 ||
-           !bond_flow_dissect(bond, skb, &flow))
-               return bond_eth_hash(skb);
+           !bond_flow_dissect(bond, skb, data, l2_proto, nhoff, hlen, &flow))
+               return bond_eth_hash(skb, data, mhoff, hlen);
   
         if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER23 ||
             bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP23) {
-               hash = bond_eth_hash(skb);
+               hash = bond_eth_hash(skb, data, mhoff, hlen);
         } else {
                 if (flow.icmp.id)
                         memcpy(&hash, &flow.icmp, sizeof(hash));
@@@ -3773,6 -3847,45 +3847,45 @@@
         return bond_ip_hash(hash, &flow);
   }
   
+ /**
+  * bond_xmit_hash - generate a hash value based on the xmit policy
+  * @bond: bonding device
+  * @skb: buffer to use for headers
+  *
+  * This function will extract the necessary headers from the skb buffer and use
+  * them to generate a hash based on the xmit_policy set in the bonding device
+  */
+ u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb)
+ {
+       if (bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP34 &&
+           skb->l4_hash)
+               return skb->hash;
+ 
+       return __bond_xmit_hash(bond, skb, skb->head, skb->protocol,
+                               skb->mac_header, skb->network_header,
+                               skb_headlen(skb));
+ }
+ 
+ /**
+  * bond_xmit_hash_xdp - generate a hash value based on the xmit policy
+  * @bond: bonding device
+  * @xdp: buffer to use for headers
+  *
+  * The XDP variant of bond_xmit_hash.
+  */
+ static u32 bond_xmit_hash_xdp(struct bonding *bond, struct xdp_buff *xdp)
+ {
+       struct ethhdr *eth;
+ 
+       if (xdp->data + sizeof(struct ethhdr) > xdp->data_end)
+               return 0;
+ 
+       eth = (struct ethhdr *)xdp->data;
+ 
+       return __bond_xmit_hash(bond, NULL, xdp->data, eth->h_proto, 0,
+                               sizeof(struct ethhdr), xdp->data_end - xdp->data);
+ }
+ 
   /*-------------------------- Device entry points ----------------------------*/
   
   void bond_work_init_all(struct bonding *bond)
@@@ -4421,6 -4534,47 +4534,47 @@@ non_igmp
         return NULL;
   }
   
+ static struct slave *bond_xdp_xmit_roundrobin_slave_get(struct bonding *bond,
+                                                       struct xdp_buff *xdp)
+ {
+       struct slave *slave;
+       int slave_cnt;
+       u32 slave_id;
+       const struct ethhdr *eth;
+       void *data = xdp->data;
+ 
+       if (data + sizeof(struct ethhdr) > xdp->data_end)
+               goto non_igmp;
+ 
+       eth = (struct ethhdr *)data;
+       data += sizeof(struct ethhdr);
+ 
+       /* See comment on IGMP in bond_xmit_roundrobin_slave_get() */
+       if (eth->h_proto == htons(ETH_P_IP)) {
+               const struct iphdr *iph;
+ 
+               if (data + sizeof(struct iphdr) > xdp->data_end)
+                       goto non_igmp;
+ 
+               iph = (struct iphdr *)data;
+ 
+               if (iph->protocol == IPPROTO_IGMP) {
+                       slave = rcu_dereference(bond->curr_active_slave);
+                       if (slave)
+                               return slave;
+                       return bond_get_slave_by_id(bond, 0);
+               }
+       }
+ 
+ non_igmp:
+       slave_cnt = READ_ONCE(bond->slave_cnt);
+       if (likely(slave_cnt)) {
+               slave_id = bond_rr_gen_slave_id(bond) % slave_cnt;
+               return bond_get_slave_by_id(bond, slave_id);
+       }
+       return NULL;
+ }
+ 
   static netdev_tx_t bond_xmit_roundrobin(struct sk_buff *skb,
                                         struct net_device *bond_dev)
   {
@@@ -4434,8 -4588,7 +4588,7 @@@
         return bond_tx_drop(bond_dev, skb);
   }
   
- static struct slave *bond_xmit_activebackup_slave_get(struct bonding *bond,
-                                                     struct sk_buff *skb)
+ static struct slave *bond_xmit_activebackup_slave_get(struct bonding *bond)
   {
         return rcu_dereference(bond->curr_active_slave);
   }
@@@ -4449,7 -4602,7 +4602,7 @@@ static netdev_tx_t bond_xmit_activeback
         struct bonding *bond = netdev_priv(bond_dev);
         struct slave *slave;
   
-       slave = bond_xmit_activebackup_slave_get(bond, skb);
+       slave = bond_xmit_activebackup_slave_get(bond);
         if (slave)
                 return bond_dev_queue_xmit(bond, skb, slave->dev);
   
@@@ -4637,6 -4790,22 +4790,22 @@@ static struct slave *bond_xmit_3ad_xor_
         return slave;
   }
   
+ static struct slave *bond_xdp_xmit_3ad_xor_slave_get(struct bonding *bond,
+                                                    struct xdp_buff *xdp)
+ {
+       struct bond_up_slave *slaves;
+       unsigned int count;
+       u32 hash;
+ 
+       hash = bond_xmit_hash_xdp(bond, xdp);
+       slaves = rcu_dereference(bond->usable_slaves);
+       count = slaves ? READ_ONCE(slaves->count) : 0;
+       if (unlikely(!count))
+               return NULL;
+ 
+       return slaves->arr[hash % count];
+ }
+ 
   /* Use this Xmit function for 3AD as well as XOR modes. The current
    * usable slave array is formed in the control path. The xmit function
    * just calculates hash and sends the packet out.
@@@ -4747,7 -4916,7 +4916,7 @@@ static struct net_device *bond_xmit_get
                 slave = bond_xmit_roundrobin_slave_get(bond, skb);
                 break;
         case BOND_MODE_ACTIVEBACKUP:
-               slave = bond_xmit_activebackup_slave_get(bond, skb);
+               slave = bond_xmit_activebackup_slave_get(bond);
                 break;
         case BOND_MODE_8023AD:
         case BOND_MODE_XOR:
@@@ -4921,6 -5090,174 +5090,174 @@@ static netdev_tx_t bond_start_xmit(stru
         return ret;
   }
   
+ static struct net_device *
+ bond_xdp_get_xmit_slave(struct net_device *bond_dev, struct xdp_buff *xdp)
+ {
+       struct bonding *bond = netdev_priv(bond_dev);
+       struct slave *slave;
+ 
+       /* Caller needs to hold rcu_read_lock() */
+ 
+       switch (BOND_MODE(bond)) {
+       case BOND_MODE_ROUNDROBIN:
+               slave = bond_xdp_xmit_roundrobin_slave_get(bond, xdp);
+               break;
+ 
+       case BOND_MODE_ACTIVEBACKUP:
+               slave = bond_xmit_activebackup_slave_get(bond);
+               break;
+ 
+       case BOND_MODE_8023AD:
+       case BOND_MODE_XOR:
+               slave = bond_xdp_xmit_3ad_xor_slave_get(bond, xdp);
+               break;
+ 
+       default:
+               /* Should never happen. Mode guarded by bond_xdp_check() */
+               netdev_err(bond_dev, "Unknown bonding mode %d for xdp xmit\n", BOND_MODE(bond));
+               WARN_ON_ONCE(1);
+               return NULL;
+       }
+ 
+       if (slave)
+               return slave->dev;
+ 
+       return NULL;
+ }
+ 
+ static int bond_xdp_xmit(struct net_device *bond_dev,
+                        int n, struct xdp_frame **frames, u32 flags)
+ {
+       int nxmit, err = -ENXIO;
+ 
+       rcu_read_lock();
+ 
+       for (nxmit = 0; nxmit < n; nxmit++) {
+               struct xdp_frame *frame = frames[nxmit];
+               struct xdp_frame *frames1[] = {frame};
+               struct net_device *slave_dev;
+               struct xdp_buff xdp;
+ 
+               xdp_convert_frame_to_buff(frame, &xdp);
+ 
+               slave_dev = bond_xdp_get_xmit_slave(bond_dev, &xdp);
+               if (!slave_dev) {
+                       err = -ENXIO;
+                       break;
+               }
+ 
+               err = slave_dev->netdev_ops->ndo_xdp_xmit(slave_dev, 1, frames1, flags);
+               if (err < 1)
+                       break;
+       }
+ 
+       rcu_read_unlock();
+ 
+       /* If error happened on the first frame then we can pass the error up, otherwise
+        * report the number of frames that were xmitted.
+        */
+       if (err < 0)
+               return (nxmit == 0 ? err : nxmit);
+ 
+       return nxmit;
+ }
+ 
+ static int bond_xdp_set(struct net_device *dev, struct bpf_prog *prog,
+                       struct netlink_ext_ack *extack)
+ {
+       struct bonding *bond = netdev_priv(dev);
+       struct list_head *iter;
+       struct slave *slave, *rollback_slave;
+       struct bpf_prog *old_prog;
+       struct netdev_bpf xdp = {
+               .command = XDP_SETUP_PROG,
+               .flags   = 0,
+               .prog    = prog,
+               .extack  = extack,
+       };
+       int err;
+ 
+       ASSERT_RTNL();
+ 
+       if (!bond_xdp_check(bond))
+               return -EOPNOTSUPP;
+ 
+       old_prog = bond->xdp_prog;
+       bond->xdp_prog = prog;
+ 
+       bond_for_each_slave(bond, slave, iter) {
+               struct net_device *slave_dev = slave->dev;
+ 
+               if (!slave_dev->netdev_ops->ndo_bpf ||
+                   !slave_dev->netdev_ops->ndo_xdp_xmit) {
+                       NL_SET_ERR_MSG(extack, "Slave device does not support XDP");
+                       slave_err(dev, slave_dev, "Slave does not support XDP\n");
+                       err = -EOPNOTSUPP;
+                       goto err;
+               }
+ 
+               if (dev_xdp_prog_count(slave_dev) > 0) {
+                       NL_SET_ERR_MSG(extack,
+                                      "Slave has XDP program loaded, please unload before enslaving");
+                       slave_err(dev, slave_dev,
+                                 "Slave has XDP program loaded, please unload before enslaving\n");
+                       err = -EOPNOTSUPP;
+                       goto err;
+               }
+ 
+               err = slave_dev->netdev_ops->ndo_bpf(slave_dev, &xdp);
+               if (err < 0) {
+                       /* ndo_bpf() sets extack error message */
+                       slave_err(dev, slave_dev, "Error %d calling ndo_bpf\n", err);
+                       goto err;
+               }
+               if (prog)
+                       bpf_prog_inc(prog);
+       }
+ 
+       if (old_prog)
+               bpf_prog_put(old_prog);
+ 
+       if (prog)
+               static_branch_inc(&bpf_master_redirect_enabled_key);
+       else
+               static_branch_dec(&bpf_master_redirect_enabled_key);
+ 
+       return 0;
+ 
+ err:
+       /* unwind the program changes */
+       bond->xdp_prog = old_prog;
+       xdp.prog = old_prog;
+       xdp.extack = NULL; /* do not overwrite original error */
+ 
+       bond_for_each_slave(bond, rollback_slave, iter) {
+               struct net_device *slave_dev = rollback_slave->dev;
+               int err_unwind;
+ 
+               if (slave == rollback_slave)
+                       break;
+ 
+               err_unwind = slave_dev->netdev_ops->ndo_bpf(slave_dev, &xdp);
+               if (err_unwind < 0)
+                       slave_err(dev, slave_dev,
+                                 "Error %d when unwinding XDP program change\n", err_unwind);
+               else if (xdp.prog)
+                       bpf_prog_inc(xdp.prog);
+       }
+       return err;
+ }
+ 
+ static int bond_xdp(struct net_device *dev, struct netdev_bpf *xdp)
+ {
+       switch (xdp->command) {
+       case XDP_SETUP_PROG:
+               return bond_xdp_set(dev, xdp->prog, xdp->extack);
+       default:
+               return -EINVAL;
+       }
+ }
+ 
   static u32 bond_mode_bcast_speed(struct slave *slave, u32 speed)
   {
         if (speed == 0 || speed == SPEED_UNKNOWN)
@@@ -5009,6 -5346,9 +5346,9 @@@ static const struct net_device_ops bond
         .ndo_features_check     = passthru_features_check,
         .ndo_get_xmit_slave     = bond_xmit_get_slave,
         .ndo_sk_get_lower_dev   = bond_sk_get_lower_dev,
+       .ndo_bpf                = bond_xdp,
+       .ndo_xdp_xmit           = bond_xdp_xmit,
+       .ndo_xdp_get_xmit_slave = bond_xdp_get_xmit_slave,
   };
   
   static const struct device_type bond_type = {
@@@ -5478,7 -5818,6 +5818,7 @@@ static int bond_check_params(struct bon
         params->downdelay = downdelay;
         params->peer_notif_delay = 0;
         params->use_carrier = use_carrier;
+ +      params->lacp_active = 1;
         params->lacp_fast = lacp_fast;
         params->primary[0] = 0;
         params->primary_reselect = primary_reselect_value;
diff --combined include/linux/netdevice.h

index 135c943,02c6e8e..bd8d5b8
--- 1/include/linux/netdevice.h
--- 2/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@@ -295,6 -295,18 +295,6 @@@ enum netdev_state_t 
   };
   
   
- -/*
- - * This structure holds boot-time configured netdevice settings. They
- - * are then used in the device probing.
- - */
- -struct netdev_boot_setup {
- -      char name[IFNAMSIZ];
- -      struct ifmap map;
- -};
- -#define NETDEV_BOOT_SETUP_MAX 8
- -
- -int __init netdev_boot_setup(char *str);
- -
   struct gro_list {
         struct list_head        list;
         int                     count;
@@@ -1318,6 -1330,9 +1318,9 @@@ struct netdev_net_notifier 
    *    that got dropped are freed/returned via xdp_return_frame().
    *    Returns negative number, means general error invoking ndo, meaning
    *    no frames were xmit'ed and core-caller will free all frames.
+  * struct net_device *(*ndo_xdp_get_xmit_slave)(struct net_device *dev,
+  *                                            struct xdp_buff *xdp);
+  *      Get the xmit slave of master device based on the xdp_buff.
    * int (*ndo_xsk_wakeup)(struct net_device *dev, u32 queue_id, u32 flags);
    *      This function is used to wake up the softirq, ksoftirqd or kthread
    *    responsible for sending and/or receiving packets on a specific
@@@ -1545,6 -1560,8 +1548,8 @@@ struct net_device_ops 
         int                     (*ndo_xdp_xmit)(struct net_device *dev, int n,
                                                 struct xdp_frame **xdp,
                                                 u32 flags);
+       struct net_device *     (*ndo_xdp_get_xmit_slave)(struct net_device *dev,
+                                                         struct xdp_buff *xdp);
         int                     (*ndo_xsk_wakeup)(struct net_device *dev,
                                                   u32 queue_id, u32 flags);
         struct devlink_port *   (*ndo_get_devlink_port)(struct net_device *dev);
@@@ -2927,6 -2944,7 +2932,6 @@@ static inline struct net_device *first_
   }
   
   int netdev_boot_setup_check(struct net_device *dev);
- -unsigned long netdev_boot_base(const char *prefix, int unit);
   struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type,
                                        const char *hwaddr);
   struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type);
@@@ -3916,8 -3934,6 +3921,8 @@@ static inline int netif_set_real_num_rx
         return 0;
   }
   #endif
+ +int netif_set_real_num_queues(struct net_device *dev,
+ +                            unsigned int txq, unsigned int rxq);
   
   static inline struct netdev_rx_queue *
   __netif_get_rx_queue(struct net_device *dev, unsigned int rxq)
@@@ -4076,6 -4092,7 +4081,7 @@@ typedef int (*bpf_op_t)(struct net_devi
   int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
                       int fd, int expected_fd, u32 flags);
   int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
+ u8 dev_xdp_prog_count(struct net_device *dev);
   u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode);
   
   int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
@@@ -4143,13 -4160,11 +4149,13 @@@ void netdev_run_todo(void)
    */
   static inline void dev_put(struct net_device *dev)
   {
+ +      if (dev) {
   #ifdef CONFIG_PCPU_DEV_REFCNT
- -      this_cpu_dec(*dev->pcpu_refcnt);
+ +              this_cpu_dec(*dev->pcpu_refcnt);
   #else
- -      refcount_dec(&dev->dev_refcnt);
+ +              refcount_dec(&dev->dev_refcnt);
   #endif
+ +      }
   }
   
   /**
@@@ -4160,13 -4175,11 +4166,13 @@@
    */
   static inline void dev_hold(struct net_device *dev)
   {
+ +      if (dev) {
   #ifdef CONFIG_PCPU_DEV_REFCNT
- -      this_cpu_inc(*dev->pcpu_refcnt);
+ +              this_cpu_inc(*dev->pcpu_refcnt);
   #else
- -      refcount_inc(&dev->dev_refcnt);
+ +              refcount_inc(&dev->dev_refcnt);
   #endif
+ +      }
   }
   
   /* Carrier loss detection, dial on demand. The functions netif_carrier_on
diff --combined include/net/bonding.h

index 46df470,b91c365..9f3fdc1
--- 1/include/net/bonding.h
--- 2/include/net/bonding.h
+++ b/include/net/bonding.h
@@@ -129,7 -129,6 +129,7 @@@ struct bond_params 
         int updelay;
         int downdelay;
         int peer_notif_delay;
+ +      int lacp_active;
         int lacp_fast;
         unsigned int min_links;
         int ad_select;
@@@ -259,6 -258,7 +259,7 @@@ struct bonding 
         /* protecting ipsec_list */
         spinlock_t ipsec_lock;
   #endif /* CONFIG_XFRM_OFFLOAD */
+       struct bpf_prog *xdp_prog;
   };
   
   #define bond_slave_get_rcu(dev) \
diff --combined net/core/dev.c

index eaaeff4,e5045b6..8865079
--- 1/net/core/dev.c
--- 2/net/core/dev.c
+++ b/net/core/dev.c
@@@ -676,6 -676,131 +676,6 @@@ void dev_remove_offload(struct packet_o
   }
   EXPORT_SYMBOL(dev_remove_offload);
   
- -/******************************************************************************
- - *
- - *                  Device Boot-time Settings Routines
- - *
- - ******************************************************************************/
- -
- -/* Boot time configuration table */
- -static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
- -
- -/**
- - *    netdev_boot_setup_add   - add new setup entry
- - *    @name: name of the device
- - *    @map: configured settings for the device
- - *
- - *    Adds new setup entry to the dev_boot_setup list.  The function
- - *    returns 0 on error and 1 on success.  This is a generic routine to
- - *    all netdevices.
- - */
- -static int netdev_boot_setup_add(char *name, struct ifmap *map)
- -{
- -      struct netdev_boot_setup *s;
- -      int i;
- -
- -      s = dev_boot_setup;
- -      for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
- -              if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
- -                      memset(s[i].name, 0, sizeof(s[i].name));
- -                      strlcpy(s[i].name, name, IFNAMSIZ);
- -                      memcpy(&s[i].map, map, sizeof(s[i].map));
- -                      break;
- -              }
- -      }
- -
- -      return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
- -}
- -
- -/**
- - * netdev_boot_setup_check    - check boot time settings
- - * @dev: the netdevice
- - *
- - * Check boot time settings for the device.
- - * The found settings are set for the device to be used
- - * later in the device probing.
- - * Returns 0 if no settings found, 1 if they are.
- - */
- -int netdev_boot_setup_check(struct net_device *dev)
- -{
- -      struct netdev_boot_setup *s = dev_boot_setup;
- -      int i;
- -
- -      for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
- -              if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
- -                  !strcmp(dev->name, s[i].name)) {
- -                      dev->irq = s[i].map.irq;
- -                      dev->base_addr = s[i].map.base_addr;
- -                      dev->mem_start = s[i].map.mem_start;
- -                      dev->mem_end = s[i].map.mem_end;
- -                      return 1;
- -              }
- -      }
- -      return 0;
- -}
- -EXPORT_SYMBOL(netdev_boot_setup_check);
- -
- -
- -/**
- - * netdev_boot_base   - get address from boot time settings
- - * @prefix: prefix for network device
- - * @unit: id for network device
- - *
- - * Check boot time settings for the base address of device.
- - * The found settings are set for the device to be used
- - * later in the device probing.
- - * Returns 0 if no settings found.
- - */
- -unsigned long netdev_boot_base(const char *prefix, int unit)
- -{
- -      const struct netdev_boot_setup *s = dev_boot_setup;
- -      char name[IFNAMSIZ];
- -      int i;
- -
- -      sprintf(name, "%s%d", prefix, unit);
- -
- -      /*
- -       * If device already registered then return base of 1
- -       * to indicate not to probe for this interface
- -       */
- -      if (__dev_get_by_name(&init_net, name))
- -              return 1;
- -
- -      for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
- -              if (!strcmp(name, s[i].name))
- -                      return s[i].map.base_addr;
- -      return 0;
- -}
- -
- -/*
- - * Saves at boot time configured settings for any netdevice.
- - */
- -int __init netdev_boot_setup(char *str)
- -{
- -      int ints[5];
- -      struct ifmap map;
- -
- -      str = get_options(str, ARRAY_SIZE(ints), ints);
- -      if (!str || !*str)
- -              return 0;
- -
- -      /* Save settings */
- -      memset(&map, 0, sizeof(map));
- -      if (ints[0] > 0)
- -              map.irq = ints[1];
- -      if (ints[0] > 1)
- -              map.base_addr = ints[2];
- -      if (ints[0] > 2)
- -              map.mem_start = ints[3];
- -      if (ints[0] > 3)
- -              map.mem_end = ints[4];
- -
- -      /* Add new entry to the list */
- -      return netdev_boot_setup_add(str, &map);
- -}
- -
- -__setup("netdev=", netdev_boot_setup);
- -
   /*******************************************************************************
    *
    *                        Device Interface Subroutines
@@@ -831,7 -956,8 +831,7 @@@ struct net_device *dev_get_by_name(stru
   
         rcu_read_lock();
         dev = dev_get_by_name_rcu(net, name);
- -      if (dev)
- -              dev_hold(dev);
+ +      dev_hold(dev);
         rcu_read_unlock();
         return dev;
   }
@@@ -904,7 -1030,8 +904,7 @@@ struct net_device *dev_get_by_index(str
   
         rcu_read_lock();
         dev = dev_get_by_index_rcu(net, ifindex);
- -      if (dev)
- -              dev_hold(dev);
+ +      dev_hold(dev);
         rcu_read_unlock();
         return dev;
   }
@@@ -2972,50 -3099,6 +2972,50 @@@ EXPORT_SYMBOL(netif_set_real_num_rx_que
   #endif
   
   /**
+ + *    netif_set_real_num_queues - set actual number of RX and TX queues used
+ + *    @dev: Network device
+ + *    @txq: Actual number of TX queues
+ + *    @rxq: Actual number of RX queues
+ + *
+ + *    Set the real number of both TX and RX queues.
+ + *    Does nothing if the number of queues is already correct.
+ + */
+ +int netif_set_real_num_queues(struct net_device *dev,
+ +                            unsigned int txq, unsigned int rxq)
+ +{
+ +      unsigned int old_rxq = dev->real_num_rx_queues;
+ +      int err;
+ +
+ +      if (txq < 1 || txq > dev->num_tx_queues ||
+ +          rxq < 1 || rxq > dev->num_rx_queues)
+ +              return -EINVAL;
+ +
+ +      /* Start from increases, so the error path only does decreases -
+ +       * decreases can't fail.
+ +       */
+ +      if (rxq > dev->real_num_rx_queues) {
+ +              err = netif_set_real_num_rx_queues(dev, rxq);
+ +              if (err)
+ +                      return err;
+ +      }
+ +      if (txq > dev->real_num_tx_queues) {
+ +              err = netif_set_real_num_tx_queues(dev, txq);
+ +              if (err)
+ +                      goto undo_rx;
+ +      }
+ +      if (rxq < dev->real_num_rx_queues)
+ +              WARN_ON(netif_set_real_num_rx_queues(dev, rxq));
+ +      if (txq < dev->real_num_tx_queues)
+ +              WARN_ON(netif_set_real_num_tx_queues(dev, txq));
+ +
+ +      return 0;
+ +undo_rx:
+ +      WARN_ON(netif_set_real_num_rx_queues(dev, old_rxq));
+ +      return err;
+ +}
+ +EXPORT_SYMBOL(netif_set_real_num_queues);
+ +
+ +/**
    * netif_get_num_default_rss_queues - default number of RSS queues
    *
    * This routine should set an upper limit on the number of RSS queues
@@@ -5795,7 -5878,7 +5795,7 @@@ static void flush_all_backlogs(void
          */
         ASSERT_RTNL();
   
- -      get_online_cpus();
+ +      cpus_read_lock();
   
         cpumask_clear(&flush_cpus);
         for_each_online_cpu(cpu) {
@@@ -5813,7 -5896,7 +5813,7 @@@
         for_each_cpu(cpu, &flush_cpus)
                 flush_work(per_cpu_ptr(&flush_works, cpu));
   
- -      put_online_cpus();
+ +      cpus_read_unlock();
   }
   
   /* Pass the currently batched GRO_NORMAL SKBs up to the stack. */
@@@ -7532,7 -7615,7 +7532,7 @@@ void *netdev_lower_get_next_private_rcu
   {
         struct netdev_adjacent *lower;
   
-       WARN_ON_ONCE(!rcu_read_lock_held());
+       WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
   
         lower = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
   
@@@ -9297,7 -9380,7 +9297,7 @@@ static struct bpf_prog *dev_xdp_prog(st
         return dev->xdp_state[mode].prog;
   }
   
- static u8 dev_xdp_prog_count(struct net_device *dev)
+ u8 dev_xdp_prog_count(struct net_device *dev)
   {
         u8 count = 0;
         int i;
@@@ -9307,6 -9390,7 +9307,7 @@@
                         count++;
         return count;
   }
+ EXPORT_SYMBOL_GPL(dev_xdp_prog_count);
   
   u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode)
   {
@@@ -9400,6 -9484,8 +9401,8 @@@ static int dev_xdp_attach(struct net_de
   {
         unsigned int num_modes = hweight32(flags & XDP_FLAGS_MODES);
         struct bpf_prog *cur_prog;
+       struct net_device *upper;
+       struct list_head *iter;
         enum bpf_xdp_mode mode;
         bpf_op_t bpf_op;
         int err;
@@@ -9438,6 -9524,14 +9441,14 @@@
                 return -EBUSY;
         }
   
+       /* don't allow if an upper device already has a program */
+       netdev_for_each_upper_dev_rcu(dev, upper, iter) {
+               if (dev_xdp_prog_count(upper) > 0) {
+                       NL_SET_ERR_MSG(extack, "Cannot attach when an upper device already has a program");
+                       return -EEXIST;
+               }
+       }
+ 
         cur_prog = dev_xdp_prog(dev, mode);
         /* can't replace attached prog with link */
         if (link && cur_prog) {
diff --combined net/core/filter.c

index 6f493ef,ff62cd3..3aca07c
--- 1/net/core/filter.c
--- 2/net/core/filter.c
+++ b/net/core/filter.c
@@@ -2180,9 -2180,17 +2180,9 @@@ static int bpf_out_neigh_v6(struct net 
         skb->tstamp = 0;
   
         if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
- -              struct sk_buff *skb2;
- -
- -              skb2 = skb_realloc_headroom(skb, hh_len);
- -              if (unlikely(!skb2)) {
- -                      kfree_skb(skb);
+ +              skb = skb_expand_head(skb, hh_len);
+ +              if (!skb)
                         return -ENOMEM;
- -              }
- -              if (skb->sk)
- -                      skb_set_owner_w(skb2, skb->sk);
- -              consume_skb(skb);
- -              skb = skb2;
         }
   
         rcu_read_lock_bh();
@@@ -2206,7 -2214,8 +2206,7 @@@
         }
         rcu_read_unlock_bh();
         if (dst)
- -              IP6_INC_STATS(dev_net(dst->dev),
- -                            ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
+ +              IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
   out_drop:
         kfree_skb(skb);
         return -ENETDOWN;
@@@ -2278,9 -2287,17 +2278,9 @@@ static int bpf_out_neigh_v4(struct net 
         skb->tstamp = 0;
   
         if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
- -              struct sk_buff *skb2;
- -
- -              skb2 = skb_realloc_headroom(skb, hh_len);
- -              if (unlikely(!skb2)) {
- -                      kfree_skb(skb);
+ +              skb = skb_expand_head(skb, hh_len);
+ +              if (!skb)
                         return -ENOMEM;
- -              }
- -              if (skb->sk)
- -                      skb_set_owner_w(skb2, skb->sk);
- -              consume_skb(skb);
- -              skb = skb2;
         }
   
         rcu_read_lock_bh();
@@@ -3933,6 -3950,31 +3933,31 @@@ void bpf_clear_redirect_map(struct bpf_
         }
   }
   
+ DEFINE_STATIC_KEY_FALSE(bpf_master_redirect_enabled_key);
+ EXPORT_SYMBOL_GPL(bpf_master_redirect_enabled_key);
+ 
+ u32 xdp_master_redirect(struct xdp_buff *xdp)
+ {
+       struct net_device *master, *slave;
+       struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+ 
+       master = netdev_master_upper_dev_get_rcu(xdp->rxq->dev);
+       slave = master->netdev_ops->ndo_xdp_get_xmit_slave(master, xdp);
+       if (slave && slave != xdp->rxq->dev) {
+               /* The target device is different from the receiving device, so
+                * redirect it to the new device.
+                * Using XDP_REDIRECT gets the correct behaviour from XDP enabled
+                * drivers to unmap the packet from their rx ring.
+                */
+               ri->tgt_index = slave->ifindex;
+               ri->map_id = INT_MAX;
+               ri->map_type = BPF_MAP_TYPE_UNSPEC;
+               return XDP_REDIRECT;
+       }
+       return XDP_TX;
+ }
+ EXPORT_SYMBOL_GPL(xdp_master_redirect);
+ 
   int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
                     struct bpf_prog *xdp_prog)
   {
author	Jakub Kicinski <kuba@kernel.org>
	Tue, 10 Aug 2021 14:27:09 +0000 (07:27 -0700)
committer	Jakub Kicinski <kuba@kernel.org>
	Tue, 10 Aug 2021 14:53:22 +0000 (07:53 -0700)
		1	2
drivers/net/bonding/bond_main.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/netdevice.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/net/bonding.h	patch \|	diff1 \|	diff2 \|	blob \| history
net/core/dev.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/core/filter.c	patch \|	diff1 \|	diff2 \|	blob \| history