Merge branch '100GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/net...
authorJakub Kicinski <kuba@kernel.org>
Wed, 24 Aug 2022 00:49:02 +0000 (17:49 -0700)
committerJakub Kicinski <kuba@kernel.org>
Wed, 24 Aug 2022 00:49:02 +0000 (17:49 -0700)
Tony Nguyen says:

====================
ice: xsk: reduced queue count fixes

Maciej Fijalkowski says:

this small series is supposed to fix the issues around AF_XDP usage with
reduced queue count on interface. Due to the XDP rings setup, some
configurations can result in sockets not seeing traffic flowing. More
about this in description of patch 2.

* '100GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/net-queue:
  ice: xsk: use Rx ring's XDP ring when picking NAPI context
  ice: xsk: prohibit usage of non-balanced queue id
====================

Link: https://lore.kernel.org/r/20220822163257.2382487-1-anthony.l.nguyen@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
21 files changed:
MAINTAINERS
drivers/net/bonding/bond_3ad.c
drivers/net/bonding/bond_main.c
drivers/net/dsa/microchip/ksz_common.c
drivers/net/dsa/microchip/ksz_common.h
drivers/net/ethernet/broadcom/bnxt/bnxt.c
drivers/net/ethernet/broadcom/bnxt/bnxt.h
drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
drivers/net/ethernet/moxa/moxart_ether.c
drivers/net/ipa/ipa_mem.c
drivers/net/ipvlan/ipvtap.c
drivers/net/phy/phy_device.c
include/net/bond_3ad.h
net/dsa/slave.c
tools/testing/selftests/Makefile
tools/testing/selftests/drivers/net/bonding/Makefile [new file with mode: 0644]
tools/testing/selftests/drivers/net/bonding/bond-break-lacpdu-tx.sh [new file with mode: 0755]
tools/testing/selftests/drivers/net/bonding/config [new file with mode: 0644]
tools/testing/selftests/drivers/net/bonding/settings [new file with mode: 0644]

index f512b43..274b2c1 100644 (file)
@@ -3679,6 +3679,7 @@ F:        Documentation/networking/bonding.rst
 F:     drivers/net/bonding/
 F:     include/net/bond*
 F:     include/uapi/linux/if_bonding.h
+F:     tools/testing/selftests/net/bonding/
 
 BOSCH SENSORTEC BMA400 ACCELEROMETER IIO DRIVER
 M:     Dan Robertson <dan@dlrobertson.com>
index d7fb33c..184608b 100644 (file)
@@ -84,7 +84,8 @@ enum ad_link_speed_type {
 static const u8 null_mac_addr[ETH_ALEN + 2] __long_aligned = {
        0, 0, 0, 0, 0, 0
 };
-static u16 ad_ticks_per_sec;
+
+static const u16 ad_ticks_per_sec = 1000 / AD_TIMER_INTERVAL;
 static const int ad_delta_in_ticks = (AD_TIMER_INTERVAL * HZ) / 1000;
 
 static const u8 lacpdu_mcast_addr[ETH_ALEN + 2] __long_aligned =
@@ -2001,36 +2002,24 @@ void bond_3ad_initiate_agg_selection(struct bonding *bond, int timeout)
 /**
  * bond_3ad_initialize - initialize a bond's 802.3ad parameters and structures
  * @bond: bonding struct to work on
- * @tick_resolution: tick duration (millisecond resolution)
  *
  * Can be called only after the mac address of the bond is set.
  */
-void bond_3ad_initialize(struct bonding *bond, u16 tick_resolution)
+void bond_3ad_initialize(struct bonding *bond)
 {
-       /* check that the bond is not initialized yet */
-       if (!MAC_ADDRESS_EQUAL(&(BOND_AD_INFO(bond).system.sys_mac_addr),
-                               bond->dev->dev_addr)) {
-
-               BOND_AD_INFO(bond).aggregator_identifier = 0;
-
-               BOND_AD_INFO(bond).system.sys_priority =
-                       bond->params.ad_actor_sys_prio;
-               if (is_zero_ether_addr(bond->params.ad_actor_system))
-                       BOND_AD_INFO(bond).system.sys_mac_addr =
-                           *((struct mac_addr *)bond->dev->dev_addr);
-               else
-                       BOND_AD_INFO(bond).system.sys_mac_addr =
-                           *((struct mac_addr *)bond->params.ad_actor_system);
-
-               /* initialize how many times this module is called in one
-                * second (should be about every 100ms)
-                */
-               ad_ticks_per_sec = tick_resolution;
+       BOND_AD_INFO(bond).aggregator_identifier = 0;
+       BOND_AD_INFO(bond).system.sys_priority =
+               bond->params.ad_actor_sys_prio;
+       if (is_zero_ether_addr(bond->params.ad_actor_system))
+               BOND_AD_INFO(bond).system.sys_mac_addr =
+                   *((struct mac_addr *)bond->dev->dev_addr);
+       else
+               BOND_AD_INFO(bond).system.sys_mac_addr =
+                   *((struct mac_addr *)bond->params.ad_actor_system);
 
-               bond_3ad_initiate_agg_selection(bond,
-                                               AD_AGGREGATOR_SELECTION_TIMER *
-                                               ad_ticks_per_sec);
-       }
+       bond_3ad_initiate_agg_selection(bond,
+                                       AD_AGGREGATOR_SELECTION_TIMER *
+                                       ad_ticks_per_sec);
 }
 
 /**
index 50e6084..2f4da2c 100644 (file)
@@ -2081,7 +2081,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
                        /* Initialize AD with the number of times that the AD timer is called in 1 second
                         * can be called only after the mac address of the bond is set
                         */
-                       bond_3ad_initialize(bond, 1000/AD_TIMER_INTERVAL);
+                       bond_3ad_initialize(bond);
                } else {
                        SLAVE_AD_INFO(new_slave)->id =
                                SLAVE_AD_INFO(prev_slave)->id + 1;
index ed7d137..6bd69a7 100644 (file)
@@ -803,9 +803,15 @@ static void ksz_phylink_get_caps(struct dsa_switch *ds, int port,
        if (dev->info->supports_rgmii[port])
                phy_interface_set_rgmii(config->supported_interfaces);
 
-       if (dev->info->internal_phy[port])
+       if (dev->info->internal_phy[port]) {
                __set_bit(PHY_INTERFACE_MODE_INTERNAL,
                          config->supported_interfaces);
+               /* Compatibility for phylib's default interface type when the
+                * phy-mode property is absent
+                */
+               __set_bit(PHY_INTERFACE_MODE_GMII,
+                         config->supported_interfaces);
+       }
 
        if (dev->dev_ops->get_caps)
                dev->dev_ops->get_caps(dev, port, config);
@@ -962,6 +968,7 @@ static void ksz_update_port_member(struct ksz_device *dev, int port)
 static int ksz_setup(struct dsa_switch *ds)
 {
        struct ksz_device *dev = ds->priv;
+       struct ksz_port *p;
        const u16 *regs;
        int ret;
 
@@ -1001,6 +1008,14 @@ static int ksz_setup(struct dsa_switch *ds)
                        return ret;
        }
 
+       /* Start with learning disabled on standalone user ports, and enabled
+        * on the CPU port. In lack of other finer mechanisms, learning on the
+        * CPU port will avoid flooding bridge local addresses on the network
+        * in some cases.
+        */
+       p = &dev->ports[dev->cpu_port];
+       p->learning = true;
+
        /* start switch */
        regmap_update_bits(dev->regmap[0], regs[S_START_CTRL],
                           SW_START, SW_START);
@@ -1277,6 +1292,8 @@ void ksz_port_stp_state_set(struct dsa_switch *ds, int port, u8 state)
        ksz_pread8(dev, port, regs[P_STP_CTRL], &data);
        data &= ~(PORT_TX_ENABLE | PORT_RX_ENABLE | PORT_LEARN_DISABLE);
 
+       p = &dev->ports[port];
+
        switch (state) {
        case BR_STATE_DISABLED:
                data |= PORT_LEARN_DISABLE;
@@ -1286,9 +1303,13 @@ void ksz_port_stp_state_set(struct dsa_switch *ds, int port, u8 state)
                break;
        case BR_STATE_LEARNING:
                data |= PORT_RX_ENABLE;
+               if (!p->learning)
+                       data |= PORT_LEARN_DISABLE;
                break;
        case BR_STATE_FORWARDING:
                data |= (PORT_TX_ENABLE | PORT_RX_ENABLE);
+               if (!p->learning)
+                       data |= PORT_LEARN_DISABLE;
                break;
        case BR_STATE_BLOCKING:
                data |= PORT_LEARN_DISABLE;
@@ -1300,12 +1321,38 @@ void ksz_port_stp_state_set(struct dsa_switch *ds, int port, u8 state)
 
        ksz_pwrite8(dev, port, regs[P_STP_CTRL], data);
 
-       p = &dev->ports[port];
        p->stp_state = state;
 
        ksz_update_port_member(dev, port);
 }
 
+static int ksz_port_pre_bridge_flags(struct dsa_switch *ds, int port,
+                                    struct switchdev_brport_flags flags,
+                                    struct netlink_ext_ack *extack)
+{
+       if (flags.mask & ~BR_LEARNING)
+               return -EINVAL;
+
+       return 0;
+}
+
+static int ksz_port_bridge_flags(struct dsa_switch *ds, int port,
+                                struct switchdev_brport_flags flags,
+                                struct netlink_ext_ack *extack)
+{
+       struct ksz_device *dev = ds->priv;
+       struct ksz_port *p = &dev->ports[port];
+
+       if (flags.mask & BR_LEARNING) {
+               p->learning = !!(flags.val & BR_LEARNING);
+
+               /* Make the change take effect immediately */
+               ksz_port_stp_state_set(ds, port, p->stp_state);
+       }
+
+       return 0;
+}
+
 static enum dsa_tag_protocol ksz_get_tag_protocol(struct dsa_switch *ds,
                                                  int port,
                                                  enum dsa_tag_protocol mp)
@@ -1719,6 +1766,8 @@ static const struct dsa_switch_ops ksz_switch_ops = {
        .port_bridge_join       = ksz_port_bridge_join,
        .port_bridge_leave      = ksz_port_bridge_leave,
        .port_stp_state_set     = ksz_port_stp_state_set,
+       .port_pre_bridge_flags  = ksz_port_pre_bridge_flags,
+       .port_bridge_flags      = ksz_port_bridge_flags,
        .port_fast_age          = ksz_port_fast_age,
        .port_vlan_filtering    = ksz_port_vlan_filtering,
        .port_vlan_add          = ksz_port_vlan_add,
index 764ada3..0d9520d 100644 (file)
@@ -65,6 +65,7 @@ struct ksz_chip_data {
 
 struct ksz_port {
        bool remove_tag;                /* Remove Tag flag set, for ksz8795 only */
+       bool learning;
        int stp_state;
        struct phy_device phydev;
 
index ba0f1ff..f46eefb 100644 (file)
@@ -11178,10 +11178,7 @@ static netdev_features_t bnxt_fix_features(struct net_device *dev,
        if ((features & NETIF_F_NTUPLE) && !bnxt_rfs_capable(bp))
                features &= ~NETIF_F_NTUPLE;
 
-       if (bp->flags & BNXT_FLAG_NO_AGG_RINGS)
-               features &= ~(NETIF_F_LRO | NETIF_F_GRO_HW);
-
-       if (!(bp->flags & BNXT_FLAG_TPA))
+       if ((bp->flags & BNXT_FLAG_NO_AGG_RINGS) || bp->xdp_prog)
                features &= ~(NETIF_F_LRO | NETIF_F_GRO_HW);
 
        if (!(features & NETIF_F_GRO))
index 075c620..b1b17f9 100644 (file)
@@ -2130,6 +2130,7 @@ struct bnxt {
 #define BNXT_DUMP_CRASH                1
 
        struct bpf_prog         *xdp_prog;
+       u8                      xdp_has_frags;
 
        struct bnxt_ptp_cfg     *ptp_cfg;
        u8                      ptp_all_rx_tstamp;
index 059f96f..a36803e 100644 (file)
@@ -1306,6 +1306,7 @@ int bnxt_dl_register(struct bnxt *bp)
        if (rc)
                goto err_dl_port_unreg;
 
+       devlink_set_features(dl, DEVLINK_F_RELOAD);
 out:
        devlink_register(dl);
        return 0;
index 730febd..a4cba7c 100644 (file)
@@ -623,7 +623,7 @@ static int bnxt_hwrm_func_vf_resc_cfg(struct bnxt *bp, int num_vfs, bool reset)
                hw_resc->max_stat_ctxs -= le16_to_cpu(req->min_stat_ctx) * n;
                hw_resc->max_vnics -= le16_to_cpu(req->min_vnics) * n;
                if (bp->flags & BNXT_FLAG_CHIP_P5)
-                       hw_resc->max_irqs -= vf_msix * n;
+                       hw_resc->max_nqs -= vf_msix;
 
                rc = pf->active_vfs;
        }
index f53387e..c3065ec 100644 (file)
@@ -181,6 +181,7 @@ void bnxt_xdp_buff_init(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
                        struct xdp_buff *xdp)
 {
        struct bnxt_sw_rx_bd *rx_buf;
+       u32 buflen = PAGE_SIZE;
        struct pci_dev *pdev;
        dma_addr_t mapping;
        u32 offset;
@@ -192,7 +193,10 @@ void bnxt_xdp_buff_init(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
        mapping = rx_buf->mapping - bp->rx_dma_offset;
        dma_sync_single_for_cpu(&pdev->dev, mapping + offset, *len, bp->rx_dir);
 
-       xdp_init_buff(xdp, BNXT_PAGE_MODE_BUF_SIZE + offset, &rxr->xdp_rxq);
+       if (bp->xdp_has_frags)
+               buflen = BNXT_PAGE_MODE_BUF_SIZE + offset;
+
+       xdp_init_buff(xdp, buflen, &rxr->xdp_rxq);
        xdp_prepare_buff(xdp, *data_ptr - offset, offset, *len, false);
 }
 
@@ -397,8 +401,10 @@ static int bnxt_xdp_set(struct bnxt *bp, struct bpf_prog *prog)
                netdev_warn(dev, "ethtool rx/tx channels must be combined to support XDP.\n");
                return -EOPNOTSUPP;
        }
-       if (prog)
+       if (prog) {
                tx_xdp = bp->rx_nr_rings;
+               bp->xdp_has_frags = prog->aux->xdp_has_frags;
+       }
 
        tc = netdev_get_num_tc(dev);
        if (!tc)
index 19009a6..9e57d23 100644 (file)
@@ -71,11 +71,6 @@ static int moxart_set_mac_address(struct net_device *ndev, void *addr)
 static void moxart_mac_free_memory(struct net_device *ndev)
 {
        struct moxart_mac_priv_t *priv = netdev_priv(ndev);
-       int i;
-
-       for (i = 0; i < RX_DESC_NUM; i++)
-               dma_unmap_single(&priv->pdev->dev, priv->rx_mapping[i],
-                                priv->rx_buf_size, DMA_FROM_DEVICE);
 
        if (priv->tx_desc_base)
                dma_free_coherent(&priv->pdev->dev,
@@ -187,6 +182,7 @@ static int moxart_mac_open(struct net_device *ndev)
 static int moxart_mac_stop(struct net_device *ndev)
 {
        struct moxart_mac_priv_t *priv = netdev_priv(ndev);
+       int i;
 
        napi_disable(&priv->napi);
 
@@ -198,6 +194,11 @@ static int moxart_mac_stop(struct net_device *ndev)
        /* disable all functions */
        writel(0, priv->base + REG_MAC_CTRL);
 
+       /* unmap areas mapped in moxart_mac_setup_desc_ring() */
+       for (i = 0; i < RX_DESC_NUM; i++)
+               dma_unmap_single(&priv->pdev->dev, priv->rx_mapping[i],
+                                priv->rx_buf_size, DMA_FROM_DEVICE);
+
        return 0;
 }
 
index 1e9eae2..53a1dbe 100644 (file)
@@ -568,7 +568,7 @@ static int ipa_smem_init(struct ipa *ipa, u32 item, size_t size)
        }
 
        /* Align the address down and the size up to a page boundary */
-       addr = qcom_smem_virt_to_phys(virt) & PAGE_MASK;
+       addr = qcom_smem_virt_to_phys(virt);
        phys = addr & PAGE_MASK;
        size = PAGE_ALIGN(size + addr - phys);
        iova = phys;    /* We just want a direct mapping */
index ef02f2c..cbabca1 100644 (file)
@@ -194,7 +194,7 @@ static struct notifier_block ipvtap_notifier_block __read_mostly = {
        .notifier_call  = ipvtap_device_event,
 };
 
-static int ipvtap_init(void)
+static int __init ipvtap_init(void)
 {
        int err;
 
@@ -228,7 +228,7 @@ out1:
 }
 module_init(ipvtap_init);
 
-static void ipvtap_exit(void)
+static void __exit ipvtap_exit(void)
 {
        rtnl_link_unregister(&ipvtap_link_ops);
        unregister_netdevice_notifier(&ipvtap_notifier_block);
index 0c6efd7..12ff276 100644 (file)
@@ -316,11 +316,11 @@ static __maybe_unused int mdio_bus_phy_resume(struct device *dev)
 
        phydev->suspended_by_mdio_bus = 0;
 
-       /* If we managed to get here with the PHY state machine in a state other
-        * than PHY_HALTED this is an indication that something went wrong and
-        * we should most likely be using MAC managed PM and we are not.
+       /* If we manged to get here with the PHY state machine in a state neither
+        * PHY_HALTED nor PHY_READY this is an indication that something went wrong
+        * and we should most likely be using MAC managed PM and we are not.
         */
-       WARN_ON(phydev->state != PHY_HALTED && !phydev->mac_managed_pm);
+       WARN_ON(phydev->state != PHY_HALTED && phydev->state != PHY_READY);
 
        ret = phy_init_hw(phydev);
        if (ret < 0)
index 184105d..be2992e 100644 (file)
@@ -290,7 +290,7 @@ static inline const char *bond_3ad_churn_desc(churn_state_t state)
 }
 
 /* ========== AD Exported functions to the main bonding code ========== */
-void bond_3ad_initialize(struct bonding *bond, u16 tick_resolution);
+void bond_3ad_initialize(struct bonding *bond);
 void bond_3ad_bind_slave(struct slave *slave);
 void bond_3ad_unbind_slave(struct slave *slave);
 void bond_3ad_state_machine_handler(struct work_struct *);
index ad6a666..1291c24 100644 (file)
@@ -2484,7 +2484,7 @@ static int dsa_slave_changeupper(struct net_device *dev,
                        if (!err)
                                dsa_bridge_mtu_normalization(dp);
                        if (err == -EOPNOTSUPP) {
-                               if (!extack->_msg)
+                               if (extack && !extack->_msg)
                                        NL_SET_ERR_MSG_MOD(extack,
                                                           "Offloading not supported");
                                err = 0;
index 10b34bb..c2064a3 100644 (file)
@@ -12,6 +12,7 @@ TARGETS += cpu-hotplug
 TARGETS += damon
 TARGETS += drivers/dma-buf
 TARGETS += drivers/s390x/uvdevice
+TARGETS += drivers/net/bonding
 TARGETS += efivarfs
 TARGETS += exec
 TARGETS += filesystems
diff --git a/tools/testing/selftests/drivers/net/bonding/Makefile b/tools/testing/selftests/drivers/net/bonding/Makefile
new file mode 100644 (file)
index 0000000..ab6c54b
--- /dev/null
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for net selftests
+
+TEST_PROGS := bond-break-lacpdu-tx.sh
+
+include ../../../lib.mk
diff --git a/tools/testing/selftests/drivers/net/bonding/bond-break-lacpdu-tx.sh b/tools/testing/selftests/drivers/net/bonding/bond-break-lacpdu-tx.sh
new file mode 100755 (executable)
index 0000000..47ab905
--- /dev/null
@@ -0,0 +1,81 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+# Regression Test:
+#   Verify LACPDUs get transmitted after setting the MAC address of
+#   the bond.
+#
+# https://bugzilla.redhat.com/show_bug.cgi?id=2020773
+#
+#       +---------+
+#       | fab-br0 |
+#       +---------+
+#            |
+#       +---------+
+#       |  fbond  |
+#       +---------+
+#        |       |
+#    +------+ +------+
+#    |veth1 | |veth2 |
+#    +------+ +------+
+#
+# We use veths instead of physical interfaces
+
+set -e
+tmp=$(mktemp -q dump.XXXXXX)
+cleanup() {
+       ip link del fab-br0 >/dev/null 2>&1 || :
+       ip link del fbond  >/dev/null 2>&1 || :
+       ip link del veth1-bond  >/dev/null 2>&1 || :
+       ip link del veth2-bond  >/dev/null 2>&1 || :
+       modprobe -r bonding  >/dev/null 2>&1 || :
+       rm -f -- ${tmp}
+}
+
+trap cleanup 0 1 2
+cleanup
+sleep 1
+
+# create the bridge
+ip link add fab-br0 address 52:54:00:3B:7C:A6 mtu 1500 type bridge \
+       forward_delay 15
+
+# create the bond
+ip link add fbond type bond mode 4 miimon 200 xmit_hash_policy 1 \
+       ad_actor_sys_prio 65535 lacp_rate fast
+
+# set bond address
+ip link set fbond address 52:54:00:3B:7C:A6
+ip link set fbond up
+
+# set again bond sysfs parameters
+ip link set fbond type bond ad_actor_sys_prio 65535
+
+# create veths
+ip link add name veth1-bond type veth peer name veth1-end
+ip link add name veth2-bond type veth peer name veth2-end
+
+# add ports
+ip link set fbond master fab-br0
+ip link set veth1-bond down master fbond
+ip link set veth2-bond down master fbond
+
+# bring up
+ip link set veth1-end up
+ip link set veth2-end up
+ip link set fab-br0 up
+ip link set fbond up
+ip addr add dev fab-br0 10.0.0.3
+
+tcpdump -n -i veth1-end -e ether proto 0x8809 >${tmp} 2>&1 &
+sleep 15
+pkill tcpdump >/dev/null 2>&1
+rc=0
+num=$(grep "packets captured" ${tmp} | awk '{print $1}')
+if test "$num" -gt 0; then
+       echo "PASS, captured ${num}"
+else
+       echo "FAIL"
+       rc=1
+fi
+exit $rc
diff --git a/tools/testing/selftests/drivers/net/bonding/config b/tools/testing/selftests/drivers/net/bonding/config
new file mode 100644 (file)
index 0000000..dc1c22d
--- /dev/null
@@ -0,0 +1 @@
+CONFIG_BONDING=y
diff --git a/tools/testing/selftests/drivers/net/bonding/settings b/tools/testing/selftests/drivers/net/bonding/settings
new file mode 100644 (file)
index 0000000..867e118
--- /dev/null
@@ -0,0 +1 @@
+timeout=60