Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
authorJakub Kicinski <kuba@kernel.org>
Thu, 8 Jun 2023 18:34:28 +0000 (11:34 -0700)
committerJakub Kicinski <kuba@kernel.org>
Thu, 8 Jun 2023 18:35:14 +0000 (11:35 -0700)
Cross-merge networking fixes after downstream PR.

Conflicts:

net/sched/sch_taprio.c
  d636fc5dd692 ("net: sched: add rcu annotations around qdisc->qdisc_sleeping")
  dced11ef84fb ("net/sched: taprio: don't overwrite "sch" variable in taprio_dump_class_stats()")

net/ipv4/sysctl_net_ipv4.c
  e209fee4118f ("net/ipv4: ping_group_range: allow GID from 2147483648 to 4294967294")
  ccce324dabfe ("tcp: make the first N SYN RTO backoffs linear")
https://lore.kernel.org/all/20230605100816.08d41a7b@canb.auug.org.au/

No adjacent changes.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
683 files changed:
Documentation/admin-guide/sysctl/net.rst
Documentation/bpf/instruction-set.rst
Documentation/bpf/kfuncs.rst
Documentation/bpf/llvm_reloc.rst
Documentation/bpf/map_hash.rst
Documentation/bpf/map_lru_hash_update.dot [new file with mode: 0644]
Documentation/bpf/prog_cgroup_sockopt.rst
Documentation/devicetree/bindings/net/dsa/marvell.txt
Documentation/devicetree/bindings/net/dsa/nxp,sja1105.yaml
Documentation/devicetree/bindings/net/ethernet-phy.yaml
Documentation/devicetree/bindings/net/pse-pd/pse-controller.yaml
Documentation/leds/leds-class.rst
Documentation/netlink/genetlink-legacy.yaml
Documentation/netlink/specs/ovs_flow.yaml [new file with mode: 0644]
Documentation/networking/device_drivers/ethernet/intel/ice.rst
Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst
Documentation/networking/ip-sysctl.rst
Documentation/userspace-api/netlink/intro-specs.rst
MAINTAINERS
arch/arm64/net/bpf_jit_comp.c
crypto/af_alg.c
crypto/algif_aead.c
crypto/algif_hash.c
crypto/algif_skcipher.c
drivers/infiniband/sw/siw/siw_qp_tx.c
drivers/leds/trigger/ledtrig-netdev.c
drivers/net/Kconfig
drivers/net/bonding/bond_main.c
drivers/net/bonding/bonding_priv.h
drivers/net/can/at91_can.c
drivers/net/can/bxcan.c
drivers/net/can/c_can/c_can_platform.c
drivers/net/can/cc770/cc770_isa.c
drivers/net/can/cc770/cc770_platform.c
drivers/net/can/ctucanfd/ctucanfd_platform.c
drivers/net/can/flexcan/flexcan-core.c
drivers/net/can/grcan.c
drivers/net/can/ifi_canfd/ifi_canfd.c
drivers/net/can/janz-ican3.c
drivers/net/can/m_can/m_can_platform.c
drivers/net/can/mscan/mpc5xxx_can.c
drivers/net/can/rcar/rcar_can.c
drivers/net/can/rcar/rcar_canfd.c
drivers/net/can/sja1000/sja1000_isa.c
drivers/net/can/sja1000/sja1000_platform.c
drivers/net/can/softing/softing_main.c
drivers/net/can/sun4i_can.c
drivers/net/can/ti_hecc.c
drivers/net/can/usb/Kconfig
drivers/net/can/usb/Makefile
drivers/net/can/usb/f81604.c [new file with mode: 0644]
drivers/net/can/xilinx_can.c
drivers/net/dsa/hirschmann/hellcreek.c
drivers/net/dsa/lan9303-core.c
drivers/net/dsa/lan9303_i2c.c
drivers/net/dsa/microchip/ksz8795.c
drivers/net/dsa/microchip/ksz8863_smi.c
drivers/net/dsa/microchip/ksz9477.c
drivers/net/dsa/microchip/ksz9477_i2c.c
drivers/net/dsa/microchip/ksz_common.c
drivers/net/dsa/microchip/ksz_common.h
drivers/net/dsa/microchip/ksz_spi.c
drivers/net/dsa/microchip/lan937x_main.c
drivers/net/dsa/mv88e6xxx/chip.c
drivers/net/dsa/mv88e6xxx/chip.h
drivers/net/dsa/mv88e6xxx/global2.c
drivers/net/dsa/mv88e6xxx/port.c
drivers/net/dsa/mv88e6xxx/port.h
drivers/net/dsa/mv88e6xxx/serdes.c
drivers/net/dsa/mv88e6xxx/serdes.h
drivers/net/dsa/ocelot/felix_vsc9959.c
drivers/net/dsa/ocelot/seville_vsc9953.c
drivers/net/dsa/qca/ar9331.c
drivers/net/dsa/qca/qca8k-8xxx.c
drivers/net/dsa/qca/qca8k-common.c
drivers/net/dsa/qca/qca8k-leds.c
drivers/net/dsa/sja1105/sja1105_mdio.c
drivers/net/dsa/sja1105/sja1105_tas.c
drivers/net/dsa/xrs700x/xrs700x_i2c.c
drivers/net/ethernet/8390/8390.h
drivers/net/ethernet/8390/apne.c
drivers/net/ethernet/8390/axnet_cs.c
drivers/net/ethernet/8390/hydra.c
drivers/net/ethernet/8390/lib8390.c
drivers/net/ethernet/8390/mac8390.c
drivers/net/ethernet/8390/mcf8390.c
drivers/net/ethernet/8390/ne.c
drivers/net/ethernet/8390/ne2k-pci.c
drivers/net/ethernet/8390/pcnet_cs.c
drivers/net/ethernet/8390/smc-ultra.c
drivers/net/ethernet/8390/stnic.c
drivers/net/ethernet/8390/wd.c
drivers/net/ethernet/8390/zorro8390.c
drivers/net/ethernet/altera/Kconfig
drivers/net/ethernet/altera/altera_tse_main.c
drivers/net/ethernet/aquantia/atlantic/aq_macsec.c
drivers/net/ethernet/aquantia/atlantic/aq_ring.c
drivers/net/ethernet/arc/emac.h
drivers/net/ethernet/arc/emac_arc.c
drivers/net/ethernet/arc/emac_main.c
drivers/net/ethernet/arc/emac_rockchip.c
drivers/net/ethernet/broadcom/bnx2.c
drivers/net/ethernet/broadcom/bnxt/bnxt.c
drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c
drivers/net/ethernet/cadence/macb.h
drivers/net/ethernet/cadence/macb_main.c
drivers/net/ethernet/cavium/Kconfig
drivers/net/ethernet/cavium/liquidio/Makefile
drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c
drivers/net/ethernet/cavium/liquidio/cn23xx_vf_device.c
drivers/net/ethernet/cavium/liquidio/cn66xx_device.c
drivers/net/ethernet/cavium/liquidio/cn68xx_device.c
drivers/net/ethernet/cavium/liquidio/lio_core.c
drivers/net/ethernet/cavium/liquidio/lio_ethtool.c
drivers/net/ethernet/cavium/liquidio/lio_main.c
drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
drivers/net/ethernet/cavium/liquidio/octeon_device.c
drivers/net/ethernet/cavium/liquidio/octeon_droq.c
drivers/net/ethernet/cavium/liquidio/octeon_mem_ops.c
drivers/net/ethernet/cavium/liquidio/octeon_nic.c
drivers/net/ethernet/cavium/liquidio/request_manager.c
drivers/net/ethernet/cavium/liquidio/response_manager.c
drivers/net/ethernet/chelsio/cxgb3/sge.c
drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c
drivers/net/ethernet/emulex/benet/be_main.c
drivers/net/ethernet/engleder/tsnep_selftests.c
drivers/net/ethernet/engleder/tsnep_tc.c
drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c
drivers/net/ethernet/freescale/enetc/enetc.c
drivers/net/ethernet/freescale/enetc/enetc.h
drivers/net/ethernet/freescale/enetc/enetc_pf.c
drivers/net/ethernet/freescale/enetc/enetc_qos.c
drivers/net/ethernet/freescale/fec_main.c
drivers/net/ethernet/fungible/funeth/funeth_rx.c
drivers/net/ethernet/google/gve/gve_main.c
drivers/net/ethernet/google/gve/gve_tx_dqo.c
drivers/net/ethernet/i825xx/82596.c
drivers/net/ethernet/i825xx/lasi_82596.c
drivers/net/ethernet/i825xx/lib82596.c
drivers/net/ethernet/i825xx/sun3_82586.c
drivers/net/ethernet/i825xx/sun3_82586.h
drivers/net/ethernet/intel/e1000e/netdev.c
drivers/net/ethernet/intel/ice/Makefile
drivers/net/ethernet/intel/ice/ice.h
drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
drivers/net/ethernet/intel/ice/ice_arfs.c
drivers/net/ethernet/intel/ice/ice_base.c
drivers/net/ethernet/intel/ice/ice_devlink.c
drivers/net/ethernet/intel/ice/ice_eswitch.c
drivers/net/ethernet/intel/ice/ice_eswitch.h
drivers/net/ethernet/intel/ice/ice_ethtool.c
drivers/net/ethernet/intel/ice/ice_ethtool.h [new file with mode: 0644]
drivers/net/ethernet/intel/ice/ice_idc.c
drivers/net/ethernet/intel/ice/ice_irq.c [new file with mode: 0644]
drivers/net/ethernet/intel/ice/ice_irq.h [new file with mode: 0644]
drivers/net/ethernet/intel/ice/ice_lag.c
drivers/net/ethernet/intel/ice/ice_lag.h
drivers/net/ethernet/intel/ice/ice_lib.c
drivers/net/ethernet/intel/ice/ice_lib.h
drivers/net/ethernet/intel/ice/ice_main.c
drivers/net/ethernet/intel/ice/ice_protocol_type.h
drivers/net/ethernet/intel/ice/ice_ptp.c
drivers/net/ethernet/intel/ice/ice_repr.c
drivers/net/ethernet/intel/ice/ice_repr.h
drivers/net/ethernet/intel/ice/ice_sriov.c
drivers/net/ethernet/intel/ice/ice_switch.c
drivers/net/ethernet/intel/ice/ice_switch.h
drivers/net/ethernet/intel/ice/ice_tc_lib.c
drivers/net/ethernet/intel/ice/ice_tc_lib.h
drivers/net/ethernet/intel/ice/ice_vf_lib.c
drivers/net/ethernet/intel/ice/ice_vf_lib.h
drivers/net/ethernet/intel/ice/ice_virtchnl.c
drivers/net/ethernet/intel/ice/ice_vlan_mode.c
drivers/net/ethernet/intel/ice/ice_xsk.c
drivers/net/ethernet/intel/igb/igb_main.c
drivers/net/ethernet/intel/igc/igc.h
drivers/net/ethernet/intel/igc/igc_main.c
drivers/net/ethernet/marvell/mvneta.c
drivers/net/ethernet/marvell/octeontx2/Kconfig
drivers/net/ethernet/marvell/octeontx2/af/common.h
drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
drivers/net/ethernet/marvell/octeontx2/nic/Makefile
drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c
drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c
drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
drivers/net/ethernet/marvell/octeontx2/nic/otx2_reg.h
drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h
drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
drivers/net/ethernet/marvell/octeontx2/nic/qos.c [new file with mode: 0644]
drivers/net/ethernet/marvell/octeontx2/nic/qos.h [new file with mode: 0644]
drivers/net/ethernet/marvell/octeontx2/nic/qos_sq.c [new file with mode: 0644]
drivers/net/ethernet/marvell/prestera/prestera_flower.c
drivers/net/ethernet/mellanox/mlx4/main.c
drivers/net/ethernet/mellanox/mlx5/core/devlink.c
drivers/net/ethernet/mellanox/mlx5/core/en/qos.c
drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h
drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c
drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c
drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.c
drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h
drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c
drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_mcast.c
drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
drivers/net/ethernet/mellanox/mlx5/core/fw.c
drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c
drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h
drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c
drivers/net/ethernet/mellanox/mlx5/core/rdma.c
drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c
drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h
drivers/net/ethernet/mellanox/mlx5/core/sriov.c
drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c
drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c
drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c
drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
drivers/net/ethernet/mellanox/mlx5/core/vport.c
drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_format.h
drivers/net/ethernet/mellanox/mlxsw/core.c
drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c
drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h
drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
drivers/net/ethernet/microchip/enc28j60.c
drivers/net/ethernet/microchip/lan743x_main.c
drivers/net/ethernet/microchip/lan966x/Kconfig
drivers/net/ethernet/microchip/lan966x/Makefile
drivers/net/ethernet/microchip/lan966x/lan966x_dcb.c [new file with mode: 0644]
drivers/net/ethernet/microchip/lan966x/lan966x_main.c
drivers/net/ethernet/microchip/lan966x/lan966x_main.h
drivers/net/ethernet/microchip/lan966x/lan966x_port.c
drivers/net/ethernet/microchip/lan966x/lan966x_regs.h
drivers/net/ethernet/microchip/lan966x/lan966x_tc.c
drivers/net/ethernet/microchip/lan966x/lan966x_tc_flower.c
drivers/net/ethernet/microchip/lan966x/lan966x_vcap_ag_api.c
drivers/net/ethernet/microchip/lan966x/lan966x_vcap_debugfs.c
drivers/net/ethernet/microchip/lan966x/lan966x_vcap_impl.c
drivers/net/ethernet/microchip/vcap/vcap_ag_api.h
drivers/net/ethernet/microchip/vcap/vcap_api.c
drivers/net/ethernet/mscc/ocelot_flower.c
drivers/net/ethernet/netronome/nfp/nfp_devlink.c
drivers/net/ethernet/netronome/nfp/nfp_net_common.c
drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h
drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c
drivers/net/ethernet/realtek/r8169_main.c
drivers/net/ethernet/samsung/sxgbe/sxgbe_common.h
drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c
drivers/net/ethernet/sfc/ef100_netdev.c
drivers/net/ethernet/sfc/efx_devlink.c
drivers/net/ethernet/sfc/mae.c
drivers/net/ethernet/sfc/mae.h
drivers/net/ethernet/sfc/tc.c
drivers/net/ethernet/sfc/tc.h
drivers/net/ethernet/stmicro/stmmac/Kconfig
drivers/net/ethernet/stmicro/stmmac/Makefile
drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c [deleted file]
drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.h [deleted file]
drivers/net/ethernet/stmicro/stmmac/common.h
drivers/net/ethernet/stmicro/stmmac/dwmac-anarion.c
drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c
drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c
drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c
drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c
drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c
drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c
drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c
drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c
drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c
drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c
drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c
drivers/net/ethernet/stmicro/stmmac/dwmac-starfive.c
drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c
drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c
drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c
drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c
drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c
drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h
drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
drivers/net/ethernet/sun/cassini.c
drivers/net/ethernet/ti/am65-cpsw-qos.c
drivers/net/ethernet/wangxun/Kconfig
drivers/net/ethernet/wangxun/libwx/wx_hw.c
drivers/net/ethernet/wangxun/libwx/wx_hw.h
drivers/net/ethernet/wangxun/libwx/wx_lib.c
drivers/net/ethernet/wangxun/libwx/wx_lib.h
drivers/net/ethernet/wangxun/libwx/wx_type.h
drivers/net/ethernet/wangxun/ngbe/ngbe_main.c
drivers/net/ethernet/wangxun/ngbe/ngbe_type.h
drivers/net/ethernet/wangxun/txgbe/Makefile
drivers/net/ethernet/wangxun/txgbe/txgbe_ethtool.c
drivers/net/ethernet/wangxun/txgbe/txgbe_main.c
drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c [new file with mode: 0644]
drivers/net/ethernet/wangxun/txgbe/txgbe_phy.h [new file with mode: 0644]
drivers/net/ethernet/wangxun/txgbe/txgbe_type.h
drivers/net/hyperv/hyperv_net.h
drivers/net/hyperv/netvsc_drv.c
drivers/net/hyperv/rndis_filter.c
drivers/net/ipvlan/ipvlan_core.c
drivers/net/mdio/Kconfig
drivers/net/mdio/Makefile
drivers/net/mdio/mdio-regmap.c [new file with mode: 0644]
drivers/net/pcs/Kconfig
drivers/net/pcs/Makefile
drivers/net/pcs/pcs-altera-tse.c [deleted file]
drivers/net/pcs/pcs-lynx.c
drivers/net/pcs/pcs-xpcs.c
drivers/net/pcs/pcs-xpcs.h
drivers/net/phy/Kconfig
drivers/net/phy/bcm-phy-lib.c
drivers/net/phy/bcm-phy-lib.h
drivers/net/phy/broadcom.c
drivers/net/phy/dp83869.c
drivers/net/phy/micrel.c
drivers/net/phy/microchip_t1s.c
drivers/net/phy/mscc/mscc.h
drivers/net/phy/mscc/mscc_main.c
drivers/net/phy/phy.c
drivers/net/phy/phy_device.c
drivers/net/phy/phylink.c
drivers/net/phy/realtek.c
drivers/net/phy/sfp-bus.c
drivers/net/phy/sfp.c
drivers/net/phy/sfp.h
drivers/net/ppp/Kconfig
drivers/net/ppp/pppoe.c
drivers/net/veth.c
drivers/net/virtio_net.c
drivers/net/vmxnet3/vmxnet3_drv.c
drivers/net/vxlan/vxlan_core.c
drivers/net/wireless/marvell/mwifiex/11n.h
drivers/net/wireless/marvell/mwifiex/scan.c
drivers/net/wireless/marvell/mwifiex/wmm.h
drivers/net/wireless/mediatek/mt7601u/debugfs.c
drivers/net/wireless/microchip/wilc1000/hif.c
drivers/net/wireless/microchip/wilc1000/hif.h
drivers/net/wireless/microchip/wilc1000/wlan_cfg.h
drivers/net/wireless/microchip/wilc1000/wlan_if.h
drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h
drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8188e.c
drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8188f.c
drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c
drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_regs.h
drivers/net/wireless/realtek/rtw88/debug.c
drivers/net/wireless/realtek/rtw88/mac80211.c
drivers/net/wireless/realtek/rtw89/Makefile
drivers/net/wireless/realtek/rtw89/acpi.c [new file with mode: 0644]
drivers/net/wireless/realtek/rtw89/acpi.h [new file with mode: 0644]
drivers/net/wireless/realtek/rtw89/core.c
drivers/net/wireless/realtek/rtw89/core.h
drivers/net/wireless/realtek/rtw89/debug.c
drivers/net/wireless/realtek/rtw89/fw.c
drivers/net/wireless/realtek/rtw89/fw.h
drivers/net/wireless/realtek/rtw89/mac.c
drivers/net/wireless/realtek/rtw89/mac.h
drivers/net/wireless/realtek/rtw89/mac80211.c
drivers/net/wireless/realtek/rtw89/pci.c
drivers/net/wireless/realtek/rtw89/pci.h
drivers/net/wireless/realtek/rtw89/phy.c
drivers/net/wireless/realtek/rtw89/phy.h
drivers/net/wireless/realtek/rtw89/reg.h
drivers/net/wireless/realtek/rtw89/regd.c
drivers/net/wireless/realtek/rtw89/rtw8851b.c [new file with mode: 0644]
drivers/net/wireless/realtek/rtw89/rtw8851b.h [new file with mode: 0644]
drivers/net/wireless/realtek/rtw89/rtw8851b_rfk.c [new file with mode: 0644]
drivers/net/wireless/realtek/rtw89/rtw8851b_rfk.h [new file with mode: 0644]
drivers/net/wireless/realtek/rtw89/rtw8851be.c [new file with mode: 0644]
drivers/net/wireless/realtek/rtw89/rtw8852a.c
drivers/net/wireless/realtek/rtw89/rtw8852b.c
drivers/net/wireless/realtek/rtw89/rtw8852c.c
drivers/net/wireless/realtek/rtw89/ser.c
drivers/net/wireless/realtek/rtw89/txrx.h
drivers/net/wireless/realtek/rtw89/wow.c
drivers/net/wwan/iosm/iosm_ipc_imem.h
drivers/net/wwan/iosm/iosm_ipc_mux.h
drivers/net/wwan/iosm/iosm_ipc_wwan.c
drivers/net/xen-netback/netback.c
drivers/nfc/fdp/i2c.c
drivers/nfc/microread/i2c.c
drivers/nfc/nfcmrvl/i2c.c
drivers/nfc/nxp-nci/i2c.c
drivers/nfc/pn533/i2c.c
drivers/nfc/pn544/i2c.c
drivers/nfc/s3fwrn5/i2c.c
drivers/nfc/st-nci/i2c.c
drivers/nfc/st21nfca/i2c.c
drivers/s390/net/ism_drv.c
fs/netfs/iterator.c
fs/smb/client/smb2ops.c
fs/smb/client/smbdirect.c
include/crypto/if_alg.h
include/linux/bpf.h
include/linux/bpf_verifier.h
include/linux/brcmphy.h
include/linux/btf.h
include/linux/can/length.h
include/linux/leds.h
include/linux/mdio.h
include/linux/mdio/mdio-regmap.h [new file with mode: 0644]
include/linux/netfs.h
include/linux/pcs-altera-tse.h [deleted file]
include/linux/pcs-lynx.h
include/linux/pcs/pcs-xpcs.h
include/linux/phy.h
include/linux/phylink.h
include/linux/ref_tracker.h
include/linux/sfp.h
include/linux/skbuff.h
include/linux/socket.h
include/linux/uio.h
include/net/bonding.h
include/net/devlink.h
include/net/dsa.h
include/net/flow.h
include/net/flow_dissector.h
include/net/gro.h
include/net/inet_common.h
include/net/ip.h
include/net/macsec.h
include/net/netfilter/nf_conntrack_expect.h
include/net/netfilter/nf_flow_table.h
include/net/netns/ipv4.h
include/net/pkt_cls.h
include/net/pkt_sched.h
include/net/route.h
include/net/tcp.h
include/net/tls.h
include/net/udp.h
include/net/vxlan.h
include/net/xsk_buff_pool.h
include/uapi/linux/bpf.h
include/uapi/linux/if_link.h
include/uapi/linux/mdio.h
include/uapi/linux/netfilter/nf_tables.h
include/uapi/linux/pkt_cls.h
include/uapi/linux/pkt_sched.h
io_uring/net.c
kernel/bpf/bpf_lru_list.c
kernel/bpf/bpf_lru_list.h
kernel/bpf/btf.c
kernel/bpf/cgroup.c
kernel/bpf/helpers.c
kernel/bpf/inode.c
kernel/bpf/log.c
kernel/bpf/syscall.c
kernel/bpf/trampoline.c
kernel/bpf/verifier.c
kernel/trace/bpf_trace.c
lib/net_utils.c
lib/ref_tracker.c
lib/scatterlist.c
lib/test_ref_tracker.c
net/Kconfig
net/bpf/test_run.c
net/bridge/br_device.c
net/bridge/br_forward.c
net/bridge/br_input.c
net/bridge/br_private.h
net/core/dev.c
net/core/filter.c
net/core/flow_dissector.c
net/core/gro.c
net/core/net_namespace.c
net/core/netdev-genl-gen.c
net/core/netdev-genl-gen.h
net/core/pktgen.c
net/core/skbuff.c
net/devlink/health.c
net/devlink/leftover.c
net/dsa/port.c
net/handshake/genl.c
net/handshake/genl.h
net/ipv4/af_inet.c
net/ipv4/fou_nl.c
net/ipv4/fou_nl.h
net/ipv4/inet_connection_sock.c
net/ipv4/ip_gre.c
net/ipv4/ip_output.c
net/ipv4/ipconfig.c
net/ipv4/ping.c
net/ipv4/raw.c
net/ipv4/syncookies.c
net/ipv4/sysctl_net_ipv4.c
net/ipv4/tcp.c
net/ipv4/tcp_bpf.c
net/ipv4/tcp_ipv4.c
net/ipv4/tcp_minisocks.c
net/ipv4/tcp_offload.c
net/ipv4/tcp_timer.c
net/ipv4/udp.c
net/ipv6/addrconf.c
net/ipv6/exthdrs.c
net/ipv6/ip6_output.c
net/ipv6/seg6_iptunnel.c
net/ipv6/tcp_ipv6.c
net/ipv6/tcpv6_offload.c
net/kcm/kcmsock.c
net/mptcp/mib.c
net/mptcp/mib.h
net/mptcp/options.c
net/mptcp/pm.c
net/mptcp/protocol.c
net/netfilter/ipvs/ip_vs_xmit.c
net/netfilter/nf_conntrack_proto_gre.c
net/netfilter/nf_flow_table_core.c
net/netfilter/nf_flow_table_ip.c
net/netfilter/nf_tables_api.c
net/netfilter/nft_exthdr.c
net/netfilter/nft_flow_offload.c
net/netfilter/nft_lookup.c
net/netfilter/nft_set_pipapo.c
net/nfc/llcp_commands.c
net/openvswitch/meter.c
net/sched/cls_flower.c
net/sched/sch_htb.c
net/sched/sch_taprio.c
net/sctp/protocol.c
net/sctp/socket.c
net/sctp/stream_sched.c
net/socket.c
net/tipc/bearer.c
net/tipc/bearer.h
net/tipc/udp_media.c
net/tls/tls_device.c
net/tls/tls_main.c
net/unix/af_unix.c
net/xdp/xsk_buff_pool.c
net/xfrm/espintcp.c
net/xfrm/xfrm_ipcomp.c
samples/bpf/tcp_basertt_kern.c
scripts/pahole-flags.sh
tools/bpf/bpftool/Documentation/bpftool-map.rst
tools/bpf/bpftool/Documentation/bpftool-prog.rst
tools/bpf/bpftool/bash-completion/bpftool
tools/bpf/bpftool/common.c
tools/bpf/bpftool/feature.c
tools/bpf/bpftool/iter.c
tools/bpf/bpftool/link.c
tools/bpf/bpftool/main.h
tools/bpf/bpftool/map.c
tools/bpf/bpftool/prog.c
tools/bpf/bpftool/struct_ops.c
tools/include/uapi/linux/bpf.h
tools/lib/bpf/bpf.c
tools/lib/bpf/bpf.h
tools/lib/bpf/bpf_helpers.h
tools/lib/bpf/bpf_tracing.h
tools/lib/bpf/btf.c
tools/lib/bpf/btf_dump.c
tools/lib/bpf/gen_loader.c
tools/lib/bpf/libbpf.c
tools/lib/bpf/libbpf.h
tools/lib/bpf/libbpf.map
tools/lib/bpf/libbpf_probes.c
tools/lib/bpf/libbpf_version.h
tools/lib/bpf/usdt.c
tools/net/ynl/Makefile [new file with mode: 0644]
tools/net/ynl/generated/Makefile [new file with mode: 0644]
tools/net/ynl/generated/fou-user.c [new file with mode: 0644]
tools/net/ynl/generated/fou-user.h [new file with mode: 0644]
tools/net/ynl/generated/handshake-user.c [new file with mode: 0644]
tools/net/ynl/generated/handshake-user.h [new file with mode: 0644]
tools/net/ynl/generated/netdev-user.c [new file with mode: 0644]
tools/net/ynl/generated/netdev-user.h [new file with mode: 0644]
tools/net/ynl/lib/Makefile [new file with mode: 0644]
tools/net/ynl/lib/nlspec.py
tools/net/ynl/lib/ynl.c [new file with mode: 0644]
tools/net/ynl/lib/ynl.h [new file with mode: 0644]
tools/net/ynl/lib/ynl.py
tools/net/ynl/samples/.gitignore [new file with mode: 0644]
tools/net/ynl/samples/Makefile [new file with mode: 0644]
tools/net/ynl/samples/netdev.c [new file with mode: 0644]
tools/net/ynl/ynl-gen-c.py
tools/net/ynl/ynl-regen.sh
tools/testing/selftests/bpf/DENYLIST.aarch64
tools/testing/selftests/bpf/DENYLIST.s390x
tools/testing/selftests/bpf/Makefile
tools/testing/selftests/bpf/bpf_kfuncs.h
tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h [new file with mode: 0644]
tools/testing/selftests/bpf/network_helpers.c
tools/testing/selftests/bpf/network_helpers.h
tools/testing/selftests/bpf/prog_tests/arg_parsing.c
tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c
tools/testing/selftests/bpf/prog_tests/bpf_obj_pinning.c [new file with mode: 0644]
tools/testing/selftests/bpf/prog_tests/cgroup_getset_retval.c
tools/testing/selftests/bpf/prog_tests/dynptr.c
tools/testing/selftests/bpf/prog_tests/global_map_resize.c [new file with mode: 0644]
tools/testing/selftests/bpf/prog_tests/module_attach.c
tools/testing/selftests/bpf/prog_tests/netcnt.c
tools/testing/selftests/bpf/prog_tests/sock_destroy.c [new file with mode: 0644]
tools/testing/selftests/bpf/prog_tests/sockopt.c
tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c
tools/testing/selftests/bpf/prog_tests/sockopt_multi.c
tools/testing/selftests/bpf/prog_tests/sockopt_qos_to_cc.c
tools/testing/selftests/bpf/prog_tests/task_under_cgroup.c [new file with mode: 0644]
tools/testing/selftests/bpf/prog_tests/verifier.c
tools/testing/selftests/bpf/prog_tests/xdp_bonding.c
tools/testing/selftests/bpf/progs/bpf_misc.h
tools/testing/selftests/bpf/progs/cb_refs.c
tools/testing/selftests/bpf/progs/cgroup_getset_retval_getsockopt.c
tools/testing/selftests/bpf/progs/cgroup_getset_retval_setsockopt.c
tools/testing/selftests/bpf/progs/dynptr_fail.c
tools/testing/selftests/bpf/progs/dynptr_success.c
tools/testing/selftests/bpf/progs/iters.c
tools/testing/selftests/bpf/progs/jit_probe_mem.c
tools/testing/selftests/bpf/progs/kfunc_call_destructive.c
tools/testing/selftests/bpf/progs/kfunc_call_fail.c
tools/testing/selftests/bpf/progs/kfunc_call_race.c
tools/testing/selftests/bpf/progs/kfunc_call_test.c
tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c
tools/testing/selftests/bpf/progs/local_kptr_stash.c
tools/testing/selftests/bpf/progs/map_kptr.c
tools/testing/selftests/bpf/progs/map_kptr_fail.c
tools/testing/selftests/bpf/progs/sock_destroy_prog.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/sock_destroy_prog_fail.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/sockopt_inherit.c
tools/testing/selftests/bpf/progs/sockopt_multi.c
tools/testing/selftests/bpf/progs/sockopt_qos_to_cc.c
tools/testing/selftests/bpf/progs/sockopt_sk.c
tools/testing/selftests/bpf/progs/test_global_func1.c
tools/testing/selftests/bpf/progs/test_global_map_resize.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/test_sock_fields.c
tools/testing/selftests/bpf/progs/test_task_under_cgroup.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/test_xdp_dynptr.c
tools/testing/selftests/bpf/progs/verifier_subprog_precision.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/xdp_hw_metadata.c
tools/testing/selftests/bpf/test_progs.c
tools/testing/selftests/bpf/test_progs.h
tools/testing/selftests/bpf/test_verifier.c
tools/testing/selftests/bpf/test_xsk.sh
tools/testing/selftests/bpf/testing_helpers.c
tools/testing/selftests/bpf/testing_helpers.h
tools/testing/selftests/bpf/verifier/precise.c
tools/testing/selftests/bpf/veristat.c
tools/testing/selftests/bpf/xdp_hw_metadata.c
tools/testing/selftests/bpf/xdp_metadata.h
tools/testing/selftests/bpf/xsk.h
tools/testing/selftests/bpf/xskxceiver.c
tools/testing/selftests/bpf/xskxceiver.h
tools/testing/selftests/drivers/net/bonding/bond-eth-type-change.sh
tools/testing/selftests/drivers/net/mlxsw/egress_vid_classification.sh
tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_1d.sh
tools/testing/selftests/net/Makefile
tools/testing/selftests/net/fcnal-test.sh
tools/testing/selftests/net/forwarding/Makefile
tools/testing/selftests/net/forwarding/router_bridge_vlan.sh
tools/testing/selftests/net/forwarding/tc_flower_l2_miss.sh [new file with mode: 0755]
tools/testing/selftests/net/mptcp/mptcp_join.sh
tools/testing/selftests/net/nettest.c
tools/testing/selftests/net/test_vxlan_nolocalbypass.sh [new file with mode: 0755]
tools/testing/selftests/net/tls.c
tools/testing/selftests/tc-testing/tc-tests/qdiscs/mq.json

index 466c560..4877563 100644 (file)
@@ -386,8 +386,8 @@ Default : 0  (for compatibility reasons)
 txrehash
 --------
 
-Controls default hash rethink behaviour on listening socket when SO_TXREHASH
-option is set to SOCK_TXREHASH_DEFAULT (i. e. not overridden by setsockopt).
+Controls default hash rethink behaviour on socket when SO_TXREHASH option is set
+to SOCK_TXREHASH_DEFAULT (i. e. not overridden by setsockopt).
 
 If set to 1 (default), hash rethink is performed on listening socket.
 If set to 0, hash rethink is not performed.
index 492980e..6644842 100644 (file)
@@ -163,13 +163,13 @@ BPF_MUL   0x20   dst \*= src
 BPF_DIV   0x30   dst = (src != 0) ? (dst / src) : 0
 BPF_OR    0x40   dst \|= src
 BPF_AND   0x50   dst &= src
-BPF_LSH   0x60   dst <<= src
-BPF_RSH   0x70   dst >>= src
+BPF_LSH   0x60   dst <<= (src & mask)
+BPF_RSH   0x70   dst >>= (src & mask)
 BPF_NEG   0x80   dst = ~src
 BPF_MOD   0x90   dst = (src != 0) ? (dst % src) : dst
 BPF_XOR   0xa0   dst ^= src
 BPF_MOV   0xb0   dst = src
-BPF_ARSH  0xc0   sign extending shift right
+BPF_ARSH  0xc0   sign extending dst >>= (src & mask)
 BPF_END   0xd0   byte swap operations (see `Byte swap instructions`_ below)
 ========  =====  ==========================================================
 
@@ -204,6 +204,9 @@ for ``BPF_ALU64``, 'imm' is first sign extended to 64 bits and the result
 interpreted as an unsigned 64-bit value. There are no instructions for
 signed division or modulo.
 
+Shift operations use a mask of 0x3F (63) for 64-bit operations and 0x1F (31)
+for 32-bit operations.
+
 Byte swap instructions
 ~~~~~~~~~~~~~~~~~~~~~~
 
index ea25163..7a3d9de 100644 (file)
@@ -100,7 +100,7 @@ Hence, whenever a constant scalar argument is accepted by a kfunc which is not a
 size parameter, and the value of the constant matters for program safety, __k
 suffix should be used.
 
-2.2.2 __uninit Annotation
+2.2.3 __uninit Annotation
 -------------------------
 
 This annotation is used to indicate that the argument will be treated as
@@ -117,6 +117,27 @@ Here, the dynptr will be treated as an uninitialized dynptr. Without this
 annotation, the verifier will reject the program if the dynptr passed in is
 not initialized.
 
+2.2.4 __opt Annotation
+-------------------------
+
+This annotation is used to indicate that the buffer associated with an __sz or __szk
+argument may be null. If the function is passed a nullptr in place of the buffer,
+the verifier will not check that length is appropriate for the buffer. The kfunc is
+responsible for checking if this buffer is null before using it.
+
+An example is given below::
+
+        __bpf_kfunc void *bpf_dynptr_slice(..., void *buffer__opt, u32 buffer__szk)
+        {
+        ...
+        }
+
+Here, the buffer may be null. If buffer is not null, it at least of size buffer_szk.
+Either way, the returned buffer is either NULL, or of size buffer_szk. Without this
+annotation, the verifier will reject the program if a null pointer is passed in with
+a nonzero size.
+
+
 .. _BPF_kfunc_nodef:
 
 2.3 Using an existing kernel function
index ca8957d..e4a777a 100644 (file)
@@ -48,7 +48,7 @@ the code with ``llvm-objdump -dr test.o``::
       14:       0f 10 00 00 00 00 00 00 r0 += r1
       15:       95 00 00 00 00 00 00 00 exit
 
-There are four relations in the above for four ``LD_imm64`` instructions.
+There are four relocations in the above for four ``LD_imm64`` instructions.
 The following ``llvm-readelf -r test.o`` shows the binary values of the four
 relocations::
 
@@ -79,14 +79,16 @@ The following is the symbol table with ``llvm-readelf -s test.o``::
 The 6th entry is global variable ``g1`` with value 0.
 
 Similarly, the second relocation is at ``.text`` offset ``0x18``, instruction 3,
-for global variable ``g2`` which has a symbol value 4, the offset
-from the start of ``.data`` section.
-
-The third and fourth relocations refers to static variables ``l1``
-and ``l2``. From ``.rel.text`` section above, it is not clear
-which symbols they really refers to as they both refers to
+has a type of ``R_BPF_64_64`` and refers to entry 7 in the symbol table.
+The second relocation resolves to global variable ``g2`` which has a symbol
+value 4. The symbol value represents the offset from the start of ``.data``
+section where the initial value of the global variable ``g2`` is stored.
+
+The third and fourth relocations refer to static variables ``l1``
+and ``l2``. From the ``.rel.text`` section above, it is not clear
+to which symbols they really refer as they both refer to
 symbol table entry 4, symbol ``sec``, which has ``STT_SECTION`` type
-and represents a section. So for static variable or function,
+and represents a section. So for static variable or function,
 the section offset is written to the original insn
 buffer, which is called ``A`` (addend). Looking at
 above insn ``7`` and ``11``, they have section offset ``8`` and ``12``.
index 8669426..d234395 100644 (file)
@@ -1,5 +1,6 @@
 .. SPDX-License-Identifier: GPL-2.0-only
 .. Copyright (C) 2022 Red Hat, Inc.
+.. Copyright (C) 2022-2023 Isovalent, Inc.
 
 ===============================================
 BPF_MAP_TYPE_HASH, with PERCPU and LRU Variants
@@ -29,7 +30,16 @@ will automatically evict the least recently used entries when the hash
 table reaches capacity. An LRU hash maintains an internal LRU list that
 is used to select elements for eviction. This internal LRU list is
 shared across CPUs but it is possible to request a per CPU LRU list with
-the ``BPF_F_NO_COMMON_LRU`` flag when calling ``bpf_map_create``.
+the ``BPF_F_NO_COMMON_LRU`` flag when calling ``bpf_map_create``.  The
+following table outlines the properties of LRU maps depending on the a
+map type and the flags used to create the map.
+
+======================== ========================= ================================
+Flag                     ``BPF_MAP_TYPE_LRU_HASH`` ``BPF_MAP_TYPE_LRU_PERCPU_HASH``
+======================== ========================= ================================
+**BPF_F_NO_COMMON_LRU**  Per-CPU LRU, global map   Per-CPU LRU, per-cpu map
+**!BPF_F_NO_COMMON_LRU** Global LRU, global map    Global LRU, per-cpu map
+======================== ========================= ================================
 
 Usage
 =====
@@ -206,3 +216,44 @@ Userspace walking the map elements from the map declared above:
                     cur_key = &next_key;
             }
     }
+
+Internals
+=========
+
+This section of the document is targeted at Linux developers and describes
+aspects of the map implementations that are not considered stable ABI. The
+following details are subject to change in future versions of the kernel.
+
+``BPF_MAP_TYPE_LRU_HASH`` and variants
+--------------------------------------
+
+Updating elements in LRU maps may trigger eviction behaviour when the capacity
+of the map is reached. There are various steps that the update algorithm
+attempts in order to enforce the LRU property which have increasing impacts on
+other CPUs involved in the following operation attempts:
+
+- Attempt to use CPU-local state to batch operations
+- Attempt to fetch free nodes from global lists
+- Attempt to pull any node from a global list and remove it from the hashmap
+- Attempt to pull any node from any CPU's list and remove it from the hashmap
+
+This algorithm is described visually in the following diagram. See the
+description in commit 3a08c2fd7634 ("bpf: LRU List") for a full explanation of
+the corresponding operations:
+
+.. kernel-figure::  map_lru_hash_update.dot
+   :alt:    Diagram outlining the LRU eviction steps taken during map update.
+
+   LRU hash eviction during map update for ``BPF_MAP_TYPE_LRU_HASH`` and
+   variants. See the dot file source for kernel function name code references.
+
+Map updates start from the oval in the top right "begin ``bpf_map_update()``"
+and progress through the graph towards the bottom where the result may be
+either a successful update or a failure with various error codes. The key in
+the top right provides indicators for which locks may be involved in specific
+operations. This is intended as a visual hint for reasoning about how map
+contention may impact update operations, though the map type and flags may
+impact the actual contention on those locks, based on the logic described in
+the table above. For instance, if the map is created with type
+``BPF_MAP_TYPE_LRU_PERCPU_HASH`` and flags ``BPF_F_NO_COMMON_LRU`` then all map
+properties would be per-cpu.
diff --git a/Documentation/bpf/map_lru_hash_update.dot b/Documentation/bpf/map_lru_hash_update.dot
new file mode 100644 (file)
index 0000000..a0fee34
--- /dev/null
@@ -0,0 +1,172 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (C) 2022-2023 Isovalent, Inc.
+digraph {
+  node [colorscheme=accent4,style=filled] # Apply colorscheme to all nodes
+  graph [splines=ortho, nodesep=1]
+
+  subgraph cluster_key {
+    label = "Key\n(locks held during operation)";
+    rankdir = TB;
+
+    remote_lock [shape=rectangle,fillcolor=4,label="remote CPU LRU lock"]
+    hash_lock [shape=rectangle,fillcolor=3,label="hashtab lock"]
+    lru_lock [shape=rectangle,fillcolor=2,label="LRU lock"]
+    local_lock [shape=rectangle,fillcolor=1,label="local CPU LRU lock"]
+    no_lock [shape=rectangle,label="no locks held"]
+  }
+
+  begin [shape=oval,label="begin\nbpf_map_update()"]
+
+  // Nodes below with an 'fn_' prefix are roughly labeled by the C function
+  // names that initiate the corresponding logic in kernel/bpf/bpf_lru_list.c.
+  // Number suffixes and errno suffixes handle subsections of the corresponding
+  // logic in the function as of the writing of this dot.
+
+  // cf. __local_list_pop_free() / bpf_percpu_lru_pop_free()
+  local_freelist_check [shape=diamond,fillcolor=1,
+    label="Local freelist\nnode available?"];
+  use_local_node [shape=rectangle,
+    label="Use node owned\nby this CPU"]
+
+  // cf. bpf_lru_pop_free()
+  common_lru_check [shape=diamond,
+    label="Map created with\ncommon LRU?\n(!BPF_F_NO_COMMON_LRU)"];
+
+  fn_bpf_lru_list_pop_free_to_local [shape=rectangle,fillcolor=2,
+    label="Flush local pending,
+    Rotate Global list, move
+    LOCAL_FREE_TARGET
+    from global -> local"]
+  // Also corresponds to:
+  // fn__local_list_flush()
+  // fn_bpf_lru_list_rotate()
+  fn___bpf_lru_node_move_to_free[shape=diamond,fillcolor=2,
+    label="Able to free\nLOCAL_FREE_TARGET\nnodes?"]
+
+  fn___bpf_lru_list_shrink_inactive [shape=rectangle,fillcolor=3,
+    label="Shrink inactive list
+      up to remaining
+      LOCAL_FREE_TARGET
+      (global LRU -> local)"]
+  fn___bpf_lru_list_shrink [shape=diamond,fillcolor=2,
+    label="> 0 entries in\nlocal free list?"]
+  fn___bpf_lru_list_shrink2 [shape=rectangle,fillcolor=2,
+    label="Steal one node from
+      inactive, or if empty,
+      from active global list"]
+  fn___bpf_lru_list_shrink3 [shape=rectangle,fillcolor=3,
+    label="Try to remove\nnode from hashtab"]
+
+  local_freelist_check2 [shape=diamond,label="Htab removal\nsuccessful?"]
+  common_lru_check2 [shape=diamond,
+    label="Map created with\ncommon LRU?\n(!BPF_F_NO_COMMON_LRU)"];
+
+  subgraph cluster_remote_lock {
+    label = "Iterate through CPUs\n(start from current)";
+    style = dashed;
+    rankdir=LR;
+
+    local_freelist_check5 [shape=diamond,fillcolor=4,
+      label="Steal a node from\nper-cpu freelist?"]
+    local_freelist_check6 [shape=rectangle,fillcolor=4,
+      label="Steal a node from
+        (1) Unreferenced pending, or
+        (2) Any pending node"]
+    local_freelist_check7 [shape=rectangle,fillcolor=3,
+      label="Try to remove\nnode from hashtab"]
+    fn_htab_lru_map_update_elem [shape=diamond,
+      label="Stole node\nfrom remote\nCPU?"]
+    fn_htab_lru_map_update_elem2 [shape=diamond,label="Iterated\nall CPUs?"]
+    // Also corresponds to:
+    // use_local_node()
+    // fn__local_list_pop_pending()
+  }
+
+  fn_bpf_lru_list_pop_free_to_local2 [shape=rectangle,
+    label="Use node that was\nnot recently referenced"]
+  local_freelist_check4 [shape=rectangle,
+    label="Use node that was\nactively referenced\nin global list"]
+  fn_htab_lru_map_update_elem_ENOMEM [shape=oval,label="return -ENOMEM"]
+  fn_htab_lru_map_update_elem3 [shape=rectangle,
+    label="Use node that was\nactively referenced\nin (another?) CPU's cache"]
+  fn_htab_lru_map_update_elem4 [shape=rectangle,fillcolor=3,
+    label="Update hashmap\nwith new element"]
+  fn_htab_lru_map_update_elem5 [shape=oval,label="return 0"]
+  fn_htab_lru_map_update_elem_EBUSY [shape=oval,label="return -EBUSY"]
+  fn_htab_lru_map_update_elem_EEXIST [shape=oval,label="return -EEXIST"]
+  fn_htab_lru_map_update_elem_ENOENT [shape=oval,label="return -ENOENT"]
+
+  begin -> local_freelist_check
+  local_freelist_check -> use_local_node [xlabel="Y"]
+  local_freelist_check -> common_lru_check [xlabel="N"]
+  common_lru_check -> fn_bpf_lru_list_pop_free_to_local [xlabel="Y"]
+  common_lru_check -> fn___bpf_lru_list_shrink_inactive [xlabel="N"]
+  fn_bpf_lru_list_pop_free_to_local -> fn___bpf_lru_node_move_to_free
+  fn___bpf_lru_node_move_to_free ->
+    fn_bpf_lru_list_pop_free_to_local2 [xlabel="Y"]
+  fn___bpf_lru_node_move_to_free ->
+    fn___bpf_lru_list_shrink_inactive [xlabel="N"]
+  fn___bpf_lru_list_shrink_inactive -> fn___bpf_lru_list_shrink
+  fn___bpf_lru_list_shrink -> fn_bpf_lru_list_pop_free_to_local2 [xlabel = "Y"]
+  fn___bpf_lru_list_shrink -> fn___bpf_lru_list_shrink2 [xlabel="N"]
+  fn___bpf_lru_list_shrink2 -> fn___bpf_lru_list_shrink3
+  fn___bpf_lru_list_shrink3 -> local_freelist_check2
+  local_freelist_check2 -> local_freelist_check4 [xlabel = "Y"]
+  local_freelist_check2 -> common_lru_check2 [xlabel = "N"]
+  common_lru_check2 -> local_freelist_check5 [xlabel = "Y"]
+  common_lru_check2 -> fn_htab_lru_map_update_elem_ENOMEM [xlabel = "N"]
+  local_freelist_check5 -> fn_htab_lru_map_update_elem [xlabel = "Y"]
+  local_freelist_check5 -> local_freelist_check6 [xlabel = "N"]
+  local_freelist_check6 -> local_freelist_check7
+  local_freelist_check7 -> fn_htab_lru_map_update_elem
+
+  fn_htab_lru_map_update_elem -> fn_htab_lru_map_update_elem3 [xlabel = "Y"]
+  fn_htab_lru_map_update_elem -> fn_htab_lru_map_update_elem2  [xlabel = "N"]
+  fn_htab_lru_map_update_elem2 ->
+    fn_htab_lru_map_update_elem_ENOMEM [xlabel = "Y"]
+  fn_htab_lru_map_update_elem2 -> local_freelist_check5 [xlabel = "N"]
+  fn_htab_lru_map_update_elem3 -> fn_htab_lru_map_update_elem4
+
+  use_local_node -> fn_htab_lru_map_update_elem4
+  fn_bpf_lru_list_pop_free_to_local2 -> fn_htab_lru_map_update_elem4
+  local_freelist_check4 -> fn_htab_lru_map_update_elem4
+
+  fn_htab_lru_map_update_elem4 -> fn_htab_lru_map_update_elem5 [headlabel="Success"]
+  fn_htab_lru_map_update_elem4 ->
+    fn_htab_lru_map_update_elem_EBUSY [xlabel="Hashtab lock failed"]
+  fn_htab_lru_map_update_elem4 ->
+    fn_htab_lru_map_update_elem_EEXIST [xlabel="BPF_EXIST set and\nkey already exists"]
+  fn_htab_lru_map_update_elem4 ->
+    fn_htab_lru_map_update_elem_ENOENT [headlabel="BPF_NOEXIST set\nand no such entry"]
+
+  // Create invisible pad nodes to line up various nodes
+  pad0 [style=invis]
+  pad1 [style=invis]
+  pad2 [style=invis]
+  pad3 [style=invis]
+  pad4 [style=invis]
+
+  // Line up the key with the top of the graph
+  no_lock -> local_lock [style=invis]
+  local_lock -> lru_lock [style=invis]
+  lru_lock -> hash_lock [style=invis]
+  hash_lock -> remote_lock [style=invis]
+  remote_lock -> local_freelist_check5 [style=invis]
+  remote_lock -> fn___bpf_lru_list_shrink [style=invis]
+
+  // Line up return code nodes at the bottom of the graph
+  fn_htab_lru_map_update_elem -> pad0 [style=invis]
+  pad0 -> pad1 [style=invis]
+  pad1 -> pad2 [style=invis]
+  //pad2-> fn_htab_lru_map_update_elem_ENOMEM [style=invis]
+  fn_htab_lru_map_update_elem4 -> pad3 [style=invis]
+  pad3 -> fn_htab_lru_map_update_elem5  [style=invis]
+  pad3 -> fn_htab_lru_map_update_elem_EBUSY  [style=invis]
+  pad3 -> fn_htab_lru_map_update_elem_EEXIST  [style=invis]
+  pad3 -> fn_htab_lru_map_update_elem_ENOENT  [style=invis]
+
+  // Reduce diagram width by forcing some nodes to appear above others
+  local_freelist_check4 -> fn_htab_lru_map_update_elem3 [style=invis]
+  common_lru_check2 -> pad4 [style=invis]
+  pad4 -> local_freelist_check5 [style=invis]
+}
index 172f957..1226a94 100644 (file)
@@ -98,10 +98,65 @@ can access only the first ``PAGE_SIZE`` of that data. So it has to options:
   indicates that the kernel should use BPF's trimmed ``optval``.
 
 When the BPF program returns with the ``optlen`` greater than
-``PAGE_SIZE``, the userspace will receive ``EFAULT`` errno.
+``PAGE_SIZE``, the userspace will receive original kernel
+buffers without any modifications that the BPF program might have
+applied.
 
 Example
 =======
 
+Recommended way to handle BPF programs is as follows:
+
+.. code-block:: c
+
+       SEC("cgroup/getsockopt")
+       int getsockopt(struct bpf_sockopt *ctx)
+       {
+               /* Custom socket option. */
+               if (ctx->level == MY_SOL && ctx->optname == MY_OPTNAME) {
+                       ctx->retval = 0;
+                       optval[0] = ...;
+                       ctx->optlen = 1;
+                       return 1;
+               }
+
+               /* Modify kernel's socket option. */
+               if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) {
+                       ctx->retval = 0;
+                       optval[0] = ...;
+                       ctx->optlen = 1;
+                       return 1;
+               }
+
+               /* optval larger than PAGE_SIZE use kernel's buffer. */
+               if (ctx->optlen > PAGE_SIZE)
+                       ctx->optlen = 0;
+
+               return 1;
+       }
+
+       SEC("cgroup/setsockopt")
+       int setsockopt(struct bpf_sockopt *ctx)
+       {
+               /* Custom socket option. */
+               if (ctx->level == MY_SOL && ctx->optname == MY_OPTNAME) {
+                       /* do something */
+                       ctx->optlen = -1;
+                       return 1;
+               }
+
+               /* Modify kernel's socket option. */
+               if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) {
+                       optval[0] = ...;
+                       return 1;
+               }
+
+               /* optval larger than PAGE_SIZE use kernel's buffer. */
+               if (ctx->optlen > PAGE_SIZE)
+                       ctx->optlen = 0;
+
+               return 1;
+       }
+
 See ``tools/testing/selftests/bpf/progs/sockopt_sk.c`` for an example
 of BPF program that handles socket options.
index 2363b41..3372613 100644 (file)
@@ -20,7 +20,7 @@ which is at a different MDIO base address in different switch families.
                          6171, 6172, 6175, 6176, 6185, 6240, 6320, 6321,
                          6341, 6350, 6351, 6352
 - "marvell,mv88e6190"  : Switch has base address 0x00. Use with models:
-                         6190, 6190X, 6191, 6290, 6390, 6390X
+                         6163, 6190, 6190X, 6191, 6290, 6390, 6390X
 - "marvell,mv88e6250"  : Switch has base address 0x08 or 0x18. Use with model:
                          6220, 6250
 
index 9a64ed6..4d5f5cc 100644 (file)
@@ -12,10 +12,6 @@ description:
   cs_sck_delay of 500ns. Ensuring that this SPI timing requirement is observed
   depends on the SPI bus master driver.
 
-allOf:
-  - $ref: dsa.yaml#/$defs/ethernet-ports
-  - $ref: /schemas/spi/spi-peripheral-props.yaml#
-
 maintainers:
   - Vladimir Oltean <vladimir.oltean@nxp.com>
 
@@ -36,6 +32,9 @@ properties:
   reg:
     maxItems: 1
 
+  spi-cpha: true
+  spi-cpol: true
+
   # Optional container node for the 2 internal MDIO buses of the SJA1110
   # (one for the internal 100base-T1 PHYs and the other for the single
   # 100base-TX PHY). The "reg" property does not have physical significance.
@@ -109,6 +108,30 @@ $defs:
        1860, 1880, 1900, 1920, 1940, 1960, 1980, 2000, 2020, 2040, 2060, 2080,
        2100, 2120, 2140, 2160, 2180, 2200, 2220, 2240, 2260]
 
+allOf:
+  - $ref: dsa.yaml#/$defs/ethernet-ports
+  - $ref: /schemas/spi/spi-peripheral-props.yaml#
+  - if:
+      properties:
+        compatible:
+          enum:
+            - nxp,sja1105e
+            - nxp,sja1105p
+            - nxp,sja1105q
+            - nxp,sja1105r
+            - nxp,sja1105s
+            - nxp,sja1105t
+    then:
+      properties:
+        spi-cpol: false
+      required:
+        - spi-cpha
+    else:
+      properties:
+        spi-cpha: false
+      required:
+        - spi-cpol
+
 unevaluatedProperties: false
 
 examples:
@@ -120,6 +143,7 @@ examples:
             ethernet-switch@1 {
                     reg = <0x1>;
                     compatible = "nxp,sja1105t";
+                    spi-cpha;
 
                     ethernet-ports {
                             #address-cells = <1>;
index 4f57453..c1241c8 100644 (file)
@@ -93,6 +93,12 @@ properties:
       the turn around line low at end of the control phase of the
       MDIO transaction.
 
+  clocks:
+    maxItems: 1
+    description:
+      External clock connected to the PHY. If not specified it is assumed
+      that the PHY uses a fixed crystal or an internal oscillator.
+
   enet-phy-lane-swap:
     $ref: /schemas/types.yaml#/definitions/flag
     description:
index b110abb..2d382fa 100644 (file)
@@ -16,7 +16,7 @@ maintainers:
 
 properties:
   $nodename:
-    pattern: "^ethernet-pse(@.*)?$"
+    pattern: "^ethernet-pse(@.*|-([0-9]|[1-9][0-9]+))?$"
 
   "#pse-cells":
     description:
index cd155ea..5db620e 100644 (file)
@@ -169,6 +169,87 @@ Setting the brightness to zero with brightness_set() callback function
 should completely turn off the LED and cancel the previously programmed
 hardware blinking function, if any.
 
+Hardware driven LEDs
+====================
+
+Some LEDs can be programmed to be driven by hardware. This is not
+limited to blink but also to turn off or on autonomously.
+To support this feature, a LED needs to implement various additional
+ops and needs to declare specific support for the supported triggers.
+
+With hw control we refer to the LED driven by hardware.
+
+LED driver must define the following value to support hw control:
+
+    - hw_control_trigger:
+               unique trigger name supported by the LED in hw control
+               mode.
+
+LED driver must implement the following API to support hw control:
+    - hw_control_is_supported:
+                check if the flags passed by the supported trigger can
+                be parsed and activate hw control on the LED.
+
+                Return 0 if the passed flags mask is supported and
+                can be set with hw_control_set().
+
+                If the passed flags mask is not supported -EOPNOTSUPP
+                must be returned, the LED trigger will use software
+                fallback in this case.
+
+                Return a negative error in case of any other error like
+                device not ready or timeouts.
+
+     - hw_control_set:
+                activate hw control. LED driver will use the provided
+                flags passed from the supported trigger, parse them to
+                a set of mode and setup the LED to be driven by hardware
+                following the requested modes.
+
+                Set LED_OFF via the brightness_set to deactivate hw control.
+
+                Return 0 on success, a negative error number on failing to
+                apply flags.
+
+    - hw_control_get:
+                get active modes from a LED already in hw control, parse
+                them and set in flags the current active flags for the
+                supported trigger.
+
+                Return 0 on success, a negative error number on failing
+                parsing the initial mode.
+                Error from this function is NOT FATAL as the device may
+                be in a not supported initial state by the attached LED
+                trigger.
+
+    - hw_control_get_device:
+                return the device associated with the LED driver in
+                hw control. A trigger might use this to match the
+                returned device from this function with a configured
+                device for the trigger as the source for blinking
+                events and correctly enable hw control.
+                (example a netdev trigger configured to blink for a
+                particular dev match the returned dev from get_device
+                to set hw control)
+
+                Returns a pointer to a struct device or NULL if nothing
+                is currently attached.
+
+LED driver can activate additional modes by default to workaround the
+impossibility of supporting each different mode on the supported trigger.
+Examples are hardcoding the blink speed to a set interval, enable special
+feature like bypassing blink if some requirements are not met.
+
+A trigger should first check if the hw control API are supported by the LED
+driver and check if the trigger is supported to verify if hw control is possible,
+use hw_control_is_supported to check if the flags are supported and only at
+the end use hw_control_set to activate hw control.
+
+A trigger can use hw_control_get to check if a LED is already in hw control
+and init their flags.
+
+When the LED is in hw control, no software blink is possible and doing so
+will effectively disable hw control.
 
 Known Issues
 ============
index b33541a..ac43504 100644 (file)
@@ -122,6 +122,14 @@ properties:
                 enum: [ u8, u16, u32, u64, s8, s16, s32, s64, string ]
               len:
                 $ref: '#/$defs/len-or-define'
+              byte-order:
+                enum: [ little-endian, big-endian ]
+              doc:
+                description: Documentation for the struct member attribute.
+                type: string
+              enum:
+                description: Name of the enum type used for the attribute.
+                type: string
         # End genetlink-legacy
 
   attribute-sets:
diff --git a/Documentation/netlink/specs/ovs_flow.yaml b/Documentation/netlink/specs/ovs_flow.yaml
new file mode 100644 (file)
index 0000000..3b0624c
--- /dev/null
@@ -0,0 +1,831 @@
+# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
+
+name: ovs_flow
+version: 1
+protocol: genetlink-legacy
+
+doc:
+  OVS flow configuration over generic netlink.
+
+definitions:
+  -
+    name: ovs-header
+    type: struct
+    doc: |
+      Header for OVS Generic Netlink messages.
+    members:
+      -
+        name: dp-ifindex
+        type: u32
+        doc: |
+          ifindex of local port for datapath (0 to make a request not specific
+          to a datapath).
+  -
+    name: ovs-flow-stats
+    type: struct
+    members:
+      -
+        name: n-packets
+        type: u64
+        doc: Number of matched packets.
+      -
+        name: n-bytes
+        type: u64
+        doc: Number of matched bytes.
+  -
+    name: ovs-key-mpls
+    type: struct
+    members:
+      -
+        name: mpls-lse
+        type: u32
+        byte-order: big-endian
+  -
+    name: ovs-key-ipv4
+    type: struct
+    members:
+      -
+        name: ipv4-src
+        type: u32
+        byte-order: big-endian
+      -
+        name: ipv4-dst
+        type: u32
+        byte-order: big-endian
+      -
+        name: ipv4-proto
+        type: u8
+      -
+        name: ipv4-tos
+        type: u8
+      -
+        name: ipv4-ttl
+        type: u8
+      -
+        name: ipv4-frag
+        type: u8
+        enum: ovs-frag-type
+  -
+    name: ovs-frag-type
+    type: enum
+    entries:
+      -
+        name: none
+        doc: Packet is not a fragment.
+      -
+        name: first
+        doc: Packet is a fragment with offset 0.
+      -
+        name: later
+        doc: Packet is a fragment with nonzero offset.
+      -
+        name: any
+        value: 255
+  -
+    name: ovs-key-tcp
+    type: struct
+    members:
+      -
+        name: tcp-src
+        type: u16
+        byte-order: big-endian
+      -
+        name: tcp-dst
+        type: u16
+        byte-order: big-endian
+  -
+    name: ovs-key-udp
+    type: struct
+    members:
+      -
+        name: udp-src
+        type: u16
+        byte-order: big-endian
+      -
+        name: udp-dst
+        type: u16
+        byte-order: big-endian
+  -
+    name: ovs-key-sctp
+    type: struct
+    members:
+      -
+        name: sctp-src
+        type: u16
+        byte-order: big-endian
+      -
+        name: sctp-dst
+        type: u16
+        byte-order: big-endian
+  -
+    name: ovs-key-icmp
+    type: struct
+    members:
+      -
+        name: icmp-type
+        type: u8
+      -
+        name: icmp-code
+        type: u8
+  -
+    name: ovs-key-ct-tuple-ipv4
+    type: struct
+    members:
+      -
+        name: ipv4-src
+        type: u32
+        byte-order: big-endian
+      -
+        name: ipv4-dst
+        type: u32
+        byte-order: big-endian
+      -
+        name: src-port
+        type: u16
+        byte-order: big-endian
+      -
+        name: dst-port
+        type: u16
+        byte-order: big-endian
+      -
+        name: ipv4-proto
+        type: u8
+  -
+    name: ovs-action-push-vlan
+    type: struct
+    members:
+      -
+        name: vlan_tpid
+        type: u16
+        byte-order: big-endian
+        doc: Tag protocol identifier (TPID) to push.
+      -
+        name: vlan_tci
+        type: u16
+        byte-order: big-endian
+        doc: Tag control identifier (TCI) to push.
+  -
+    name: ovs-ufid-flags
+    type: flags
+    entries:
+      - omit-key
+      - omit-mask
+      - omit-actions
+  -
+    name: ovs-action-hash
+    type: struct
+    members:
+      -
+        name: hash-algorithm
+        type: u32
+        doc: Algorithm used to compute hash prior to recirculation.
+      -
+        name: hash-basis
+        type: u32
+        doc: Basis used for computing hash.
+  -
+    name: ovs-hash-alg
+    type: enum
+    doc: |
+      Data path hash algorithm for computing Datapath hash. The algorithm type only specifies
+      the fields in a flow will be used as part of the hash. Each datapath is free to use its
+      own hash algorithm. The hash value will be opaque to the user space daemon.
+    entries:
+      - ovs-hash-alg-l4
+
+  -
+    name: ovs-action-push-mpls
+    type: struct
+    members:
+      -
+        name: lse
+        type: u32
+        byte-order: big-endian
+        doc: |
+          MPLS label stack entry to push
+      -
+        name: ethertype
+        type: u32
+        byte-order: big-endian
+        doc: |
+          Ethertype to set in the encapsulating ethernet frame.  The only values
+          ethertype should ever be given are ETH_P_MPLS_UC and ETH_P_MPLS_MC,
+          indicating MPLS unicast or multicast. Other are rejected.
+  -
+    name: ovs-action-add-mpls
+    type: struct
+    members:
+      -
+        name: lse
+        type: u32
+        byte-order: big-endian
+        doc: |
+          MPLS label stack entry to push
+      -
+        name: ethertype
+        type: u32
+        byte-order: big-endian
+        doc: |
+          Ethertype to set in the encapsulating ethernet frame.  The only values
+          ethertype should ever be given are ETH_P_MPLS_UC and ETH_P_MPLS_MC,
+          indicating MPLS unicast or multicast. Other are rejected.
+      -
+        name: tun-flags
+        type: u16
+        doc: |
+          MPLS tunnel attributes.
+  -
+    name: ct-state-flags
+    type: flags
+    entries:
+      -
+        name: new
+        doc: Beginning of a new connection.
+      -
+        name: established
+        doc: Part of an existing connenction
+      -
+        name: related
+        doc: Related to an existing connection.
+      -
+        name: reply-dir
+        doc: Flow is in the reply direction.
+      -
+        name: invalid
+        doc: Could not track the connection.
+      -
+        name: tracked
+        doc: Conntrack has occurred.
+      -
+        name: src-nat
+        doc: Packet's source address/port was mangled by NAT.
+      -
+        name: dst-nat
+        doc: Packet's destination address/port was mangled by NAT.
+
+attribute-sets:
+  -
+    name: flow-attrs
+    attributes:
+      -
+        name: key
+        type: nest
+        nested-attributes: key-attrs
+        doc: |
+          Nested attributes specifying the flow key. Always present in
+          notifications. Required for all requests (except dumps).
+      -
+        name: actions
+        type: nest
+        nested-attributes: action-attrs
+        doc: |
+          Nested attributes specifying the actions to take for packets that
+          match the key. Always present in notifications. Required for
+          OVS_FLOW_CMD_NEW requests, optional for OVS_FLOW_CMD_SET requests.  An
+          OVS_FLOW_CMD_SET without OVS_FLOW_ATTR_ACTIONS will not modify the
+          actions.  To clear the actions, an OVS_FLOW_ATTR_ACTIONS without any
+          nested attributes must be given.
+      -
+        name: stats
+        type: binary
+        struct: ovs-flow-stats
+        doc: |
+          Statistics for this flow. Present in notifications if the stats would
+          be nonzero. Ignored in requests.
+      -
+        name: tcp-flags
+        type: u8
+        doc: |
+          An 8-bit value giving the ORed value of all of the TCP flags seen on
+          packets in this flow. Only present in notifications for TCP flows, and
+          only if it would be nonzero. Ignored in requests.
+      -
+        name: used
+        type: u64
+        doc: |
+          A 64-bit integer giving the time, in milliseconds on the system
+          monotonic clock, at which a packet was last processed for this
+          flow. Only present in notifications if a packet has been processed for
+          this flow. Ignored in requests.
+      -
+        name: clear
+        type: flag
+        doc: |
+          If present in a OVS_FLOW_CMD_SET request, clears the last-used time,
+          accumulated TCP flags, and statistics for this flow.  Otherwise
+          ignored in requests. Never present in notifications.
+      -
+        name: mask
+        type: nest
+        nested-attributes: key-attrs
+        doc: |
+          Nested attributes specifying the mask bits for wildcarded flow
+          match. Mask bit value '1' specifies exact match with corresponding
+          flow key bit, while mask bit value '0' specifies a wildcarded
+          match. Omitting attribute is treated as wildcarding all corresponding
+          fields. Optional for all requests. If not present, all flow key bits
+          are exact match bits.
+      -
+        name: probe
+        type: binary
+        doc: |
+          Flow operation is a feature probe, error logging should be suppressed.
+      -
+        name: ufid
+        type: binary
+        doc: |
+          A value between 1-16 octets specifying a unique identifier for the
+          flow. Causes the flow to be indexed by this value rather than the
+          value of the OVS_FLOW_ATTR_KEY attribute. Optional for all
+          requests. Present in notifications if the flow was created with this
+          attribute.
+      -
+        name: ufid-flags
+        type: u32
+        enum: ovs-ufid-flags
+        doc: |
+          A 32-bit value of ORed flags that provide alternative semantics for
+          flow installation and retrieval. Optional for all requests.
+      -
+        name: pad
+        type: binary
+
+  -
+    name: key-attrs
+    attributes:
+      -
+        name: encap
+        type: nest
+        nested-attributes: key-attrs
+      -
+        name: priority
+        type: u32
+      -
+        name: in-port
+        type: u32
+      -
+        name: ethernet
+        type: binary
+        doc: struct ovs_key_ethernet
+      -
+        name: vlan
+        type: u16
+        byte-order: big-endian
+      -
+        name: ethertype
+        type: u16
+        byte-order: big-endian
+      -
+        name: ipv4
+        type: binary
+        struct: ovs-key-ipv4
+      -
+        name: ipv6
+        type: binary
+        doc: struct ovs_key_ipv6
+      -
+        name: tcp
+        type: binary
+        struct: ovs-key-tcp
+      -
+        name: udp
+        type: binary
+        struct: ovs-key-udp
+      -
+        name: icmp
+        type: binary
+        struct: ovs-key-icmp
+      -
+        name: icmpv6
+        type: binary
+        struct: ovs-key-icmp
+      -
+        name: arp
+        type: binary
+        doc: struct ovs_key_arp
+      -
+        name: nd
+        type: binary
+        doc: struct ovs_key_nd
+      -
+        name: skb-mark
+        type: u32
+      -
+        name: tunnel
+        type: nest
+        nested-attributes: tunnel-key-attrs
+      -
+        name: sctp
+        type: binary
+        struct: ovs-key-sctp
+      -
+        name: tcp-flags
+        type: u16
+        byte-order: big-endian
+      -
+        name: dp-hash
+        type: u32
+        doc: Value 0 indicates the hash is not computed by the datapath.
+      -
+        name: recirc-id
+        type: u32
+      -
+        name: mpls
+        type: binary
+        struct: ovs-key-mpls
+      -
+        name: ct-state
+        type: u32
+        enum: ct-state-flags
+        enum-as-flags: true
+      -
+        name: ct-zone
+        type: u16
+        doc: connection tracking zone
+      -
+        name: ct-mark
+        type: u32
+        doc: connection tracking mark
+      -
+        name: ct-labels
+        type: binary
+        doc: 16-octet connection tracking label
+      -
+        name: ct-orig-tuple-ipv4
+        type: binary
+        struct: ovs-key-ct-tuple-ipv4
+      -
+        name: ct-orig-tuple-ipv6
+        type: binary
+        doc: struct ovs_key_ct_tuple_ipv6
+      -
+        name: nsh
+        type: nest
+        nested-attributes: ovs-nsh-key-attrs
+      -
+        name: packet-type
+        type: u32
+        byte-order: big-endian
+        doc: Should not be sent to the kernel
+      -
+        name: nd-extensions
+        type: binary
+        doc: Should not be sent to the kernel
+      -
+        name: tunnel-info
+        type: binary
+        doc: struct ip_tunnel_info
+      -
+        name: ipv6-exthdrs
+        type: binary
+        doc: struct ovs_key_ipv6_exthdr
+  -
+    name: action-attrs
+    attributes:
+      -
+        name: output
+        type: u32
+        doc: ovs port number in datapath
+      -
+        name: userspace
+        type: nest
+        nested-attributes: userspace-attrs
+      -
+        name: set
+        type: nest
+        nested-attributes: key-attrs
+        doc: Replaces the contents of an existing header. The single nested attribute specifies a header to modify and its value.
+      -
+        name: push-vlan
+        type: binary
+        struct: ovs-action-push-vlan
+        doc: Push a new outermost 802.1Q or 802.1ad header onto the packet.
+      -
+        name: pop-vlan
+        type: flag
+        doc: Pop the outermost 802.1Q or 802.1ad header from the packet.
+      -
+        name: sample
+        type: nest
+        nested-attributes: sample-attrs
+        doc: |
+          Probabilistically executes actions, as specified in the nested attributes.
+      -
+        name: recirc
+        type: u32
+        doc: recirc id
+      -
+        name: hash
+        type: binary
+        struct: ovs-action-hash
+      -
+        name: push-mpls
+        type: binary
+        struct: ovs-action-push-mpls
+        doc: |
+          Push a new MPLS label stack entry onto the top of the packets MPLS
+          label stack. Set the ethertype of the encapsulating frame to either
+          ETH_P_MPLS_UC or ETH_P_MPLS_MC to indicate the new packet contents.
+      -
+        name: pop-mpls
+        type: u16
+        byte-order: big-endian
+        doc: ethertype
+      -
+        name: set-masked
+        type: nest
+        nested-attributes: key-attrs
+        doc: |
+          Replaces the contents of an existing header. A nested attribute
+          specifies a header to modify, its value, and a mask. For every bit set
+          in the mask, the corresponding bit value is copied from the value to
+          the packet header field, rest of the bits are left unchanged. The
+          non-masked value bits must be passed in as zeroes. Masking is not
+          supported for the OVS_KEY_ATTR_TUNNEL attribute.
+      -
+        name: ct
+        type: nest
+        nested-attributes: ct-attrs
+        doc: |
+          Track the connection. Populate the conntrack-related entries
+          in the flow key.
+      -
+        name: trunc
+        type: u32
+        doc: struct ovs_action_trunc is a u32 max length
+      -
+        name: push-eth
+        type: binary
+        doc: struct ovs_action_push_eth
+      -
+        name: pop-eth
+        type: flag
+      -
+        name: ct-clear
+        type: flag
+      -
+        name: push-nsh
+        type: nest
+        nested-attributes: ovs-nsh-key-attrs
+        doc: |
+          Push NSH header to the packet.
+      -
+        name: pop-nsh
+        type: flag
+        doc: |
+          Pop the outermost NSH header off the packet.
+      -
+        name: meter
+        type: u32
+        doc: |
+          Run packet through a meter, which may drop the packet, or modify the
+          packet (e.g., change the DSCP field)
+      -
+        name: clone
+        type: nest
+        nested-attributes: action-attrs
+        doc: |
+          Make a copy of the packet and execute a list of actions without
+          affecting the original packet and key.
+      -
+        name: check-pkt-len
+        type: nest
+        nested-attributes: check-pkt-len-attrs
+        doc: |
+          Check the packet length and execute a set of actions if greater than
+          the specified packet length, else execute another set of actions.
+      -
+        name: add-mpls
+        type: binary
+        struct: ovs-action-add-mpls
+        doc: |
+          Push a new MPLS label stack entry at the start of the packet or at the
+          start of the l3 header depending on the value of l3 tunnel flag in the
+          tun_flags field of this OVS_ACTION_ATTR_ADD_MPLS argument.
+      -
+        name: dec-ttl
+        type: nest
+        nested-attributes: dec-ttl-attrs
+  -
+    name: tunnel-key-attrs
+    attributes:
+      -
+        name: id
+        type: u64
+        byte-order: big-endian
+        value: 0
+      -
+        name: ipv4-src
+        type: u32
+        byte-order: big-endian
+      -
+        name: ipv4-dst
+        type: u32
+        byte-order: big-endian
+      -
+        name: tos
+        type: u8
+      -
+        name: ttl
+        type: u8
+      -
+        name: dont-fragment
+        type: flag
+      -
+        name: csum
+        type: flag
+      -
+        name: oam
+        type: flag
+      -
+        name: geneve-opts
+        type: binary
+        sub-type: u32
+      -
+        name: tp-src
+        type: u16
+        byte-order: big-endian
+      -
+        name: tp-dst
+        type: u16
+        byte-order: big-endian
+      -
+        name: vxlan-opts
+        type: nest
+        nested-attributes: vxlan-ext-attrs
+      -
+        name: ipv6-src
+        type: binary
+        doc: |
+          struct in6_addr source IPv6 address
+      -
+        name: ipv6-dst
+        type: binary
+        doc: |
+          struct in6_addr destination IPv6 address
+      -
+        name: pad
+        type: binary
+      -
+        name: erspan-opts
+        type: binary
+        doc: |
+          struct erspan_metadata
+      -
+        name: ipv4-info-bridge
+        type: flag
+  -
+    name: check-pkt-len-attrs
+    attributes:
+      -
+        name: pkt-len
+        type: u16
+      -
+        name: actions-if-greater
+        type: nest
+        nested-attributes: action-attrs
+      -
+        name: actions-if-less-equal
+        type: nest
+        nested-attributes: action-attrs
+  -
+    name: sample-attrs
+    attributes:
+      -
+        name: probability
+        type: u32
+      -
+        name: actions
+        type: nest
+        nested-attributes: action-attrs
+  -
+    name: userspace-attrs
+    attributes:
+      -
+        name: pid
+        type: u32
+      -
+        name: userdata
+        type: binary
+      -
+        name: egress-tun-port
+        type: u32
+      -
+        name: actions
+        type: flag
+  -
+    name: ovs-nsh-key-attrs
+    attributes:
+      -
+        name: base
+        type: binary
+      -
+        name: md1
+        type: binary
+      -
+        name: md2
+        type: binary
+  -
+    name: ct-attrs
+    attributes:
+      -
+        name: commit
+        type: flag
+      -
+        name: zone
+        type: u16
+      -
+        name: mark
+        type: binary
+      -
+        name: labels
+        type: binary
+      -
+        name: helper
+        type: string
+      -
+        name: nat
+        type: nest
+        nested-attributes: nat-attrs
+      -
+        name: force-commit
+        type: flag
+      -
+        name: eventmask
+        type: u32
+      -
+        name: timeout
+        type: string
+  -
+    name: nat-attrs
+    attributes:
+      -
+        name: src
+        type: binary
+      -
+        name: dst
+        type: binary
+      -
+        name: ip-min
+        type: binary
+      -
+        name: ip-max
+        type: binary
+      -
+        name: proto-min
+        type: binary
+      -
+        name: proto-max
+        type: binary
+      -
+        name: persistent
+        type: binary
+      -
+        name: proto-hash
+        type: binary
+      -
+        name: proto-random
+        type: binary
+  -
+    name: dec-ttl-attrs
+    attributes:
+      -
+        name: action
+        type: nest
+        nested-attributes: action-attrs
+  -
+    name: vxlan-ext-attrs
+    attributes:
+      -
+        name: gbp
+        type: u32
+
+operations:
+  fixed-header: ovs-header
+  list:
+    -
+      name: flow-get
+      doc: Get / dump OVS flow configuration and state
+      value: 3
+      attribute-set: flow-attrs
+      do: &flow-get-op
+        request:
+          attributes:
+            - dp-ifindex
+            - key
+            - ufid
+            - ufid-flags
+        reply:
+          attributes:
+            - dp-ifindex
+            - key
+            - ufid
+            - mask
+            - stats
+            - actions
+      dump: *flow-get-op
+
+mcast-groups:
+  list:
+    -
+      name: ovs_flow
index 69695e5..e4d065c 100644 (file)
@@ -84,24 +84,6 @@ Once the VM shuts down, or otherwise releases the VF, the command will
 complete.
 
 
-Important notes for SR-IOV and Link Aggregation
------------------------------------------------
-Link Aggregation is mutually exclusive with SR-IOV.
-
-- If Link Aggregation is active, SR-IOV VFs cannot be created on the PF.
-- If SR-IOV is active, you cannot set up Link Aggregation on the interface.
-
-Bridging and MACVLAN are also affected by this. If you wish to use bridging or
-MACVLAN with SR-IOV, you must set up bridging or MACVLAN before enabling
-SR-IOV. If you are using bridging or MACVLAN in conjunction with SR-IOV, and
-you want to remove the interface from the bridge or MACVLAN, you must follow
-these steps:
-
-1. Destroy SR-IOV VFs if they exist
-2. Remove the interface from the bridge or MACVLAN
-3. Recreate SRIOV VFs as needed
-
-
 Additional Features and Configurations
 ======================================
 
index 5ba9015..bfd233c 100644 (file)
@@ -13,6 +13,7 @@ Contents
 - `Drivers`_
 - `Basic packet flow`_
 - `Devlink health reporters`_
+- `Quality of service`_
 
 Overview
 ========
@@ -287,3 +288,47 @@ For example::
         NIX_AF_ERR:
                NIX Error Interrupt Reg : 64
                Rx on unmapped PF_FUNC
+
+
+Quality of service
+==================
+
+
+Hardware algorithms used in scheduling
+--------------------------------------
+
+octeontx2 silicon and CN10K transmit interface consists of five transmit levels
+starting from SMQ/MDQ, TL4 to TL1. Each packet will traverse MDQ, TL4 to TL1
+levels. Each level contains an array of queues to support scheduling and shaping.
+The hardware uses the below algorithms depending on the priority of scheduler queues.
+once the usercreates tc classes with different priorities, the driver configures
+schedulers allocated to the class with specified priority along with rate-limiting
+configuration.
+
+1. Strict Priority
+
+      -  Once packets are submitted to MDQ, hardware picks all active MDQs having different priority
+         using strict priority.
+
+2. Round Robin
+
+      - Active MDQs having the same priority level are chosen using round robin.
+
+
+Setup HTB offload
+-----------------
+
+1. Enable HW TC offload on the interface::
+
+        # ethtool -K <interface> hw-tc-offload on
+
+2. Crate htb root::
+
+        # tc qdisc add dev <interface> clsact
+        # tc qdisc replace dev <interface> root handle 1: htb offload
+
+3. Create tc classes with different priorities::
+
+        # tc class add dev <interface> parent 1: classid 1:1 htb rate 10Gbit prio 1
+
+        # tc class add dev <interface> parent 1: classid 1:2 htb rate 10Gbit prio 7
index 80b8f73..366e2a5 100644 (file)
@@ -881,9 +881,10 @@ tcp_fastopen_key - list of comma separated 32-digit hexadecimal INTEGERs
 tcp_syn_retries - INTEGER
        Number of times initial SYNs for an active TCP connection attempt
        will be retransmitted. Should not be higher than 127. Default value
-       is 6, which corresponds to 63seconds till the last retransmission
-       with the current initial RTO of 1second. With this the final timeout
-       for an active TCP connection attempt will happen after 127seconds.
+       is 6, which corresponds to 67seconds (with tcp_syn_linear_timeouts = 4)
+       till the last retransmission with the current initial RTO of 1second.
+       With this the final timeout for an active TCP connection attempt
+       will happen after 131seconds.
 
 tcp_timestamps - INTEGER
        Enable timestamps as defined in RFC1323.
@@ -946,6 +947,16 @@ tcp_pacing_ca_ratio - INTEGER
 
        Default: 120
 
+tcp_syn_linear_timeouts - INTEGER
+       The number of times for an active TCP connection to retransmit SYNs with
+       a linear backoff timeout before defaulting to an exponential backoff
+       timeout. This has no effect on SYNACK at the passive TCP side.
+
+       With an initial RTO of 1 and tcp_syn_linear_timeouts = 4 we would
+       expect SYN RTOs to be: 1, 1, 1, 1, 1, 2, 4, ... (4 linear timeouts,
+       and the first exponential backoff using 2^0 * initial_RTO).
+       Default: 4
+
 tcp_tso_win_divisor - INTEGER
        This allows control over what percentage of the congestion window
        can be consumed by a single TSO frame.
index a3b847e..bada896 100644 (file)
@@ -78,3 +78,82 @@ to see other examples.
 The code generation itself is performed by ``tools/net/ynl/ynl-gen-c.py``
 but it takes a few arguments so calling it directly for each file
 quickly becomes tedious.
+
+YNL lib
+=======
+
+``tools/net/ynl/lib/`` contains an implementation of a C library
+(based on libmnl) which integrates with code generated by
+``tools/net/ynl/ynl-gen-c.py`` to create easy to use netlink wrappers.
+
+YNL basics
+----------
+
+The YNL library consists of two parts - the generic code (functions
+prefix by ``ynl_``) and per-family auto-generated code (prefixed
+with the name of the family).
+
+To create a YNL socket call ynl_sock_create() passing the family
+struct (family structs are exported by the auto-generated code).
+ynl_sock_destroy() closes the socket.
+
+YNL requests
+------------
+
+Steps for issuing YNL requests are best explained on an example.
+All the functions and types in this example come from the auto-generated
+code (for the netdev family in this case):
+
+.. code-block:: c
+
+   // 0. Request and response pointers
+   struct netdev_dev_get_req *req;
+   struct netdev_dev_get_rsp *d;
+
+   // 1. Allocate a request
+   req = netdev_dev_get_req_alloc();
+   // 2. Set request parameters (as needed)
+   netdev_dev_get_req_set_ifindex(req, ifindex);
+
+   // 3. Issues the request
+   d = netdev_dev_get(ys, req);
+   // 4. Free the request arguments
+   netdev_dev_get_req_free(req);
+   // 5. Error check (the return value from step 3)
+   if (!d) {
+       // 6. Print the YNL-generated error
+       fprintf(stderr, "YNL: %s\n", ys->err.msg);
+        return -1;
+   }
+
+   // ... do stuff with the response @d
+
+   // 7. Free response
+   netdev_dev_get_rsp_free(d);
+
+YNL dumps
+---------
+
+Performing dumps follows similar pattern as requests.
+Dumps return a list of objects terminated by a special marker,
+or NULL on error. Use ``ynl_dump_foreach()`` to iterate over
+the result.
+
+YNL notifications
+-----------------
+
+YNL lib supports using the same socket for notifications and
+requests. In case notifications arrive during processing of a request
+they are queued internally and can be retrieved at a later time.
+
+To subscribed to notifications use ``ynl_subscribe()``.
+The notifications have to be read out from the socket,
+``ynl_socket_get_fd()`` returns the underlying socket fd which can
+be plugged into appropriate asynchronous IO API like ``poll``,
+or ``select``.
+
+Notifications can be retrieved using ``ynl_ntf_dequeue()`` and have
+to be freed using ``ynl_ntf_free()``. Since we don't know the notification
+type upfront the notifications are returned as ``struct ynl_ntf_base_type *``
+and user is expected to cast them to the appropriate full type based
+on the ``cmd`` member.
index a2f1d14..0971854 100644 (file)
@@ -909,13 +909,6 @@ L: netdev@vger.kernel.org
 S:     Maintained
 F:     drivers/net/ethernet/altera/
 
-ALTERA TSE PCS
-M:     Maxime Chevallier <maxime.chevallier@bootlin.com>
-L:     netdev@vger.kernel.org
-S:     Supported
-F:     drivers/net/pcs/pcs-altera-tse.c
-F:     include/linux/pcs-altera-tse.h
-
 ALTERA UART/JTAG UART SERIAL DRIVERS
 M:     Tobias Klauser <tklauser@distanz.ch>
 L:     linux-serial@vger.kernel.org
@@ -7978,6 +7971,12 @@ S:       Maintained
 F:     drivers/hwmon/f75375s.c
 F:     include/linux/f75375s.h
 
+FINTEK F81604 USB to 2xCANBUS DEVICE DRIVER
+M:     Ji-Ze Hong (Peter Hong) <peter_hong@fintek.com.tw>
+L:     linux-can@vger.kernel.org
+S:     Maintained
+F:     drivers/net/can/usb/f81604.c
+
 FIREWIRE AUDIO DRIVERS and IEC 61883-1/6 PACKET STREAMING ENGINE
 M:     Clemens Ladisch <clemens@ladisch.de>
 M:     Takashi Sakamoto <o-takashi@sakamocchi.jp>
@@ -10343,9 +10342,8 @@ M:      Jesse Brandeburg <jesse.brandeburg@intel.com>
 M:     Tony Nguyen <anthony.l.nguyen@intel.com>
 L:     intel-wired-lan@lists.osuosl.org (moderated for non-subscribers)
 S:     Supported
-W:     http://www.intel.com/support/feedback.htm
-W:     http://e1000.sourceforge.net/
-Q:     http://patchwork.ozlabs.org/project/intel-wired-lan/list/
+W:     https://www.intel.com/content/www/us/en/support.html
+Q:     https://patchwork.ozlabs.org/project/intel-wired-lan/list/
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/net-queue.git
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/next-queue.git
 F:     Documentation/networking/device_drivers/ethernet/intel/
@@ -12847,6 +12845,13 @@ F:     Documentation/devicetree/bindings/net/ieee802154/mcr20a.txt
 F:     drivers/net/ieee802154/mcr20a.c
 F:     drivers/net/ieee802154/mcr20a.h
 
+MDIO REGMAP DRIVER
+M:     Maxime Chevallier <maxime.chevallier@bootlin.com>
+L:     netdev@vger.kernel.org
+S:     Maintained
+F:     drivers/net/mdio/mdio-regmap.c
+F:     include/linux/mdio/mdio-regmap.h
+
 MEASUREMENT COMPUTING CIO-DAC IIO DRIVER
 M:     William Breathitt Gray <william.gray@linaro.org>
 L:     linux-iio@vger.kernel.org
index b26da8e..145b540 100644 (file)
@@ -1731,21 +1731,21 @@ static void invoke_bpf_mod_ret(struct jit_ctx *ctx, struct bpf_tramp_links *tl,
        }
 }
 
-static void save_args(struct jit_ctx *ctx, int args_off, int nargs)
+static void save_args(struct jit_ctx *ctx, int args_off, int nregs)
 {
        int i;
 
-       for (i = 0; i < nargs; i++) {
+       for (i = 0; i < nregs; i++) {
                emit(A64_STR64I(i, A64_SP, args_off), ctx);
                args_off += 8;
        }
 }
 
-static void restore_args(struct jit_ctx *ctx, int args_off, int nargs)
+static void restore_args(struct jit_ctx *ctx, int args_off, int nregs)
 {
        int i;
 
-       for (i = 0; i < nargs; i++) {
+       for (i = 0; i < nregs; i++) {
                emit(A64_LDR64I(i, A64_SP, args_off), ctx);
                args_off += 8;
        }
@@ -1764,7 +1764,7 @@ static void restore_args(struct jit_ctx *ctx, int args_off, int nargs)
  */
 static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
                              struct bpf_tramp_links *tlinks, void *orig_call,
-                             int nargs, u32 flags)
+                             int nregs, u32 flags)
 {
        int i;
        int stack_size;
@@ -1772,7 +1772,7 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
        int regs_off;
        int retval_off;
        int args_off;
-       int nargs_off;
+       int nregs_off;
        int ip_off;
        int run_ctx_off;
        struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
@@ -1795,11 +1795,11 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
         * SP + retval_off  [ return value      ] BPF_TRAMP_F_CALL_ORIG or
         *                                        BPF_TRAMP_F_RET_FENTRY_RET
         *
-        *                  [ argN              ]
+        *                  [ arg reg N         ]
         *                  [ ...               ]
-        * SP + args_off    [ arg1              ]
+        * SP + args_off    [ arg reg 1         ]
         *
-        * SP + nargs_off   [ args count        ]
+        * SP + nregs_off   [ arg regs count    ]
         *
         * SP + ip_off      [ traced function   ] BPF_TRAMP_F_IP_ARG flag
         *
@@ -1816,13 +1816,13 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
        if (flags & BPF_TRAMP_F_IP_ARG)
                stack_size += 8;
 
-       nargs_off = stack_size;
+       nregs_off = stack_size;
        /* room for args count */
        stack_size += 8;
 
        args_off = stack_size;
        /* room for args */
-       stack_size += nargs * 8;
+       stack_size += nregs * 8;
 
        /* room for return value */
        retval_off = stack_size;
@@ -1865,12 +1865,12 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
                emit(A64_STR64I(A64_R(10), A64_SP, ip_off), ctx);
        }
 
-       /* save args count*/
-       emit(A64_MOVZ(1, A64_R(10), nargs, 0), ctx);
-       emit(A64_STR64I(A64_R(10), A64_SP, nargs_off), ctx);
+       /* save arg regs count*/
+       emit(A64_MOVZ(1, A64_R(10), nregs, 0), ctx);
+       emit(A64_STR64I(A64_R(10), A64_SP, nregs_off), ctx);
 
-       /* save args */
-       save_args(ctx, args_off, nargs);
+       /* save arg regs */
+       save_args(ctx, args_off, nregs);
 
        /* save callee saved registers */
        emit(A64_STR64I(A64_R(19), A64_SP, regs_off), ctx);
@@ -1897,7 +1897,7 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
        }
 
        if (flags & BPF_TRAMP_F_CALL_ORIG) {
-               restore_args(ctx, args_off, nargs);
+               restore_args(ctx, args_off, nregs);
                /* call original func */
                emit(A64_LDR64I(A64_R(10), A64_SP, retaddr_off), ctx);
                emit(A64_ADR(A64_LR, AARCH64_INSN_SIZE * 2), ctx);
@@ -1926,7 +1926,7 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
        }
 
        if (flags & BPF_TRAMP_F_RESTORE_REGS)
-               restore_args(ctx, args_off, nargs);
+               restore_args(ctx, args_off, nregs);
 
        /* restore callee saved register x19 and x20 */
        emit(A64_LDR64I(A64_R(19), A64_SP, regs_off), ctx);
@@ -1967,24 +1967,25 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
                                void *orig_call)
 {
        int i, ret;
-       int nargs = m->nr_args;
+       int nregs = m->nr_args;
        int max_insns = ((long)image_end - (long)image) / AARCH64_INSN_SIZE;
        struct jit_ctx ctx = {
                .image = NULL,
                .idx = 0,
        };
 
-       /* the first 8 arguments are passed by registers */
-       if (nargs > 8)
-               return -ENOTSUPP;
-
-       /* don't support struct argument */
+       /* extra registers needed for struct argument */
        for (i = 0; i < MAX_BPF_FUNC_ARGS; i++) {
+               /* The arg_size is at most 16 bytes, enforced by the verifier. */
                if (m->arg_flags[i] & BTF_FMODEL_STRUCT_ARG)
-                       return -ENOTSUPP;
+                       nregs += (m->arg_size[i] + 7) / 8 - 1;
        }
 
-       ret = prepare_trampoline(&ctx, im, tlinks, orig_call, nargs, flags);
+       /* the first 8 registers are used for arguments */
+       if (nregs > 8)
+               return -ENOTSUPP;
+
+       ret = prepare_trampoline(&ctx, im, tlinks, orig_call, nregs, flags);
        if (ret < 0)
                return ret;
 
@@ -1995,7 +1996,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
        ctx.idx = 0;
 
        jit_fill_hole(image, (unsigned int)(image_end - image));
-       ret = prepare_trampoline(&ctx, im, tlinks, orig_call, nargs, flags);
+       ret = prepare_trampoline(&ctx, im, tlinks, orig_call, nregs, flags);
 
        if (ret > 0 && validate_code(&ctx) < 0)
                ret = -EINVAL;
index 5f7252a..7d4b601 100644 (file)
@@ -531,50 +531,25 @@ static const struct net_proto_family alg_family = {
        .owner  =       THIS_MODULE,
 };
 
-int af_alg_make_sg(struct af_alg_sgl *sgl, struct iov_iter *iter, int len)
-{
-       size_t off;
-       ssize_t n;
-       int npages, i;
-
-       n = iov_iter_get_pages2(iter, sgl->pages, len, ALG_MAX_PAGES, &off);
-       if (n < 0)
-               return n;
-
-       npages = DIV_ROUND_UP(off + n, PAGE_SIZE);
-       if (WARN_ON(npages == 0))
-               return -EINVAL;
-       /* Add one extra for linking */
-       sg_init_table(sgl->sg, npages + 1);
-
-       for (i = 0, len = n; i < npages; i++) {
-               int plen = min_t(int, len, PAGE_SIZE - off);
-
-               sg_set_page(sgl->sg + i, sgl->pages[i], plen, off);
-
-               off = 0;
-               len -= plen;
-       }
-       sg_mark_end(sgl->sg + npages - 1);
-       sgl->npages = npages;
-
-       return n;
-}
-EXPORT_SYMBOL_GPL(af_alg_make_sg);
-
 static void af_alg_link_sg(struct af_alg_sgl *sgl_prev,
                           struct af_alg_sgl *sgl_new)
 {
-       sg_unmark_end(sgl_prev->sg + sgl_prev->npages - 1);
-       sg_chain(sgl_prev->sg, sgl_prev->npages + 1, sgl_new->sg);
+       sg_unmark_end(sgl_prev->sgt.sgl + sgl_prev->sgt.nents - 1);
+       sg_chain(sgl_prev->sgt.sgl, sgl_prev->sgt.nents + 1, sgl_new->sgt.sgl);
 }
 
 void af_alg_free_sg(struct af_alg_sgl *sgl)
 {
        int i;
 
-       for (i = 0; i < sgl->npages; i++)
-               put_page(sgl->pages[i]);
+       if (sgl->sgt.sgl) {
+               if (sgl->need_unpin)
+                       for (i = 0; i < sgl->sgt.nents; i++)
+                               unpin_user_page(sg_page(&sgl->sgt.sgl[i]));
+               if (sgl->sgt.sgl != sgl->sgl)
+                       kvfree(sgl->sgt.sgl);
+               sgl->sgt.sgl = NULL;
+       }
 }
 EXPORT_SYMBOL_GPL(af_alg_free_sg);
 
@@ -1015,7 +990,7 @@ int af_alg_sendmsg(struct socket *sock, struct msghdr *msg, size_t size,
        while (size) {
                struct scatterlist *sg;
                size_t len = size;
-               size_t plen;
+               ssize_t plen;
 
                /* use the existing memory in an allocated page */
                if (ctx->merge) {
@@ -1060,35 +1035,58 @@ int af_alg_sendmsg(struct socket *sock, struct msghdr *msg, size_t size,
                if (sgl->cur)
                        sg_unmark_end(sg + sgl->cur - 1);
 
-               do {
-                       struct page *pg;
-                       unsigned int i = sgl->cur;
-
-                       plen = min_t(size_t, len, PAGE_SIZE);
-
-                       pg = alloc_page(GFP_KERNEL);
-                       if (!pg) {
-                               err = -ENOMEM;
+               if (msg->msg_flags & MSG_SPLICE_PAGES) {
+                       struct sg_table sgtable = {
+                               .sgl            = sg,
+                               .nents          = sgl->cur,
+                               .orig_nents     = sgl->cur,
+                       };
+
+                       plen = extract_iter_to_sg(&msg->msg_iter, len, &sgtable,
+                                                 MAX_SGL_ENTS, 0);
+                       if (plen < 0) {
+                               err = plen;
                                goto unlock;
                        }
 
-                       sg_assign_page(sg + i, pg);
-
-                       err = memcpy_from_msg(page_address(sg_page(sg + i)),
-                                             msg, plen);
-                       if (err) {
-                               __free_page(sg_page(sg + i));
-                               sg_assign_page(sg + i, NULL);
-                               goto unlock;
-                       }
-
-                       sg[i].length = plen;
+                       for (; sgl->cur < sgtable.nents; sgl->cur++)
+                               get_page(sg_page(&sg[sgl->cur]));
                        len -= plen;
                        ctx->used += plen;
                        copied += plen;
                        size -= plen;
-                       sgl->cur++;
-               } while (len && sgl->cur < MAX_SGL_ENTS);
+               } else {
+                       do {
+                               struct page *pg;
+                               unsigned int i = sgl->cur;
+
+                               plen = min_t(size_t, len, PAGE_SIZE);
+
+                               pg = alloc_page(GFP_KERNEL);
+                               if (!pg) {
+                                       err = -ENOMEM;
+                                       goto unlock;
+                               }
+
+                               sg_assign_page(sg + i, pg);
+
+                               err = memcpy_from_msg(
+                                       page_address(sg_page(sg + i)),
+                                       msg, plen);
+                               if (err) {
+                                       __free_page(sg_page(sg + i));
+                                       sg_assign_page(sg + i, NULL);
+                                       goto unlock;
+                               }
+
+                               sg[i].length = plen;
+                               len -= plen;
+                               ctx->used += plen;
+                               copied += plen;
+                               size -= plen;
+                               sgl->cur++;
+                       } while (len && sgl->cur < MAX_SGL_ENTS);
+               }
 
                if (!size)
                        sg_mark_end(sg + sgl->cur - 1);
@@ -1121,53 +1119,17 @@ EXPORT_SYMBOL_GPL(af_alg_sendmsg);
 ssize_t af_alg_sendpage(struct socket *sock, struct page *page,
                        int offset, size_t size, int flags)
 {
-       struct sock *sk = sock->sk;
-       struct alg_sock *ask = alg_sk(sk);
-       struct af_alg_ctx *ctx = ask->private;
-       struct af_alg_tsgl *sgl;
-       int err = -EINVAL;
+       struct bio_vec bvec;
+       struct msghdr msg = {
+               .msg_flags = flags | MSG_SPLICE_PAGES,
+       };
 
        if (flags & MSG_SENDPAGE_NOTLAST)
-               flags |= MSG_MORE;
-
-       lock_sock(sk);
-       if (!ctx->more && ctx->used)
-               goto unlock;
-
-       if (!size)
-               goto done;
-
-       if (!af_alg_writable(sk)) {
-               err = af_alg_wait_for_wmem(sk, flags);
-               if (err)
-                       goto unlock;
-       }
-
-       err = af_alg_alloc_tsgl(sk);
-       if (err)
-               goto unlock;
-
-       ctx->merge = 0;
-       sgl = list_entry(ctx->tsgl_list.prev, struct af_alg_tsgl, list);
-
-       if (sgl->cur)
-               sg_unmark_end(sgl->sg + sgl->cur - 1);
-
-       sg_mark_end(sgl->sg + sgl->cur);
-
-       get_page(page);
-       sg_set_page(sgl->sg + sgl->cur, page, size, offset);
-       sgl->cur++;
-       ctx->used += size;
+               msg.msg_flags |= MSG_MORE;
 
-done:
-       ctx->more = flags & MSG_MORE;
-
-unlock:
-       af_alg_data_wakeup(sk);
-       release_sock(sk);
-
-       return err ?: size;
+       bvec_set_page(&bvec, page, size, offset);
+       iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size);
+       return sock_sendmsg(sock, &msg);
 }
 EXPORT_SYMBOL_GPL(af_alg_sendpage);
 
@@ -1288,8 +1250,8 @@ int af_alg_get_rsgl(struct sock *sk, struct msghdr *msg, int flags,
 
        while (maxsize > len && msg_data_left(msg)) {
                struct af_alg_rsgl *rsgl;
+               ssize_t err;
                size_t seglen;
-               int err;
 
                /* limit the amount of readable buffers */
                if (!af_alg_readable(sk))
@@ -1306,16 +1268,23 @@ int af_alg_get_rsgl(struct sock *sk, struct msghdr *msg, int flags,
                                return -ENOMEM;
                }
 
-               rsgl->sgl.npages = 0;
+               rsgl->sgl.sgt.sgl = rsgl->sgl.sgl;
+               rsgl->sgl.sgt.nents = 0;
+               rsgl->sgl.sgt.orig_nents = 0;
                list_add_tail(&rsgl->list, &areq->rsgl_list);
 
-               /* make one iovec available as scatterlist */
-               err = af_alg_make_sg(&rsgl->sgl, &msg->msg_iter, seglen);
+               sg_init_table(rsgl->sgl.sgt.sgl, ALG_MAX_PAGES);
+               err = extract_iter_to_sg(&msg->msg_iter, seglen, &rsgl->sgl.sgt,
+                                        ALG_MAX_PAGES, 0);
                if (err < 0) {
                        rsgl->sg_num_bytes = 0;
                        return err;
                }
 
+               sg_mark_end(rsgl->sgl.sgt.sgl + rsgl->sgl.sgt.nents - 1);
+               rsgl->sgl.need_unpin =
+                       iov_iter_extract_will_pin(&msg->msg_iter);
+
                /* chain the new scatterlist with previous one */
                if (areq->last_rsgl)
                        af_alg_link_sg(&areq->last_rsgl->sgl, &rsgl->sgl);
index 42493b4..35bfa28 100644 (file)
@@ -9,8 +9,8 @@
  * The following concept of the memory management is used:
  *
  * The kernel maintains two SGLs, the TX SGL and the RX SGL. The TX SGL is
- * filled by user space with the data submitted via sendpage/sendmsg. Filling
- * up the TX SGL does not cause a crypto operation -- the data will only be
+ * filled by user space with the data submitted via sendpage. Filling up
+ * the TX SGL does not cause a crypto operation -- the data will only be
  * tracked by the kernel. Upon receipt of one recvmsg call, the caller must
  * provide a buffer which is tracked with the RX SGL.
  *
@@ -113,19 +113,19 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg,
        }
 
        /*
-        * Data length provided by caller via sendmsg/sendpage that has not
-        * yet been processed.
+        * Data length provided by caller via sendmsg that has not yet been
+        * processed.
         */
        used = ctx->used;
 
        /*
-        * Make sure sufficient data is present -- note, the same check is
-        * also present in sendmsg/sendpage. The checks in sendpage/sendmsg
-        * shall provide an information to the data sender that something is
-        * wrong, but they are irrelevant to maintain the kernel integrity.
-        * We need this check here too in case user space decides to not honor
-        * the error message in sendmsg/sendpage and still call recvmsg. This
-        * check here protects the kernel integrity.
+        * Make sure sufficient data is present -- note, the same check is also
+        * present in sendmsg. The checks in sendmsg shall provide an
+        * information to the data sender that something is wrong, but they are
+        * irrelevant to maintain the kernel integrity.  We need this check
+        * here too in case user space decides to not honor the error message
+        * in sendmsg and still call recvmsg. This check here protects the
+        * kernel integrity.
         */
        if (!aead_sufficient_data(sk))
                return -EINVAL;
@@ -210,7 +210,7 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg,
         */
 
        /* Use the RX SGL as source (and destination) for crypto op. */
-       rsgl_src = areq->first_rsgl.sgl.sg;
+       rsgl_src = areq->first_rsgl.sgl.sgt.sgl;
 
        if (ctx->enc) {
                /*
@@ -224,7 +224,8 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg,
                 * RX SGL: AAD || PT || Tag
                 */
                err = crypto_aead_copy_sgl(null_tfm, tsgl_src,
-                                          areq->first_rsgl.sgl.sg, processed);
+                                          areq->first_rsgl.sgl.sgt.sgl,
+                                          processed);
                if (err)
                        goto free;
                af_alg_pull_tsgl(sk, processed, NULL, 0);
@@ -242,7 +243,8 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg,
 
                 /* Copy AAD || CT to RX SGL buffer for in-place operation. */
                err = crypto_aead_copy_sgl(null_tfm, tsgl_src,
-                                          areq->first_rsgl.sgl.sg, outlen);
+                                          areq->first_rsgl.sgl.sgt.sgl,
+                                          outlen);
                if (err)
                        goto free;
 
@@ -267,10 +269,10 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg,
                if (usedpages) {
                        /* RX SGL present */
                        struct af_alg_sgl *sgl_prev = &areq->last_rsgl->sgl;
+                       struct scatterlist *sg = sgl_prev->sgt.sgl;
 
-                       sg_unmark_end(sgl_prev->sg + sgl_prev->npages - 1);
-                       sg_chain(sgl_prev->sg, sgl_prev->npages + 1,
-                                areq->tsgl);
+                       sg_unmark_end(sg + sgl_prev->sgt.nents - 1);
+                       sg_chain(sg, sgl_prev->sgt.nents + 1, areq->tsgl);
                } else
                        /* no RX SGL present (e.g. authentication only) */
                        rsgl_src = areq->tsgl;
@@ -278,7 +280,7 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg,
 
        /* Initialize the crypto operation */
        aead_request_set_crypt(&areq->cra_u.aead_req, rsgl_src,
-                              areq->first_rsgl.sgl.sg, used, ctx->iv);
+                              areq->first_rsgl.sgl.sgt.sgl, used, ctx->iv);
        aead_request_set_ad(&areq->cra_u.aead_req, ctx->aead_assoclen);
        aead_request_set_tfm(&areq->cra_u.aead_req, tfm);
 
index 63af72e..1a2d80c 100644 (file)
@@ -63,70 +63,102 @@ static void hash_free_result(struct sock *sk, struct hash_ctx *ctx)
 static int hash_sendmsg(struct socket *sock, struct msghdr *msg,
                        size_t ignored)
 {
-       int limit = ALG_MAX_PAGES * PAGE_SIZE;
        struct sock *sk = sock->sk;
        struct alg_sock *ask = alg_sk(sk);
        struct hash_ctx *ctx = ask->private;
-       long copied = 0;
+       ssize_t copied = 0;
+       size_t len, max_pages, npages;
+       bool continuing = ctx->more, need_init = false;
        int err;
 
-       if (limit > sk->sk_sndbuf)
-               limit = sk->sk_sndbuf;
+       max_pages = min_t(size_t, ALG_MAX_PAGES,
+                         DIV_ROUND_UP(sk->sk_sndbuf, PAGE_SIZE));
 
        lock_sock(sk);
-       if (!ctx->more) {
+       if (!continuing) {
                if ((msg->msg_flags & MSG_MORE))
                        hash_free_result(sk, ctx);
-
-               err = crypto_wait_req(crypto_ahash_init(&ctx->req), &ctx->wait);
-               if (err)
-                       goto unlock;
+               need_init = true;
        }
 
        ctx->more = false;
 
        while (msg_data_left(msg)) {
-               int len = msg_data_left(msg);
-
-               if (len > limit)
-                       len = limit;
-
-               len = af_alg_make_sg(&ctx->sgl, &msg->msg_iter, len);
-               if (len < 0) {
-                       err = copied ? 0 : len;
-                       goto unlock;
+               ctx->sgl.sgt.sgl = ctx->sgl.sgl;
+               ctx->sgl.sgt.nents = 0;
+               ctx->sgl.sgt.orig_nents = 0;
+
+               err = -EIO;
+               npages = iov_iter_npages(&msg->msg_iter, max_pages);
+               if (npages == 0)
+                       goto unlock_free;
+
+               if (npages > ARRAY_SIZE(ctx->sgl.sgl)) {
+                       err = -ENOMEM;
+                       ctx->sgl.sgt.sgl =
+                               kvmalloc(array_size(npages,
+                                                   sizeof(*ctx->sgl.sgt.sgl)),
+                                        GFP_KERNEL);
+                       if (!ctx->sgl.sgt.sgl)
+                               goto unlock_free;
                }
+               sg_init_table(ctx->sgl.sgl, npages);
 
-               ahash_request_set_crypt(&ctx->req, ctx->sgl.sg, NULL, len);
+               ctx->sgl.need_unpin = iov_iter_extract_will_pin(&msg->msg_iter);
 
-               err = crypto_wait_req(crypto_ahash_update(&ctx->req),
-                                     &ctx->wait);
-               af_alg_free_sg(&ctx->sgl);
-               if (err) {
-                       iov_iter_revert(&msg->msg_iter, len);
-                       goto unlock;
+               err = extract_iter_to_sg(&msg->msg_iter, LONG_MAX,
+                                        &ctx->sgl.sgt, npages, 0);
+               if (err < 0)
+                       goto unlock_free;
+               len = err;
+               sg_mark_end(ctx->sgl.sgt.sgl + ctx->sgl.sgt.nents - 1);
+
+               if (!msg_data_left(msg)) {
+                       err = hash_alloc_result(sk, ctx);
+                       if (err)
+                               goto unlock_free;
                }
 
-               copied += len;
-       }
+               ahash_request_set_crypt(&ctx->req, ctx->sgl.sgt.sgl,
+                                       ctx->result, len);
 
-       err = 0;
+               if (!msg_data_left(msg) && !continuing &&
+                   !(msg->msg_flags & MSG_MORE)) {
+                       err = crypto_ahash_digest(&ctx->req);
+               } else {
+                       if (need_init) {
+                               err = crypto_wait_req(
+                                       crypto_ahash_init(&ctx->req),
+                                       &ctx->wait);
+                               if (err)
+                                       goto unlock_free;
+                               need_init = false;
+                       }
+
+                       if (msg_data_left(msg) || (msg->msg_flags & MSG_MORE))
+                               err = crypto_ahash_update(&ctx->req);
+                       else
+                               err = crypto_ahash_finup(&ctx->req);
+                       continuing = true;
+               }
 
-       ctx->more = msg->msg_flags & MSG_MORE;
-       if (!ctx->more) {
-               err = hash_alloc_result(sk, ctx);
+               err = crypto_wait_req(err, &ctx->wait);
                if (err)
-                       goto unlock;
+                       goto unlock_free;
 
-               ahash_request_set_crypt(&ctx->req, NULL, ctx->result, 0);
-               err = crypto_wait_req(crypto_ahash_final(&ctx->req),
-                                     &ctx->wait);
+               copied += len;
+               af_alg_free_sg(&ctx->sgl);
        }
 
+       ctx->more = msg->msg_flags & MSG_MORE;
+       err = 0;
 unlock:
        release_sock(sk);
+       return copied ?: err;
 
-       return err ?: copied;
+unlock_free:
+       af_alg_free_sg(&ctx->sgl);
+       goto unlock;
 }
 
 static ssize_t hash_sendpage(struct socket *sock, struct page *page,
@@ -141,8 +173,8 @@ static ssize_t hash_sendpage(struct socket *sock, struct page *page,
                flags |= MSG_MORE;
 
        lock_sock(sk);
-       sg_init_table(ctx->sgl.sg, 1);
-       sg_set_page(ctx->sgl.sg, page, size, offset);
+       sg_init_table(ctx->sgl.sgl, 1);
+       sg_set_page(ctx->sgl.sgl, page, size, offset);
 
        if (!(flags & MSG_MORE)) {
                err = hash_alloc_result(sk, ctx);
@@ -151,7 +183,7 @@ static ssize_t hash_sendpage(struct socket *sock, struct page *page,
        } else if (!ctx->more)
                hash_free_result(sk, ctx);
 
-       ahash_request_set_crypt(&ctx->req, ctx->sgl.sg, ctx->result, size);
+       ahash_request_set_crypt(&ctx->req, ctx->sgl.sgl, ctx->result, size);
 
        if (!(flags & MSG_MORE)) {
                if (ctx->more)
index ee8890e..b1f321b 100644 (file)
@@ -9,10 +9,10 @@
  * The following concept of the memory management is used:
  *
  * The kernel maintains two SGLs, the TX SGL and the RX SGL. The TX SGL is
- * filled by user space with the data submitted via sendpage/sendmsg. Filling
- * up the TX SGL does not cause a crypto operation -- the data will only be
- * tracked by the kernel. Upon receipt of one recvmsg call, the caller must
- * provide a buffer which is tracked with the RX SGL.
+ * filled by user space with the data submitted via sendmsg. Filling up the TX
+ * SGL does not cause a crypto operation -- the data will only be tracked by
+ * the kernel. Upon receipt of one recvmsg call, the caller must provide a
+ * buffer which is tracked with the RX SGL.
  *
  * During the processing of the recvmsg operation, the cipher request is
  * allocated and prepared. As part of the recvmsg operation, the processed
@@ -105,7 +105,7 @@ static int _skcipher_recvmsg(struct socket *sock, struct msghdr *msg,
        /* Initialize the crypto operation */
        skcipher_request_set_tfm(&areq->cra_u.skcipher_req, tfm);
        skcipher_request_set_crypt(&areq->cra_u.skcipher_req, areq->tsgl,
-                                  areq->first_rsgl.sgl.sg, len, ctx->iv);
+                                  areq->first_rsgl.sgl.sgt.sgl, len, ctx->iv);
 
        if (msg->msg_iocb && !is_sync_kiocb(msg->msg_iocb)) {
                /* AIO operation */
index 4b292e0..ffb16be 100644 (file)
@@ -312,7 +312,7 @@ static int siw_tx_ctrl(struct siw_iwarp_tx *c_tx, struct socket *s,
 }
 
 /*
- * 0copy TCP transmit interface: Use do_tcp_sendpages.
+ * 0copy TCP transmit interface: Use MSG_SPLICE_PAGES.
  *
  * Using sendpage to push page by page appears to be less efficient
  * than using sendmsg, even if data are copied.
@@ -323,20 +323,27 @@ static int siw_tx_ctrl(struct siw_iwarp_tx *c_tx, struct socket *s,
 static int siw_tcp_sendpages(struct socket *s, struct page **page, int offset,
                             size_t size)
 {
+       struct bio_vec bvec;
+       struct msghdr msg = {
+               .msg_flags = (MSG_MORE | MSG_DONTWAIT | MSG_SENDPAGE_NOTLAST |
+                             MSG_SPLICE_PAGES),
+       };
        struct sock *sk = s->sk;
-       int i = 0, rv = 0, sent = 0,
-           flags = MSG_MORE | MSG_DONTWAIT | MSG_SENDPAGE_NOTLAST;
+       int i = 0, rv = 0, sent = 0;
 
        while (size) {
                size_t bytes = min_t(size_t, PAGE_SIZE - offset, size);
 
                if (size + offset <= PAGE_SIZE)
-                       flags = MSG_MORE | MSG_DONTWAIT;
+                       msg.msg_flags &= ~MSG_SENDPAGE_NOTLAST;
 
                tcp_rate_check_app_limited(sk);
+               bvec_set_page(&bvec, page[i], bytes, offset);
+               iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size);
+
 try_page_again:
                lock_sock(sk);
-               rv = do_tcp_sendpages(sk, page[i], offset, bytes, flags);
+               rv = tcp_sendmsg_locked(sk, &msg, size);
                release_sock(sk);
 
                if (rv > 0) {
index d5e774d..b0a6f27 100644 (file)
 #include <linux/list.h>
 #include <linux/module.h>
 #include <linux/netdevice.h>
-#include <linux/spinlock.h>
+#include <linux/mutex.h>
 #include <linux/timer.h>
 #include "../leds.h"
 
+#define NETDEV_LED_DEFAULT_INTERVAL    50
+
 /*
  * Configurable sysfs attributes:
  *
@@ -37,7 +39,7 @@
  */
 
 struct led_netdev_data {
-       spinlock_t lock;
+       struct mutex lock;
 
        struct delayed_work work;
        struct notifier_block notifier;
@@ -50,16 +52,8 @@ struct led_netdev_data {
        unsigned int last_activity;
 
        unsigned long mode;
-#define NETDEV_LED_LINK        0
-#define NETDEV_LED_TX  1
-#define NETDEV_LED_RX  2
-#define NETDEV_LED_MODE_LINKUP 3
-};
-
-enum netdev_led_attr {
-       NETDEV_ATTR_LINK,
-       NETDEV_ATTR_TX,
-       NETDEV_ATTR_RX
+       bool carrier_link_up;
+       bool hw_control;
 };
 
 static void set_baseline_state(struct led_netdev_data *trigger_data)
@@ -67,16 +61,23 @@ static void set_baseline_state(struct led_netdev_data *trigger_data)
        int current_brightness;
        struct led_classdev *led_cdev = trigger_data->led_cdev;
 
+       /* Already validated, hw control is possible with the requested mode */
+       if (trigger_data->hw_control) {
+               led_cdev->hw_control_set(led_cdev, trigger_data->mode);
+
+               return;
+       }
+
        current_brightness = led_cdev->brightness;
        if (current_brightness)
                led_cdev->blink_brightness = current_brightness;
        if (!led_cdev->blink_brightness)
                led_cdev->blink_brightness = led_cdev->max_brightness;
 
-       if (!test_bit(NETDEV_LED_MODE_LINKUP, &trigger_data->mode))
+       if (!trigger_data->carrier_link_up) {
                led_set_brightness(led_cdev, LED_OFF);
-       else {
-               if (test_bit(NETDEV_LED_LINK, &trigger_data->mode))
+       else {
+               if (test_bit(TRIGGER_NETDEV_LINK, &trigger_data->mode))
                        led_set_brightness(led_cdev,
                                           led_cdev->blink_brightness);
                else
@@ -85,44 +86,107 @@ static void set_baseline_state(struct led_netdev_data *trigger_data)
                /* If we are looking for RX/TX start periodically
                 * checking stats
                 */
-               if (test_bit(NETDEV_LED_TX, &trigger_data->mode) ||
-                   test_bit(NETDEV_LED_RX, &trigger_data->mode))
+               if (test_bit(TRIGGER_NETDEV_TX, &trigger_data->mode) ||
+                   test_bit(TRIGGER_NETDEV_RX, &trigger_data->mode))
                        schedule_delayed_work(&trigger_data->work, 0);
        }
 }
 
+static bool supports_hw_control(struct led_classdev *led_cdev)
+{
+       if (!led_cdev->hw_control_get || !led_cdev->hw_control_set ||
+           !led_cdev->hw_control_is_supported)
+               return false;
+
+       return !strcmp(led_cdev->hw_control_trigger, led_cdev->trigger->name);
+}
+
+/*
+ * Validate the configured netdev is the same as the one associated with
+ * the LED driver in hw control.
+ */
+static bool validate_net_dev(struct led_classdev *led_cdev,
+                            struct net_device *net_dev)
+{
+       struct device *dev = led_cdev->hw_control_get_device(led_cdev);
+       struct net_device *ndev;
+
+       if (!dev)
+               return false;
+
+       ndev = to_net_dev(dev);
+
+       return ndev == net_dev;
+}
+
+static bool can_hw_control(struct led_netdev_data *trigger_data)
+{
+       unsigned long default_interval = msecs_to_jiffies(NETDEV_LED_DEFAULT_INTERVAL);
+       unsigned int interval = atomic_read(&trigger_data->interval);
+       struct led_classdev *led_cdev = trigger_data->led_cdev;
+       int ret;
+
+       if (!supports_hw_control(led_cdev))
+               return false;
+
+       /*
+        * Interval must be set to the default
+        * value. Any different value is rejected if in hw
+        * control.
+        */
+       if (interval != default_interval)
+               return false;
+
+       /*
+        * net_dev must be set with hw control, otherwise no
+        * blinking can be happening and there is nothing to
+        * offloaded. Additionally, for hw control to be
+        * valid, the configured netdev must be the same as
+        * netdev associated to the LED.
+        */
+       if (!validate_net_dev(led_cdev, trigger_data->net_dev))
+               return false;
+
+       /* Check if the requested mode is supported */
+       ret = led_cdev->hw_control_is_supported(led_cdev, trigger_data->mode);
+       /* Fall back to software blinking if not supported */
+       if (ret == -EOPNOTSUPP)
+               return false;
+       if (ret) {
+               dev_warn(led_cdev->dev,
+                        "Current mode check failed with error %d\n", ret);
+               return false;
+       }
+
+       return true;
+}
+
 static ssize_t device_name_show(struct device *dev,
                                struct device_attribute *attr, char *buf)
 {
        struct led_netdev_data *trigger_data = led_trigger_get_drvdata(dev);
        ssize_t len;
 
-       spin_lock_bh(&trigger_data->lock);
+       mutex_lock(&trigger_data->lock);
        len = sprintf(buf, "%s\n", trigger_data->device_name);
-       spin_unlock_bh(&trigger_data->lock);
+       mutex_unlock(&trigger_data->lock);
 
        return len;
 }
 
-static ssize_t device_name_store(struct device *dev,
-                                struct device_attribute *attr, const char *buf,
-                                size_t size)
+static int set_device_name(struct led_netdev_data *trigger_data,
+                          const char *name, size_t size)
 {
-       struct led_netdev_data *trigger_data = led_trigger_get_drvdata(dev);
-
-       if (size >= IFNAMSIZ)
-               return -EINVAL;
-
        cancel_delayed_work_sync(&trigger_data->work);
 
-       spin_lock_bh(&trigger_data->lock);
+       mutex_lock(&trigger_data->lock);
 
        if (trigger_data->net_dev) {
                dev_put(trigger_data->net_dev);
                trigger_data->net_dev = NULL;
        }
 
-       memcpy(trigger_data->device_name, buf, size);
+       memcpy(trigger_data->device_name, name, size);
        trigger_data->device_name[size] = 0;
        if (size > 0 && trigger_data->device_name[size - 1] == '\n')
                trigger_data->device_name[size - 1] = 0;
@@ -131,36 +195,48 @@ static ssize_t device_name_store(struct device *dev,
                trigger_data->net_dev =
                    dev_get_by_name(&init_net, trigger_data->device_name);
 
-       clear_bit(NETDEV_LED_MODE_LINKUP, &trigger_data->mode);
+       trigger_data->carrier_link_up = false;
        if (trigger_data->net_dev != NULL)
-               if (netif_carrier_ok(trigger_data->net_dev))
-                       set_bit(NETDEV_LED_MODE_LINKUP, &trigger_data->mode);
+               trigger_data->carrier_link_up = netif_carrier_ok(trigger_data->net_dev);
 
        trigger_data->last_activity = 0;
 
        set_baseline_state(trigger_data);
-       spin_unlock_bh(&trigger_data->lock);
+       mutex_unlock(&trigger_data->lock);
 
+       return 0;
+}
+
+static ssize_t device_name_store(struct device *dev,
+                                struct device_attribute *attr, const char *buf,
+                                size_t size)
+{
+       struct led_netdev_data *trigger_data = led_trigger_get_drvdata(dev);
+       int ret;
+
+       if (size >= IFNAMSIZ)
+               return -EINVAL;
+
+       ret = set_device_name(trigger_data, buf, size);
+
+       if (ret < 0)
+               return ret;
        return size;
 }
 
 static DEVICE_ATTR_RW(device_name);
 
 static ssize_t netdev_led_attr_show(struct device *dev, char *buf,
-       enum netdev_led_attr attr)
+                                   enum led_trigger_netdev_modes attr)
 {
        struct led_netdev_data *trigger_data = led_trigger_get_drvdata(dev);
        int bit;
 
        switch (attr) {
-       case NETDEV_ATTR_LINK:
-               bit = NETDEV_LED_LINK;
-               break;
-       case NETDEV_ATTR_TX:
-               bit = NETDEV_LED_TX;
-               break;
-       case NETDEV_ATTR_RX:
-               bit = NETDEV_LED_RX;
+       case TRIGGER_NETDEV_LINK:
+       case TRIGGER_NETDEV_TX:
+       case TRIGGER_NETDEV_RX:
+               bit = attr;
                break;
        default:
                return -EINVAL;
@@ -170,7 +246,7 @@ static ssize_t netdev_led_attr_show(struct device *dev, char *buf,
 }
 
 static ssize_t netdev_led_attr_store(struct device *dev, const char *buf,
-       size_t size, enum netdev_led_attr attr)
+                                    size_t size, enum led_trigger_netdev_modes attr)
 {
        struct led_netdev_data *trigger_data = led_trigger_get_drvdata(dev);
        unsigned long state;
@@ -182,14 +258,10 @@ static ssize_t netdev_led_attr_store(struct device *dev, const char *buf,
                return ret;
 
        switch (attr) {
-       case NETDEV_ATTR_LINK:
-               bit = NETDEV_LED_LINK;
-               break;
-       case NETDEV_ATTR_TX:
-               bit = NETDEV_LED_TX;
-               break;
-       case NETDEV_ATTR_RX:
-               bit = NETDEV_LED_RX;
+       case TRIGGER_NETDEV_LINK:
+       case TRIGGER_NETDEV_TX:
+       case TRIGGER_NETDEV_RX:
+               bit = attr;
                break;
        default:
                return -EINVAL;
@@ -202,52 +274,29 @@ static ssize_t netdev_led_attr_store(struct device *dev, const char *buf,
        else
                clear_bit(bit, &trigger_data->mode);
 
+       trigger_data->hw_control = can_hw_control(trigger_data);
+
        set_baseline_state(trigger_data);
 
        return size;
 }
 
-static ssize_t link_show(struct device *dev,
-       struct device_attribute *attr, char *buf)
-{
-       return netdev_led_attr_show(dev, buf, NETDEV_ATTR_LINK);
-}
-
-static ssize_t link_store(struct device *dev,
-       struct device_attribute *attr, const char *buf, size_t size)
-{
-       return netdev_led_attr_store(dev, buf, size, NETDEV_ATTR_LINK);
-}
-
-static DEVICE_ATTR_RW(link);
-
-static ssize_t tx_show(struct device *dev,
-       struct device_attribute *attr, char *buf)
-{
-       return netdev_led_attr_show(dev, buf, NETDEV_ATTR_TX);
-}
-
-static ssize_t tx_store(struct device *dev,
-       struct device_attribute *attr, const char *buf, size_t size)
-{
-       return netdev_led_attr_store(dev, buf, size, NETDEV_ATTR_TX);
-}
-
-static DEVICE_ATTR_RW(tx);
-
-static ssize_t rx_show(struct device *dev,
-       struct device_attribute *attr, char *buf)
-{
-       return netdev_led_attr_show(dev, buf, NETDEV_ATTR_RX);
-}
-
-static ssize_t rx_store(struct device *dev,
-       struct device_attribute *attr, const char *buf, size_t size)
-{
-       return netdev_led_attr_store(dev, buf, size, NETDEV_ATTR_RX);
-}
-
-static DEVICE_ATTR_RW(rx);
+#define DEFINE_NETDEV_TRIGGER(trigger_name, trigger) \
+       static ssize_t trigger_name##_show(struct device *dev, \
+               struct device_attribute *attr, char *buf) \
+       { \
+               return netdev_led_attr_show(dev, buf, trigger); \
+       } \
+       static ssize_t trigger_name##_store(struct device *dev, \
+               struct device_attribute *attr, const char *buf, size_t size) \
+       { \
+               return netdev_led_attr_store(dev, buf, size, trigger); \
+       } \
+       static DEVICE_ATTR_RW(trigger_name)
+
+DEFINE_NETDEV_TRIGGER(link, TRIGGER_NETDEV_LINK);
+DEFINE_NETDEV_TRIGGER(tx, TRIGGER_NETDEV_TX);
+DEFINE_NETDEV_TRIGGER(rx, TRIGGER_NETDEV_RX);
 
 static ssize_t interval_show(struct device *dev,
                             struct device_attribute *attr, char *buf)
@@ -266,6 +315,9 @@ static ssize_t interval_store(struct device *dev,
        unsigned long value;
        int ret;
 
+       if (trigger_data->hw_control)
+               return -EINVAL;
+
        ret = kstrtoul(buf, 0, &value);
        if (ret)
                return ret;
@@ -313,11 +365,13 @@ static int netdev_trig_notify(struct notifier_block *nb,
 
        cancel_delayed_work_sync(&trigger_data->work);
 
-       spin_lock_bh(&trigger_data->lock);
+       mutex_lock(&trigger_data->lock);
 
-       clear_bit(NETDEV_LED_MODE_LINKUP, &trigger_data->mode);
+       trigger_data->carrier_link_up = false;
        switch (evt) {
        case NETDEV_CHANGENAME:
+               trigger_data->carrier_link_up = netif_carrier_ok(dev);
+               fallthrough;
        case NETDEV_REGISTER:
                if (trigger_data->net_dev)
                        dev_put(trigger_data->net_dev);
@@ -330,14 +384,13 @@ static int netdev_trig_notify(struct notifier_block *nb,
                break;
        case NETDEV_UP:
        case NETDEV_CHANGE:
-               if (netif_carrier_ok(dev))
-                       set_bit(NETDEV_LED_MODE_LINKUP, &trigger_data->mode);
+               trigger_data->carrier_link_up = netif_carrier_ok(dev);
                break;
        }
 
        set_baseline_state(trigger_data);
 
-       spin_unlock_bh(&trigger_data->lock);
+       mutex_unlock(&trigger_data->lock);
 
        return NOTIFY_DONE;
 }
@@ -360,21 +413,21 @@ static void netdev_trig_work(struct work_struct *work)
        }
 
        /* If we are not looking for RX/TX then return  */
-       if (!test_bit(NETDEV_LED_TX, &trigger_data->mode) &&
-           !test_bit(NETDEV_LED_RX, &trigger_data->mode))
+       if (!test_bit(TRIGGER_NETDEV_TX, &trigger_data->mode) &&
+           !test_bit(TRIGGER_NETDEV_RX, &trigger_data->mode))
                return;
 
        dev_stats = dev_get_stats(trigger_data->net_dev, &temp);
        new_activity =
-           (test_bit(NETDEV_LED_TX, &trigger_data->mode) ?
+           (test_bit(TRIGGER_NETDEV_TX, &trigger_data->mode) ?
                dev_stats->tx_packets : 0) +
-           (test_bit(NETDEV_LED_RX, &trigger_data->mode) ?
+           (test_bit(TRIGGER_NETDEV_RX, &trigger_data->mode) ?
                dev_stats->rx_packets : 0);
 
        if (trigger_data->last_activity != new_activity) {
                led_stop_software_blink(trigger_data->led_cdev);
 
-               invert = test_bit(NETDEV_LED_LINK, &trigger_data->mode);
+               invert = test_bit(TRIGGER_NETDEV_LINK, &trigger_data->mode);
                interval = jiffies_to_msecs(
                                atomic_read(&trigger_data->interval));
                /* base state is ON (link present) */
@@ -392,13 +445,15 @@ static void netdev_trig_work(struct work_struct *work)
 static int netdev_trig_activate(struct led_classdev *led_cdev)
 {
        struct led_netdev_data *trigger_data;
+       unsigned long mode;
+       struct device *dev;
        int rc;
 
        trigger_data = kzalloc(sizeof(struct led_netdev_data), GFP_KERNEL);
        if (!trigger_data)
                return -ENOMEM;
 
-       spin_lock_init(&trigger_data->lock);
+       mutex_init(&trigger_data->lock);
 
        trigger_data->notifier.notifier_call = netdev_trig_notify;
        trigger_data->notifier.priority = 10;
@@ -410,9 +465,24 @@ static int netdev_trig_activate(struct led_classdev *led_cdev)
        trigger_data->device_name[0] = 0;
 
        trigger_data->mode = 0;
-       atomic_set(&trigger_data->interval, msecs_to_jiffies(50));
+       atomic_set(&trigger_data->interval, msecs_to_jiffies(NETDEV_LED_DEFAULT_INTERVAL));
        trigger_data->last_activity = 0;
 
+       /* Check if hw control is active by default on the LED.
+        * Init already enabled mode in hw control.
+        */
+       if (supports_hw_control(led_cdev) &&
+           !led_cdev->hw_control_get(led_cdev, &mode)) {
+               dev = led_cdev->hw_control_get_device(led_cdev);
+               if (dev) {
+                       const char *name = dev_name(dev);
+
+                       set_device_name(trigger_data, name, strlen(name));
+                       trigger_data->hw_control = true;
+                       trigger_data->mode = mode;
+               }
+       }
+
        led_set_trigger_data(led_cdev, trigger_data);
 
        rc = register_netdevice_notifier(&trigger_data->notifier);
index d0a1ed2..368c6f5 100644 (file)
@@ -403,7 +403,6 @@ config TUN_VNET_CROSS_LE
 config VETH
        tristate "Virtual ethernet pair device"
        select PAGE_POOL
-       select PAGE_POOL_STATS
        help
          This device is a local ethernet tunnel. Devices are created in pairs.
          When one end receives the packet it appears on its pair and vice
index edbaa14..007cec2 100644 (file)
@@ -1,8 +1,9 @@
+// SPDX-License-Identifier: GPL-1.0+
 /*
  * originally based on the dummy device.
  *
  * Copyright 1999, Thomas Davis, tadavis@lbl.gov.
- * Licensed under the GPL. Based on dummy.c, and eql.c devices.
+ * Based on dummy.c, and eql.c devices.
  *
  * bonding.c: an Ethernet Bonding driver
  *
@@ -2871,6 +2872,8 @@ static bool bond_has_this_ip(struct bonding *bond, __be32 ip)
        return ret;
 }
 
+#define BOND_VLAN_PROTO_NONE cpu_to_be16(0xffff)
+
 static bool bond_handle_vlan(struct slave *slave, struct bond_vlan_tag *tags,
                             struct sk_buff *skb)
 {
@@ -2878,13 +2881,13 @@ static bool bond_handle_vlan(struct slave *slave, struct bond_vlan_tag *tags,
        struct net_device *slave_dev = slave->dev;
        struct bond_vlan_tag *outer_tag = tags;
 
-       if (!tags || tags->vlan_proto == VLAN_N_VID)
+       if (!tags || tags->vlan_proto == BOND_VLAN_PROTO_NONE)
                return true;
 
        tags++;
 
        /* Go through all the tags backwards and add them to the packet */
-       while (tags->vlan_proto != VLAN_N_VID) {
+       while (tags->vlan_proto != BOND_VLAN_PROTO_NONE) {
                if (!tags->vlan_id) {
                        tags++;
                        continue;
@@ -2960,7 +2963,7 @@ struct bond_vlan_tag *bond_verify_device_path(struct net_device *start_dev,
                tags = kcalloc(level + 1, sizeof(*tags), GFP_ATOMIC);
                if (!tags)
                        return ERR_PTR(-ENOMEM);
-               tags[level].vlan_proto = VLAN_N_VID;
+               tags[level].vlan_proto = BOND_VLAN_PROTO_NONE;
                return tags;
        }
 
index 48cdf3a..fef6288 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-1.0+ */
 /*
  * Bond several ethernet interfaces into a Cisco, running 'Etherchannel'.
  *
@@ -7,9 +8,6 @@
  * BUT, I'm the one who modified it for ethernet, so:
  * (c) Copyright 1999, Thomas Davis, tadavis@lbl.gov
  *
- *     This software may be used and distributed according to the terms
- *     of the GNU Public License, incorporated herein by reference.
- *
  */
 
 #ifndef _BONDING_PRIV_H
index 199cb20..4621266 100644 (file)
@@ -1346,7 +1346,7 @@ static int at91_can_probe(struct platform_device *pdev)
        return err;
 }
 
-static int at91_can_remove(struct platform_device *pdev)
+static void at91_can_remove(struct platform_device *pdev)
 {
        struct net_device *dev = platform_get_drvdata(pdev);
        struct at91_priv *priv = netdev_priv(dev);
@@ -1362,8 +1362,6 @@ static int at91_can_remove(struct platform_device *pdev)
        clk_put(priv->clk);
 
        free_candev(dev);
-
-       return 0;
 }
 
 static const struct platform_device_id at91_can_id_table[] = {
@@ -1381,7 +1379,7 @@ MODULE_DEVICE_TABLE(platform, at91_can_id_table);
 
 static struct platform_driver at91_can_driver = {
        .probe = at91_can_probe,
-       .remove = at91_can_remove,
+       .remove_new = at91_can_remove,
        .driver = {
                .name = KBUILD_MODNAME,
                .of_match_table = of_match_ptr(at91_can_dt_ids),
index 027a8a1..39de716 100644 (file)
@@ -966,22 +966,16 @@ static int bxcan_probe(struct platform_device *pdev)
        }
 
        rx_irq = platform_get_irq_byname(pdev, "rx0");
-       if (rx_irq < 0) {
-               dev_err(dev, "failed to get rx0 irq\n");
+       if (rx_irq < 0)
                return rx_irq;
-       }
 
        tx_irq = platform_get_irq_byname(pdev, "tx");
-       if (tx_irq < 0) {
-               dev_err(dev, "failed to get tx irq\n");
+       if (tx_irq < 0)
                return tx_irq;
-       }
 
        sce_irq = platform_get_irq_byname(pdev, "sce");
-       if (sce_irq < 0) {
-               dev_err(dev, "failed to get sce irq\n");
+       if (sce_irq < 0)
                return sce_irq;
-       }
 
        ndev = alloc_candev(sizeof(struct bxcan_priv), BXCAN_TX_MB_NUM);
        if (!ndev) {
@@ -1039,7 +1033,7 @@ out_free_candev:
        return err;
 }
 
-static int bxcan_remove(struct platform_device *pdev)
+static void bxcan_remove(struct platform_device *pdev)
 {
        struct net_device *ndev = platform_get_drvdata(pdev);
        struct bxcan_priv *priv = netdev_priv(ndev);
@@ -1048,7 +1042,6 @@ static int bxcan_remove(struct platform_device *pdev)
        clk_disable_unprepare(priv->clk);
        can_rx_offload_del(&priv->offload);
        free_candev(ndev);
-       return 0;
 }
 
 static int __maybe_unused bxcan_suspend(struct device *dev)
@@ -1100,7 +1093,7 @@ static struct platform_driver bxcan_driver = {
                .of_match_table = bxcan_of_match,
        },
        .probe = bxcan_probe,
-       .remove = bxcan_remove,
+       .remove_new = bxcan_remove,
 };
 
 module_platform_driver(bxcan_driver);
index 03ccb7c..925930b 100644 (file)
@@ -410,7 +410,7 @@ exit:
        return ret;
 }
 
-static int c_can_plat_remove(struct platform_device *pdev)
+static void c_can_plat_remove(struct platform_device *pdev)
 {
        struct net_device *dev = platform_get_drvdata(pdev);
        struct c_can_priv *priv = netdev_priv(dev);
@@ -418,8 +418,6 @@ static int c_can_plat_remove(struct platform_device *pdev)
        unregister_c_can_dev(dev);
        pm_runtime_disable(priv->device);
        free_c_can_dev(dev);
-
-       return 0;
 }
 
 #ifdef CONFIG_PM
@@ -487,7 +485,7 @@ static struct platform_driver c_can_plat_driver = {
                .of_match_table = c_can_of_table,
        },
        .probe = c_can_plat_probe,
-       .remove = c_can_plat_remove,
+       .remove_new = c_can_plat_remove,
        .suspend = c_can_suspend,
        .resume = c_can_resume,
        .id_table = c_can_id_table,
index 8f6dccd..2200944 100644 (file)
@@ -285,7 +285,7 @@ exit:
        return err;
 }
 
-static int cc770_isa_remove(struct platform_device *pdev)
+static void cc770_isa_remove(struct platform_device *pdev)
 {
        struct net_device *dev = platform_get_drvdata(pdev);
        struct cc770_priv *priv = netdev_priv(dev);
@@ -303,13 +303,11 @@ static int cc770_isa_remove(struct platform_device *pdev)
                        release_region(port[idx], CC770_IOSIZE);
        }
        free_cc770dev(dev);
-
-       return 0;
 }
 
 static struct platform_driver cc770_isa_driver = {
        .probe = cc770_isa_probe,
-       .remove = cc770_isa_remove,
+       .remove_new = cc770_isa_remove,
        .driver = {
                .name = KBUILD_MODNAME,
        },
index 8dcc32e..13bcfba 100644 (file)
@@ -230,7 +230,7 @@ exit_release_mem:
        return err;
 }
 
-static int cc770_platform_remove(struct platform_device *pdev)
+static void cc770_platform_remove(struct platform_device *pdev)
 {
        struct net_device *dev = platform_get_drvdata(pdev);
        struct cc770_priv *priv = netdev_priv(dev);
@@ -242,8 +242,6 @@ static int cc770_platform_remove(struct platform_device *pdev)
 
        mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
        release_mem_region(mem->start, resource_size(mem));
-
-       return 0;
 }
 
 static const struct of_device_id cc770_platform_table[] = {
@@ -259,7 +257,7 @@ static struct platform_driver cc770_platform_driver = {
                .of_match_table = cc770_platform_table,
        },
        .probe = cc770_platform_probe,
-       .remove = cc770_platform_remove,
+       .remove_new = cc770_platform_remove,
 };
 
 module_platform_driver(cc770_platform_driver);
index a17561d..55bb10b 100644 (file)
@@ -86,7 +86,7 @@ err:
  * This function frees all the resources allocated to the device.
  * Return: 0 always
  */
-static int ctucan_platform_remove(struct platform_device *pdev)
+static void ctucan_platform_remove(struct platform_device *pdev)
 {
        struct net_device *ndev = platform_get_drvdata(pdev);
        struct ctucan_priv *priv = netdev_priv(ndev);
@@ -97,8 +97,6 @@ static int ctucan_platform_remove(struct platform_device *pdev)
        pm_runtime_disable(&pdev->dev);
        netif_napi_del(&priv->napi);
        free_candev(ndev);
-
-       return 0;
 }
 
 static SIMPLE_DEV_PM_OPS(ctucan_platform_pm_ops, ctucan_suspend, ctucan_resume);
@@ -113,7 +111,7 @@ MODULE_DEVICE_TABLE(of, ctucan_of_match);
 
 static struct platform_driver ctucanfd_driver = {
        .probe  = ctucan_platform_probe,
-       .remove = ctucan_platform_remove,
+       .remove_new = ctucan_platform_remove,
        .driver = {
                .name = DRV_NAME,
                .pm = &ctucan_platform_pm_ops,
index 6d638c9..ff0fc18 100644 (file)
@@ -2218,7 +2218,7 @@ static int flexcan_probe(struct platform_device *pdev)
        return err;
 }
 
-static int flexcan_remove(struct platform_device *pdev)
+static void flexcan_remove(struct platform_device *pdev)
 {
        struct net_device *dev = platform_get_drvdata(pdev);
 
@@ -2227,8 +2227,6 @@ static int flexcan_remove(struct platform_device *pdev)
        unregister_flexcandev(dev);
        pm_runtime_disable(&pdev->dev);
        free_candev(dev);
-
-       return 0;
 }
 
 static int __maybe_unused flexcan_suspend(struct device *device)
@@ -2379,7 +2377,7 @@ static struct platform_driver flexcan_driver = {
                .of_match_table = flexcan_of_match,
        },
        .probe = flexcan_probe,
-       .remove = flexcan_remove,
+       .remove_new = flexcan_remove,
        .id_table = flexcan_id_table,
 };
 
index 4bedcc3..3174efd 100644 (file)
@@ -1696,7 +1696,7 @@ exit_error:
        return err;
 }
 
-static int grcan_remove(struct platform_device *ofdev)
+static void grcan_remove(struct platform_device *ofdev)
 {
        struct net_device *dev = platform_get_drvdata(ofdev);
        struct grcan_priv *priv = netdev_priv(dev);
@@ -1706,8 +1706,6 @@ static int grcan_remove(struct platform_device *ofdev)
        irq_dispose_mapping(dev->irq);
        netif_napi_del(&priv->napi);
        free_candev(dev);
-
-       return 0;
 }
 
 static const struct of_device_id grcan_match[] = {
@@ -1726,7 +1724,7 @@ static struct platform_driver grcan_driver = {
                .of_match_table = grcan_match,
        },
        .probe = grcan_probe,
-       .remove = grcan_remove,
+       .remove_new = grcan_remove,
 };
 
 module_platform_driver(grcan_driver);
index 07eaf72..1d6642c 100644 (file)
@@ -1013,15 +1013,13 @@ err_reg:
        return ret;
 }
 
-static int ifi_canfd_plat_remove(struct platform_device *pdev)
+static void ifi_canfd_plat_remove(struct platform_device *pdev)
 {
        struct net_device *ndev = platform_get_drvdata(pdev);
 
        unregister_candev(ndev);
        platform_set_drvdata(pdev, NULL);
        free_candev(ndev);
-
-       return 0;
 }
 
 static const struct of_device_id ifi_canfd_of_table[] = {
@@ -1036,7 +1034,7 @@ static struct platform_driver ifi_canfd_plat_driver = {
                .of_match_table = ifi_canfd_of_table,
        },
        .probe  = ifi_canfd_plat_probe,
-       .remove = ifi_canfd_plat_remove,
+       .remove_new = ifi_canfd_plat_remove,
 };
 
 module_platform_driver(ifi_canfd_plat_driver);
index 0732a50..d048ea5 100644 (file)
@@ -2023,7 +2023,7 @@ out_return:
        return ret;
 }
 
-static int ican3_remove(struct platform_device *pdev)
+static void ican3_remove(struct platform_device *pdev)
 {
        struct net_device *ndev = platform_get_drvdata(pdev);
        struct ican3_dev *mod = netdev_priv(ndev);
@@ -2042,8 +2042,6 @@ static int ican3_remove(struct platform_device *pdev)
        iounmap(mod->dpm);
 
        free_candev(ndev);
-
-       return 0;
 }
 
 static struct platform_driver ican3_driver = {
@@ -2051,7 +2049,7 @@ static struct platform_driver ican3_driver = {
                .name   = DRV_NAME,
        },
        .probe          = ican3_probe,
-       .remove         = ican3_remove,
+       .remove_new     = ican3_remove,
 };
 
 module_platform_driver(ican3_driver);
index 9c1dcf8..94dc826 100644 (file)
@@ -164,7 +164,7 @@ static __maybe_unused int m_can_resume(struct device *dev)
        return m_can_class_resume(dev);
 }
 
-static int m_can_plat_remove(struct platform_device *pdev)
+static void m_can_plat_remove(struct platform_device *pdev)
 {
        struct m_can_plat_priv *priv = platform_get_drvdata(pdev);
        struct m_can_classdev *mcan_class = &priv->cdev;
@@ -172,8 +172,6 @@ static int m_can_plat_remove(struct platform_device *pdev)
        m_can_class_unregister(mcan_class);
 
        m_can_class_free_dev(mcan_class->net);
-
-       return 0;
 }
 
 static int __maybe_unused m_can_runtime_suspend(struct device *dev)
@@ -223,7 +221,7 @@ static struct platform_driver m_can_plat_driver = {
                .pm     = &m_can_pmops,
        },
        .probe = m_can_plat_probe,
-       .remove = m_can_plat_remove,
+       .remove_new = m_can_plat_remove,
 };
 
 module_platform_driver(m_can_plat_driver);
index b0ed798..4837df6 100644 (file)
@@ -349,7 +349,7 @@ exit_unmap_mem:
        return err;
 }
 
-static int mpc5xxx_can_remove(struct platform_device *ofdev)
+static void mpc5xxx_can_remove(struct platform_device *ofdev)
 {
        const struct of_device_id *match;
        const struct mpc5xxx_can_data *data;
@@ -365,8 +365,6 @@ static int mpc5xxx_can_remove(struct platform_device *ofdev)
        iounmap(priv->reg_base);
        irq_dispose_mapping(dev->irq);
        free_candev(dev);
-
-       return 0;
 }
 
 #ifdef CONFIG_PM
@@ -437,7 +435,7 @@ static struct platform_driver mpc5xxx_can_driver = {
                .of_match_table = mpc5xxx_can_table,
        },
        .probe = mpc5xxx_can_probe,
-       .remove = mpc5xxx_can_remove,
+       .remove_new = mpc5xxx_can_remove,
 #ifdef CONFIG_PM
        .suspend = mpc5xxx_can_suspend,
        .resume = mpc5xxx_can_resume,
index cc43c9c..f5aa5db 100644 (file)
@@ -824,7 +824,7 @@ fail:
        return err;
 }
 
-static int rcar_can_remove(struct platform_device *pdev)
+static void rcar_can_remove(struct platform_device *pdev)
 {
        struct net_device *ndev = platform_get_drvdata(pdev);
        struct rcar_can_priv *priv = netdev_priv(ndev);
@@ -832,7 +832,6 @@ static int rcar_can_remove(struct platform_device *pdev)
        unregister_candev(ndev);
        netif_napi_del(&priv->napi);
        free_candev(ndev);
-       return 0;
 }
 
 static int __maybe_unused rcar_can_suspend(struct device *dev)
@@ -908,7 +907,7 @@ static struct platform_driver rcar_can_driver = {
                .pm = &rcar_can_pm_ops,
        },
        .probe = rcar_can_probe,
-       .remove = rcar_can_remove,
+       .remove_new = rcar_can_remove,
 };
 
 module_platform_driver(rcar_can_driver);
index 963c42f..e4d7489 100644 (file)
@@ -2078,7 +2078,7 @@ fail_dev:
        return err;
 }
 
-static int rcar_canfd_remove(struct platform_device *pdev)
+static void rcar_canfd_remove(struct platform_device *pdev)
 {
        struct rcar_canfd_global *gpriv = platform_get_drvdata(pdev);
        u32 ch;
@@ -2096,8 +2096,6 @@ static int rcar_canfd_remove(struct platform_device *pdev)
        clk_disable_unprepare(gpriv->clkp);
        reset_control_assert(gpriv->rstc1);
        reset_control_assert(gpriv->rstc2);
-
-       return 0;
 }
 
 static int __maybe_unused rcar_canfd_suspend(struct device *dev)
@@ -2130,7 +2128,7 @@ static struct platform_driver rcar_canfd_driver = {
                .pm = &rcar_canfd_pm_ops,
        },
        .probe = rcar_canfd_probe,
-       .remove = rcar_canfd_remove,
+       .remove_new = rcar_canfd_remove,
 };
 
 module_platform_driver(rcar_canfd_driver);
index db3e767..fca5a9a 100644 (file)
@@ -223,7 +223,7 @@ exit:
        return err;
 }
 
-static int sja1000_isa_remove(struct platform_device *pdev)
+static void sja1000_isa_remove(struct platform_device *pdev)
 {
        struct net_device *dev = platform_get_drvdata(pdev);
        struct sja1000_priv *priv = netdev_priv(dev);
@@ -241,13 +241,11 @@ static int sja1000_isa_remove(struct platform_device *pdev)
                        release_region(port[idx], SJA1000_IOSIZE);
        }
        free_sja1000dev(dev);
-
-       return 0;
 }
 
 static struct platform_driver sja1000_isa_driver = {
        .probe = sja1000_isa_probe,
-       .remove = sja1000_isa_remove,
+       .remove_new = sja1000_isa_remove,
        .driver = {
                .name = DRV_NAME,
        },
index 6779d53..b4889b5 100644 (file)
@@ -317,19 +317,17 @@ static int sp_probe(struct platform_device *pdev)
        return err;
 }
 
-static int sp_remove(struct platform_device *pdev)
+static void sp_remove(struct platform_device *pdev)
 {
        struct net_device *dev = platform_get_drvdata(pdev);
 
        unregister_sja1000dev(dev);
        free_sja1000dev(dev);
-
-       return 0;
 }
 
 static struct platform_driver sp_driver = {
        .probe = sp_probe,
-       .remove = sp_remove,
+       .remove_new = sp_remove,
        .driver = {
                .name = DRV_NAME,
                .of_match_table = sp_of_table,
index c72f505..bd25137 100644 (file)
@@ -729,7 +729,7 @@ static const struct attribute_group softing_pdev_group = {
 /*
  * platform driver
  */
-static int softing_pdev_remove(struct platform_device *pdev)
+static void softing_pdev_remove(struct platform_device *pdev)
 {
        struct softing *card = platform_get_drvdata(pdev);
        int j;
@@ -747,7 +747,6 @@ static int softing_pdev_remove(struct platform_device *pdev)
 
        iounmap(card->dpram);
        kfree(card);
-       return 0;
 }
 
 static int softing_pdev_probe(struct platform_device *pdev)
@@ -855,7 +854,7 @@ static struct platform_driver softing_driver = {
                .name = KBUILD_MODNAME,
        },
        .probe = softing_pdev_probe,
-       .remove = softing_pdev_remove,
+       .remove_new = softing_pdev_remove,
 };
 
 module_platform_driver(softing_driver);
index 2b78f91..0827830 100644 (file)
@@ -791,14 +791,12 @@ static const struct of_device_id sun4ican_of_match[] = {
 
 MODULE_DEVICE_TABLE(of, sun4ican_of_match);
 
-static int sun4ican_remove(struct platform_device *pdev)
+static void sun4ican_remove(struct platform_device *pdev)
 {
        struct net_device *dev = platform_get_drvdata(pdev);
 
        unregister_netdev(dev);
        free_candev(dev);
-
-       return 0;
 }
 
 static int sun4ican_probe(struct platform_device *pdev)
@@ -901,7 +899,7 @@ static struct platform_driver sun4i_can_driver = {
                .of_match_table = sun4ican_of_match,
        },
        .probe = sun4ican_probe,
-       .remove = sun4ican_remove,
+       .remove_new = sun4ican_remove,
 };
 
 module_platform_driver(sun4i_can_driver);
index 27700f7..9bab0b4 100644 (file)
@@ -963,7 +963,7 @@ probe_exit_candev:
        return err;
 }
 
-static int ti_hecc_remove(struct platform_device *pdev)
+static void ti_hecc_remove(struct platform_device *pdev)
 {
        struct net_device *ndev = platform_get_drvdata(pdev);
        struct ti_hecc_priv *priv = netdev_priv(ndev);
@@ -973,8 +973,6 @@ static int ti_hecc_remove(struct platform_device *pdev)
        clk_put(priv->clk);
        can_rx_offload_del(&priv->offload);
        free_candev(ndev);
-
-       return 0;
 }
 
 #ifdef CONFIG_PM
@@ -1028,7 +1026,7 @@ static struct platform_driver ti_hecc_driver = {
                .of_match_table = ti_hecc_dt_ids,
        },
        .probe = ti_hecc_probe,
-       .remove = ti_hecc_remove,
+       .remove_new = ti_hecc_remove,
        .suspend = ti_hecc_suspend,
        .resume = ti_hecc_resume,
 };
index 445504a..58fcd2b 100644 (file)
@@ -38,6 +38,18 @@ config CAN_ETAS_ES58X
          To compile this driver as a module, choose M here: the module
          will be called etas_es58x.
 
+config CAN_F81604
+        tristate "Fintek F81604 USB to 2CAN interface"
+        help
+          This driver supports the Fintek F81604 USB to 2CAN interface.
+          The device can support CAN2.0A/B protocol and also support
+          2 output pins to control external terminator (optional).
+
+          To compile this driver as a module, choose M here: the module will
+          be called f81604.
+
+          (see also https://www.fintek.com.tw).
+
 config CAN_GS_USB
        tristate "Geschwister Schneider UG and candleLight compatible interfaces"
        help
index 1ea16be..8b11088 100644 (file)
@@ -7,6 +7,7 @@ obj-$(CONFIG_CAN_8DEV_USB) += usb_8dev.o
 obj-$(CONFIG_CAN_EMS_USB) += ems_usb.o
 obj-$(CONFIG_CAN_ESD_USB) += esd_usb.o
 obj-$(CONFIG_CAN_ETAS_ES58X) += etas_es58x/
+obj-$(CONFIG_CAN_F81604) += f81604.o
 obj-$(CONFIG_CAN_GS_USB) += gs_usb.o
 obj-$(CONFIG_CAN_KVASER_USB) += kvaser_usb/
 obj-$(CONFIG_CAN_MCBA_USB) += mcba_usb.o
diff --git a/drivers/net/can/usb/f81604.c b/drivers/net/can/usb/f81604.c
new file mode 100644 (file)
index 0000000..ec8cef7
--- /dev/null
@@ -0,0 +1,1201 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Fintek F81604 USB-to-2CAN controller driver.
+ *
+ * Copyright (C) 2023 Ji-Ze Hong (Peter Hong) <peter_hong@fintek.com.tw>
+ */
+#include <linux/bitfield.h>
+#include <linux/netdevice.h>
+#include <linux/units.h>
+#include <linux/usb.h>
+
+#include <linux/can.h>
+#include <linux/can/dev.h>
+#include <linux/can/error.h>
+#include <linux/can/platform/sja1000.h>
+
+#include <asm-generic/unaligned.h>
+
+/* vendor and product id */
+#define F81604_VENDOR_ID 0x2c42
+#define F81604_PRODUCT_ID 0x1709
+#define F81604_CAN_CLOCK (12 * MEGA)
+#define F81604_MAX_DEV 2
+#define F81604_SET_DEVICE_RETRY 10
+
+#define F81604_USB_TIMEOUT 2000
+#define F81604_SET_GET_REGISTER 0xA0
+#define F81604_PORT_OFFSET 0x1000
+#define F81604_MAX_RX_URBS 4
+
+#define F81604_CMD_DATA 0x00
+
+#define F81604_DLC_LEN_MASK GENMASK(3, 0)
+#define F81604_DLC_EFF_BIT BIT(7)
+#define F81604_DLC_RTR_BIT BIT(6)
+
+#define F81604_SFF_SHIFT 5
+#define F81604_EFF_SHIFT 3
+
+#define F81604_BRP_MASK GENMASK(5, 0)
+#define F81604_SJW_MASK GENMASK(7, 6)
+
+#define F81604_SEG1_MASK GENMASK(3, 0)
+#define F81604_SEG2_MASK GENMASK(6, 4)
+
+#define F81604_CLEAR_ALC 0
+#define F81604_CLEAR_ECC 1
+#define F81604_CLEAR_OVERRUN 2
+
+/* device setting */
+#define F81604_CTRL_MODE_REG 0x80
+#define F81604_TX_ONESHOT (0x03 << 3)
+#define F81604_TX_NORMAL (0x01 << 3)
+#define F81604_RX_AUTO_RELEASE_BUF BIT(1)
+#define F81604_INT_WHEN_CHANGE BIT(0)
+
+#define F81604_TERMINATOR_REG 0x105
+#define F81604_CAN0_TERM BIT(2)
+#define F81604_CAN1_TERM BIT(3)
+
+#define F81604_TERMINATION_DISABLED CAN_TERMINATION_DISABLED
+#define F81604_TERMINATION_ENABLED 120
+
+/* SJA1000 registers - manual section 6.4 (Pelican Mode) */
+#define F81604_SJA1000_MOD 0x00
+#define F81604_SJA1000_CMR 0x01
+#define F81604_SJA1000_IR 0x03
+#define F81604_SJA1000_IER 0x04
+#define F81604_SJA1000_ALC 0x0B
+#define F81604_SJA1000_ECC 0x0C
+#define F81604_SJA1000_RXERR 0x0E
+#define F81604_SJA1000_TXERR 0x0F
+#define F81604_SJA1000_ACCC0 0x10
+#define F81604_SJA1000_ACCM0 0x14
+#define F81604_MAX_FILTER_CNT 4
+
+/* Common registers - manual section 6.5 */
+#define F81604_SJA1000_BTR0 0x06
+#define F81604_SJA1000_BTR1 0x07
+#define F81604_SJA1000_BTR1_SAMPLE_TRIPLE BIT(7)
+#define F81604_SJA1000_OCR 0x08
+#define F81604_SJA1000_CDR 0x1F
+
+/* mode register */
+#define F81604_SJA1000_MOD_RM 0x01
+#define F81604_SJA1000_MOD_LOM 0x02
+#define F81604_SJA1000_MOD_STM 0x04
+
+/* commands */
+#define F81604_SJA1000_CMD_CDO 0x08
+
+/* interrupt sources */
+#define F81604_SJA1000_IRQ_BEI 0x80
+#define F81604_SJA1000_IRQ_ALI 0x40
+#define F81604_SJA1000_IRQ_EPI 0x20
+#define F81604_SJA1000_IRQ_DOI 0x08
+#define F81604_SJA1000_IRQ_EI 0x04
+#define F81604_SJA1000_IRQ_TI 0x02
+#define F81604_SJA1000_IRQ_RI 0x01
+#define F81604_SJA1000_IRQ_ALL 0xFF
+#define F81604_SJA1000_IRQ_OFF 0x00
+
+/* status register content */
+#define F81604_SJA1000_SR_BS 0x80
+#define F81604_SJA1000_SR_ES 0x40
+#define F81604_SJA1000_SR_TCS 0x08
+
+/* ECC register */
+#define F81604_SJA1000_ECC_SEG 0x1F
+#define F81604_SJA1000_ECC_DIR 0x20
+#define F81604_SJA1000_ECC_BIT 0x00
+#define F81604_SJA1000_ECC_FORM 0x40
+#define F81604_SJA1000_ECC_STUFF 0x80
+#define F81604_SJA1000_ECC_MASK 0xc0
+
+/* ALC register */
+#define F81604_SJA1000_ALC_MASK 0x1f
+
+/* table of devices that work with this driver */
+static const struct usb_device_id f81604_table[] = {
+       { USB_DEVICE(F81604_VENDOR_ID, F81604_PRODUCT_ID) },
+       {} /* Terminating entry */
+};
+
+MODULE_DEVICE_TABLE(usb, f81604_table);
+
+static const struct ethtool_ops f81604_ethtool_ops = {
+       .get_ts_info = ethtool_op_get_ts_info,
+};
+
+static const u16 f81604_termination[] = { F81604_TERMINATION_DISABLED,
+                                         F81604_TERMINATION_ENABLED };
+
+struct f81604_priv {
+       struct net_device *netdev[F81604_MAX_DEV];
+};
+
+struct f81604_port_priv {
+       struct can_priv can;
+       struct net_device *netdev;
+       struct sk_buff *echo_skb;
+
+       unsigned long clear_flags;
+       struct work_struct clear_reg_work;
+
+       struct usb_device *dev;
+       struct usb_interface *intf;
+
+       struct usb_anchor urbs_anchor;
+};
+
+/* Interrupt endpoint data format:
+ *     Byte 0: Status register.
+ *     Byte 1: Interrupt register.
+ *     Byte 2: Interrupt enable register.
+ *     Byte 3: Arbitration lost capture(ALC) register.
+ *     Byte 4: Error code capture(ECC) register.
+ *     Byte 5: Error warning limit register.
+ *     Byte 6: RX error counter register.
+ *     Byte 7: TX error counter register.
+ *     Byte 8: Reserved.
+ */
+struct f81604_int_data {
+       u8 sr;
+       u8 isrc;
+       u8 ier;
+       u8 alc;
+       u8 ecc;
+       u8 ewlr;
+       u8 rxerr;
+       u8 txerr;
+       u8 val;
+} __packed __aligned(4);
+
+struct f81604_sff {
+       __be16 id;
+       u8 data[CAN_MAX_DLEN];
+} __packed __aligned(2);
+
+struct f81604_eff {
+       __be32 id;
+       u8 data[CAN_MAX_DLEN];
+} __packed __aligned(2);
+
+struct f81604_can_frame {
+       u8 cmd;
+
+       /* According for F81604 DLC define:
+        *      bit 3~0: data length (0~8)
+        *      bit6: is RTR flag.
+        *      bit7: is EFF frame.
+        */
+       u8 dlc;
+
+       union {
+               struct f81604_sff sff;
+               struct f81604_eff eff;
+       };
+} __packed __aligned(2);
+
+static const u8 bulk_in_addr[F81604_MAX_DEV] = { 2, 4 };
+static const u8 bulk_out_addr[F81604_MAX_DEV] = { 1, 3 };
+static const u8 int_in_addr[F81604_MAX_DEV] = { 1, 3 };
+
+static int f81604_write(struct usb_device *dev, u16 reg, u8 data)
+{
+       int ret;
+
+       ret = usb_control_msg_send(dev, 0, F81604_SET_GET_REGISTER,
+                                  USB_TYPE_VENDOR | USB_DIR_OUT, 0, reg,
+                                  &data, sizeof(data), F81604_USB_TIMEOUT,
+                                  GFP_KERNEL);
+       if (ret)
+               dev_err(&dev->dev, "%s: reg: %x data: %x failed: %pe\n",
+                       __func__, reg, data, ERR_PTR(ret));
+
+       return ret;
+}
+
+static int f81604_read(struct usb_device *dev, u16 reg, u8 *data)
+{
+       int ret;
+
+       ret = usb_control_msg_recv(dev, 0, F81604_SET_GET_REGISTER,
+                                  USB_TYPE_VENDOR | USB_DIR_IN, 0, reg, data,
+                                  sizeof(*data), F81604_USB_TIMEOUT,
+                                  GFP_KERNEL);
+
+       if (ret < 0)
+               dev_err(&dev->dev, "%s: reg: %x failed: %pe\n", __func__, reg,
+                       ERR_PTR(ret));
+
+       return ret;
+}
+
+static int f81604_update_bits(struct usb_device *dev, u16 reg, u8 mask,
+                             u8 data)
+{
+       int ret;
+       u8 tmp;
+
+       ret = f81604_read(dev, reg, &tmp);
+       if (ret)
+               return ret;
+
+       tmp &= ~mask;
+       tmp |= (mask & data);
+
+       return f81604_write(dev, reg, tmp);
+}
+
+static int f81604_sja1000_write(struct f81604_port_priv *priv, u16 reg,
+                               u8 data)
+{
+       int port = priv->netdev->dev_port;
+       int real_reg;
+
+       real_reg = reg + F81604_PORT_OFFSET * port + F81604_PORT_OFFSET;
+       return f81604_write(priv->dev, real_reg, data);
+}
+
+static int f81604_sja1000_read(struct f81604_port_priv *priv, u16 reg,
+                              u8 *data)
+{
+       int port = priv->netdev->dev_port;
+       int real_reg;
+
+       real_reg = reg + F81604_PORT_OFFSET * port + F81604_PORT_OFFSET;
+       return f81604_read(priv->dev, real_reg, data);
+}
+
+static int f81604_set_reset_mode(struct f81604_port_priv *priv)
+{
+       int ret, i;
+       u8 tmp;
+
+       /* disable interrupts */
+       ret = f81604_sja1000_write(priv, F81604_SJA1000_IER,
+                                  F81604_SJA1000_IRQ_OFF);
+       if (ret)
+               return ret;
+
+       for (i = 0; i < F81604_SET_DEVICE_RETRY; i++) {
+               ret = f81604_sja1000_read(priv, F81604_SJA1000_MOD, &tmp);
+               if (ret)
+                       return ret;
+
+               /* check reset bit */
+               if (tmp & F81604_SJA1000_MOD_RM) {
+                       priv->can.state = CAN_STATE_STOPPED;
+                       return 0;
+               }
+
+               /* reset chip */
+               ret = f81604_sja1000_write(priv, F81604_SJA1000_MOD,
+                                          F81604_SJA1000_MOD_RM);
+               if (ret)
+                       return ret;
+       }
+
+       return -EPERM;
+}
+
+static int f81604_set_normal_mode(struct f81604_port_priv *priv)
+{
+       u8 tmp, ier = 0;
+       u8 mod_reg = 0;
+       int ret, i;
+
+       for (i = 0; i < F81604_SET_DEVICE_RETRY; i++) {
+               ret = f81604_sja1000_read(priv, F81604_SJA1000_MOD, &tmp);
+               if (ret)
+                       return ret;
+
+               /* check reset bit */
+               if ((tmp & F81604_SJA1000_MOD_RM) == 0) {
+                       priv->can.state = CAN_STATE_ERROR_ACTIVE;
+                       /* enable interrupts, RI handled by bulk-in */
+                       ier = F81604_SJA1000_IRQ_ALL & ~F81604_SJA1000_IRQ_RI;
+                       if (!(priv->can.ctrlmode &
+                             CAN_CTRLMODE_BERR_REPORTING))
+                               ier &= ~F81604_SJA1000_IRQ_BEI;
+
+                       return f81604_sja1000_write(priv, F81604_SJA1000_IER,
+                                                   ier);
+               }
+
+               /* set chip to normal mode */
+               if (priv->can.ctrlmode & CAN_CTRLMODE_LISTENONLY)
+                       mod_reg |= F81604_SJA1000_MOD_LOM;
+               if (priv->can.ctrlmode & CAN_CTRLMODE_PRESUME_ACK)
+                       mod_reg |= F81604_SJA1000_MOD_STM;
+
+               ret = f81604_sja1000_write(priv, F81604_SJA1000_MOD, mod_reg);
+               if (ret)
+                       return ret;
+       }
+
+       return -EPERM;
+}
+
+static int f81604_chipset_init(struct f81604_port_priv *priv)
+{
+       int i, ret;
+
+       /* set clock divider and output control register */
+       ret = f81604_sja1000_write(priv, F81604_SJA1000_CDR,
+                                  CDR_CBP | CDR_PELICAN);
+       if (ret)
+               return ret;
+
+       /* set acceptance filter (accept all) */
+       for (i = 0; i < F81604_MAX_FILTER_CNT; ++i) {
+               ret = f81604_sja1000_write(priv, F81604_SJA1000_ACCC0 + i, 0);
+               if (ret)
+                       return ret;
+       }
+
+       for (i = 0; i < F81604_MAX_FILTER_CNT; ++i) {
+               ret = f81604_sja1000_write(priv, F81604_SJA1000_ACCM0 + i,
+                                          0xFF);
+               if (ret)
+                       return ret;
+       }
+
+       return f81604_sja1000_write(priv, F81604_SJA1000_OCR,
+                                   OCR_TX0_PUSHPULL | OCR_TX1_PUSHPULL |
+                                           OCR_MODE_NORMAL);
+}
+
+static void f81604_process_rx_packet(struct net_device *netdev,
+                                    struct f81604_can_frame *frame)
+{
+       struct net_device_stats *stats = &netdev->stats;
+       struct can_frame *cf;
+       struct sk_buff *skb;
+
+       if (frame->cmd != F81604_CMD_DATA)
+               return;
+
+       skb = alloc_can_skb(netdev, &cf);
+       if (!skb) {
+               stats->rx_dropped++;
+               return;
+       }
+
+       cf->len = can_cc_dlc2len(frame->dlc & F81604_DLC_LEN_MASK);
+
+       if (frame->dlc & F81604_DLC_EFF_BIT) {
+               cf->can_id = get_unaligned_be32(&frame->eff.id) >>
+                            F81604_EFF_SHIFT;
+               cf->can_id |= CAN_EFF_FLAG;
+
+               if (!(frame->dlc & F81604_DLC_RTR_BIT))
+                       memcpy(cf->data, frame->eff.data, cf->len);
+       } else {
+               cf->can_id = get_unaligned_be16(&frame->sff.id) >>
+                            F81604_SFF_SHIFT;
+
+               if (!(frame->dlc & F81604_DLC_RTR_BIT))
+                       memcpy(cf->data, frame->sff.data, cf->len);
+       }
+
+       if (frame->dlc & F81604_DLC_RTR_BIT)
+               cf->can_id |= CAN_RTR_FLAG;
+       else
+               stats->rx_bytes += cf->len;
+
+       stats->rx_packets++;
+       netif_rx(skb);
+}
+
+static void f81604_read_bulk_callback(struct urb *urb)
+{
+       struct f81604_can_frame *frame = urb->transfer_buffer;
+       struct net_device *netdev = urb->context;
+       int ret;
+
+       if (!netif_device_present(netdev))
+               return;
+
+       if (urb->status)
+               netdev_info(netdev, "%s: URB aborted %pe\n", __func__,
+                           ERR_PTR(urb->status));
+
+       switch (urb->status) {
+       case 0: /* success */
+               break;
+
+       case -ENOENT:
+       case -EPIPE:
+       case -EPROTO:
+       case -ESHUTDOWN:
+               return;
+
+       default:
+               goto resubmit_urb;
+       }
+
+       if (urb->actual_length != sizeof(*frame)) {
+               netdev_warn(netdev, "URB length %u not equal to %zu\n",
+                           urb->actual_length, sizeof(*frame));
+               goto resubmit_urb;
+       }
+
+       f81604_process_rx_packet(netdev, frame);
+
+resubmit_urb:
+       ret = usb_submit_urb(urb, GFP_ATOMIC);
+       if (ret == -ENODEV)
+               netif_device_detach(netdev);
+       else if (ret)
+               netdev_err(netdev,
+                          "%s: failed to resubmit read bulk urb: %pe\n",
+                          __func__, ERR_PTR(ret));
+}
+
+static void f81604_handle_tx(struct f81604_port_priv *priv,
+                            struct f81604_int_data *data)
+{
+       struct net_device *netdev = priv->netdev;
+       struct net_device_stats *stats = &netdev->stats;
+
+       /* transmission buffer released */
+       if (priv->can.ctrlmode & CAN_CTRLMODE_ONE_SHOT &&
+           !(data->sr & F81604_SJA1000_SR_TCS)) {
+               stats->tx_errors++;
+               can_free_echo_skb(netdev, 0, NULL);
+       } else {
+               /* transmission complete */
+               stats->tx_bytes += can_get_echo_skb(netdev, 0, NULL);
+               stats->tx_packets++;
+       }
+
+       netif_wake_queue(netdev);
+}
+
+static void f81604_handle_can_bus_errors(struct f81604_port_priv *priv,
+                                        struct f81604_int_data *data)
+{
+       enum can_state can_state = priv->can.state;
+       struct net_device *netdev = priv->netdev;
+       struct net_device_stats *stats = &netdev->stats;
+       struct can_frame *cf;
+       struct sk_buff *skb;
+
+       /* Note: ALC/ECC will not auto clear by read here, must be cleared by
+        * read register (via clear_reg_work).
+        */
+
+       skb = alloc_can_err_skb(netdev, &cf);
+       if (skb) {
+               cf->can_id |= CAN_ERR_CNT;
+               cf->data[6] = data->txerr;
+               cf->data[7] = data->rxerr;
+       }
+
+       if (data->isrc & F81604_SJA1000_IRQ_DOI) {
+               /* data overrun interrupt */
+               netdev_dbg(netdev, "data overrun interrupt\n");
+
+               if (skb) {
+                       cf->can_id |= CAN_ERR_CRTL;
+                       cf->data[1] = CAN_ERR_CRTL_RX_OVERFLOW;
+               }
+
+               stats->rx_over_errors++;
+               stats->rx_errors++;
+
+               set_bit(F81604_CLEAR_OVERRUN, &priv->clear_flags);
+       }
+
+       if (data->isrc & F81604_SJA1000_IRQ_EI) {
+               /* error warning interrupt */
+               netdev_dbg(netdev, "error warning interrupt\n");
+
+               if (data->sr & F81604_SJA1000_SR_BS)
+                       can_state = CAN_STATE_BUS_OFF;
+               else if (data->sr & F81604_SJA1000_SR_ES)
+                       can_state = CAN_STATE_ERROR_WARNING;
+               else
+                       can_state = CAN_STATE_ERROR_ACTIVE;
+       }
+
+       if (data->isrc & F81604_SJA1000_IRQ_BEI) {
+               /* bus error interrupt */
+               netdev_dbg(netdev, "bus error interrupt\n");
+
+               priv->can.can_stats.bus_error++;
+               stats->rx_errors++;
+
+               if (skb) {
+                       cf->can_id |= CAN_ERR_PROT | CAN_ERR_BUSERROR;
+
+                       /* set error type */
+                       switch (data->ecc & F81604_SJA1000_ECC_MASK) {
+                       case F81604_SJA1000_ECC_BIT:
+                               cf->data[2] |= CAN_ERR_PROT_BIT;
+                               break;
+                       case F81604_SJA1000_ECC_FORM:
+                               cf->data[2] |= CAN_ERR_PROT_FORM;
+                               break;
+                       case F81604_SJA1000_ECC_STUFF:
+                               cf->data[2] |= CAN_ERR_PROT_STUFF;
+                               break;
+                       default:
+                               break;
+                       }
+
+                       /* set error location */
+                       cf->data[3] = data->ecc & F81604_SJA1000_ECC_SEG;
+
+                       /* Error occurred during transmission? */
+                       if ((data->ecc & F81604_SJA1000_ECC_DIR) == 0)
+                               cf->data[2] |= CAN_ERR_PROT_TX;
+               }
+
+               set_bit(F81604_CLEAR_ECC, &priv->clear_flags);
+       }
+
+       if (data->isrc & F81604_SJA1000_IRQ_EPI) {
+               if (can_state == CAN_STATE_ERROR_PASSIVE)
+                       can_state = CAN_STATE_ERROR_WARNING;
+               else
+                       can_state = CAN_STATE_ERROR_PASSIVE;
+
+               /* error passive interrupt */
+               netdev_dbg(netdev, "error passive interrupt: %d\n", can_state);
+       }
+
+       if (data->isrc & F81604_SJA1000_IRQ_ALI) {
+               /* arbitration lost interrupt */
+               netdev_dbg(netdev, "arbitration lost interrupt\n");
+
+               priv->can.can_stats.arbitration_lost++;
+
+               if (skb) {
+                       cf->can_id |= CAN_ERR_LOSTARB;
+                       cf->data[0] = data->alc & F81604_SJA1000_ALC_MASK;
+               }
+
+               set_bit(F81604_CLEAR_ALC, &priv->clear_flags);
+       }
+
+       if (can_state != priv->can.state) {
+               enum can_state tx_state, rx_state;
+
+               tx_state = data->txerr >= data->rxerr ? can_state : 0;
+               rx_state = data->txerr <= data->rxerr ? can_state : 0;
+
+               can_change_state(netdev, cf, tx_state, rx_state);
+
+               if (can_state == CAN_STATE_BUS_OFF)
+                       can_bus_off(netdev);
+       }
+
+       if (priv->clear_flags)
+               schedule_work(&priv->clear_reg_work);
+
+       if (skb)
+               netif_rx(skb);
+}
+
+static void f81604_read_int_callback(struct urb *urb)
+{
+       struct f81604_int_data *data = urb->transfer_buffer;
+       struct net_device *netdev = urb->context;
+       struct f81604_port_priv *priv;
+       int ret;
+
+       priv = netdev_priv(netdev);
+
+       if (!netif_device_present(netdev))
+               return;
+
+       if (urb->status)
+               netdev_info(netdev, "%s: Int URB aborted: %pe\n", __func__,
+                           ERR_PTR(urb->status));
+
+       switch (urb->status) {
+       case 0: /* success */
+               break;
+
+       case -ENOENT:
+       case -EPIPE:
+       case -EPROTO:
+       case -ESHUTDOWN:
+               return;
+
+       default:
+               goto resubmit_urb;
+       }
+
+       /* handle Errors */
+       if (data->isrc & (F81604_SJA1000_IRQ_DOI | F81604_SJA1000_IRQ_EI |
+                         F81604_SJA1000_IRQ_BEI | F81604_SJA1000_IRQ_EPI |
+                         F81604_SJA1000_IRQ_ALI))
+               f81604_handle_can_bus_errors(priv, data);
+
+       /* handle TX */
+       if (priv->can.state != CAN_STATE_BUS_OFF &&
+           (data->isrc & F81604_SJA1000_IRQ_TI))
+               f81604_handle_tx(priv, data);
+
+resubmit_urb:
+       ret = usb_submit_urb(urb, GFP_ATOMIC);
+       if (ret == -ENODEV)
+               netif_device_detach(netdev);
+       else if (ret)
+               netdev_err(netdev, "%s: failed to resubmit int urb: %pe\n",
+                          __func__, ERR_PTR(ret));
+}
+
+static void f81604_unregister_urbs(struct f81604_port_priv *priv)
+{
+       usb_kill_anchored_urbs(&priv->urbs_anchor);
+}
+
+static int f81604_register_urbs(struct f81604_port_priv *priv)
+{
+       struct net_device *netdev = priv->netdev;
+       struct f81604_int_data *int_data;
+       int id = netdev->dev_port;
+       struct urb *int_urb;
+       int rx_urb_cnt;
+       int ret;
+
+       for (rx_urb_cnt = 0; rx_urb_cnt < F81604_MAX_RX_URBS; ++rx_urb_cnt) {
+               struct f81604_can_frame *frame;
+               struct urb *rx_urb;
+
+               rx_urb = usb_alloc_urb(0, GFP_KERNEL);
+               if (!rx_urb) {
+                       ret = -ENOMEM;
+                       break;
+               }
+
+               frame = kmalloc(sizeof(*frame), GFP_KERNEL);
+               if (!frame) {
+                       usb_free_urb(rx_urb);
+                       ret = -ENOMEM;
+                       break;
+               }
+
+               usb_fill_bulk_urb(rx_urb, priv->dev,
+                                 usb_rcvbulkpipe(priv->dev, bulk_in_addr[id]),
+                                 frame, sizeof(*frame),
+                                 f81604_read_bulk_callback, netdev);
+
+               rx_urb->transfer_flags |= URB_FREE_BUFFER;
+               usb_anchor_urb(rx_urb, &priv->urbs_anchor);
+
+               ret = usb_submit_urb(rx_urb, GFP_KERNEL);
+               if (ret) {
+                       usb_unanchor_urb(rx_urb);
+                       usb_free_urb(rx_urb);
+                       break;
+               }
+
+               /* Drop reference, USB core will take care of freeing it */
+               usb_free_urb(rx_urb);
+       }
+
+       if (rx_urb_cnt == 0) {
+               netdev_warn(netdev, "%s: submit rx urb failed: %pe\n",
+                           __func__, ERR_PTR(ret));
+
+               goto error;
+       }
+
+       int_urb = usb_alloc_urb(0, GFP_KERNEL);
+       if (!int_urb) {
+               ret = -ENOMEM;
+               goto error;
+       }
+
+       int_data = kmalloc(sizeof(*int_data), GFP_KERNEL);
+       if (!int_data) {
+               usb_free_urb(int_urb);
+               ret = -ENOMEM;
+               goto error;
+       }
+
+       usb_fill_int_urb(int_urb, priv->dev,
+                        usb_rcvintpipe(priv->dev, int_in_addr[id]), int_data,
+                        sizeof(*int_data), f81604_read_int_callback, netdev,
+                        1);
+
+       int_urb->transfer_flags |= URB_FREE_BUFFER;
+       usb_anchor_urb(int_urb, &priv->urbs_anchor);
+
+       ret = usb_submit_urb(int_urb, GFP_KERNEL);
+       if (ret) {
+               usb_unanchor_urb(int_urb);
+               usb_free_urb(int_urb);
+
+               netdev_warn(netdev, "%s: submit int urb failed: %pe\n",
+                           __func__, ERR_PTR(ret));
+               goto error;
+       }
+
+       /* Drop reference, USB core will take care of freeing it */
+       usb_free_urb(int_urb);
+
+       return 0;
+
+error:
+       f81604_unregister_urbs(priv);
+       return ret;
+}
+
+static int f81604_start(struct net_device *netdev)
+{
+       struct f81604_port_priv *priv = netdev_priv(netdev);
+       int ret;
+       u8 mode;
+       u8 tmp;
+
+       mode = F81604_RX_AUTO_RELEASE_BUF | F81604_INT_WHEN_CHANGE;
+
+       /* Set TR/AT mode */
+       if (priv->can.ctrlmode & CAN_CTRLMODE_ONE_SHOT)
+               mode |= F81604_TX_ONESHOT;
+       else
+               mode |= F81604_TX_NORMAL;
+
+       ret = f81604_sja1000_write(priv, F81604_CTRL_MODE_REG, mode);
+       if (ret)
+               return ret;
+
+       /* set reset mode */
+       ret = f81604_set_reset_mode(priv);
+       if (ret)
+               return ret;
+
+       ret = f81604_chipset_init(priv);
+       if (ret)
+               return ret;
+
+       /* Clear error counters and error code capture */
+       ret = f81604_sja1000_write(priv, F81604_SJA1000_TXERR, 0);
+       if (ret)
+               return ret;
+
+       ret = f81604_sja1000_write(priv, F81604_SJA1000_RXERR, 0);
+       if (ret)
+               return ret;
+
+       /* Read clear for ECC/ALC/IR register */
+       ret = f81604_sja1000_read(priv, F81604_SJA1000_ECC, &tmp);
+       if (ret)
+               return ret;
+
+       ret = f81604_sja1000_read(priv, F81604_SJA1000_ALC, &tmp);
+       if (ret)
+               return ret;
+
+       ret = f81604_sja1000_read(priv, F81604_SJA1000_IR, &tmp);
+       if (ret)
+               return ret;
+
+       ret = f81604_register_urbs(priv);
+       if (ret)
+               return ret;
+
+       ret = f81604_set_normal_mode(priv);
+       if (ret) {
+               f81604_unregister_urbs(priv);
+               return ret;
+       }
+
+       return 0;
+}
+
+static int f81604_set_bittiming(struct net_device *dev)
+{
+       struct f81604_port_priv *priv = netdev_priv(dev);
+       struct can_bittiming *bt = &priv->can.bittiming;
+       u8 btr0, btr1;
+       int ret;
+
+       btr0 = FIELD_PREP(F81604_BRP_MASK, bt->brp - 1) |
+              FIELD_PREP(F81604_SJW_MASK, bt->sjw - 1);
+
+       btr1 = FIELD_PREP(F81604_SEG1_MASK,
+                         bt->prop_seg + bt->phase_seg1 - 1) |
+              FIELD_PREP(F81604_SEG2_MASK, bt->phase_seg2 - 1);
+
+       if (priv->can.ctrlmode & CAN_CTRLMODE_3_SAMPLES)
+               btr1 |= F81604_SJA1000_BTR1_SAMPLE_TRIPLE;
+
+       ret = f81604_sja1000_write(priv, F81604_SJA1000_BTR0, btr0);
+       if (ret) {
+               netdev_warn(dev, "%s: Set BTR0 failed: %pe\n", __func__,
+                           ERR_PTR(ret));
+               return ret;
+       }
+
+       ret = f81604_sja1000_write(priv, F81604_SJA1000_BTR1, btr1);
+       if (ret) {
+               netdev_warn(dev, "%s: Set BTR1 failed: %pe\n", __func__,
+                           ERR_PTR(ret));
+               return ret;
+       }
+
+       return 0;
+}
+
+static int f81604_set_mode(struct net_device *netdev, enum can_mode mode)
+{
+       int ret;
+
+       switch (mode) {
+       case CAN_MODE_START:
+               ret = f81604_start(netdev);
+               if (!ret && netif_queue_stopped(netdev))
+                       netif_wake_queue(netdev);
+               break;
+
+       default:
+               ret = -EOPNOTSUPP;
+       }
+
+       return ret;
+}
+
+static void f81604_write_bulk_callback(struct urb *urb)
+{
+       struct net_device *netdev = urb->context;
+
+       if (!netif_device_present(netdev))
+               return;
+
+       if (urb->status)
+               netdev_info(netdev, "%s: Tx URB error: %pe\n", __func__,
+                           ERR_PTR(urb->status));
+}
+
+static void f81604_clear_reg_work(struct work_struct *work)
+{
+       struct f81604_port_priv *priv;
+       u8 tmp;
+
+       priv = container_of(work, struct f81604_port_priv, clear_reg_work);
+
+       /* dummy read for clear Arbitration lost capture(ALC) register. */
+       if (test_and_clear_bit(F81604_CLEAR_ALC, &priv->clear_flags))
+               f81604_sja1000_read(priv, F81604_SJA1000_ALC, &tmp);
+
+       /* dummy read for clear Error code capture(ECC) register. */
+       if (test_and_clear_bit(F81604_CLEAR_ECC, &priv->clear_flags))
+               f81604_sja1000_read(priv, F81604_SJA1000_ECC, &tmp);
+
+       /* dummy write for clear data overrun flag. */
+       if (test_and_clear_bit(F81604_CLEAR_OVERRUN, &priv->clear_flags))
+               f81604_sja1000_write(priv, F81604_SJA1000_CMR,
+                                    F81604_SJA1000_CMD_CDO);
+}
+
+static netdev_tx_t f81604_start_xmit(struct sk_buff *skb,
+                                    struct net_device *netdev)
+{
+       struct can_frame *cf = (struct can_frame *)skb->data;
+       struct f81604_port_priv *priv = netdev_priv(netdev);
+       struct net_device_stats *stats = &netdev->stats;
+       struct f81604_can_frame *frame;
+       struct urb *write_urb;
+       int ret;
+
+       if (can_dev_dropped_skb(netdev, skb))
+               return NETDEV_TX_OK;
+
+       netif_stop_queue(netdev);
+
+       write_urb = usb_alloc_urb(0, GFP_ATOMIC);
+       if (!write_urb)
+               goto nomem_urb;
+
+       frame = kzalloc(sizeof(*frame), GFP_ATOMIC);
+       if (!frame)
+               goto nomem_buf;
+
+       usb_fill_bulk_urb(write_urb, priv->dev,
+                         usb_sndbulkpipe(priv->dev,
+                                         bulk_out_addr[netdev->dev_port]),
+                         frame, sizeof(*frame), f81604_write_bulk_callback,
+                         priv->netdev);
+
+       write_urb->transfer_flags |= URB_FREE_BUFFER;
+
+       frame->cmd = F81604_CMD_DATA;
+       frame->dlc = cf->len;
+
+       if (cf->can_id & CAN_RTR_FLAG)
+               frame->dlc |= F81604_DLC_RTR_BIT;
+
+       if (cf->can_id & CAN_EFF_FLAG) {
+               u32 id = (cf->can_id & CAN_EFF_MASK) << F81604_EFF_SHIFT;
+
+               put_unaligned_be32(id, &frame->eff.id);
+
+               frame->dlc |= F81604_DLC_EFF_BIT;
+
+               if (!(cf->can_id & CAN_RTR_FLAG))
+                       memcpy(&frame->eff.data, cf->data, cf->len);
+       } else {
+               u32 id = (cf->can_id & CAN_SFF_MASK) << F81604_SFF_SHIFT;
+
+               put_unaligned_be16(id, &frame->sff.id);
+
+               if (!(cf->can_id & CAN_RTR_FLAG))
+                       memcpy(&frame->sff.data, cf->data, cf->len);
+       }
+
+       can_put_echo_skb(skb, netdev, 0, 0);
+
+       ret = usb_submit_urb(write_urb, GFP_ATOMIC);
+       if (ret) {
+               netdev_err(netdev, "%s: failed to resubmit tx bulk urb: %pe\n",
+                          __func__, ERR_PTR(ret));
+
+               can_free_echo_skb(netdev, 0, NULL);
+               stats->tx_dropped++;
+               stats->tx_errors++;
+
+               if (ret == -ENODEV)
+                       netif_device_detach(netdev);
+               else
+                       netif_wake_queue(netdev);
+       }
+
+       /* let usb core take care of this urb */
+       usb_free_urb(write_urb);
+
+       return NETDEV_TX_OK;
+
+nomem_buf:
+       usb_free_urb(write_urb);
+
+nomem_urb:
+       dev_kfree_skb(skb);
+       stats->tx_dropped++;
+       stats->tx_errors++;
+       netif_wake_queue(netdev);
+
+       return NETDEV_TX_OK;
+}
+
+static int f81604_get_berr_counter(const struct net_device *netdev,
+                                  struct can_berr_counter *bec)
+{
+       struct f81604_port_priv *priv = netdev_priv(netdev);
+       u8 txerr, rxerr;
+       int ret;
+
+       ret = f81604_sja1000_read(priv, F81604_SJA1000_TXERR, &txerr);
+       if (ret)
+               return ret;
+
+       ret = f81604_sja1000_read(priv, F81604_SJA1000_RXERR, &rxerr);
+       if (ret)
+               return ret;
+
+       bec->txerr = txerr;
+       bec->rxerr = rxerr;
+
+       return 0;
+}
+
+/* Open USB device */
+static int f81604_open(struct net_device *netdev)
+{
+       int ret;
+
+       ret = open_candev(netdev);
+       if (ret)
+               return ret;
+
+       ret = f81604_start(netdev);
+       if (ret) {
+               if (ret == -ENODEV)
+                       netif_device_detach(netdev);
+
+               close_candev(netdev);
+               return ret;
+       }
+
+       netif_start_queue(netdev);
+       return 0;
+}
+
+/* Close USB device */
+static int f81604_close(struct net_device *netdev)
+{
+       struct f81604_port_priv *priv = netdev_priv(netdev);
+
+       f81604_set_reset_mode(priv);
+
+       netif_stop_queue(netdev);
+       cancel_work_sync(&priv->clear_reg_work);
+       close_candev(netdev);
+
+       f81604_unregister_urbs(priv);
+
+       return 0;
+}
+
+static const struct net_device_ops f81604_netdev_ops = {
+       .ndo_open = f81604_open,
+       .ndo_stop = f81604_close,
+       .ndo_start_xmit = f81604_start_xmit,
+       .ndo_change_mtu = can_change_mtu,
+};
+
+static const struct can_bittiming_const f81604_bittiming_const = {
+       .name = KBUILD_MODNAME,
+       .tseg1_min = 1,
+       .tseg1_max = 16,
+       .tseg2_min = 1,
+       .tseg2_max = 8,
+       .sjw_max = 4,
+       .brp_min = 1,
+       .brp_max = 64,
+       .brp_inc = 1,
+};
+
+/* Called by the usb core when driver is unloaded or device is removed */
+static void f81604_disconnect(struct usb_interface *intf)
+{
+       struct f81604_priv *priv = usb_get_intfdata(intf);
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(priv->netdev); ++i) {
+               if (!priv->netdev[i])
+                       continue;
+
+               unregister_netdev(priv->netdev[i]);
+               free_candev(priv->netdev[i]);
+       }
+}
+
+static int __f81604_set_termination(struct usb_device *dev, int idx, u16 term)
+{
+       u8 mask, data = 0;
+
+       if (idx == 0)
+               mask = F81604_CAN0_TERM;
+       else
+               mask = F81604_CAN1_TERM;
+
+       if (term)
+               data = mask;
+
+       return f81604_update_bits(dev, F81604_TERMINATOR_REG, mask, data);
+}
+
+static int f81604_set_termination(struct net_device *netdev, u16 term)
+{
+       struct f81604_port_priv *port_priv = netdev_priv(netdev);
+
+       ASSERT_RTNL();
+
+       return __f81604_set_termination(port_priv->dev, netdev->dev_port,
+                                       term);
+}
+
+static int f81604_probe(struct usb_interface *intf,
+                       const struct usb_device_id *id)
+{
+       struct usb_device *dev = interface_to_usbdev(intf);
+       struct net_device *netdev;
+       struct f81604_priv *priv;
+       int i, ret;
+
+       priv = devm_kzalloc(&intf->dev, sizeof(*priv), GFP_KERNEL);
+       if (!priv)
+               return -ENOMEM;
+
+       usb_set_intfdata(intf, priv);
+
+       for (i = 0; i < ARRAY_SIZE(priv->netdev); ++i) {
+               ret = __f81604_set_termination(dev, i, 0);
+               if (ret) {
+                       dev_err(&intf->dev,
+                               "Setting termination of CH#%d failed: %pe\n",
+                               i, ERR_PTR(ret));
+                       return ret;
+               }
+       }
+
+       for (i = 0; i < ARRAY_SIZE(priv->netdev); ++i) {
+               struct f81604_port_priv *port_priv;
+
+               netdev = alloc_candev(sizeof(*port_priv), 1);
+               if (!netdev) {
+                       dev_err(&intf->dev, "Couldn't alloc candev: %d\n", i);
+                       ret = -ENOMEM;
+
+                       goto failure_cleanup;
+               }
+
+               port_priv = netdev_priv(netdev);
+
+               INIT_WORK(&port_priv->clear_reg_work, f81604_clear_reg_work);
+               init_usb_anchor(&port_priv->urbs_anchor);
+
+               port_priv->intf = intf;
+               port_priv->dev = dev;
+               port_priv->netdev = netdev;
+               port_priv->can.clock.freq = F81604_CAN_CLOCK;
+
+               port_priv->can.termination_const = f81604_termination;
+               port_priv->can.termination_const_cnt =
+                       ARRAY_SIZE(f81604_termination);
+               port_priv->can.bittiming_const = &f81604_bittiming_const;
+               port_priv->can.do_set_bittiming = f81604_set_bittiming;
+               port_priv->can.do_set_mode = f81604_set_mode;
+               port_priv->can.do_set_termination = f81604_set_termination;
+               port_priv->can.do_get_berr_counter = f81604_get_berr_counter;
+               port_priv->can.ctrlmode_supported =
+                       CAN_CTRLMODE_LISTENONLY | CAN_CTRLMODE_3_SAMPLES |
+                       CAN_CTRLMODE_ONE_SHOT | CAN_CTRLMODE_BERR_REPORTING |
+                       CAN_CTRLMODE_PRESUME_ACK;
+
+               netdev->ethtool_ops = &f81604_ethtool_ops;
+               netdev->netdev_ops = &f81604_netdev_ops;
+               netdev->flags |= IFF_ECHO;
+               netdev->dev_port = i;
+
+               SET_NETDEV_DEV(netdev, &intf->dev);
+
+               ret = register_candev(netdev);
+               if (ret) {
+                       netdev_err(netdev, "register CAN device failed: %pe\n",
+                                  ERR_PTR(ret));
+                       free_candev(netdev);
+
+                       goto failure_cleanup;
+               }
+
+               priv->netdev[i] = netdev;
+       }
+
+       return 0;
+
+failure_cleanup:
+       f81604_disconnect(intf);
+       return ret;
+}
+
+static struct usb_driver f81604_driver = {
+       .name = KBUILD_MODNAME,
+       .probe = f81604_probe,
+       .disconnect = f81604_disconnect,
+       .id_table = f81604_table,
+};
+
+module_usb_driver(f81604_driver);
+
+MODULE_AUTHOR("Ji-Ze Hong (Peter Hong) <peter_hong@fintek.com.tw>");
+MODULE_DESCRIPTION("Fintek F81604 USB to 2xCANBUS");
+MODULE_LICENSE("GPL");
index 43c812e..797c69a 100644 (file)
@@ -1898,20 +1898,18 @@ err:
  * This function frees all the resources allocated to the device.
  * Return: 0 always
  */
-static int xcan_remove(struct platform_device *pdev)
+static void xcan_remove(struct platform_device *pdev)
 {
        struct net_device *ndev = platform_get_drvdata(pdev);
 
        unregister_candev(ndev);
        pm_runtime_disable(&pdev->dev);
        free_candev(ndev);
-
-       return 0;
 }
 
 static struct platform_driver xcan_driver = {
        .probe = xcan_probe,
-       .remove = xcan_remove,
+       .remove_new = xcan_remove,
        .driver = {
                .name = DRIVER_NAME,
                .pm = &xcan_dev_pm_ops,
index 595a548..af50001 100644 (file)
@@ -1885,13 +1885,17 @@ static int hellcreek_port_setup_tc(struct dsa_switch *ds, int port,
        case TC_SETUP_QDISC_TAPRIO: {
                struct tc_taprio_qopt_offload *taprio = type_data;
 
-               if (!hellcreek_validate_schedule(hellcreek, taprio))
-                       return -EOPNOTSUPP;
+               switch (taprio->cmd) {
+               case TAPRIO_CMD_REPLACE:
+                       if (!hellcreek_validate_schedule(hellcreek, taprio))
+                               return -EOPNOTSUPP;
 
-               if (taprio->enable)
                        return hellcreek_port_set_schedule(ds, port, taprio);
-
-               return hellcreek_port_del_schedule(ds, port);
+               case TAPRIO_CMD_DESTROY:
+                       return hellcreek_port_del_schedule(ds, port);
+               default:
+                       return -EOPNOTSUPP;
+               }
        }
        default:
                return -EOPNOTSUPP;
index c0215a8..ff76444 100644 (file)
@@ -1458,7 +1458,6 @@ int lan9303_remove(struct lan9303 *chip)
 
        /* assert reset to the whole device to prevent it from doing anything */
        gpiod_set_value_cansleep(chip->reset_gpio, 1);
-       gpiod_unexport(chip->reset_gpio);
 
        return 0;
 }
index e884482..bbbec32 100644 (file)
@@ -105,7 +105,7 @@ static struct i2c_driver lan9303_i2c_driver = {
                .name = "LAN9303_I2C",
                .of_match_table = lan9303_i2c_of_match,
        },
-       .probe_new = lan9303_i2c_probe,
+       .probe = lan9303_i2c_probe,
        .remove = lan9303_i2c_remove,
        .shutdown = lan9303_i2c_shutdown,
        .id_table = lan9303_i2c_id,
index f56fca1..84d5025 100644 (file)
 
 static void ksz_cfg(struct ksz_device *dev, u32 addr, u8 bits, bool set)
 {
-       regmap_update_bits(dev->regmap[0], addr, bits, set ? bits : 0);
+       regmap_update_bits(ksz_regmap_8(dev), addr, bits, set ? bits : 0);
 }
 
 static void ksz_port_cfg(struct ksz_device *dev, int port, int offset, u8 bits,
                         bool set)
 {
-       regmap_update_bits(dev->regmap[0], PORT_CTRL_ADDR(port, offset),
+       regmap_update_bits(ksz_regmap_8(dev), PORT_CTRL_ADDR(port, offset),
                           bits, set ? bits : 0);
 }
 
@@ -941,7 +941,6 @@ void ksz8_flush_dyn_mac_table(struct ksz_device *dev, int port)
 {
        u8 learn[DSA_MAX_PORTS];
        int first, index, cnt;
-       struct ksz_port *p;
        const u16 *regs;
 
        regs = dev->info->regs;
@@ -955,9 +954,6 @@ void ksz8_flush_dyn_mac_table(struct ksz_device *dev, int port)
                cnt = dev->info->port_cnt;
        }
        for (index = first; index < cnt; index++) {
-               p = &dev->ports[index];
-               if (!p->on)
-                       continue;
                ksz_pread8(dev, index, regs[P_STP_CTRL], &learn[index]);
                if (!(learn[index] & PORT_LEARN_DISABLE))
                        ksz_pwrite8(dev, index, regs[P_STP_CTRL],
@@ -965,9 +961,6 @@ void ksz8_flush_dyn_mac_table(struct ksz_device *dev, int port)
        }
        ksz_cfg(dev, S_FLUSH_TABLE_CTRL, SW_FLUSH_DYN_MAC_TABLE, true);
        for (index = first; index < cnt; index++) {
-               p = &dev->ports[index];
-               if (!p->on)
-                       continue;
                if (!(learn[index] & PORT_LEARN_DISABLE))
                        ksz_pwrite8(dev, index, regs[P_STP_CTRL], learn[index]);
        }
@@ -1338,25 +1331,14 @@ void ksz8_config_cpu_port(struct dsa_switch *ds)
 
        ksz_cfg(dev, regs[S_TAIL_TAG_CTRL], masks[SW_TAIL_TAG_ENABLE], true);
 
-       p = &dev->ports[dev->cpu_port];
-       p->on = 1;
-
        ksz8_port_setup(dev, dev->cpu_port, true);
 
        for (i = 0; i < dev->phy_port_cnt; i++) {
-               p = &dev->ports[i];
-
                ksz_port_stp_state_set(ds, i, BR_STATE_DISABLED);
-
-               /* Last port may be disabled. */
-               if (i == dev->phy_port_cnt)
-                       break;
-               p->on = 1;
        }
        for (i = 0; i < dev->phy_port_cnt; i++) {
                p = &dev->ports[i];
-               if (!p->on)
-                       continue;
+
                if (!ksz_is_ksz88x3(dev)) {
                        ksz_pread8(dev, i, regs[P_REMOTE_STATUS], &remote);
                        if (remote & KSZ8_PORT_FIBER_MODE)
@@ -1425,14 +1407,14 @@ int ksz8_setup(struct dsa_switch *ds)
        ksz_cfg(dev, S_LINK_AGING_CTRL, SW_LINK_AUTO_AGING, true);
 
        /* Enable aggressive back off algorithm in half duplex mode. */
-       regmap_update_bits(dev->regmap[0], REG_SW_CTRL_1,
+       regmap_update_bits(ksz_regmap_8(dev), REG_SW_CTRL_1,
                           SW_AGGR_BACKOFF, SW_AGGR_BACKOFF);
 
        /*
         * Make sure unicast VLAN boundary is set as default and
         * enable no excessive collision drop.
         */
-       regmap_update_bits(dev->regmap[0], REG_SW_CTRL_2,
+       regmap_update_bits(ksz_regmap_8(dev), REG_SW_CTRL_2,
                           UNICAST_VLAN_BOUNDARY | NO_EXC_COLLISION_DROP,
                           UNICAST_VLAN_BOUNDARY | NO_EXC_COLLISION_DROP);
 
index 3698112..fd6e2e6 100644 (file)
@@ -104,6 +104,7 @@ static const struct regmap_config ksz8863_regmap_config[] = {
                .cache_type = REGCACHE_NONE,
                .lock = ksz_regmap_lock,
                .unlock = ksz_regmap_unlock,
+               .max_register = U8_MAX,
        },
        {
                .name = "#16",
@@ -113,6 +114,7 @@ static const struct regmap_config ksz8863_regmap_config[] = {
                .cache_type = REGCACHE_NONE,
                .lock = ksz_regmap_lock,
                .unlock = ksz_regmap_unlock,
+               .max_register = U8_MAX,
        },
        {
                .name = "#32",
@@ -122,11 +124,14 @@ static const struct regmap_config ksz8863_regmap_config[] = {
                .cache_type = REGCACHE_NONE,
                .lock = ksz_regmap_lock,
                .unlock = ksz_regmap_unlock,
+               .max_register = U8_MAX,
        }
 };
 
 static int ksz8863_smi_probe(struct mdio_device *mdiodev)
 {
+       struct device *ddev = &mdiodev->dev;
+       const struct ksz_chip_data *chip;
        struct regmap_config rc;
        struct ksz_device *dev;
        int ret;
@@ -136,9 +141,15 @@ static int ksz8863_smi_probe(struct mdio_device *mdiodev)
        if (!dev)
                return -ENOMEM;
 
-       for (i = 0; i < ARRAY_SIZE(ksz8863_regmap_config); i++) {
+       chip = device_get_match_data(ddev);
+       if (!chip)
+               return -EINVAL;
+
+       for (i = 0; i < __KSZ_NUM_REGMAPS; i++) {
                rc = ksz8863_regmap_config[i];
                rc.lock_arg = &dev->regmap_mutex;
+               rc.wr_table = chip->wr_table;
+               rc.rd_table = chip->rd_table;
                dev->regmap[i] = devm_regmap_init(&mdiodev->dev,
                                                  &regmap_smi[i], dev,
                                                  &rc);
index bf13d47..fc5157a 100644 (file)
 
 static void ksz_cfg(struct ksz_device *dev, u32 addr, u8 bits, bool set)
 {
-       regmap_update_bits(dev->regmap[0], addr, bits, set ? bits : 0);
+       regmap_update_bits(ksz_regmap_8(dev), addr, bits, set ? bits : 0);
 }
 
 static void ksz_port_cfg(struct ksz_device *dev, int port, int offset, u8 bits,
                         bool set)
 {
-       regmap_update_bits(dev->regmap[0], PORT_CTRL_ADDR(port, offset),
+       regmap_update_bits(ksz_regmap_8(dev), PORT_CTRL_ADDR(port, offset),
                           bits, set ? bits : 0);
 }
 
 static void ksz9477_cfg32(struct ksz_device *dev, u32 addr, u32 bits, bool set)
 {
-       regmap_update_bits(dev->regmap[2], addr, bits, set ? bits : 0);
+       regmap_update_bits(ksz_regmap_32(dev), addr, bits, set ? bits : 0);
 }
 
 static void ksz9477_port_cfg32(struct ksz_device *dev, int port, int offset,
                               u32 bits, bool set)
 {
-       regmap_update_bits(dev->regmap[2], PORT_CTRL_ADDR(port, offset),
+       regmap_update_bits(ksz_regmap_32(dev), PORT_CTRL_ADDR(port, offset),
                           bits, set ? bits : 0);
 }
 
@@ -52,7 +52,7 @@ int ksz9477_change_mtu(struct ksz_device *dev, int port, int mtu)
 
        frame_size = mtu + VLAN_ETH_HLEN + ETH_FCS_LEN;
 
-       return regmap_update_bits(dev->regmap[1], REG_SW_MTU__2,
+       return regmap_update_bits(ksz_regmap_16(dev), REG_SW_MTU__2,
                                  REG_SW_MTU_MASK, frame_size);
 }
 
@@ -60,7 +60,7 @@ static int ksz9477_wait_vlan_ctrl_ready(struct ksz_device *dev)
 {
        unsigned int val;
 
-       return regmap_read_poll_timeout(dev->regmap[0], REG_SW_VLAN_CTRL,
+       return regmap_read_poll_timeout(ksz_regmap_8(dev), REG_SW_VLAN_CTRL,
                                        val, !(val & VLAN_START), 10, 1000);
 }
 
@@ -147,7 +147,7 @@ static int ksz9477_wait_alu_ready(struct ksz_device *dev)
 {
        unsigned int val;
 
-       return regmap_read_poll_timeout(dev->regmap[2], REG_SW_ALU_CTRL__4,
+       return regmap_read_poll_timeout(ksz_regmap_32(dev), REG_SW_ALU_CTRL__4,
                                        val, !(val & ALU_START), 10, 1000);
 }
 
@@ -155,7 +155,7 @@ static int ksz9477_wait_alu_sta_ready(struct ksz_device *dev)
 {
        unsigned int val;
 
-       return regmap_read_poll_timeout(dev->regmap[2],
+       return regmap_read_poll_timeout(ksz_regmap_32(dev),
                                        REG_SW_ALU_STAT_CTRL__4,
                                        val, !(val & ALU_STAT_START),
                                        10, 1000);
@@ -170,7 +170,7 @@ int ksz9477_reset_switch(struct ksz_device *dev)
        ksz_cfg(dev, REG_SW_OPERATION, SW_RESET, true);
 
        /* turn off SPI DO Edge select */
-       regmap_update_bits(dev->regmap[0], REG_SW_GLOBAL_SERIAL_CTRL_0,
+       regmap_update_bits(ksz_regmap_8(dev), REG_SW_GLOBAL_SERIAL_CTRL_0,
                           SPI_AUTO_EDGE_DETECTION, 0);
 
        /* default configuration */
@@ -213,7 +213,7 @@ void ksz9477_r_mib_cnt(struct ksz_device *dev, int port, u16 addr, u64 *cnt)
        data |= (addr << MIB_COUNTER_INDEX_S);
        ksz_pwrite32(dev, port, REG_PORT_MIB_CTRL_STAT__4, data);
 
-       ret = regmap_read_poll_timeout(dev->regmap[2],
+       ret = regmap_read_poll_timeout(ksz_regmap_32(dev),
                        PORT_CTRL_ADDR(port, REG_PORT_MIB_CTRL_STAT__4),
                        val, !(val & MIB_COUNTER_READ), 10, 1000);
        /* failed to read MIB. get out of loop */
@@ -346,7 +346,7 @@ void ksz9477_flush_dyn_mac_table(struct ksz_device *dev, int port)
        const u16 *regs = dev->info->regs;
        u8 data;
 
-       regmap_update_bits(dev->regmap[0], REG_SW_LUE_CTRL_2,
+       regmap_update_bits(ksz_regmap_8(dev), REG_SW_LUE_CTRL_2,
                           SW_FLUSH_OPTION_M << SW_FLUSH_OPTION_S,
                           SW_FLUSH_OPTION_DYN_MAC << SW_FLUSH_OPTION_S);
 
@@ -889,62 +889,6 @@ static phy_interface_t ksz9477_get_interface(struct ksz_device *dev, int port)
        return interface;
 }
 
-static void ksz9477_port_mmd_write(struct ksz_device *dev, int port,
-                                  u8 dev_addr, u16 reg_addr, u16 val)
-{
-       ksz_pwrite16(dev, port, REG_PORT_PHY_MMD_SETUP,
-                    MMD_SETUP(PORT_MMD_OP_INDEX, dev_addr));
-       ksz_pwrite16(dev, port, REG_PORT_PHY_MMD_INDEX_DATA, reg_addr);
-       ksz_pwrite16(dev, port, REG_PORT_PHY_MMD_SETUP,
-                    MMD_SETUP(PORT_MMD_OP_DATA_NO_INCR, dev_addr));
-       ksz_pwrite16(dev, port, REG_PORT_PHY_MMD_INDEX_DATA, val);
-}
-
-static void ksz9477_phy_errata_setup(struct ksz_device *dev, int port)
-{
-       /* Apply PHY settings to address errata listed in
-        * KSZ9477, KSZ9897, KSZ9896, KSZ9567, KSZ8565
-        * Silicon Errata and Data Sheet Clarification documents:
-        *
-        * Register settings are needed to improve PHY receive performance
-        */
-       ksz9477_port_mmd_write(dev, port, 0x01, 0x6f, 0xdd0b);
-       ksz9477_port_mmd_write(dev, port, 0x01, 0x8f, 0x6032);
-       ksz9477_port_mmd_write(dev, port, 0x01, 0x9d, 0x248c);
-       ksz9477_port_mmd_write(dev, port, 0x01, 0x75, 0x0060);
-       ksz9477_port_mmd_write(dev, port, 0x01, 0xd3, 0x7777);
-       ksz9477_port_mmd_write(dev, port, 0x1c, 0x06, 0x3008);
-       ksz9477_port_mmd_write(dev, port, 0x1c, 0x08, 0x2001);
-
-       /* Transmit waveform amplitude can be improved
-        * (1000BASE-T, 100BASE-TX, 10BASE-Te)
-        */
-       ksz9477_port_mmd_write(dev, port, 0x1c, 0x04, 0x00d0);
-
-       /* Energy Efficient Ethernet (EEE) feature select must
-        * be manually disabled (except on KSZ8565 which is 100Mbit)
-        */
-       if (dev->info->gbit_capable[port])
-               ksz9477_port_mmd_write(dev, port, 0x07, 0x3c, 0x0000);
-
-       /* Register settings are required to meet data sheet
-        * supply current specifications
-        */
-       ksz9477_port_mmd_write(dev, port, 0x1c, 0x13, 0x6eff);
-       ksz9477_port_mmd_write(dev, port, 0x1c, 0x14, 0xe6ff);
-       ksz9477_port_mmd_write(dev, port, 0x1c, 0x15, 0x6eff);
-       ksz9477_port_mmd_write(dev, port, 0x1c, 0x16, 0xe6ff);
-       ksz9477_port_mmd_write(dev, port, 0x1c, 0x17, 0x00ff);
-       ksz9477_port_mmd_write(dev, port, 0x1c, 0x18, 0x43ff);
-       ksz9477_port_mmd_write(dev, port, 0x1c, 0x19, 0xc3ff);
-       ksz9477_port_mmd_write(dev, port, 0x1c, 0x1a, 0x6fff);
-       ksz9477_port_mmd_write(dev, port, 0x1c, 0x1b, 0x07ff);
-       ksz9477_port_mmd_write(dev, port, 0x1c, 0x1c, 0x0fff);
-       ksz9477_port_mmd_write(dev, port, 0x1c, 0x1d, 0xe7ff);
-       ksz9477_port_mmd_write(dev, port, 0x1c, 0x1e, 0xefff);
-       ksz9477_port_mmd_write(dev, port, 0x1c, 0x20, 0xeeee);
-}
-
 void ksz9477_get_caps(struct ksz_device *dev, int port,
                      struct phylink_config *config)
 {
@@ -1029,20 +973,10 @@ void ksz9477_port_setup(struct ksz_device *dev, int port, bool cpu_port)
        /* enable 802.1p priority */
        ksz_port_cfg(dev, port, P_PRIO_CTRL, PORT_802_1P_PRIO_ENABLE, true);
 
-       if (dev->info->internal_phy[port]) {
-               /* do not force flow control */
-               ksz_port_cfg(dev, port, REG_PORT_CTRL_0,
-                            PORT_FORCE_TX_FLOW_CTRL | PORT_FORCE_RX_FLOW_CTRL,
-                            false);
-
-               if (dev->info->phy_errata_9477)
-                       ksz9477_phy_errata_setup(dev, port);
-       } else {
-               /* force flow control */
-               ksz_port_cfg(dev, port, REG_PORT_CTRL_0,
-                            PORT_FORCE_TX_FLOW_CTRL | PORT_FORCE_RX_FLOW_CTRL,
-                            true);
-       }
+       /* force flow control for non-PHY ports only */
+       ksz_port_cfg(dev, port, REG_PORT_CTRL_0,
+                    PORT_FORCE_TX_FLOW_CTRL | PORT_FORCE_RX_FLOW_CTRL,
+                    !dev->info->internal_phy[port]);
 
        if (cpu_port)
                member = dsa_user_ports(ds);
@@ -1165,7 +1099,7 @@ int ksz9477_setup(struct dsa_switch *ds)
        ksz_cfg(dev, REG_SW_MAC_CTRL_1, SW_JUMBO_PACKET, true);
 
        /* Now we can configure default MTU value */
-       ret = regmap_update_bits(dev->regmap[1], REG_SW_MTU__2, REG_SW_MTU_MASK,
+       ret = regmap_update_bits(ksz_regmap_16(dev), REG_SW_MTU__2, REG_SW_MTU_MASK,
                                 VLAN_ETH_FRAME_LEN + ETH_FCS_LEN);
        if (ret)
                return ret;
index 97a3172..2710afa 100644 (file)
@@ -24,7 +24,7 @@ static int ksz9477_i2c_probe(struct i2c_client *i2c)
        if (!dev)
                return -ENOMEM;
 
-       for (i = 0; i < ARRAY_SIZE(ksz9477_regmap_config); i++) {
+       for (i = 0; i < __KSZ_NUM_REGMAPS; i++) {
                rc = ksz9477_regmap_config[i];
                rc.lock_arg = &dev->regmap_mutex;
                dev->regmap[i] = devm_regmap_init_i2c(i2c, &rc);
@@ -119,7 +119,7 @@ static struct i2c_driver ksz9477_i2c_driver = {
                .name   = "ksz9477-switch",
                .of_match_table = ksz9477_dt_ids,
        },
-       .probe_new = ksz9477_i2c_probe,
+       .probe = ksz9477_i2c_probe,
        .remove = ksz9477_i2c_remove,
        .shutdown = ksz9477_i2c_shutdown,
        .id_table = ksz9477_i2c_id,
index a4428be..813b91a 100644 (file)
@@ -1075,6 +1075,45 @@ static const struct regmap_access_table ksz9896_register_set = {
        .n_yes_ranges = ARRAY_SIZE(ksz9896_valid_regs),
 };
 
+static const struct regmap_range ksz8873_valid_regs[] = {
+       regmap_reg_range(0x00, 0x01),
+       /* global control register */
+       regmap_reg_range(0x02, 0x0f),
+
+       /* port registers */
+       regmap_reg_range(0x10, 0x1d),
+       regmap_reg_range(0x1e, 0x1f),
+       regmap_reg_range(0x20, 0x2d),
+       regmap_reg_range(0x2e, 0x2f),
+       regmap_reg_range(0x30, 0x39),
+       regmap_reg_range(0x3f, 0x3f),
+
+       /* advanced control registers */
+       regmap_reg_range(0x60, 0x6f),
+       regmap_reg_range(0x70, 0x75),
+       regmap_reg_range(0x76, 0x78),
+       regmap_reg_range(0x79, 0x7a),
+       regmap_reg_range(0x7b, 0x83),
+       regmap_reg_range(0x8e, 0x99),
+       regmap_reg_range(0x9a, 0xa5),
+       regmap_reg_range(0xa6, 0xa6),
+       regmap_reg_range(0xa7, 0xaa),
+       regmap_reg_range(0xab, 0xae),
+       regmap_reg_range(0xaf, 0xba),
+       regmap_reg_range(0xbb, 0xbc),
+       regmap_reg_range(0xbd, 0xbd),
+       regmap_reg_range(0xc0, 0xc0),
+       regmap_reg_range(0xc2, 0xc2),
+       regmap_reg_range(0xc3, 0xc3),
+       regmap_reg_range(0xc4, 0xc4),
+       regmap_reg_range(0xc6, 0xc6),
+};
+
+static const struct regmap_access_table ksz8873_register_set = {
+       .yes_ranges = ksz8873_valid_regs,
+       .n_yes_ranges = ARRAY_SIZE(ksz8873_valid_regs),
+};
+
 const struct ksz_chip_data ksz_switch_chips[] = {
        [KSZ8563] = {
                .chip_id = KSZ8563_CHIP_ID,
@@ -1214,6 +1253,8 @@ const struct ksz_chip_data ksz_switch_chips[] = {
                .supports_mii = {false, false, true},
                .supports_rmii = {false, false, true},
                .internal_phy = {true, true, false},
+               .wr_table = &ksz8873_register_set,
+               .rd_table = &ksz8873_register_set,
        },
 
        [KSZ9477] = {
@@ -1229,7 +1270,6 @@ const struct ksz_chip_data ksz_switch_chips[] = {
                .tc_cbs_supported = true,
                .tc_ets_supported = true,
                .ops = &ksz9477_dev_ops,
-               .phy_errata_9477 = true,
                .mib_names = ksz9477_mib_names,
                .mib_cnt = ARRAY_SIZE(ksz9477_mib_names),
                .reg_mib_cnt = MIB_COUNTER_NUM,
@@ -1262,7 +1302,6 @@ const struct ksz_chip_data ksz_switch_chips[] = {
                .port_nirqs = 2,
                .num_tx_queues = 4,
                .ops = &ksz9477_dev_ops,
-               .phy_errata_9477 = true,
                .mib_names = ksz9477_mib_names,
                .mib_cnt = ARRAY_SIZE(ksz9477_mib_names),
                .reg_mib_cnt = MIB_COUNTER_NUM,
@@ -1295,7 +1334,6 @@ const struct ksz_chip_data ksz_switch_chips[] = {
                .port_nirqs = 2,
                .num_tx_queues = 4,
                .ops = &ksz9477_dev_ops,
-               .phy_errata_9477 = true,
                .mib_names = ksz9477_mib_names,
                .mib_cnt = ARRAY_SIZE(ksz9477_mib_names),
                .reg_mib_cnt = MIB_COUNTER_NUM,
@@ -1382,7 +1420,6 @@ const struct ksz_chip_data ksz_switch_chips[] = {
                .tc_cbs_supported = true,
                .tc_ets_supported = true,
                .ops = &ksz9477_dev_ops,
-               .phy_errata_9477 = true,
                .mib_names = ksz9477_mib_names,
                .mib_cnt = ARRAY_SIZE(ksz9477_mib_names),
                .reg_mib_cnt = MIB_COUNTER_NUM,
@@ -2095,7 +2132,7 @@ static int ksz_setup(struct dsa_switch *ds)
        }
 
        /* set broadcast storm protection 10% rate */
-       regmap_update_bits(dev->regmap[1], regs[S_BROADCAST_CTRL],
+       regmap_update_bits(ksz_regmap_16(dev), regs[S_BROADCAST_CTRL],
                           BROADCAST_STORM_RATE,
                           (BROADCAST_STORM_VALUE *
                           BROADCAST_STORM_PROT_RATE) / 100);
@@ -2106,7 +2143,7 @@ static int ksz_setup(struct dsa_switch *ds)
 
        ds->num_tx_queues = dev->info->num_tx_queues;
 
-       regmap_update_bits(dev->regmap[0], regs[S_MULTICAST_CTRL],
+       regmap_update_bits(ksz_regmap_8(dev), regs[S_MULTICAST_CTRL],
                           MULTICAST_STORM_DISABLE, MULTICAST_STORM_DISABLE);
 
        ksz_init_mib_timer(dev);
@@ -2156,7 +2193,7 @@ static int ksz_setup(struct dsa_switch *ds)
        }
 
        /* start switch */
-       regmap_update_bits(dev->regmap[0], regs[S_START_CTRL],
+       regmap_update_bits(ksz_regmap_8(dev), regs[S_START_CTRL],
                           SW_START, SW_START);
 
        return 0;
index 8abecaf..a66b568 100644 (file)
 struct ksz_device;
 struct ksz_port;
 
+enum ksz_regmap_width {
+       KSZ_REGMAP_8,
+       KSZ_REGMAP_16,
+       KSZ_REGMAP_32,
+       __KSZ_NUM_REGMAPS,
+};
+
 struct vlan_table {
        u32 table[3];
 };
@@ -53,7 +60,6 @@ struct ksz_chip_data {
        bool tc_cbs_supported;
        bool tc_ets_supported;
        const struct ksz_dev_ops *ops;
-       bool phy_errata_9477;
        bool ksz87xx_eee_link_erratum;
        const struct ksz_mib_names *mib_names;
        int mib_cnt;
@@ -101,7 +107,6 @@ struct ksz_port {
        int stp_state;
        struct phy_device phydev;
 
-       u32 on:1;                       /* port is not disabled by hardware */
        u32 fiber:1;                    /* port is fiber */
        u32 force:1;
        u32 read:1;                     /* read MIB counters in background */
@@ -137,7 +142,7 @@ struct ksz_device {
        const struct ksz_dev_ops *dev_ops;
 
        struct device *dev;
-       struct regmap *regmap[3];
+       struct regmap *regmap[__KSZ_NUM_REGMAPS];
 
        void *priv;
        int irq;
@@ -377,11 +382,25 @@ phy_interface_t ksz_get_xmii(struct ksz_device *dev, int port, bool gbit);
 extern const struct ksz_chip_data ksz_switch_chips[];
 
 /* Common register access functions */
+static inline struct regmap *ksz_regmap_8(struct ksz_device *dev)
+{
+       return dev->regmap[KSZ_REGMAP_8];
+}
+
+static inline struct regmap *ksz_regmap_16(struct ksz_device *dev)
+{
+       return dev->regmap[KSZ_REGMAP_16];
+}
+
+static inline struct regmap *ksz_regmap_32(struct ksz_device *dev)
+{
+       return dev->regmap[KSZ_REGMAP_32];
+}
 
 static inline int ksz_read8(struct ksz_device *dev, u32 reg, u8 *val)
 {
        unsigned int value;
-       int ret = regmap_read(dev->regmap[0], reg, &value);
+       int ret = regmap_read(ksz_regmap_8(dev), reg, &value);
 
        if (ret)
                dev_err(dev->dev, "can't read 8bit reg: 0x%x %pe\n", reg,
@@ -394,7 +413,7 @@ static inline int ksz_read8(struct ksz_device *dev, u32 reg, u8 *val)
 static inline int ksz_read16(struct ksz_device *dev, u32 reg, u16 *val)
 {
        unsigned int value;
-       int ret = regmap_read(dev->regmap[1], reg, &value);
+       int ret = regmap_read(ksz_regmap_16(dev), reg, &value);
 
        if (ret)
                dev_err(dev->dev, "can't read 16bit reg: 0x%x %pe\n", reg,
@@ -407,7 +426,7 @@ static inline int ksz_read16(struct ksz_device *dev, u32 reg, u16 *val)
 static inline int ksz_read32(struct ksz_device *dev, u32 reg, u32 *val)
 {
        unsigned int value;
-       int ret = regmap_read(dev->regmap[2], reg, &value);
+       int ret = regmap_read(ksz_regmap_32(dev), reg, &value);
 
        if (ret)
                dev_err(dev->dev, "can't read 32bit reg: 0x%x %pe\n", reg,
@@ -422,7 +441,7 @@ static inline int ksz_read64(struct ksz_device *dev, u32 reg, u64 *val)
        u32 value[2];
        int ret;
 
-       ret = regmap_bulk_read(dev->regmap[2], reg, value, 2);
+       ret = regmap_bulk_read(ksz_regmap_32(dev), reg, value, 2);
        if (ret)
                dev_err(dev->dev, "can't read 64bit reg: 0x%x %pe\n", reg,
                        ERR_PTR(ret));
@@ -436,7 +455,7 @@ static inline int ksz_write8(struct ksz_device *dev, u32 reg, u8 value)
 {
        int ret;
 
-       ret = regmap_write(dev->regmap[0], reg, value);
+       ret = regmap_write(ksz_regmap_8(dev), reg, value);
        if (ret)
                dev_err(dev->dev, "can't write 8bit reg: 0x%x %pe\n", reg,
                        ERR_PTR(ret));
@@ -448,7 +467,7 @@ static inline int ksz_write16(struct ksz_device *dev, u32 reg, u16 value)
 {
        int ret;
 
-       ret = regmap_write(dev->regmap[1], reg, value);
+       ret = regmap_write(ksz_regmap_16(dev), reg, value);
        if (ret)
                dev_err(dev->dev, "can't write 16bit reg: 0x%x %pe\n", reg,
                        ERR_PTR(ret));
@@ -460,7 +479,7 @@ static inline int ksz_write32(struct ksz_device *dev, u32 reg, u32 value)
 {
        int ret;
 
-       ret = regmap_write(dev->regmap[2], reg, value);
+       ret = regmap_write(ksz_regmap_32(dev), reg, value);
        if (ret)
                dev_err(dev->dev, "can't write 32bit reg: 0x%x %pe\n", reg,
                        ERR_PTR(ret));
@@ -473,7 +492,7 @@ static inline int ksz_rmw16(struct ksz_device *dev, u32 reg, u16 mask,
 {
        int ret;
 
-       ret = regmap_update_bits(dev->regmap[1], reg, mask, value);
+       ret = regmap_update_bits(ksz_regmap_16(dev), reg, mask, value);
        if (ret)
                dev_err(dev->dev, "can't rmw 16bit reg 0x%x: %pe\n", reg,
                        ERR_PTR(ret));
@@ -486,7 +505,7 @@ static inline int ksz_rmw32(struct ksz_device *dev, u32 reg, u32 mask,
 {
        int ret;
 
-       ret = regmap_update_bits(dev->regmap[2], reg, mask, value);
+       ret = regmap_update_bits(ksz_regmap_32(dev), reg, mask, value);
        if (ret)
                dev_err(dev->dev, "can't rmw 32bit reg 0x%x: %pe\n", reg,
                        ERR_PTR(ret));
@@ -503,12 +522,19 @@ static inline int ksz_write64(struct ksz_device *dev, u32 reg, u64 value)
        val[0] = swab32(value & 0xffffffffULL);
        val[1] = swab32(value >> 32ULL);
 
-       return regmap_bulk_write(dev->regmap[2], reg, val, 2);
+       return regmap_bulk_write(ksz_regmap_32(dev), reg, val, 2);
 }
 
 static inline int ksz_rmw8(struct ksz_device *dev, int offset, u8 mask, u8 val)
 {
-       return regmap_update_bits(dev->regmap[0], offset, mask, val);
+       int ret;
+
+       ret = regmap_update_bits(ksz_regmap_8(dev), offset, mask, val);
+       if (ret)
+               dev_err(dev->dev, "can't rmw 8bit reg 0x%x: %pe\n", offset,
+                       ERR_PTR(ret));
+
+       return ret;
 }
 
 static inline int ksz_pread8(struct ksz_device *dev, int port, int offset,
@@ -549,12 +575,20 @@ static inline int ksz_pwrite32(struct ksz_device *dev, int port, int offset,
                           data);
 }
 
-static inline void ksz_prmw8(struct ksz_device *dev, int port, int offset,
-                            u8 mask, u8 val)
+static inline int ksz_prmw8(struct ksz_device *dev, int port, int offset,
+                           u8 mask, u8 val)
 {
-       regmap_update_bits(dev->regmap[0],
-                          dev->dev_ops->get_port_addr(port, offset),
-                          mask, val);
+       int ret;
+
+       ret = regmap_update_bits(ksz_regmap_8(dev),
+                                dev->dev_ops->get_port_addr(port, offset),
+                                mask, val);
+       if (ret)
+               dev_err(dev->dev, "can't rmw 8bit reg 0x%x: %pe\n",
+                       dev->dev_ops->get_port_addr(port, offset),
+                       ERR_PTR(ret));
+
+       return ret;
 }
 
 static inline void ksz_regmap_lock(void *__mtx)
@@ -709,9 +743,9 @@ static inline int is_lan937x(struct ksz_device *dev)
 
 #define KSZ_REGMAP_TABLE(ksz, swp, regbits, regpad, regalign)          \
        static const struct regmap_config ksz##_regmap_config[] = {     \
-               KSZ_REGMAP_ENTRY(8, swp, (regbits), (regpad), (regalign)), \
-               KSZ_REGMAP_ENTRY(16, swp, (regbits), (regpad), (regalign)), \
-               KSZ_REGMAP_ENTRY(32, swp, (regbits), (regpad), (regalign)), \
+               [KSZ_REGMAP_8] = KSZ_REGMAP_ENTRY(8, swp, (regbits), (regpad), (regalign)), \
+               [KSZ_REGMAP_16] = KSZ_REGMAP_ENTRY(16, swp, (regbits), (regpad), (regalign)), \
+               [KSZ_REGMAP_32] = KSZ_REGMAP_ENTRY(32, swp, (regbits), (regpad), (regalign)), \
        }
 
 #endif
index 96c52e8..2793384 100644 (file)
@@ -63,7 +63,7 @@ static int ksz_spi_probe(struct spi_device *spi)
        else
                regmap_config = ksz9477_regmap_config;
 
-       for (i = 0; i < ARRAY_SIZE(ksz8795_regmap_config); i++) {
+       for (i = 0; i < __KSZ_NUM_REGMAPS; i++) {
                rc = regmap_config[i];
                rc.lock_arg = &dev->regmap_mutex;
                rc.wr_table = chip->wr_table;
index 399a390..b479a62 100644 (file)
 
 static int lan937x_cfg(struct ksz_device *dev, u32 addr, u8 bits, bool set)
 {
-       return regmap_update_bits(dev->regmap[0], addr, bits, set ? bits : 0);
+       return regmap_update_bits(ksz_regmap_8(dev), addr, bits, set ? bits : 0);
 }
 
 static int lan937x_port_cfg(struct ksz_device *dev, int port, int offset,
                            u8 bits, bool set)
 {
-       return regmap_update_bits(dev->regmap[0], PORT_CTRL_ADDR(port, offset),
+       return regmap_update_bits(ksz_regmap_8(dev), PORT_CTRL_ADDR(port, offset),
                                  bits, set ? bits : 0);
 }
 
@@ -86,7 +86,7 @@ static int lan937x_internal_phy_write(struct ksz_device *dev, int addr, int reg,
        if (ret < 0)
                return ret;
 
-       ret = regmap_read_poll_timeout(dev->regmap[1], REG_VPHY_IND_CTRL__2,
+       ret = regmap_read_poll_timeout(ksz_regmap_16(dev), REG_VPHY_IND_CTRL__2,
                                       value, !(value & VPHY_IND_BUSY), 10,
                                       1000);
        if (ret < 0) {
@@ -116,7 +116,7 @@ static int lan937x_internal_phy_read(struct ksz_device *dev, int addr, int reg,
        if (ret < 0)
                return ret;
 
-       ret = regmap_read_poll_timeout(dev->regmap[1], REG_VPHY_IND_CTRL__2,
+       ret = regmap_read_poll_timeout(ksz_regmap_16(dev), REG_VPHY_IND_CTRL__2,
                                       value, !(value & VPHY_IND_BUSY), 10,
                                       1000);
        if (ret < 0) {
index 08a46ff..8b51756 100644 (file)
@@ -463,11 +463,11 @@ restore_link:
        return err;
 }
 
-static int mv88e6xxx_phy_is_internal(struct dsa_switch *ds, int port)
+static int mv88e6xxx_phy_is_internal(struct mv88e6xxx_chip *chip, int port)
 {
-       struct mv88e6xxx_chip *chip = ds->priv;
-
-       return port < chip->info->num_internal_phys;
+       return port >= chip->info->internal_phys_offset &&
+               port < chip->info->num_internal_phys +
+                       chip->info->internal_phys_offset;
 }
 
 static int mv88e6xxx_port_ppu_updates(struct mv88e6xxx_chip *chip, int port)
@@ -479,7 +479,7 @@ static int mv88e6xxx_port_ppu_updates(struct mv88e6xxx_chip *chip, int port)
         * report whether the port is internal.
         */
        if (chip->info->family == MV88E6XXX_FAMILY_6250)
-               return port < chip->info->num_internal_phys;
+               return mv88e6xxx_phy_is_internal(chip, port);
 
        err = mv88e6xxx_port_read(chip, port, MV88E6XXX_PORT_STS, &reg);
        if (err) {
@@ -584,7 +584,7 @@ static void mv88e6095_phylink_get_caps(struct mv88e6xxx_chip *chip, int port,
 
        config->mac_capabilities = MAC_SYM_PAUSE | MAC_10 | MAC_100;
 
-       if (mv88e6xxx_phy_is_internal(chip->ds, port)) {
+       if (mv88e6xxx_phy_is_internal(chip, port)) {
                __set_bit(PHY_INTERFACE_MODE_MII, config->supported_interfaces);
        } else {
                if (cmode < ARRAY_SIZE(mv88e6185_phy_interface_modes) &&
@@ -790,6 +790,8 @@ static void mv88e6393x_phylink_get_caps(struct mv88e6xxx_chip *chip, int port,
        unsigned long *supported = config->supported_interfaces;
        bool is_6191x =
                chip->info->prod_num == MV88E6XXX_PORT_SWITCH_ID_PROD_6191X;
+       bool is_6361 =
+               chip->info->prod_num == MV88E6XXX_PORT_SWITCH_ID_PROD_6361;
 
        mv88e6xxx_translate_cmode(chip->ports[port].cmode, supported);
 
@@ -804,13 +806,16 @@ static void mv88e6393x_phylink_get_caps(struct mv88e6xxx_chip *chip, int port,
                /* 6191X supports >1G modes only on port 10 */
                if (!is_6191x || port == 10) {
                        __set_bit(PHY_INTERFACE_MODE_2500BASEX, supported);
-                       __set_bit(PHY_INTERFACE_MODE_5GBASER, supported);
-                       __set_bit(PHY_INTERFACE_MODE_10GBASER, supported);
-                       /* FIXME: USXGMII is not supported yet */
-                       /* __set_bit(PHY_INTERFACE_MODE_USXGMII, supported); */
-
-                       config->mac_capabilities |= MAC_2500FD | MAC_5000FD |
-                               MAC_10000FD;
+                       config->mac_capabilities |= MAC_2500FD;
+
+                       /* 6361 only supports up to 2500BaseX */
+                       if (!is_6361) {
+                               __set_bit(PHY_INTERFACE_MODE_5GBASER, supported);
+                               __set_bit(PHY_INTERFACE_MODE_10GBASER, supported);
+                               __set_bit(PHY_INTERFACE_MODE_USXGMII, supported);
+                               config->mac_capabilities |= MAC_5000FD |
+                                       MAC_10000FD;
+                       }
                }
        }
 
@@ -832,7 +837,7 @@ static void mv88e6xxx_get_caps(struct dsa_switch *ds, int port,
        chip->info->ops->phylink_get_caps(chip, port, config);
        mv88e6xxx_reg_unlock(chip);
 
-       if (mv88e6xxx_phy_is_internal(ds, port)) {
+       if (mv88e6xxx_phy_is_internal(chip, port)) {
                __set_bit(PHY_INTERFACE_MODE_INTERNAL,
                          config->supported_interfaces);
                /* Internal ports with no phy-mode need GMII for PHYLIB */
@@ -841,29 +846,38 @@ static void mv88e6xxx_get_caps(struct dsa_switch *ds, int port,
        }
 }
 
+static int mv88e6xxx_mac_prepare(struct dsa_switch *ds, int port,
+                                unsigned int mode, phy_interface_t interface)
+{
+       struct mv88e6xxx_chip *chip = ds->priv;
+       int err = 0;
+
+       /* In inband mode, the link may come up at any time while the link
+        * is not forced down. Force the link down while we reconfigure the
+        * interface mode.
+        */
+       if (mode == MLO_AN_INBAND &&
+           chip->ports[port].interface != interface &&
+           chip->info->ops->port_set_link) {
+               mv88e6xxx_reg_lock(chip);
+               err = chip->info->ops->port_set_link(chip, port,
+                                                    LINK_FORCED_DOWN);
+               mv88e6xxx_reg_unlock(chip);
+       }
+
+       return err;
+}
+
 static void mv88e6xxx_mac_config(struct dsa_switch *ds, int port,
                                 unsigned int mode,
                                 const struct phylink_link_state *state)
 {
        struct mv88e6xxx_chip *chip = ds->priv;
-       struct mv88e6xxx_port *p;
        int err = 0;
 
-       p = &chip->ports[port];
-
        mv88e6xxx_reg_lock(chip);
 
-       if (mode != MLO_AN_PHY || !mv88e6xxx_phy_is_internal(ds, port)) {
-               /* In inband mode, the link may come up at any time while the
-                * link is not forced down. Force the link down while we
-                * reconfigure the interface mode.
-                */
-               if (mode == MLO_AN_INBAND &&
-                   p->interface != state->interface &&
-                   chip->info->ops->port_set_link)
-                       chip->info->ops->port_set_link(chip, port,
-                                                      LINK_FORCED_DOWN);
-
+       if (mode != MLO_AN_PHY || !mv88e6xxx_phy_is_internal(chip, port)) {
                err = mv88e6xxx_port_config_interface(chip, port,
                                                      state->interface);
                if (err && err != -EOPNOTSUPP)
@@ -880,24 +894,38 @@ static void mv88e6xxx_mac_config(struct dsa_switch *ds, int port,
                        err = 0;
        }
 
+err_unlock:
+       mv88e6xxx_reg_unlock(chip);
+
+       if (err && err != -EOPNOTSUPP)
+               dev_err(ds->dev, "p%d: failed to configure MAC/PCS\n", port);
+}
+
+static int mv88e6xxx_mac_finish(struct dsa_switch *ds, int port,
+                               unsigned int mode, phy_interface_t interface)
+{
+       struct mv88e6xxx_chip *chip = ds->priv;
+       int err = 0;
+
        /* Undo the forced down state above after completing configuration
         * irrespective of its state on entry, which allows the link to come
         * up in the in-band case where there is no separate SERDES. Also
         * ensure that the link can come up if the PPU is in use and we are
         * in PHY mode (we treat the PPU as an effective in-band mechanism.)
         */
+       mv88e6xxx_reg_lock(chip);
+
        if (chip->info->ops->port_set_link &&
-           ((mode == MLO_AN_INBAND && p->interface != state->interface) ||
+           ((mode == MLO_AN_INBAND &&
+             chip->ports[port].interface != interface) ||
             (mode == MLO_AN_PHY && mv88e6xxx_port_ppu_updates(chip, port))))
-               chip->info->ops->port_set_link(chip, port, LINK_UNFORCED);
-
-       p->interface = state->interface;
+               err = chip->info->ops->port_set_link(chip, port, LINK_UNFORCED);
 
-err_unlock:
        mv88e6xxx_reg_unlock(chip);
 
-       if (err && err != -EOPNOTSUPP)
-               dev_err(ds->dev, "p%d: failed to configure MAC/PCS\n", port);
+       chip->ports[port].interface = interface;
+
+       return err;
 }
 
 static void mv88e6xxx_mac_link_down(struct dsa_switch *ds, int port,
@@ -3311,7 +3339,7 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port)
                caps = pl_config.mac_capabilities;
 
                if (chip->info->ops->port_max_speed_mode)
-                       mode = chip->info->ops->port_max_speed_mode(port);
+                       mode = chip->info->ops->port_max_speed_mode(chip, port);
                else
                        mode = PHY_INTERFACE_MODE_NA;
 
@@ -5043,6 +5071,7 @@ static const struct mv88e6xxx_ops mv88e6250_ops = {
        .avb_ops = &mv88e6352_avb_ops,
        .ptp_ops = &mv88e6250_ptp_ops,
        .phylink_get_caps = mv88e6250_phylink_get_caps,
+       .set_max_frame_size = mv88e6185_g1_set_max_frame_size,
 };
 
 static const struct mv88e6xxx_ops mv88e6290_ops = {
@@ -5642,6 +5671,46 @@ static const struct mv88e6xxx_ops mv88e6393x_ops = {
 };
 
 static const struct mv88e6xxx_info mv88e6xxx_table[] = {
+       [MV88E6020] = {
+               .prod_num = MV88E6XXX_PORT_SWITCH_ID_PROD_6020,
+               .family = MV88E6XXX_FAMILY_6250,
+               .name = "Marvell 88E6020",
+               .num_databases = 64,
+               .num_ports = 4,
+               .num_internal_phys = 2,
+               .max_vid = 4095,
+               .port_base_addr = 0x8,
+               .phy_base_addr = 0x0,
+               .global1_addr = 0xf,
+               .global2_addr = 0x7,
+               .age_time_coeff = 15000,
+               .g1_irqs = 9,
+               .g2_irqs = 5,
+               .atu_move_port_mask = 0xf,
+               .dual_chip = true,
+               .ops = &mv88e6250_ops,
+       },
+
+       [MV88E6071] = {
+               .prod_num = MV88E6XXX_PORT_SWITCH_ID_PROD_6071,
+               .family = MV88E6XXX_FAMILY_6250,
+               .name = "Marvell 88E6071",
+               .num_databases = 64,
+               .num_ports = 7,
+               .num_internal_phys = 5,
+               .max_vid = 4095,
+               .port_base_addr = 0x08,
+               .phy_base_addr = 0x00,
+               .global1_addr = 0x0f,
+               .global2_addr = 0x07,
+               .age_time_coeff = 15000,
+               .g1_irqs = 9,
+               .g2_irqs = 5,
+               .atu_move_port_mask = 0xf,
+               .dual_chip = true,
+               .ops = &mv88e6250_ops,
+       },
+
        [MV88E6085] = {
                .prod_num = MV88E6XXX_PORT_SWITCH_ID_PROD_6085,
                .family = MV88E6XXX_FAMILY_6097,
@@ -6024,7 +6093,8 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .name = "Marvell 88E6191X",
                .num_databases = 4096,
                .num_ports = 11,        /* 10 + Z80 */
-               .num_internal_phys = 9,
+               .num_internal_phys = 8,
+               .internal_phys_offset = 1,
                .max_vid = 8191,
                .max_sid = 63,
                .port_base_addr = 0x0,
@@ -6047,7 +6117,8 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .name = "Marvell 88E6193X",
                .num_databases = 4096,
                .num_ports = 11,        /* 10 + Z80 */
-               .num_internal_phys = 9,
+               .num_internal_phys = 8,
+               .internal_phys_offset = 1,
                .max_vid = 8191,
                .max_sid = 63,
                .port_base_addr = 0x0,
@@ -6309,6 +6380,32 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .ptp_support = true,
                .ops = &mv88e6352_ops,
        },
+       [MV88E6361] = {
+               .prod_num = MV88E6XXX_PORT_SWITCH_ID_PROD_6361,
+               .family = MV88E6XXX_FAMILY_6393,
+               .name = "Marvell 88E6361",
+               .num_databases = 4096,
+               .num_macs = 16384,
+               .num_ports = 11,
+               /* Ports 1, 2 and 8 are not routed */
+               .invalid_port_mask = BIT(1) | BIT(2) | BIT(8),
+               .num_internal_phys = 5,
+               .internal_phys_offset = 3,
+               .max_vid = 4095,
+               .max_sid = 63,
+               .port_base_addr = 0x0,
+               .phy_base_addr = 0x0,
+               .global1_addr = 0x1b,
+               .global2_addr = 0x1c,
+               .age_time_coeff = 3750,
+               .g1_irqs = 10,
+               .g2_irqs = 14,
+               .atu_move_port_mask = 0x1f,
+               .pvt = true,
+               .multi_chip = true,
+               .ptp_support = true,
+               .ops = &mv88e6393x_ops,
+       },
        [MV88E6390] = {
                .prod_num = MV88E6XXX_PORT_SWITCH_ID_PROD_6390,
                .family = MV88E6XXX_FAMILY_6390,
@@ -6366,7 +6463,8 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .name = "Marvell 88E6393X",
                .num_databases = 4096,
                .num_ports = 11,        /* 10 + Z80 */
-               .num_internal_phys = 9,
+               .num_internal_phys = 8,
+               .internal_phys_offset = 1,
                .max_vid = 8191,
                .max_sid = 63,
                .port_base_addr = 0x0,
@@ -7002,7 +7100,9 @@ static const struct dsa_switch_ops mv88e6xxx_switch_ops = {
        .port_teardown          = mv88e6xxx_port_teardown,
        .phylink_get_caps       = mv88e6xxx_get_caps,
        .phylink_mac_link_state = mv88e6xxx_serdes_pcs_get_state,
+       .phylink_mac_prepare    = mv88e6xxx_mac_prepare,
        .phylink_mac_config     = mv88e6xxx_mac_config,
+       .phylink_mac_finish     = mv88e6xxx_mac_finish,
        .phylink_mac_an_restart = mv88e6xxx_serdes_pcs_an_restart,
        .phylink_mac_link_down  = mv88e6xxx_mac_link_down,
        .phylink_mac_link_up    = mv88e6xxx_mac_link_up,
index da6e133..0ad34b2 100644 (file)
@@ -54,6 +54,8 @@ enum mv88e6xxx_frame_mode {
 
 /* List of supported models */
 enum mv88e6xxx_model {
+       MV88E6020,
+       MV88E6071,
        MV88E6085,
        MV88E6095,
        MV88E6097,
@@ -82,6 +84,7 @@ enum mv88e6xxx_model {
        MV88E6350,
        MV88E6351,
        MV88E6352,
+       MV88E6361,
        MV88E6390,
        MV88E6390X,
        MV88E6393X,
@@ -94,13 +97,13 @@ enum mv88e6xxx_family {
        MV88E6XXX_FAMILY_6097,  /* 6046 6085 6096 6097 */
        MV88E6XXX_FAMILY_6165,  /* 6123 6161 6165 */
        MV88E6XXX_FAMILY_6185,  /* 6108 6121 6122 6131 6152 6155 6182 6185 */
-       MV88E6XXX_FAMILY_6250,  /* 6220 6250 */
+       MV88E6XXX_FAMILY_6250,  /* 6220 6250 6020 6071 */
        MV88E6XXX_FAMILY_6320,  /* 6320 6321 */
        MV88E6XXX_FAMILY_6341,  /* 6141 6341 */
        MV88E6XXX_FAMILY_6351,  /* 6171 6175 6350 6351 */
        MV88E6XXX_FAMILY_6352,  /* 6172 6176 6240 6352 */
        MV88E6XXX_FAMILY_6390,  /* 6190 6190X 6191 6290 6390 6390X */
-       MV88E6XXX_FAMILY_6393,  /* 6191X 6193X 6393X */
+       MV88E6XXX_FAMILY_6393,  /* 6191X 6193X 6361 6393X */
 };
 
 /**
@@ -167,6 +170,11 @@ struct mv88e6xxx_info {
 
        /* Supports PTP */
        bool ptp_support;
+
+       /* Internal PHY start index. 0 means that internal PHYs range starts at
+        * port 0, 1 means internal PHYs range starts at port 1, etc
+        */
+       unsigned int internal_phys_offset;
 };
 
 struct mv88e6xxx_atu_entry {
@@ -513,7 +521,8 @@ struct mv88e6xxx_ops {
                                     int speed, int duplex);
 
        /* What interface mode should be used for maximum speed? */
-       phy_interface_t (*port_max_speed_mode)(int port);
+       phy_interface_t (*port_max_speed_mode)(struct mv88e6xxx_chip *chip,
+                                              int port);
 
        int (*port_tag_remap)(struct mv88e6xxx_chip *chip, int port);
 
index 6158968..937a01f 100644 (file)
@@ -1196,9 +1196,12 @@ out:
 int mv88e6xxx_g2_irq_mdio_setup(struct mv88e6xxx_chip *chip,
                                struct mii_bus *bus)
 {
+       int phy_start = chip->info->internal_phys_offset;
+       int phy_end = chip->info->internal_phys_offset +
+                     chip->info->num_internal_phys;
        int phy, irq;
 
-       for (phy = 0; phy < chip->info->num_internal_phys; phy++) {
+       for (phy = phy_start; phy < phy_end; phy++) {
                irq = irq_find_mapping(chip->g2_irq.domain, phy);
                if (irq < 0)
                        return irq;
index f79cf71..dd66ec9 100644 (file)
@@ -342,7 +342,8 @@ int mv88e6341_port_set_speed_duplex(struct mv88e6xxx_chip *chip, int port,
                                               duplex);
 }
 
-phy_interface_t mv88e6341_port_max_speed_mode(int port)
+phy_interface_t mv88e6341_port_max_speed_mode(struct mv88e6xxx_chip *chip,
+                                             int port)
 {
        if (port == 5)
                return PHY_INTERFACE_MODE_2500BASEX;
@@ -381,7 +382,8 @@ int mv88e6390_port_set_speed_duplex(struct mv88e6xxx_chip *chip, int port,
                                               duplex);
 }
 
-phy_interface_t mv88e6390_port_max_speed_mode(int port)
+phy_interface_t mv88e6390_port_max_speed_mode(struct mv88e6xxx_chip *chip,
+                                             int port)
 {
        if (port == 9 || port == 10)
                return PHY_INTERFACE_MODE_2500BASEX;
@@ -403,7 +405,8 @@ int mv88e6390x_port_set_speed_duplex(struct mv88e6xxx_chip *chip, int port,
                                               duplex);
 }
 
-phy_interface_t mv88e6390x_port_max_speed_mode(int port)
+phy_interface_t mv88e6390x_port_max_speed_mode(struct mv88e6xxx_chip *chip,
+                                              int port)
 {
        if (port == 9 || port == 10)
                return PHY_INTERFACE_MODE_XAUI;
@@ -421,6 +424,10 @@ int mv88e6393x_port_set_speed_duplex(struct mv88e6xxx_chip *chip, int port,
        u16 reg, ctrl;
        int err;
 
+       if (chip->info->prod_num == MV88E6XXX_PORT_SWITCH_ID_PROD_6361 &&
+           speed > 2500)
+               return -EOPNOTSUPP;
+
        if (speed == 200 && port != 0)
                return -EOPNOTSUPP;
 
@@ -500,12 +507,17 @@ int mv88e6393x_port_set_speed_duplex(struct mv88e6xxx_chip *chip, int port,
        return 0;
 }
 
-phy_interface_t mv88e6393x_port_max_speed_mode(int port)
+phy_interface_t mv88e6393x_port_max_speed_mode(struct mv88e6xxx_chip *chip,
+                                              int port)
 {
-       if (port == 0 || port == 9 || port == 10)
-               return PHY_INTERFACE_MODE_10GBASER;
 
-       return PHY_INTERFACE_MODE_NA;
+       if (port != 0 && port != 9 && port != 10)
+               return PHY_INTERFACE_MODE_NA;
+
+       if (chip->info->prod_num == MV88E6XXX_PORT_SWITCH_ID_PROD_6361)
+               return PHY_INTERFACE_MODE_2500BASEX;
+
+       return PHY_INTERFACE_MODE_10GBASER;
 }
 
 static int mv88e6xxx_port_set_cmode(struct mv88e6xxx_chip *chip, int port,
@@ -554,6 +566,9 @@ static int mv88e6xxx_port_set_cmode(struct mv88e6xxx_chip *chip, int port,
        case PHY_INTERFACE_MODE_10GBASER:
                cmode = MV88E6393X_PORT_STS_CMODE_10GBASER;
                break;
+       case PHY_INTERFACE_MODE_USXGMII:
+               cmode = MV88E6393X_PORT_STS_CMODE_USXGMII;
+               break;
        default:
                cmode = 0;
        }
index d19b630..86deeb3 100644 (file)
 /* Offset 0x03: Switch Identifier Register */
 #define MV88E6XXX_PORT_SWITCH_ID               0x03
 #define MV88E6XXX_PORT_SWITCH_ID_PROD_MASK     0xfff0
+#define MV88E6XXX_PORT_SWITCH_ID_PROD_6020     0x0200
+#define MV88E6XXX_PORT_SWITCH_ID_PROD_6071     0x0710
 #define MV88E6XXX_PORT_SWITCH_ID_PROD_6085     0x04a0
 #define MV88E6XXX_PORT_SWITCH_ID_PROD_6095     0x0950
 #define MV88E6XXX_PORT_SWITCH_ID_PROD_6097     0x0990
 #define MV88E6XXX_PORT_SWITCH_ID_PROD_6220     0x2200
 #define MV88E6XXX_PORT_SWITCH_ID_PROD_6240     0x2400
 #define MV88E6XXX_PORT_SWITCH_ID_PROD_6250     0x2500
+#define MV88E6XXX_PORT_SWITCH_ID_PROD_6361     0x2610
 #define MV88E6XXX_PORT_SWITCH_ID_PROD_6290     0x2900
 #define MV88E6XXX_PORT_SWITCH_ID_PROD_6321     0x3100
 #define MV88E6XXX_PORT_SWITCH_ID_PROD_6141     0x3400
@@ -359,10 +362,14 @@ int mv88e6390x_port_set_speed_duplex(struct mv88e6xxx_chip *chip, int port,
 int mv88e6393x_port_set_speed_duplex(struct mv88e6xxx_chip *chip, int port,
                                     int speed, int duplex);
 
-phy_interface_t mv88e6341_port_max_speed_mode(int port);
-phy_interface_t mv88e6390_port_max_speed_mode(int port);
-phy_interface_t mv88e6390x_port_max_speed_mode(int port);
-phy_interface_t mv88e6393x_port_max_speed_mode(int port);
+phy_interface_t mv88e6341_port_max_speed_mode(struct mv88e6xxx_chip *chip,
+                                             int port);
+phy_interface_t mv88e6390_port_max_speed_mode(struct mv88e6xxx_chip *chip,
+                                             int port);
+phy_interface_t mv88e6390x_port_max_speed_mode(struct mv88e6xxx_chip *chip,
+                                              int port);
+phy_interface_t mv88e6393x_port_max_speed_mode(struct mv88e6xxx_chip *chip,
+                                              int port);
 
 int mv88e6xxx_port_set_state(struct mv88e6xxx_chip *chip, int port, u8 state);
 
index 72faec8..80167d5 100644 (file)
@@ -683,7 +683,8 @@ int mv88e6393x_serdes_get_lane(struct mv88e6xxx_chip *chip, int port)
            cmode == MV88E6XXX_PORT_STS_CMODE_SGMII ||
            cmode == MV88E6XXX_PORT_STS_CMODE_2500BASEX ||
            cmode == MV88E6393X_PORT_STS_CMODE_5GBASER ||
-           cmode == MV88E6393X_PORT_STS_CMODE_10GBASER)
+           cmode == MV88E6393X_PORT_STS_CMODE_10GBASER ||
+           cmode == MV88E6393X_PORT_STS_CMODE_USXGMII)
                lane = port;
 
        return lane;
@@ -984,7 +985,42 @@ static int mv88e6393x_serdes_pcs_get_state_10g(struct mv88e6xxx_chip *chip,
                        state->speed = SPEED_10000;
                state->duplex = DUPLEX_FULL;
        }
+       return 0;
+}
+
+/* USXGMII registers for Marvell switch 88e639x are undocumented and this function is based
+ * on some educated guesses. It appears that there are no status bits related to
+ * autonegotiation complete or flow control.
+ */
+static int mv88e639x_serdes_pcs_get_state_usxgmii(struct mv88e6xxx_chip *chip,
+                                                 int port, int lane,
+                                                 struct phylink_link_state *state)
+{
+       u16 status, lp_status;
+       int err;
 
+       err = mv88e6390_serdes_read(chip, lane, MDIO_MMD_PHYXS,
+                                   MV88E6390_USXGMII_PHY_STATUS, &status);
+       if (err) {
+               dev_err(chip->dev, "can't read Serdes USXGMII PHY status: %d\n", err);
+               return err;
+       }
+       dev_dbg(chip->dev, "USXGMII PHY status: 0x%x\n", status);
+
+       state->link = !!(status & MDIO_USXGMII_LINK);
+       state->an_complete = state->link;
+
+       if (state->link) {
+               err = mv88e6390_serdes_read(chip, lane, MDIO_MMD_PHYXS,
+                                           MV88E6390_USXGMII_LP_STATUS, &lp_status);
+               if (err) {
+                       dev_err(chip->dev, "can't read Serdes USXGMII LP status: %d\n", err);
+                       return err;
+               }
+               dev_dbg(chip->dev, "USXGMII LP status: 0x%x\n", lp_status);
+               /* lp_status appears to include the "link" bit as per USXGMII spec. */
+               phylink_decode_usxgmii_word(state, lp_status);
+       }
        return 0;
 }
 
@@ -1020,6 +1056,9 @@ int mv88e6393x_serdes_pcs_get_state(struct mv88e6xxx_chip *chip, int port,
        case PHY_INTERFACE_MODE_10GBASER:
                return mv88e6393x_serdes_pcs_get_state_10g(chip, port, lane,
                                                           state);
+       case PHY_INTERFACE_MODE_USXGMII:
+               return mv88e639x_serdes_pcs_get_state_usxgmii(chip, port, lane,
+                                                          state);
 
        default:
                return -EOPNOTSUPP;
@@ -1173,6 +1212,7 @@ int mv88e6393x_serdes_irq_enable(struct mv88e6xxx_chip *chip, int port,
                return mv88e6390_serdes_irq_enable_sgmii(chip, lane, enable);
        case MV88E6393X_PORT_STS_CMODE_5GBASER:
        case MV88E6393X_PORT_STS_CMODE_10GBASER:
+       case MV88E6393X_PORT_STS_CMODE_USXGMII:
                return mv88e6393x_serdes_irq_enable_10g(chip, lane, enable);
        }
 
@@ -1213,6 +1253,7 @@ irqreturn_t mv88e6393x_serdes_irq_status(struct mv88e6xxx_chip *chip, int port,
                break;
        case MV88E6393X_PORT_STS_CMODE_5GBASER:
        case MV88E6393X_PORT_STS_CMODE_10GBASER:
+       case MV88E6393X_PORT_STS_CMODE_USXGMII:
                err = mv88e6393x_serdes_irq_status_10g(chip, lane, &status);
                if (err)
                        return err;
@@ -1477,7 +1518,8 @@ static int mv88e6393x_serdes_erratum_5_2(struct mv88e6xxx_chip *chip, int lane,
         * to SERDES operating in 10G mode. These registers only apply to 10G
         * operation and have no effect on other speeds.
         */
-       if (cmode != MV88E6393X_PORT_STS_CMODE_10GBASER)
+       if (cmode != MV88E6393X_PORT_STS_CMODE_10GBASER &&
+           cmode != MV88E6393X_PORT_STS_CMODE_USXGMII)
                return 0;
 
        for (i = 0; i < ARRAY_SIZE(fixes); ++i) {
@@ -1582,6 +1624,7 @@ int mv88e6393x_serdes_power(struct mv88e6xxx_chip *chip, int port, int lane,
                break;
        case MV88E6393X_PORT_STS_CMODE_5GBASER:
        case MV88E6393X_PORT_STS_CMODE_10GBASER:
+       case MV88E6393X_PORT_STS_CMODE_USXGMII:
                err = mv88e6390_serdes_power_10g(chip, lane, on);
                break;
        default:
index 29bb4e9..e245687 100644 (file)
 #define MV88E6393X_10G_INT_LINK_CHANGE BIT(2)
 #define MV88E6393X_10G_INT_STATUS      0x9001
 
+/* USXGMII */
+#define MV88E6390_USXGMII_LP_STATUS       0xf0a2
+#define MV88E6390_USXGMII_PHY_STATUS      0xf0a6
+
 /* 1000BASE-X and SGMII */
 #define MV88E6390_SGMII_BMCR           (0x2000 + MII_BMCR)
 #define MV88E6390_SGMII_BMSR           (0x2000 + MII_BMSR)
index cfb3fae..903532e 100644 (file)
@@ -1021,7 +1021,6 @@ static int vsc9959_mdio_bus_alloc(struct ocelot *ocelot)
        for (port = 0; port < felix->info->num_ports; port++) {
                struct ocelot_port *ocelot_port = ocelot->ports[port];
                struct phylink_pcs *phylink_pcs;
-               struct mdio_device *mdio_device;
 
                if (dsa_is_unused_port(felix->ds, port))
                        continue;
@@ -1029,16 +1028,10 @@ static int vsc9959_mdio_bus_alloc(struct ocelot *ocelot)
                if (ocelot_port->phy_mode == PHY_INTERFACE_MODE_INTERNAL)
                        continue;
 
-               mdio_device = mdio_device_create(felix->imdio, port);
-               if (IS_ERR(mdio_device))
+               phylink_pcs = lynx_pcs_create_mdiodev(felix->imdio, port);
+               if (IS_ERR(phylink_pcs))
                        continue;
 
-               phylink_pcs = lynx_pcs_create(mdio_device);
-               if (!phylink_pcs) {
-                       mdio_device_free(mdio_device);
-                       continue;
-               }
-
                felix->pcs[port] = phylink_pcs;
 
                dev_info(dev, "Found PCS at internal MDIO address %d\n", port);
@@ -1054,14 +1047,9 @@ static void vsc9959_mdio_bus_free(struct ocelot *ocelot)
 
        for (port = 0; port < ocelot->num_phys_ports; port++) {
                struct phylink_pcs *phylink_pcs = felix->pcs[port];
-               struct mdio_device *mdio_device;
 
-               if (!phylink_pcs)
-                       continue;
-
-               mdio_device = lynx_get_mdio_device(phylink_pcs);
-               mdio_device_free(mdio_device);
-               lynx_pcs_destroy(phylink_pcs);
+               if (phylink_pcs)
+                       lynx_pcs_destroy(phylink_pcs);
        }
        mdiobus_unregister(felix->imdio);
        mdiobus_free(felix->imdio);
@@ -1423,7 +1411,7 @@ static int vsc9959_qos_port_tas_set(struct ocelot *ocelot, int port,
 
        mutex_lock(&ocelot->tas_lock);
 
-       if (!taprio->enable) {
+       if (taprio->cmd == TAPRIO_CMD_DESTROY) {
                ocelot_port_mqprio(ocelot, port, &taprio->mqprio);
                ocelot_rmw_rix(ocelot, 0, QSYS_TAG_CONFIG_ENABLE,
                               QSYS_TAG_CONFIG, port);
@@ -1435,6 +1423,9 @@ static int vsc9959_qos_port_tas_set(struct ocelot *ocelot, int port,
 
                mutex_unlock(&ocelot->tas_lock);
                return 0;
+       } else if (taprio->cmd != TAPRIO_CMD_REPLACE) {
+               ret = -EOPNOTSUPP;
+               goto err_unlock;
        }
 
        ret = ocelot_port_mqprio(ocelot, port, &taprio->mqprio);
index 96d4972..15003b2 100644 (file)
@@ -912,7 +912,6 @@ static int vsc9953_mdio_bus_alloc(struct ocelot *ocelot)
        for (port = 0; port < felix->info->num_ports; port++) {
                struct ocelot_port *ocelot_port = ocelot->ports[port];
                struct phylink_pcs *phylink_pcs;
-               struct mdio_device *mdio_device;
                int addr = port + 4;
 
                if (dsa_is_unused_port(felix->ds, port))
@@ -921,16 +920,10 @@ static int vsc9953_mdio_bus_alloc(struct ocelot *ocelot)
                if (ocelot_port->phy_mode == PHY_INTERFACE_MODE_INTERNAL)
                        continue;
 
-               mdio_device = mdio_device_create(felix->imdio, addr);
-               if (IS_ERR(mdio_device))
+               phylink_pcs = lynx_pcs_create_mdiodev(felix->imdio, addr);
+               if (IS_ERR(phylink_pcs))
                        continue;
 
-               phylink_pcs = lynx_pcs_create(mdio_device);
-               if (!phylink_pcs) {
-                       mdio_device_free(mdio_device);
-                       continue;
-               }
-
                felix->pcs[port] = phylink_pcs;
 
                dev_info(dev, "Found PCS at internal MDIO address %d\n", addr);
@@ -946,14 +939,9 @@ static void vsc9953_mdio_bus_free(struct ocelot *ocelot)
 
        for (port = 0; port < ocelot->num_phys_ports; port++) {
                struct phylink_pcs *phylink_pcs = felix->pcs[port];
-               struct mdio_device *mdio_device;
-
-               if (!phylink_pcs)
-                       continue;
 
-               mdio_device = lynx_get_mdio_device(phylink_pcs);
-               mdio_device_free(mdio_device);
-               lynx_pcs_destroy(phylink_pcs);
+               if (phylink_pcs)
+                       lynx_pcs_destroy(phylink_pcs);
        }
 
        /* mdiobus_unregister and mdiobus_free handled by devres */
index e7b98b8..b2bf78a 100644 (file)
@@ -391,7 +391,7 @@ static int ar9331_sw_mbus_init(struct ar9331_sw_priv *priv)
 
 static int ar9331_sw_setup_port(struct dsa_switch *ds, int port)
 {
-       struct ar9331_sw_priv *priv = (struct ar9331_sw_priv *)ds->priv;
+       struct ar9331_sw_priv *priv = ds->priv;
        struct regmap *regmap = priv->regmap;
        u32 port_mask, port_ctrl, val;
        int ret;
@@ -439,7 +439,7 @@ error:
 
 static int ar9331_sw_setup(struct dsa_switch *ds)
 {
-       struct ar9331_sw_priv *priv = (struct ar9331_sw_priv *)ds->priv;
+       struct ar9331_sw_priv *priv = ds->priv;
        struct regmap *regmap = priv->regmap;
        int ret, i;
 
@@ -484,7 +484,7 @@ error:
 
 static void ar9331_sw_port_disable(struct dsa_switch *ds, int port)
 {
-       struct ar9331_sw_priv *priv = (struct ar9331_sw_priv *)ds->priv;
+       struct ar9331_sw_priv *priv = ds->priv;
        struct regmap *regmap = priv->regmap;
        int ret;
 
@@ -527,7 +527,7 @@ static void ar9331_sw_phylink_mac_config(struct dsa_switch *ds, int port,
                                         unsigned int mode,
                                         const struct phylink_link_state *state)
 {
-       struct ar9331_sw_priv *priv = (struct ar9331_sw_priv *)ds->priv;
+       struct ar9331_sw_priv *priv = ds->priv;
        struct regmap *regmap = priv->regmap;
        int ret;
 
@@ -542,7 +542,7 @@ static void ar9331_sw_phylink_mac_link_down(struct dsa_switch *ds, int port,
                                            unsigned int mode,
                                            phy_interface_t interface)
 {
-       struct ar9331_sw_priv *priv = (struct ar9331_sw_priv *)ds->priv;
+       struct ar9331_sw_priv *priv = ds->priv;
        struct ar9331_sw_port *p = &priv->port[port];
        struct regmap *regmap = priv->regmap;
        int ret;
@@ -562,7 +562,7 @@ static void ar9331_sw_phylink_mac_link_up(struct dsa_switch *ds, int port,
                                          int speed, int duplex,
                                          bool tx_pause, bool rx_pause)
 {
-       struct ar9331_sw_priv *priv = (struct ar9331_sw_priv *)ds->priv;
+       struct ar9331_sw_priv *priv = ds->priv;
        struct ar9331_sw_port *p = &priv->port[port];
        struct regmap *regmap = priv->regmap;
        u32 val;
@@ -665,7 +665,7 @@ static void ar9331_do_stats_poll(struct work_struct *work)
 static void ar9331_get_stats64(struct dsa_switch *ds, int port,
                               struct rtnl_link_stats64 *s)
 {
-       struct ar9331_sw_priv *priv = (struct ar9331_sw_priv *)ds->priv;
+       struct ar9331_sw_priv *priv = ds->priv;
        struct ar9331_sw_port *p = &priv->port[port];
 
        spin_lock(&p->stats_lock);
@@ -676,7 +676,7 @@ static void ar9331_get_stats64(struct dsa_switch *ds, int port,
 static void ar9331_get_pause_stats(struct dsa_switch *ds, int port,
                                   struct ethtool_pause_stats *pause_stats)
 {
-       struct ar9331_sw_priv *priv = (struct ar9331_sw_priv *)ds->priv;
+       struct ar9331_sw_priv *priv = ds->priv;
        struct ar9331_sw_port *p = &priv->port[port];
 
        spin_lock(&p->stats_lock);
index 6d5ac75..dee7b65 100644 (file)
@@ -1756,7 +1756,7 @@ static int qca8k_connect_tag_protocol(struct dsa_switch *ds,
 static int
 qca8k_setup(struct dsa_switch *ds)
 {
-       struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv;
+       struct qca8k_priv *priv = ds->priv;
        int cpu_port, ret, i;
        u32 mask;
 
index 96773e4..8c2dc0e 100644 (file)
@@ -760,7 +760,7 @@ int qca8k_port_fdb_add(struct dsa_switch *ds, int port,
                       const unsigned char *addr, u16 vid,
                       struct dsa_db db)
 {
-       struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv;
+       struct qca8k_priv *priv = ds->priv;
        u16 port_mask = BIT(port);
 
        return qca8k_port_fdb_insert(priv, addr, port_mask, vid);
@@ -770,7 +770,7 @@ int qca8k_port_fdb_del(struct dsa_switch *ds, int port,
                       const unsigned char *addr, u16 vid,
                       struct dsa_db db)
 {
-       struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv;
+       struct qca8k_priv *priv = ds->priv;
        u16 port_mask = BIT(port);
 
        if (!vid)
@@ -782,7 +782,7 @@ int qca8k_port_fdb_del(struct dsa_switch *ds, int port,
 int qca8k_port_fdb_dump(struct dsa_switch *ds, int port,
                        dsa_fdb_dump_cb_t *cb, void *data)
 {
-       struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv;
+       struct qca8k_priv *priv = ds->priv;
        struct qca8k_fdb _fdb = { 0 };
        int cnt = QCA8K_NUM_FDB_RECORDS;
        bool is_static;
index b883692..6f02029 100644 (file)
@@ -5,6 +5,18 @@
 #include "qca8k.h"
 #include "qca8k_leds.h"
 
+static u32 qca8k_phy_to_port(int phy)
+{
+       /* Internal PHY 0 has port at index 1.
+        * Internal PHY 1 has port at index 2.
+        * Internal PHY 2 has port at index 3.
+        * Internal PHY 3 has port at index 4.
+        * Internal PHY 4 has port at index 5.
+        */
+
+       return phy + 1;
+}
+
 static int
 qca8k_get_enable_led_reg(int port_num, int led_num, struct qca8k_led_pattern_en *reg_info)
 {
@@ -32,6 +44,43 @@ qca8k_get_enable_led_reg(int port_num, int led_num, struct qca8k_led_pattern_en
 }
 
 static int
+qca8k_get_control_led_reg(int port_num, int led_num, struct qca8k_led_pattern_en *reg_info)
+{
+       reg_info->reg = QCA8K_LED_CTRL_REG(led_num);
+
+       /* 6 total control rule:
+        * 3 control rules for phy0-3 that applies to all their leds
+        * 3 control rules for phy4
+        */
+       if (port_num == 4)
+               reg_info->shift = QCA8K_LED_PHY4_CONTROL_RULE_SHIFT;
+       else
+               reg_info->shift = QCA8K_LED_PHY0123_CONTROL_RULE_SHIFT;
+
+       return 0;
+}
+
+static int
+qca8k_parse_netdev(unsigned long rules, u32 *offload_trigger)
+{
+       /* Parsing specific to netdev trigger */
+       if (test_bit(TRIGGER_NETDEV_TX, &rules))
+               *offload_trigger |= QCA8K_LED_TX_BLINK_MASK;
+       if (test_bit(TRIGGER_NETDEV_RX, &rules))
+               *offload_trigger |= QCA8K_LED_RX_BLINK_MASK;
+
+       if (rules && !*offload_trigger)
+               return -EOPNOTSUPP;
+
+       /* Enable some default rule by default to the requested mode:
+        * - Blink at 4Hz by default
+        */
+       *offload_trigger |= QCA8K_LED_BLINK_4HZ;
+
+       return 0;
+}
+
+static int
 qca8k_led_brightness_set(struct qca8k_led *led,
                         enum led_brightness brightness)
 {
@@ -165,6 +214,133 @@ qca8k_cled_blink_set(struct led_classdev *ldev,
 }
 
 static int
+qca8k_cled_trigger_offload(struct led_classdev *ldev, bool enable)
+{
+       struct qca8k_led *led = container_of(ldev, struct qca8k_led, cdev);
+
+       struct qca8k_led_pattern_en reg_info;
+       struct qca8k_priv *priv = led->priv;
+       u32 mask, val = QCA8K_LED_ALWAYS_OFF;
+
+       qca8k_get_enable_led_reg(led->port_num, led->led_num, &reg_info);
+
+       if (enable)
+               val = QCA8K_LED_RULE_CONTROLLED;
+
+       if (led->port_num == 0 || led->port_num == 4) {
+               mask = QCA8K_LED_PATTERN_EN_MASK;
+               val <<= QCA8K_LED_PATTERN_EN_SHIFT;
+       } else {
+               mask = QCA8K_LED_PHY123_PATTERN_EN_MASK;
+       }
+
+       return regmap_update_bits(priv->regmap, reg_info.reg, mask << reg_info.shift,
+                                 val << reg_info.shift);
+}
+
+static bool
+qca8k_cled_hw_control_status(struct led_classdev *ldev)
+{
+       struct qca8k_led *led = container_of(ldev, struct qca8k_led, cdev);
+
+       struct qca8k_led_pattern_en reg_info;
+       struct qca8k_priv *priv = led->priv;
+       u32 val;
+
+       qca8k_get_enable_led_reg(led->port_num, led->led_num, &reg_info);
+
+       regmap_read(priv->regmap, reg_info.reg, &val);
+
+       val >>= reg_info.shift;
+
+       if (led->port_num == 0 || led->port_num == 4) {
+               val &= QCA8K_LED_PATTERN_EN_MASK;
+               val >>= QCA8K_LED_PATTERN_EN_SHIFT;
+       } else {
+               val &= QCA8K_LED_PHY123_PATTERN_EN_MASK;
+       }
+
+       return val == QCA8K_LED_RULE_CONTROLLED;
+}
+
+static int
+qca8k_cled_hw_control_is_supported(struct led_classdev *ldev, unsigned long rules)
+{
+       u32 offload_trigger = 0;
+
+       return qca8k_parse_netdev(rules, &offload_trigger);
+}
+
+static int
+qca8k_cled_hw_control_set(struct led_classdev *ldev, unsigned long rules)
+{
+       struct qca8k_led *led = container_of(ldev, struct qca8k_led, cdev);
+       struct qca8k_led_pattern_en reg_info;
+       struct qca8k_priv *priv = led->priv;
+       u32 offload_trigger = 0;
+       int ret;
+
+       ret = qca8k_parse_netdev(rules, &offload_trigger);
+       if (ret)
+               return ret;
+
+       ret = qca8k_cled_trigger_offload(ldev, true);
+       if (ret)
+               return ret;
+
+       qca8k_get_control_led_reg(led->port_num, led->led_num, &reg_info);
+
+       return regmap_update_bits(priv->regmap, reg_info.reg,
+                                 QCA8K_LED_RULE_MASK << reg_info.shift,
+                                 offload_trigger << reg_info.shift);
+}
+
+static int
+qca8k_cled_hw_control_get(struct led_classdev *ldev, unsigned long *rules)
+{
+       struct qca8k_led *led = container_of(ldev, struct qca8k_led, cdev);
+       struct qca8k_led_pattern_en reg_info;
+       struct qca8k_priv *priv = led->priv;
+       u32 val;
+       int ret;
+
+       /* With hw control not active return err */
+       if (!qca8k_cled_hw_control_status(ldev))
+               return -EINVAL;
+
+       qca8k_get_control_led_reg(led->port_num, led->led_num, &reg_info);
+
+       ret = regmap_read(priv->regmap, reg_info.reg, &val);
+       if (ret)
+               return ret;
+
+       val >>= reg_info.shift;
+       val &= QCA8K_LED_RULE_MASK;
+
+       /* Parsing specific to netdev trigger */
+       if (val & QCA8K_LED_TX_BLINK_MASK)
+               set_bit(TRIGGER_NETDEV_TX, rules);
+       if (val & QCA8K_LED_RX_BLINK_MASK)
+               set_bit(TRIGGER_NETDEV_RX, rules);
+
+       return 0;
+}
+
+static struct device *qca8k_cled_hw_control_get_device(struct led_classdev *ldev)
+{
+       struct qca8k_led *led = container_of(ldev, struct qca8k_led, cdev);
+       struct qca8k_priv *priv = led->priv;
+       struct dsa_port *dp;
+
+       dp = dsa_to_port(priv->ds, qca8k_phy_to_port(led->port_num));
+       if (!dp)
+               return NULL;
+       if (dp->slave)
+               return &dp->slave->dev;
+       return NULL;
+}
+
+static int
 qca8k_parse_port_leds(struct qca8k_priv *priv, struct fwnode_handle *port, int port_num)
 {
        struct fwnode_handle *led = NULL, *leds = NULL;
@@ -224,6 +400,11 @@ qca8k_parse_port_leds(struct qca8k_priv *priv, struct fwnode_handle *port, int p
                port_led->cdev.max_brightness = 1;
                port_led->cdev.brightness_set_blocking = qca8k_cled_brightness_set_blocking;
                port_led->cdev.blink_set = qca8k_cled_blink_set;
+               port_led->cdev.hw_control_is_supported = qca8k_cled_hw_control_is_supported;
+               port_led->cdev.hw_control_set = qca8k_cled_hw_control_set;
+               port_led->cdev.hw_control_get = qca8k_cled_hw_control_get;
+               port_led->cdev.hw_control_get_device = qca8k_cled_hw_control_get_device;
+               port_led->cdev.hw_control_trigger = "netdev";
                init_data.default_label = ":port";
                init_data.fwnode = led;
                init_data.devname_mandatory = true;
index 01f1cb7..833e55e 100644 (file)
@@ -400,7 +400,6 @@ static int sja1105_mdiobus_pcs_register(struct sja1105_private *priv)
        }
 
        for (port = 0; port < ds->num_ports; port++) {
-               struct mdio_device *mdiodev;
                struct dw_xpcs *xpcs;
 
                if (dsa_is_unused_port(ds, port))
@@ -410,13 +409,7 @@ static int sja1105_mdiobus_pcs_register(struct sja1105_private *priv)
                    priv->phy_mode[port] != PHY_INTERFACE_MODE_2500BASEX)
                        continue;
 
-               mdiodev = mdio_device_create(bus, port);
-               if (IS_ERR(mdiodev)) {
-                       rc = PTR_ERR(mdiodev);
-                       goto out_pcs_free;
-               }
-
-               xpcs = xpcs_create(mdiodev, priv->phy_mode[port]);
+               xpcs = xpcs_create_mdiodev(bus, port, priv->phy_mode[port]);
                if (IS_ERR(xpcs)) {
                        rc = PTR_ERR(xpcs);
                        goto out_pcs_free;
@@ -434,7 +427,6 @@ out_pcs_free:
                if (!priv->xpcs[port])
                        continue;
 
-               mdio_device_free(priv->xpcs[port]->mdiodev);
                xpcs_destroy(priv->xpcs[port]);
                priv->xpcs[port] = NULL;
        }
@@ -457,7 +449,6 @@ static void sja1105_mdiobus_pcs_unregister(struct sja1105_private *priv)
                if (!priv->xpcs[port])
                        continue;
 
-               mdio_device_free(priv->xpcs[port]->mdiodev);
                xpcs_destroy(priv->xpcs[port]);
                priv->xpcs[port] = NULL;
        }
index e615384..d781871 100644 (file)
@@ -516,10 +516,11 @@ int sja1105_setup_tc_taprio(struct dsa_switch *ds, int port,
        /* Can't change an already configured port (must delete qdisc first).
         * Can't delete the qdisc from an unconfigured port.
         */
-       if (!!tas_data->offload[port] == admin->enable)
+       if ((!!tas_data->offload[port] && admin->cmd == TAPRIO_CMD_REPLACE) ||
+           (!tas_data->offload[port] && admin->cmd == TAPRIO_CMD_DESTROY))
                return -EINVAL;
 
-       if (!admin->enable) {
+       if (admin->cmd == TAPRIO_CMD_DESTROY) {
                taprio_offload_free(tas_data->offload[port]);
                tas_data->offload[port] = NULL;
 
@@ -528,6 +529,8 @@ int sja1105_setup_tc_taprio(struct dsa_switch *ds, int port,
                        return rc;
 
                return sja1105_static_config_reload(priv, SJA1105_SCHEDULING);
+       } else if (admin->cmd != TAPRIO_CMD_REPLACE) {
+               return -EOPNOTSUPP;
        }
 
        /* The cycle time extension is the amount of time the last cycle from
index 14ff688..c1179d7 100644 (file)
@@ -147,7 +147,7 @@ static struct i2c_driver xrs700x_i2c_driver = {
                .name   = "xrs700x-i2c",
                .of_match_table = of_match_ptr(xrs700x_i2c_dt_ids),
        },
-       .probe_new = xrs700x_i2c_probe,
+       .probe = xrs700x_i2c_probe,
        .remove = xrs700x_i2c_remove,
        .shutdown = xrs700x_i2c_shutdown,
        .id_table = xrs700x_i2c_id,
index e522644..f784a6e 100644 (file)
@@ -1,3 +1,5 @@
+/* SPDX-License-Identifier: GPL-1.0+ */
+
 /* Generic NS8390 register definitions. */
 
 /* This file is part of Donald Becker's 8390 drivers, and is distributed
index 991ad95..a09f383 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Amiga Linux/68k 8390 based PCMCIA Ethernet Driver for the Amiga 1200
  *
  *
  * ----------------------------------------------------------------------------
  *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file COPYING in the main directory of the Linux
- * distribution for more details.
- *
- * ----------------------------------------------------------------------------
- *
  */
 
 
index 78f9858..fea489a 100644 (file)
@@ -1,3 +1,5 @@
+// SPDX-License-Identifier: GPL-1.0+
+
 /*======================================================================
 
     A PCMCIA ethernet driver for Asix AX88190-based cards
@@ -17,9 +19,7 @@
 
     Written 1992,1993 by Donald Becker.
     Copyright 1993 United States Government as represented by the
-    Director, National Security Agency.  This software may be used and
-    distributed according to the terms of the GNU General Public License,
-    incorporated herein by reference.
+    Director, National Security Agency.
     Donald Becker may be reached at becker@scyld.com
 
 ======================================================================*/
index 1df7601..24f49a8 100644 (file)
@@ -1,10 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
 /* New Hydra driver using generic 8390 core */
 /* Based on old hydra driver by Topi Kanerva (topi@susanna.oulu.fi) */
 
-/* This file is subject to the terms and conditions of the GNU General      */
-/* Public License.  See the file COPYING in the main directory of the       */
-/* Linux distribution for more details.                                     */
-
 /* Peter De Schrijver (p2@mind.be) */
 /* Oldenburg 2000 */
 
index e840212..84aeb80 100644 (file)
@@ -1,3 +1,5 @@
+// SPDX-License-Identifier: GPL-1.0+
+
 /* 8390.c: A general NS8390 ethernet driver core for linux. */
 /*
        Written 1992-94 by Donald Becker.
@@ -5,9 +7,6 @@
        Copyright 1993 United States Government as represented by the
        Director, National Security Agency.
 
-       This software may be used and distributed according to the terms
-       of the GNU General Public License, incorporated herein by reference.
-
        The author may be reached as becker@scyld.com, or C/O
        Scyld Computing Corporation
        410 Severn Ave., Suite 210
index 7fb819b..4a0a095 100644 (file)
@@ -1,11 +1,9 @@
+// SPDX-License-Identifier: GPL-1.0+
 /* mac8390.c: New driver for 8390-based Nubus (or Nubus-alike)
    Ethernet cards on Linux */
 /* Based on the former daynaport.c driver, by Alan Cox.  Some code
    taken from or inspired by skeleton.c by Donald Becker, acenic.c by
-   Jes Sorensen, and ne2k-pci.c by Donald Becker and Paul Gortmaker.
-
-   This software may be used and distributed according to the terms of
-   the GNU Public License, incorporated herein by reference.  */
+   Jes Sorensen, and ne2k-pci.c by Donald Becker and Paul Gortmaker. */
 
 /* 2000-02-28: support added for Dayna and Kinetics cards by
    A.G.deWijn@phys.uu.nl */
index 8a7918d..217838b 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  *  Support for ColdFire CPU based boards using a NS8390 Ethernet device.
  *
@@ -5,9 +6,6 @@
  *
  *  (C) Copyright 2012,  Greg Ungerer <gerg@uclinux.org>
  *
- *  This file is subject to the terms and conditions of the GNU General Public
- *  License.  See the file COPYING in the main directory of the Linux
- *  distribution for more details.
  */
 
 #include <linux/module.h>
index bc9c81d..7d89ec1 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-1.0+
 /* ne.c: A general non-shared-memory NS8390 ethernet driver for linux. */
 /*
     Written 1992-94 by Donald Becker.
@@ -5,9 +6,6 @@
     Copyright 1993 United States Government as represented by the
     Director, National Security Agency.
 
-    This software may be used and distributed according to the terms
-    of the GNU General Public License, incorporated herein by reference.
-
     The author may be reached as becker@scyld.com, or C/O
     Scyld Computing Corporation, 410 Severn Ave., Suite 210, Annapolis MD 21403
 
index 6a0a203..2c6bd36 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-1.0+
 /* A Linux device driver for PCI NE2000 clones.
  *
  * Authors and other copyright holders:
index 0f07fe0..9bd5e99 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-1.0+
 /*======================================================================
 
     A PCMCIA ethernet driver for NS8390-based cards
@@ -17,9 +18,7 @@
 
     Written 1992,1993 by Donald Becker.
     Copyright 1993 United States Government as represented by the
-    Director, National Security Agency.  This software may be used and
-    distributed according to the terms of the GNU General Public License,
-    incorporated herein by reference.
+    Director, National Security Agency.
     Donald Becker may be reached at becker@scyld.com
 
     Based also on Keith Moore's changes to Don Becker's code, for IBM
index 7465650..22ca804 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-1.0+
 /* smc-ultra.c: A SMC Ultra ethernet driver for linux. */
 /*
        This is a driver for the SMC Ultra and SMC EtherEZ ISA ethercards.
@@ -7,9 +8,6 @@
        Copyright 1993 United States Government as represented by the
        Director, National Security Agency.
 
-       This software may be used and distributed according to the terms
-       of the GNU General Public License, incorporated herein by reference.
-
        The author may be reached as becker@scyld.com, or C/O
        Scyld Computing Corporation
        410 Severn Ave., Suite 210
index bd89ca8..265976e 100644 (file)
@@ -1,9 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* stnic.c : A SH7750 specific part of driver for NS DP83902A ST-NIC.
  *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
  * Copyright (C) 1999 kaz Kojima
  */
 
index 119021d..ffd6394 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-1.0+
 /* wd.c: A WD80x3 ethernet driver for linux. */
 /*
        Written 1993-94 by Donald Becker.
@@ -5,9 +6,6 @@
        Copyright 1993 United States Government as represented by the
        Director, National Security Agency.
 
-       This software may be used and distributed according to the terms
-       of the GNU General Public License, incorporated herein by reference.
-
        The author may be reached as becker@scyld.com, or C/O
        Scyld Computing Corporation
        410 Severn Ave., Suite 210
index e8b4fe8..d70390e 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  *  Amiga Linux/m68k and Linux/PPC Zorro NS8390 Ethernet Driver
  *
@@ -9,12 +10,6 @@
  *
  *  ---------------------------------------------------------------------------
  *
- *  This file is subject to the terms and conditions of the GNU General Public
- *  License.  See the file COPYING in the main directory of the Linux
- *  distribution for more details.
- *
- *  ---------------------------------------------------------------------------
- *
  *  The Ariadne II and X-Surf are Zorro-II boards containing Realtek RTL8019AS
  *  Ethernet Controllers.
  */
index dd7fd41..1798531 100644 (file)
@@ -4,7 +4,9 @@ config ALTERA_TSE
        depends on HAS_DMA
        select PHYLIB
        select PHYLINK
-       select PCS_ALTERA_TSE
+       select PCS_LYNX
+       select MDIO_REGMAP
+       select REGMAP_MMIO
        help
          This driver supports the Altera Triple-Speed (TSE) Ethernet MAC.
 
index 66e3af7..2e15800 100644 (file)
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/mii.h>
+#include <linux/mdio/mdio-regmap.h>
 #include <linux/netdevice.h>
 #include <linux/of_device.h>
 #include <linux/of_mdio.h>
 #include <linux/of_net.h>
 #include <linux/of_platform.h>
-#include <linux/pcs-altera-tse.h>
+#include <linux/pcs-lynx.h>
 #include <linux/phy.h>
 #include <linux/platform_device.h>
+#include <linux/regmap.h>
 #include <linux/skbuff.h>
 #include <asm/cacheflush.h>
 
@@ -1036,10 +1038,6 @@ static struct net_device_ops altera_tse_netdev_ops = {
        .ndo_validate_addr      = eth_validate_addr,
 };
 
-static void alt_tse_mac_an_restart(struct phylink_config *config)
-{
-}
-
 static void alt_tse_mac_config(struct phylink_config *config, unsigned int mode,
                               const struct phylink_link_state *state)
 {
@@ -1096,7 +1094,6 @@ static struct phylink_pcs *alt_tse_select_pcs(struct phylink_config *config,
 }
 
 static const struct phylink_mac_ops alt_tse_phylink_ops = {
-       .mac_an_restart = alt_tse_mac_an_restart,
        .mac_config = alt_tse_mac_config,
        .mac_link_down = alt_tse_mac_link_down,
        .mac_link_up = alt_tse_mac_link_up,
@@ -1137,13 +1134,16 @@ static int request_and_map(struct platform_device *pdev, const char *name,
 static int altera_tse_probe(struct platform_device *pdev)
 {
        const struct of_device_id *of_id = NULL;
+       struct regmap_config pcs_regmap_cfg;
        struct altera_tse_private *priv;
+       struct mdio_regmap_config mrc;
        struct resource *control_port;
+       struct regmap *pcs_regmap;
        struct resource *dma_res;
        struct resource *pcs_res;
+       struct mii_bus *pcs_bus;
        struct net_device *ndev;
        void __iomem *descmap;
-       int pcs_reg_width = 2;
        int ret = -ENODEV;
 
        ndev = alloc_etherdev(sizeof(struct altera_tse_private));
@@ -1255,18 +1255,41 @@ static int altera_tse_probe(struct platform_device *pdev)
        if (ret)
                goto err_free_netdev;
 
+       memset(&pcs_regmap_cfg, 0, sizeof(pcs_regmap_cfg));
+       memset(&mrc, 0, sizeof(mrc));
        /* SGMII PCS address space. The location can vary depending on how the
         * IP is integrated. We can have a resource dedicated to it at a specific
         * address space, but if it's not the case, we fallback to the mdiophy0
         * from the MAC's address space
         */
-       ret = request_and_map(pdev, "pcs", &pcs_res,
-                             &priv->pcs_base);
+       ret = request_and_map(pdev, "pcs", &pcs_res, &priv->pcs_base);
        if (ret) {
+               /* If we can't find a dedicated resource for the PCS, fallback
+                * to the internal PCS, that has a different address stride
+                */
                priv->pcs_base = priv->mac_dev + tse_csroffs(mdio_phy0);
-               pcs_reg_width = 4;
+               pcs_regmap_cfg.reg_bits = 32;
+               /* Values are MDIO-like values, on 16 bits */
+               pcs_regmap_cfg.val_bits = 16;
+               pcs_regmap_cfg.reg_shift = REGMAP_UPSHIFT(2);
+       } else {
+               pcs_regmap_cfg.reg_bits = 16;
+               pcs_regmap_cfg.val_bits = 16;
+               pcs_regmap_cfg.reg_shift = REGMAP_UPSHIFT(1);
        }
 
+       /* Create a regmap for the PCS so that it can be used by the PCS driver */
+       pcs_regmap = devm_regmap_init_mmio(&pdev->dev, priv->pcs_base,
+                                          &pcs_regmap_cfg);
+       if (IS_ERR(pcs_regmap)) {
+               ret = PTR_ERR(pcs_regmap);
+               goto err_free_netdev;
+       }
+       mrc.regmap = pcs_regmap;
+       mrc.parent = &pdev->dev;
+       mrc.valid_addr = 0x0;
+       mrc.autoscan = false;
+
        /* Rx IRQ */
        priv->rx_irq = platform_get_irq_byname(pdev, "rx_irq");
        if (priv->rx_irq == -ENXIO) {
@@ -1389,7 +1412,18 @@ static int altera_tse_probe(struct platform_device *pdev)
                         (unsigned long) control_port->start, priv->rx_irq,
                         priv->tx_irq);
 
-       priv->pcs = alt_tse_pcs_create(ndev, priv->pcs_base, pcs_reg_width);
+       snprintf(mrc.name, MII_BUS_ID_SIZE, "%s-pcs-mii", ndev->name);
+       pcs_bus = devm_mdio_regmap_register(&pdev->dev, &mrc);
+       if (IS_ERR(pcs_bus)) {
+               ret = PTR_ERR(pcs_bus);
+               goto err_init_pcs;
+       }
+
+       priv->pcs = lynx_pcs_create_mdiodev(pcs_bus, 0);
+       if (IS_ERR(priv->pcs)) {
+               ret = PTR_ERR(priv->pcs);
+               goto err_init_pcs;
+       }
 
        priv->phylink_config.dev = &ndev->dev;
        priv->phylink_config.type = PHYLINK_NETDEV;
@@ -1412,12 +1446,13 @@ static int altera_tse_probe(struct platform_device *pdev)
        if (IS_ERR(priv->phylink)) {
                dev_err(&pdev->dev, "failed to create phylink\n");
                ret = PTR_ERR(priv->phylink);
-               goto err_init_phy;
+               goto err_init_phylink;
        }
 
        return 0;
-
-err_init_phy:
+err_init_phylink:
+       lynx_pcs_destroy(priv->pcs);
+err_init_pcs:
        unregister_netdev(ndev);
 err_register_netdev:
        netif_napi_del(&priv->napi);
@@ -1438,6 +1473,8 @@ static int altera_tse_remove(struct platform_device *pdev)
        altera_tse_mdio_destroy(ndev);
        unregister_netdev(ndev);
        phylink_destroy(priv->phylink);
+       lynx_pcs_destroy(priv->pcs);
+
        free_netdev(ndev);
 
        return 0;
index 7eb5851..6afff8a 100644 (file)
@@ -289,7 +289,7 @@ static int aq_get_txsc_stats(struct aq_hw_s *hw, const int sc_idx,
 
 static int aq_mdo_dev_open(struct macsec_context *ctx)
 {
-       struct aq_nic_s *nic = netdev_priv(ctx->netdev);
+       struct aq_nic_s *nic = macsec_netdev_priv(ctx->netdev);
        int ret = 0;
 
        if (netif_carrier_ok(nic->ndev))
@@ -300,7 +300,7 @@ static int aq_mdo_dev_open(struct macsec_context *ctx)
 
 static int aq_mdo_dev_stop(struct macsec_context *ctx)
 {
-       struct aq_nic_s *nic = netdev_priv(ctx->netdev);
+       struct aq_nic_s *nic = macsec_netdev_priv(ctx->netdev);
        int i;
 
        for (i = 0; i < AQ_MACSEC_MAX_SC; i++) {
@@ -439,7 +439,7 @@ static enum aq_macsec_sc_sa sc_sa_from_num_an(const int num_an)
 
 static int aq_mdo_add_secy(struct macsec_context *ctx)
 {
-       struct aq_nic_s *nic = netdev_priv(ctx->netdev);
+       struct aq_nic_s *nic = macsec_netdev_priv(ctx->netdev);
        struct aq_macsec_cfg *cfg = nic->macsec_cfg;
        const struct macsec_secy *secy = ctx->secy;
        enum aq_macsec_sc_sa sc_sa;
@@ -474,7 +474,7 @@ static int aq_mdo_add_secy(struct macsec_context *ctx)
 
 static int aq_mdo_upd_secy(struct macsec_context *ctx)
 {
-       struct aq_nic_s *nic = netdev_priv(ctx->netdev);
+       struct aq_nic_s *nic = macsec_netdev_priv(ctx->netdev);
        const struct macsec_secy *secy = ctx->secy;
        int txsc_idx;
        int ret = 0;
@@ -528,7 +528,7 @@ static int aq_clear_txsc(struct aq_nic_s *nic, const int txsc_idx,
 
 static int aq_mdo_del_secy(struct macsec_context *ctx)
 {
-       struct aq_nic_s *nic = netdev_priv(ctx->netdev);
+       struct aq_nic_s *nic = macsec_netdev_priv(ctx->netdev);
        int ret = 0;
 
        if (!nic->macsec_cfg)
@@ -576,7 +576,7 @@ static int aq_update_txsa(struct aq_nic_s *nic, const unsigned int sc_idx,
 
 static int aq_mdo_add_txsa(struct macsec_context *ctx)
 {
-       struct aq_nic_s *nic = netdev_priv(ctx->netdev);
+       struct aq_nic_s *nic = macsec_netdev_priv(ctx->netdev);
        struct aq_macsec_cfg *cfg = nic->macsec_cfg;
        const struct macsec_secy *secy = ctx->secy;
        struct aq_macsec_txsc *aq_txsc;
@@ -603,7 +603,7 @@ static int aq_mdo_add_txsa(struct macsec_context *ctx)
 
 static int aq_mdo_upd_txsa(struct macsec_context *ctx)
 {
-       struct aq_nic_s *nic = netdev_priv(ctx->netdev);
+       struct aq_nic_s *nic = macsec_netdev_priv(ctx->netdev);
        struct aq_macsec_cfg *cfg = nic->macsec_cfg;
        const struct macsec_secy *secy = ctx->secy;
        struct aq_macsec_txsc *aq_txsc;
@@ -652,7 +652,7 @@ static int aq_clear_txsa(struct aq_nic_s *nic, struct aq_macsec_txsc *aq_txsc,
 
 static int aq_mdo_del_txsa(struct macsec_context *ctx)
 {
-       struct aq_nic_s *nic = netdev_priv(ctx->netdev);
+       struct aq_nic_s *nic = macsec_netdev_priv(ctx->netdev);
        struct aq_macsec_cfg *cfg = nic->macsec_cfg;
        int txsc_idx;
        int ret = 0;
@@ -744,7 +744,7 @@ static int aq_set_rxsc(struct aq_nic_s *nic, const u32 rxsc_idx)
 
 static int aq_mdo_add_rxsc(struct macsec_context *ctx)
 {
-       struct aq_nic_s *nic = netdev_priv(ctx->netdev);
+       struct aq_nic_s *nic = macsec_netdev_priv(ctx->netdev);
        struct aq_macsec_cfg *cfg = nic->macsec_cfg;
        const u32 rxsc_idx_max = aq_sc_idx_max(cfg->sc_sa);
        u32 rxsc_idx;
@@ -775,7 +775,7 @@ static int aq_mdo_add_rxsc(struct macsec_context *ctx)
 
 static int aq_mdo_upd_rxsc(struct macsec_context *ctx)
 {
-       struct aq_nic_s *nic = netdev_priv(ctx->netdev);
+       struct aq_nic_s *nic = macsec_netdev_priv(ctx->netdev);
        int rxsc_idx;
        int ret = 0;
 
@@ -838,7 +838,7 @@ static int aq_clear_rxsc(struct aq_nic_s *nic, const int rxsc_idx,
 
 static int aq_mdo_del_rxsc(struct macsec_context *ctx)
 {
-       struct aq_nic_s *nic = netdev_priv(ctx->netdev);
+       struct aq_nic_s *nic = macsec_netdev_priv(ctx->netdev);
        enum aq_clear_type clear_type = AQ_CLEAR_SW;
        int rxsc_idx;
        int ret = 0;
@@ -906,8 +906,8 @@ static int aq_update_rxsa(struct aq_nic_s *nic, const unsigned int sc_idx,
 
 static int aq_mdo_add_rxsa(struct macsec_context *ctx)
 {
+       struct aq_nic_s *nic = macsec_netdev_priv(ctx->netdev);
        const struct macsec_rx_sc *rx_sc = ctx->sa.rx_sa->sc;
-       struct aq_nic_s *nic = netdev_priv(ctx->netdev);
        const struct macsec_secy *secy = ctx->secy;
        struct aq_macsec_rxsc *aq_rxsc;
        int rxsc_idx;
@@ -933,8 +933,8 @@ static int aq_mdo_add_rxsa(struct macsec_context *ctx)
 
 static int aq_mdo_upd_rxsa(struct macsec_context *ctx)
 {
+       struct aq_nic_s *nic = macsec_netdev_priv(ctx->netdev);
        const struct macsec_rx_sc *rx_sc = ctx->sa.rx_sa->sc;
-       struct aq_nic_s *nic = netdev_priv(ctx->netdev);
        struct aq_macsec_cfg *cfg = nic->macsec_cfg;
        const struct macsec_secy *secy = ctx->secy;
        int rxsc_idx;
@@ -982,8 +982,8 @@ static int aq_clear_rxsa(struct aq_nic_s *nic, struct aq_macsec_rxsc *aq_rxsc,
 
 static int aq_mdo_del_rxsa(struct macsec_context *ctx)
 {
+       struct aq_nic_s *nic = macsec_netdev_priv(ctx->netdev);
        const struct macsec_rx_sc *rx_sc = ctx->sa.rx_sa->sc;
-       struct aq_nic_s *nic = netdev_priv(ctx->netdev);
        struct aq_macsec_cfg *cfg = nic->macsec_cfg;
        int rxsc_idx;
        int ret = 0;
@@ -1000,7 +1000,7 @@ static int aq_mdo_del_rxsa(struct macsec_context *ctx)
 
 static int aq_mdo_get_dev_stats(struct macsec_context *ctx)
 {
-       struct aq_nic_s *nic = netdev_priv(ctx->netdev);
+       struct aq_nic_s *nic = macsec_netdev_priv(ctx->netdev);
        struct aq_macsec_common_stats *stats = &nic->macsec_cfg->stats;
        struct aq_hw_s *hw = nic->aq_hw;
 
@@ -1020,7 +1020,7 @@ static int aq_mdo_get_dev_stats(struct macsec_context *ctx)
 
 static int aq_mdo_get_tx_sc_stats(struct macsec_context *ctx)
 {
-       struct aq_nic_s *nic = netdev_priv(ctx->netdev);
+       struct aq_nic_s *nic = macsec_netdev_priv(ctx->netdev);
        struct aq_macsec_tx_sc_stats *stats;
        struct aq_hw_s *hw = nic->aq_hw;
        struct aq_macsec_txsc *aq_txsc;
@@ -1044,7 +1044,7 @@ static int aq_mdo_get_tx_sc_stats(struct macsec_context *ctx)
 
 static int aq_mdo_get_tx_sa_stats(struct macsec_context *ctx)
 {
-       struct aq_nic_s *nic = netdev_priv(ctx->netdev);
+       struct aq_nic_s *nic = macsec_netdev_priv(ctx->netdev);
        struct aq_macsec_cfg *cfg = nic->macsec_cfg;
        struct aq_macsec_tx_sa_stats *stats;
        struct aq_hw_s *hw = nic->aq_hw;
@@ -1084,7 +1084,7 @@ static int aq_mdo_get_tx_sa_stats(struct macsec_context *ctx)
 
 static int aq_mdo_get_rx_sc_stats(struct macsec_context *ctx)
 {
-       struct aq_nic_s *nic = netdev_priv(ctx->netdev);
+       struct aq_nic_s *nic = macsec_netdev_priv(ctx->netdev);
        struct aq_macsec_cfg *cfg = nic->macsec_cfg;
        struct aq_macsec_rx_sa_stats *stats;
        struct aq_hw_s *hw = nic->aq_hw;
@@ -1129,7 +1129,7 @@ static int aq_mdo_get_rx_sc_stats(struct macsec_context *ctx)
 
 static int aq_mdo_get_rx_sa_stats(struct macsec_context *ctx)
 {
-       struct aq_nic_s *nic = netdev_priv(ctx->netdev);
+       struct aq_nic_s *nic = macsec_netdev_priv(ctx->netdev);
        struct aq_macsec_cfg *cfg = nic->macsec_cfg;
        struct aq_macsec_rx_sa_stats *stats;
        struct aq_hw_s *hw = nic->aq_hw;
@@ -1399,7 +1399,7 @@ static void aq_check_txsa_expiration(struct aq_nic_s *nic)
 #define AQ_LOCKED_MDO_DEF(mdo)                                         \
 static int aq_locked_mdo_##mdo(struct macsec_context *ctx)             \
 {                                                                      \
-       struct aq_nic_s *nic = netdev_priv(ctx->netdev);                \
+       struct aq_nic_s *nic = macsec_netdev_priv(ctx->netdev);         \
        int ret;                                                        \
        mutex_lock(&nic->macsec_mutex);                                 \
        ret = aq_mdo_##mdo(ctx);                                        \
index 7f93317..4de22ee 100644 (file)
@@ -532,10 +532,10 @@ static bool aq_add_rx_fragment(struct device *dev,
                                              buff_->rxdata.pg_off,
                                              buff_->len,
                                              DMA_FROM_DEVICE);
-               skb_frag_off_set(frag, buff_->rxdata.pg_off);
-               skb_frag_size_set(frag, buff_->len);
                sinfo->xdp_frags_size += buff_->len;
-               __skb_frag_set_page(frag, buff_->rxdata.page);
+               skb_frag_fill_page_desc(frag, buff_->rxdata.page,
+                                       buff_->rxdata.pg_off,
+                                       buff_->len);
 
                buff_->is_cleaned = 1;
 
index d820ae0..0e244f0 100644 (file)
@@ -220,6 +220,6 @@ static inline void arc_reg_clr(struct arc_emac_priv *priv, int reg, int mask)
 int arc_mdio_probe(struct arc_emac_priv *priv);
 int arc_mdio_remove(struct arc_emac_priv *priv);
 int arc_emac_probe(struct net_device *ndev, int interface);
-int arc_emac_remove(struct net_device *ndev);
+void arc_emac_remove(struct net_device *ndev);
 
 #endif /* ARC_EMAC_H */
index 800620b..ce3147e 100644 (file)
@@ -61,11 +61,11 @@ out_netdev:
 static int emac_arc_remove(struct platform_device *pdev)
 {
        struct net_device *ndev = platform_get_drvdata(pdev);
-       int err;
 
-       err = arc_emac_remove(ndev);
+       arc_emac_remove(ndev);
        free_netdev(ndev);
-       return err;
+
+       return 0;
 }
 
 static const struct of_device_id emac_arc_dt_ids[] = {
index ba0646b..2b427d8 100644 (file)
@@ -1008,7 +1008,7 @@ out_put_node:
 }
 EXPORT_SYMBOL_GPL(arc_emac_probe);
 
-int arc_emac_remove(struct net_device *ndev)
+void arc_emac_remove(struct net_device *ndev)
 {
        struct arc_emac_priv *priv = netdev_priv(ndev);
 
@@ -1019,8 +1019,6 @@ int arc_emac_remove(struct net_device *ndev)
 
        if (!IS_ERR(priv->clk))
                clk_disable_unprepare(priv->clk);
-
-       return 0;
 }
 EXPORT_SYMBOL_GPL(arc_emac_remove);
 
index 1c9ca3b..5091011 100644 (file)
@@ -248,9 +248,8 @@ static int emac_rockchip_remove(struct platform_device *pdev)
 {
        struct net_device *ndev = platform_get_drvdata(pdev);
        struct rockchip_priv_data *priv = netdev_priv(ndev);
-       int err;
 
-       err = arc_emac_remove(ndev);
+       arc_emac_remove(ndev);
 
        clk_disable_unprepare(priv->refclk);
 
@@ -261,7 +260,7 @@ static int emac_rockchip_remove(struct platform_device *pdev)
                clk_disable_unprepare(priv->macclk);
 
        free_netdev(ndev);
-       return err;
+       return 0;
 }
 
 static struct platform_driver emac_rockchip_driver = {
index 466e1d6..0d917a9 100644 (file)
@@ -2955,7 +2955,6 @@ bnx2_reuse_rx_skb_pages(struct bnx2 *bp, struct bnx2_rx_ring_info *rxr,
                shinfo = skb_shinfo(skb);
                shinfo->nr_frags--;
                page = skb_frag_page(&shinfo->frags[shinfo->nr_frags]);
-               __skb_frag_set_page(&shinfo->frags[shinfo->nr_frags], NULL);
 
                cons_rx_pg->page = page;
                dev_kfree_skb(skb);
index b499bc9..e5b54e6 100644 (file)
@@ -1085,9 +1085,8 @@ static u32 __bnxt_rx_agg_pages(struct bnxt *bp,
                            RX_AGG_CMP_LEN) >> RX_AGG_CMP_LEN_SHIFT;
 
                cons_rx_buf = &rxr->rx_agg_ring[cons];
-               skb_frag_off_set(frag, cons_rx_buf->offset);
-               skb_frag_size_set(frag, frag_len);
-               __skb_frag_set_page(frag, cons_rx_buf->page);
+               skb_frag_fill_page_desc(frag, cons_rx_buf->page,
+                                       cons_rx_buf->offset, frag_len);
                shinfo->nr_frags = i + 1;
                __clear_bit(cons, rxr->rx_agg_bmap);
 
@@ -1103,10 +1102,7 @@ static u32 __bnxt_rx_agg_pages(struct bnxt *bp,
                        xdp_buff_set_frag_pfmemalloc(xdp);
 
                if (bnxt_alloc_rx_page(bp, rxr, prod, GFP_ATOMIC) != 0) {
-                       unsigned int nr_frags;
-
-                       nr_frags = --shinfo->nr_frags;
-                       __skb_frag_set_page(&shinfo->frags[nr_frags], NULL);
+                       --shinfo->nr_frags;
                        cons_rx_buf->page = page;
 
                        /* Update prod since possibly some pages have been
index 3a4b6cb..7a41cad 100644 (file)
@@ -42,6 +42,12 @@ void bcmgenet_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
        struct bcmgenet_priv *priv = netdev_priv(dev);
        struct device *kdev = &priv->pdev->dev;
 
+       if (dev->phydev) {
+               phy_ethtool_get_wol(dev->phydev, wol);
+               if (wol->supported)
+                       return;
+       }
+
        if (!device_can_wakeup(kdev)) {
                wol->supported = 0;
                wol->wolopts = 0;
@@ -63,6 +69,14 @@ int bcmgenet_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
 {
        struct bcmgenet_priv *priv = netdev_priv(dev);
        struct device *kdev = &priv->pdev->dev;
+       int ret;
+
+       /* Try Wake-on-LAN from the PHY first */
+       if (dev->phydev) {
+               ret = phy_ethtool_set_wol(dev->phydev, wol);
+               if (ret != -EOPNOTSUPP)
+                       return ret;
+       }
 
        if (!device_can_wakeup(kdev))
                return -ENOTSUPP;
index cfbdd00..b6d5bf8 100644 (file)
@@ -1181,6 +1181,7 @@ struct macb_config {
                            struct clk **hclk, struct clk **tx_clk,
                            struct clk **rx_clk, struct clk **tsu_clk);
        int     (*init)(struct platform_device *pdev);
+       unsigned int            max_tx_length;
        int     jumbo_max_len;
        const struct macb_usrio_config *usrio;
 };
index 29a1199..50a4b04 100644 (file)
@@ -4117,14 +4117,12 @@ static int macb_init(struct platform_device *pdev)
 
        /* setup appropriated routines according to adapter type */
        if (macb_is_gem(bp)) {
-               bp->max_tx_length = GEM_MAX_TX_LEN;
                bp->macbgem_ops.mog_alloc_rx_buffers = gem_alloc_rx_buffers;
                bp->macbgem_ops.mog_free_rx_buffers = gem_free_rx_buffers;
                bp->macbgem_ops.mog_init_rings = gem_init_rings;
                bp->macbgem_ops.mog_rx = gem_rx;
                dev->ethtool_ops = &gem_ethtool_ops;
        } else {
-               bp->max_tx_length = MACB_MAX_TX_LEN;
                bp->macbgem_ops.mog_alloc_rx_buffers = macb_alloc_rx_buffers;
                bp->macbgem_ops.mog_free_rx_buffers = macb_free_rx_buffers;
                bp->macbgem_ops.mog_init_rings = macb_init_rings;
@@ -4861,7 +4859,8 @@ static const struct macb_config mpfs_config = {
        .clk_init = macb_clk_init,
        .init = init_reset_optional,
        .usrio = &macb_default_usrio,
-       .jumbo_max_len = 10240,
+       .max_tx_length = 4040, /* Cadence Erratum 1686 */
+       .jumbo_max_len = 4040,
 };
 
 static const struct macb_config sama7g5_gem_config = {
@@ -5012,6 +5011,13 @@ static int macb_probe(struct platform_device *pdev)
        if (macb_config)
                bp->jumbo_max_len = macb_config->jumbo_max_len;
 
+       if (!hw_is_gem(bp->regs, bp->native_io))
+               bp->max_tx_length = MACB_MAX_TX_LEN;
+       else if (macb_config->max_tx_length)
+               bp->max_tx_length = macb_config->max_tx_length;
+       else
+               bp->max_tx_length = GEM_MAX_TX_LEN;
+
        bp->wol = 0;
        if (of_property_read_bool(np, "magic-packet"))
                bp->wol |= MACB_WOL_HAS_MAGIC_PACKET;
index 1c76c95..ca742cc 100644 (file)
@@ -62,6 +62,9 @@ config CAVIUM_PTP
          Precision Time Protocol or other purposes.  Timestamps can be used in
          BGX, TNS, GTI, and NIC blocks.
 
+config LIQUIDIO_CORE
+       tristate
+
 config LIQUIDIO
        tristate "Cavium LiquidIO support"
        depends on 64BIT && PCI
@@ -69,6 +72,7 @@ config LIQUIDIO
        depends on PTP_1588_CLOCK_OPTIONAL
        select FW_LOADER
        select LIBCRC32C
+       select LIQUIDIO_CORE
        select NET_DEVLINK
        help
          This driver supports Cavium LiquidIO Intelligent Server Adapters
@@ -92,6 +96,7 @@ config LIQUIDIO_VF
        tristate "Cavium LiquidIO VF support"
        depends on 64BIT && PCI_MSI
        depends on PTP_1588_CLOCK_OPTIONAL
+       select LIQUIDIO_CORE
        help
          This driver supports Cavium LiquidIO Intelligent Server Adapter
          based on CN23XX chips.
index bc99375..4ee80af 100644 (file)
@@ -3,7 +3,9 @@
 # Cavium Liquidio ethernet device driver
 #
 
-common-objs := lio_ethtool.o           \
+obj-$(CONFIG_LIQUIDIO_CORE) += liquidio-core.o
+liquidio-core-y := \
+               lio_ethtool.o           \
                lio_core.o              \
                request_manager.o       \
                response_manager.o      \
@@ -18,7 +20,7 @@ common-objs :=        lio_ethtool.o           \
                octeon_nic.o
 
 obj-$(CONFIG_LIQUIDIO) += liquidio.o
-liquidio-y := lio_main.o octeon_console.o lio_vf_rep.o $(common-objs)
+liquidio-y := lio_main.o octeon_console.o lio_vf_rep.o
 
 obj-$(CONFIG_LIQUIDIO_VF) += liquidio_vf.o
-liquidio_vf-y := lio_vf_main.o $(common-objs)
+liquidio_vf-y := lio_vf_main.o
index 9ed3d1a..068ed52 100644 (file)
@@ -719,12 +719,10 @@ static int cn23xx_setup_pf_mbox(struct octeon_device *oct)
        for (i = 0; i < oct->sriov_info.max_vfs; i++) {
                q_no = i * oct->sriov_info.rings_per_vf;
 
-               mbox = vmalloc(sizeof(*mbox));
+               mbox = vzalloc(sizeof(*mbox));
                if (!mbox)
                        goto free_mbox;
 
-               memset(mbox, 0, sizeof(struct octeon_mbox));
-
                spin_lock_init(&mbox->lock);
 
                mbox->oct_dev = oct;
@@ -1377,6 +1375,7 @@ int setup_cn23xx_octeon_pf_device(struct octeon_device *oct)
 
        return 0;
 }
+EXPORT_SYMBOL_GPL(setup_cn23xx_octeon_pf_device);
 
 int validate_cn23xx_pf_config_info(struct octeon_device *oct,
                                   struct octeon_config *conf23xx)
@@ -1435,6 +1434,7 @@ int cn23xx_fw_loaded(struct octeon_device *oct)
        val = octeon_read_csr64(oct, CN23XX_SLI_SCRATCH2);
        return (val >> SCR2_BIT_FW_LOADED) & 1ULL;
 }
+EXPORT_SYMBOL_GPL(cn23xx_fw_loaded);
 
 void cn23xx_tell_vf_its_macaddr_changed(struct octeon_device *oct, int vfidx,
                                        u8 *mac)
@@ -1456,6 +1456,7 @@ void cn23xx_tell_vf_its_macaddr_changed(struct octeon_device *oct, int vfidx,
                octeon_mbox_write(oct, &mbox_cmd);
        }
 }
+EXPORT_SYMBOL_GPL(cn23xx_tell_vf_its_macaddr_changed);
 
 static void
 cn23xx_get_vf_stats_callback(struct octeon_device *oct,
@@ -1510,3 +1511,4 @@ int cn23xx_get_vf_stats(struct octeon_device *oct, int vfidx,
 
        return 0;
 }
+EXPORT_SYMBOL_GPL(cn23xx_get_vf_stats);
index fda4940..dd5d80f 100644 (file)
@@ -279,12 +279,10 @@ static int cn23xx_setup_vf_mbox(struct octeon_device *oct)
 {
        struct octeon_mbox *mbox = NULL;
 
-       mbox = vmalloc(sizeof(*mbox));
+       mbox = vzalloc(sizeof(*mbox));
        if (!mbox)
                return 1;
 
-       memset(mbox, 0, sizeof(struct octeon_mbox));
-
        spin_lock_init(&mbox->lock);
 
        mbox->oct_dev = oct;
@@ -386,6 +384,7 @@ void cn23xx_vf_ask_pf_to_do_flr(struct octeon_device *oct)
 
        octeon_mbox_write(oct, &mbox_cmd);
 }
+EXPORT_SYMBOL_GPL(cn23xx_vf_ask_pf_to_do_flr);
 
 static void octeon_pfvf_hs_callback(struct octeon_device *oct,
                                    struct octeon_mbox_cmd *cmd,
@@ -468,6 +467,7 @@ int cn23xx_octeon_pfvf_handshake(struct octeon_device *oct)
 
        return 0;
 }
+EXPORT_SYMBOL_GPL(cn23xx_octeon_pfvf_handshake);
 
 static void cn23xx_handle_vf_mbox_intr(struct octeon_ioq_vector *ioq_vector)
 {
@@ -680,3 +680,4 @@ int cn23xx_setup_octeon_vf_device(struct octeon_device *oct)
 
        return 0;
 }
+EXPORT_SYMBOL_GPL(cn23xx_setup_octeon_vf_device);
index 39643be..93fccfe 100644 (file)
@@ -697,6 +697,7 @@ int lio_setup_cn66xx_octeon_device(struct octeon_device *oct)
 
        return 0;
 }
+EXPORT_SYMBOL_GPL(lio_setup_cn66xx_octeon_device);
 
 int lio_validate_cn6xxx_config_info(struct octeon_device *oct,
                                    struct octeon_config *conf6xxx)
index 30254e4..b5103de 100644 (file)
@@ -181,3 +181,4 @@ int lio_setup_cn68xx_octeon_device(struct octeon_device *oct)
 
        return 0;
 }
+EXPORT_SYMBOL_GPL(lio_setup_cn68xx_octeon_device);
index 882b2be..9cc6303 100644 (file)
@@ -26,6 +26,9 @@
 #include "octeon_main.h"
 #include "octeon_network.h"
 
+MODULE_AUTHOR("Cavium Networks, <support@cavium.com>");
+MODULE_LICENSE("GPL");
+
 /* OOM task polling interval */
 #define LIO_OOM_POLL_INTERVAL_MS 250
 
@@ -71,6 +74,7 @@ void lio_delete_glists(struct lio *lio)
        kfree(lio->glist);
        lio->glist = NULL;
 }
+EXPORT_SYMBOL_GPL(lio_delete_glists);
 
 /**
  * lio_setup_glists - Setup gather lists
@@ -154,6 +158,7 @@ int lio_setup_glists(struct octeon_device *oct, struct lio *lio, int num_iqs)
 
        return 0;
 }
+EXPORT_SYMBOL_GPL(lio_setup_glists);
 
 int liquidio_set_feature(struct net_device *netdev, int cmd, u16 param1)
 {
@@ -180,6 +185,7 @@ int liquidio_set_feature(struct net_device *netdev, int cmd, u16 param1)
        }
        return ret;
 }
+EXPORT_SYMBOL_GPL(liquidio_set_feature);
 
 void octeon_report_tx_completion_to_bql(void *txq, unsigned int pkts_compl,
                                        unsigned int bytes_compl)
@@ -395,6 +401,7 @@ void liquidio_link_ctrl_cmd_completion(void *nctrl_ptr)
                        nctrl->ncmd.s.cmd);
        }
 }
+EXPORT_SYMBOL_GPL(liquidio_link_ctrl_cmd_completion);
 
 void octeon_pf_changed_vf_macaddr(struct octeon_device *oct, u8 *mac)
 {
@@ -478,6 +485,7 @@ int setup_rx_oom_poll_fn(struct net_device *netdev)
 
        return 0;
 }
+EXPORT_SYMBOL_GPL(setup_rx_oom_poll_fn);
 
 void cleanup_rx_oom_poll_fn(struct net_device *netdev)
 {
@@ -495,6 +503,7 @@ void cleanup_rx_oom_poll_fn(struct net_device *netdev)
                }
        }
 }
+EXPORT_SYMBOL_GPL(cleanup_rx_oom_poll_fn);
 
 /* Runs in interrupt context. */
 static void lio_update_txq_status(struct octeon_device *oct, int iq_num)
@@ -899,6 +908,7 @@ int liquidio_setup_io_queues(struct octeon_device *octeon_dev, int ifidx,
 
        return 0;
 }
+EXPORT_SYMBOL_GPL(liquidio_setup_io_queues);
 
 static
 int liquidio_schedule_msix_droq_pkt_handler(struct octeon_droq *droq, u64 ret)
@@ -1194,6 +1204,7 @@ int octeon_setup_interrupt(struct octeon_device *oct, u32 num_ioqs)
        }
        return 0;
 }
+EXPORT_SYMBOL_GPL(octeon_setup_interrupt);
 
 /**
  * liquidio_change_mtu - Net device change_mtu
@@ -1256,6 +1267,7 @@ int liquidio_change_mtu(struct net_device *netdev, int new_mtu)
        WRITE_ONCE(sc->caller_is_done, true);
        return 0;
 }
+EXPORT_SYMBOL_GPL(liquidio_change_mtu);
 
 int lio_wait_for_clean_oq(struct octeon_device *oct)
 {
@@ -1279,6 +1291,7 @@ int lio_wait_for_clean_oq(struct octeon_device *oct)
 
        return pending_pkts;
 }
+EXPORT_SYMBOL_GPL(lio_wait_for_clean_oq);
 
 static void
 octnet_nic_stats_callback(struct octeon_device *oct_dev,
@@ -1509,6 +1522,7 @@ lio_fetch_stats_exit:
 
        return;
 }
+EXPORT_SYMBOL_GPL(lio_fetch_stats);
 
 int liquidio_set_speed(struct lio *lio, int speed)
 {
@@ -1659,6 +1673,7 @@ int liquidio_get_speed(struct lio *lio)
 
        return retval;
 }
+EXPORT_SYMBOL_GPL(liquidio_get_speed);
 
 int liquidio_set_fec(struct lio *lio, int on_off)
 {
@@ -1812,3 +1827,4 @@ int liquidio_get_fec(struct lio *lio)
 
        return retval;
 }
+EXPORT_SYMBOL_GPL(liquidio_get_fec);
index 2c10ae3..9d56181 100644 (file)
@@ -3180,3 +3180,4 @@ void liquidio_set_ethtool_ops(struct net_device *netdev)
        else
                netdev->ethtool_ops = &lio_ethtool_ops;
 }
+EXPORT_SYMBOL_GPL(liquidio_set_ethtool_ops);
index 9bd1d2d..100daad 100644 (file)
@@ -191,8 +191,7 @@ static void octeon_droq_bh(struct tasklet_struct *t)
 
 static int lio_wait_for_oq_pkts(struct octeon_device *oct)
 {
-       struct octeon_device_priv *oct_priv =
-               (struct octeon_device_priv *)oct->priv;
+       struct octeon_device_priv *oct_priv = oct->priv;
        int retry = 100, pkt_cnt = 0, pending_pkts = 0;
        int i;
 
@@ -950,8 +949,7 @@ static void octeon_destroy_resources(struct octeon_device *oct)
 {
        int i, refcount;
        struct msix_entry *msix_entries;
-       struct octeon_device_priv *oct_priv =
-               (struct octeon_device_priv *)oct->priv;
+       struct octeon_device_priv *oct_priv = oct->priv;
 
        struct handshake *hs;
 
@@ -1211,8 +1209,7 @@ static int send_rx_ctrl_cmd(struct lio *lio, int start_stop)
 static void liquidio_destroy_nic_device(struct octeon_device *oct, int ifidx)
 {
        struct net_device *netdev = oct->props[ifidx].netdev;
-       struct octeon_device_priv *oct_priv =
-               (struct octeon_device_priv *)oct->priv;
+       struct octeon_device_priv *oct_priv = oct->priv;
        struct napi_struct *napi, *n;
        struct lio *lio;
 
@@ -1774,8 +1771,7 @@ static int liquidio_open(struct net_device *netdev)
 {
        struct lio *lio = GET_LIO(netdev);
        struct octeon_device *oct = lio->oct_dev;
-       struct octeon_device_priv *oct_priv =
-               (struct octeon_device_priv *)oct->priv;
+       struct octeon_device_priv *oct_priv = oct->priv;
        struct napi_struct *napi, *n;
        int ret = 0;
 
@@ -1855,8 +1851,7 @@ static int liquidio_stop(struct net_device *netdev)
 {
        struct lio *lio = GET_LIO(netdev);
        struct octeon_device *oct = lio->oct_dev;
-       struct octeon_device_priv *oct_priv =
-               (struct octeon_device_priv *)oct->priv;
+       struct octeon_device_priv *oct_priv = oct->priv;
        struct napi_struct *napi, *n;
        int ret = 0;
 
@@ -4057,8 +4052,7 @@ static int octeon_device_init(struct octeon_device *octeon_dev)
        char bootcmd[] = "\n";
        char *dbg_enb = NULL;
        enum lio_fw_state fw_state;
-       struct octeon_device_priv *oct_priv =
-               (struct octeon_device_priv *)octeon_dev->priv;
+       struct octeon_device_priv *oct_priv = octeon_dev->priv;
        atomic_set(&octeon_dev->status, OCT_DEV_BEGIN_STATE);
 
        /* Enable access to the octeon device and make its DMA capability
index e2921ae..62c2ead 100644 (file)
@@ -72,8 +72,7 @@ static int liquidio_stop(struct net_device *netdev);
 
 static int lio_wait_for_oq_pkts(struct octeon_device *oct)
 {
-       struct octeon_device_priv *oct_priv =
-           (struct octeon_device_priv *)oct->priv;
+       struct octeon_device_priv *oct_priv = oct->priv;
        int retry = MAX_IO_PENDING_PKT_COUNT;
        int pkt_cnt = 0, pending_pkts;
        int i;
@@ -442,8 +441,7 @@ static void octeon_pci_flr(struct octeon_device *oct)
  */
 static void octeon_destroy_resources(struct octeon_device *oct)
 {
-       struct octeon_device_priv *oct_priv =
-               (struct octeon_device_priv *)oct->priv;
+       struct octeon_device_priv *oct_priv = oct->priv;
        struct msix_entry *msix_entries;
        int i;
 
@@ -659,8 +657,7 @@ static int send_rx_ctrl_cmd(struct lio *lio, int start_stop)
 static void liquidio_destroy_nic_device(struct octeon_device *oct, int ifidx)
 {
        struct net_device *netdev = oct->props[ifidx].netdev;
-       struct octeon_device_priv *oct_priv =
-               (struct octeon_device_priv *)oct->priv;
+       struct octeon_device_priv *oct_priv = oct->priv;
        struct napi_struct *napi, *n;
        struct lio *lio;
 
@@ -909,8 +906,7 @@ static int liquidio_open(struct net_device *netdev)
 {
        struct lio *lio = GET_LIO(netdev);
        struct octeon_device *oct = lio->oct_dev;
-       struct octeon_device_priv *oct_priv =
-               (struct octeon_device_priv *)oct->priv;
+       struct octeon_device_priv *oct_priv = oct->priv;
        struct napi_struct *napi, *n;
        int ret = 0;
 
@@ -956,8 +952,7 @@ static int liquidio_stop(struct net_device *netdev)
 {
        struct lio *lio = GET_LIO(netdev);
        struct octeon_device *oct = lio->oct_dev;
-       struct octeon_device_priv *oct_priv =
-               (struct octeon_device_priv *)oct->priv;
+       struct octeon_device_priv *oct_priv = oct->priv;
        struct napi_struct *napi, *n;
        int ret = 0;
 
index e159194..364f4f9 100644 (file)
@@ -564,6 +564,7 @@ void octeon_init_device_list(int conf_type)
        for (i = 0; i <  MAX_OCTEON_DEVICES; i++)
                oct_set_config_info(i, conf_type);
 }
+EXPORT_SYMBOL_GPL(octeon_init_device_list);
 
 static void *__retrieve_octeon_config_info(struct octeon_device *oct,
                                           u16 card_type)
@@ -633,6 +634,7 @@ char *lio_get_state_string(atomic_t *state_ptr)
                return oct_dev_state_str[OCT_DEV_STATE_INVALID];
        return oct_dev_state_str[istate];
 }
+EXPORT_SYMBOL_GPL(lio_get_state_string);
 
 static char *get_oct_app_string(u32 app_mode)
 {
@@ -661,6 +663,7 @@ void octeon_free_device_mem(struct octeon_device *oct)
        octeon_device[i] = NULL;
        octeon_device_count--;
 }
+EXPORT_SYMBOL_GPL(octeon_free_device_mem);
 
 static struct octeon_device *octeon_allocate_device_mem(u32 pci_id,
                                                        u32 priv_size)
@@ -747,6 +750,7 @@ struct octeon_device *octeon_allocate_device(u32 pci_id,
 
        return oct;
 }
+EXPORT_SYMBOL_GPL(octeon_allocate_device);
 
 /** Register a device's bus location at initialization time.
  *  @param octeon_dev - pointer to the octeon device structure.
@@ -804,6 +808,7 @@ int octeon_register_device(struct octeon_device *oct,
 
        return refcount;
 }
+EXPORT_SYMBOL_GPL(octeon_register_device);
 
 /** Deregister a device at de-initialization time.
  *  @param octeon_dev - pointer to the octeon device structure.
@@ -821,6 +826,7 @@ int octeon_deregister_device(struct octeon_device *oct)
 
        return refcount;
 }
+EXPORT_SYMBOL_GPL(octeon_deregister_device);
 
 int
 octeon_allocate_ioq_vector(struct octeon_device *oct, u32 num_ioqs)
@@ -853,12 +859,14 @@ octeon_allocate_ioq_vector(struct octeon_device *oct, u32 num_ioqs)
 
        return 0;
 }
+EXPORT_SYMBOL_GPL(octeon_allocate_ioq_vector);
 
 void
 octeon_free_ioq_vector(struct octeon_device *oct)
 {
        vfree(oct->ioq_vector);
 }
+EXPORT_SYMBOL_GPL(octeon_free_ioq_vector);
 
 /* this function is only for setting up the first queue */
 int octeon_setup_instr_queues(struct octeon_device *oct)
@@ -904,6 +912,7 @@ int octeon_setup_instr_queues(struct octeon_device *oct)
        oct->num_iqs++;
        return 0;
 }
+EXPORT_SYMBOL_GPL(octeon_setup_instr_queues);
 
 int octeon_setup_output_queues(struct octeon_device *oct)
 {
@@ -940,6 +949,7 @@ int octeon_setup_output_queues(struct octeon_device *oct)
 
        return 0;
 }
+EXPORT_SYMBOL_GPL(octeon_setup_output_queues);
 
 int octeon_set_io_queues_off(struct octeon_device *oct)
 {
@@ -989,6 +999,7 @@ int octeon_set_io_queues_off(struct octeon_device *oct)
        }
        return 0;
 }
+EXPORT_SYMBOL_GPL(octeon_set_io_queues_off);
 
 void octeon_set_droq_pkt_op(struct octeon_device *oct,
                            u32 q_no,
@@ -1027,6 +1038,7 @@ int octeon_init_dispatch_list(struct octeon_device *oct)
 
        return 0;
 }
+EXPORT_SYMBOL_GPL(octeon_init_dispatch_list);
 
 void octeon_delete_dispatch_list(struct octeon_device *oct)
 {
@@ -1058,6 +1070,7 @@ void octeon_delete_dispatch_list(struct octeon_device *oct)
                kfree(temp);
        }
 }
+EXPORT_SYMBOL_GPL(octeon_delete_dispatch_list);
 
 octeon_dispatch_fn_t
 octeon_get_dispatch(struct octeon_device *octeon_dev, u16 opcode,
@@ -1180,6 +1193,7 @@ octeon_register_dispatch_fn(struct octeon_device *oct,
 
        return 0;
 }
+EXPORT_SYMBOL_GPL(octeon_register_dispatch_fn);
 
 int octeon_core_drv_init(struct octeon_recv_info *recv_info, void *buf)
 {
@@ -1262,6 +1276,7 @@ core_drv_init_err:
        octeon_free_recv_info(recv_info);
        return 0;
 }
+EXPORT_SYMBOL_GPL(octeon_core_drv_init);
 
 int octeon_get_tx_qsize(struct octeon_device *oct, u32 q_no)
 
@@ -1272,6 +1287,7 @@ int octeon_get_tx_qsize(struct octeon_device *oct, u32 q_no)
 
        return -1;
 }
+EXPORT_SYMBOL_GPL(octeon_get_tx_qsize);
 
 int octeon_get_rx_qsize(struct octeon_device *oct, u32 q_no)
 {
@@ -1280,6 +1296,7 @@ int octeon_get_rx_qsize(struct octeon_device *oct, u32 q_no)
                return oct->droq[q_no]->max_count;
        return -1;
 }
+EXPORT_SYMBOL_GPL(octeon_get_rx_qsize);
 
 /* Retruns the host firmware handshake OCTEON specific configuration */
 struct octeon_config *octeon_get_conf(struct octeon_device *oct)
@@ -1302,6 +1319,7 @@ struct octeon_config *octeon_get_conf(struct octeon_device *oct)
        }
        return default_oct_conf;
 }
+EXPORT_SYMBOL_GPL(octeon_get_conf);
 
 /* scratch register address is same in all the OCT-II and CN70XX models */
 #define CNXX_SLI_SCRATCH1   0x3C0
@@ -1318,6 +1336,7 @@ struct octeon_device *lio_get_device(u32 octeon_id)
        else
                return octeon_device[octeon_id];
 }
+EXPORT_SYMBOL_GPL(lio_get_device);
 
 u64 lio_pci_readq(struct octeon_device *oct, u64 addr)
 {
@@ -1349,6 +1368,7 @@ u64 lio_pci_readq(struct octeon_device *oct, u64 addr)
 
        return val64;
 }
+EXPORT_SYMBOL_GPL(lio_pci_readq);
 
 void lio_pci_writeq(struct octeon_device *oct,
                    u64 val,
@@ -1369,6 +1389,7 @@ void lio_pci_writeq(struct octeon_device *oct,
 
        spin_unlock_irqrestore(&oct->pci_win_lock, flags);
 }
+EXPORT_SYMBOL_GPL(lio_pci_writeq);
 
 int octeon_mem_access_ok(struct octeon_device *oct)
 {
@@ -1388,6 +1409,7 @@ int octeon_mem_access_ok(struct octeon_device *oct)
 
        return access_okay ? 0 : 1;
 }
+EXPORT_SYMBOL_GPL(octeon_mem_access_ok);
 
 int octeon_wait_for_ddr_init(struct octeon_device *oct, u32 *timeout)
 {
@@ -1408,6 +1430,7 @@ int octeon_wait_for_ddr_init(struct octeon_device *oct, u32 *timeout)
 
        return ret;
 }
+EXPORT_SYMBOL_GPL(octeon_wait_for_ddr_init);
 
 /* Get the octeon id assigned to the octeon device passed as argument.
  *  This function is exported to other modules.
@@ -1462,3 +1485,4 @@ void lio_enable_irq(struct octeon_droq *droq, struct octeon_instr_queue *iq)
                }
        }
 }
+EXPORT_SYMBOL_GPL(lio_enable_irq);
index d4080bd..0d6ee30 100644 (file)
@@ -107,6 +107,7 @@ u32 octeon_droq_check_hw_for_pkts(struct octeon_droq *droq)
 
        return last_count;
 }
+EXPORT_SYMBOL_GPL(octeon_droq_check_hw_for_pkts);
 
 static void octeon_droq_compute_max_packet_bufs(struct octeon_droq *droq)
 {
@@ -216,6 +217,7 @@ int octeon_delete_droq(struct octeon_device *oct, u32 q_no)
 
        return 0;
 }
+EXPORT_SYMBOL_GPL(octeon_delete_droq);
 
 int octeon_init_droq(struct octeon_device *oct,
                     u32 q_no,
@@ -773,6 +775,7 @@ octeon_droq_process_packets(struct octeon_device *oct,
 
        return 0;
 }
+EXPORT_SYMBOL_GPL(octeon_droq_process_packets);
 
 /*
  * Utility function to poll for packets. check_hw_for_packets must be
@@ -921,6 +924,7 @@ int octeon_unregister_droq_ops(struct octeon_device *oct, u32 q_no)
 
        return 0;
 }
+EXPORT_SYMBOL_GPL(octeon_unregister_droq_ops);
 
 int octeon_create_droq(struct octeon_device *oct,
                       u32 q_no, u32 num_descs,
index 7ccab36..d701324 100644 (file)
@@ -164,6 +164,7 @@ octeon_pci_read_core_mem(struct octeon_device *oct,
 {
        __octeon_pci_rw_core_mem(oct, coreaddr, buf, len, 1);
 }
+EXPORT_SYMBOL_GPL(octeon_pci_read_core_mem);
 
 void
 octeon_pci_write_core_mem(struct octeon_device *oct,
@@ -173,6 +174,7 @@ octeon_pci_write_core_mem(struct octeon_device *oct,
 {
        __octeon_pci_rw_core_mem(oct, coreaddr, (u8 *)buf, len, 0);
 }
+EXPORT_SYMBOL_GPL(octeon_pci_write_core_mem);
 
 u64 octeon_read_device_mem64(struct octeon_device *oct, u64 coreaddr)
 {
@@ -182,6 +184,7 @@ u64 octeon_read_device_mem64(struct octeon_device *oct, u64 coreaddr)
 
        return be64_to_cpu(ret);
 }
+EXPORT_SYMBOL_GPL(octeon_read_device_mem64);
 
 u32 octeon_read_device_mem32(struct octeon_device *oct, u64 coreaddr)
 {
@@ -191,6 +194,7 @@ u32 octeon_read_device_mem32(struct octeon_device *oct, u64 coreaddr)
 
        return be32_to_cpu(ret);
 }
+EXPORT_SYMBOL_GPL(octeon_read_device_mem32);
 
 void octeon_write_device_mem32(struct octeon_device *oct, u64 coreaddr,
                               u32 val)
@@ -199,3 +203,4 @@ void octeon_write_device_mem32(struct octeon_device *oct, u64 coreaddr,
 
        __octeon_pci_rw_core_mem(oct, coreaddr, (u8 *)&t, 4, 0);
 }
+EXPORT_SYMBOL_GPL(octeon_write_device_mem32);
index 1a706f8..dee56ea 100644 (file)
@@ -79,6 +79,7 @@ octeon_alloc_soft_command_resp(struct octeon_device    *oct,
 
        return sc;
 }
+EXPORT_SYMBOL_GPL(octeon_alloc_soft_command_resp);
 
 int octnet_send_nic_data_pkt(struct octeon_device *oct,
                             struct octnic_data_pkt *ndata,
@@ -90,6 +91,7 @@ int octnet_send_nic_data_pkt(struct octeon_device *oct,
                                   ndata->buf, ndata->datasize,
                                   ndata->reqtype);
 }
+EXPORT_SYMBOL_GPL(octnet_send_nic_data_pkt);
 
 static inline struct octeon_soft_command
 *octnic_alloc_ctrl_pkt_sc(struct octeon_device *oct,
@@ -196,3 +198,4 @@ octnet_send_nic_ctrl_pkt(struct octeon_device *oct,
 
        return retval;
 }
+EXPORT_SYMBOL_GPL(octnet_send_nic_ctrl_pkt);
index 32f854c..de8a6ce 100644 (file)
@@ -185,6 +185,7 @@ int octeon_delete_instr_queue(struct octeon_device *oct, u32 iq_no)
        }
        return 1;
 }
+EXPORT_SYMBOL_GPL(octeon_delete_instr_queue);
 
 /* Return 0 on success, 1 on failure */
 int octeon_setup_iq(struct octeon_device *oct,
@@ -258,6 +259,7 @@ int lio_wait_for_instr_fetch(struct octeon_device *oct)
 
        return instr_cnt;
 }
+EXPORT_SYMBOL_GPL(lio_wait_for_instr_fetch);
 
 static inline void
 ring_doorbell(struct octeon_device *oct, struct octeon_instr_queue *iq)
@@ -282,6 +284,7 @@ octeon_ring_doorbell_locked(struct octeon_device *oct, u32 iq_no)
                ring_doorbell(oct, iq);
        spin_unlock(&iq->post_lock);
 }
+EXPORT_SYMBOL_GPL(octeon_ring_doorbell_locked);
 
 static inline void __copy_cmd_into_iq(struct octeon_instr_queue *iq,
                                      u8 *cmd)
@@ -345,6 +348,7 @@ octeon_register_reqtype_free_fn(struct octeon_device *oct, int reqtype,
 
        return 0;
 }
+EXPORT_SYMBOL_GPL(octeon_register_reqtype_free_fn);
 
 static inline void
 __add_to_request_list(struct octeon_instr_queue *iq,
@@ -430,6 +434,7 @@ lio_process_iq_request_list(struct octeon_device *oct,
 
        return inst_count;
 }
+EXPORT_SYMBOL_GPL(lio_process_iq_request_list);
 
 /* Can only be called from process context */
 int
@@ -566,6 +571,7 @@ octeon_send_command(struct octeon_device *oct, u32 iq_no,
 
        return st.status;
 }
+EXPORT_SYMBOL_GPL(octeon_send_command);
 
 void
 octeon_prepare_soft_command(struct octeon_device *oct,
@@ -673,6 +679,7 @@ octeon_prepare_soft_command(struct octeon_device *oct,
                }
        }
 }
+EXPORT_SYMBOL_GPL(octeon_prepare_soft_command);
 
 int octeon_send_soft_command(struct octeon_device *oct,
                             struct octeon_soft_command *sc)
@@ -726,6 +733,7 @@ int octeon_send_soft_command(struct octeon_device *oct,
        return (octeon_send_command(oct, sc->iq_no, 1, &sc->cmd, sc,
                                    len, REQTYPE_SOFT_COMMAND));
 }
+EXPORT_SYMBOL_GPL(octeon_send_soft_command);
 
 int octeon_setup_sc_buffer_pool(struct octeon_device *oct)
 {
@@ -755,6 +763,7 @@ int octeon_setup_sc_buffer_pool(struct octeon_device *oct)
 
        return 0;
 }
+EXPORT_SYMBOL_GPL(octeon_setup_sc_buffer_pool);
 
 int octeon_free_sc_done_list(struct octeon_device *oct)
 {
@@ -794,6 +803,7 @@ int octeon_free_sc_done_list(struct octeon_device *oct)
 
        return 0;
 }
+EXPORT_SYMBOL_GPL(octeon_free_sc_done_list);
 
 int octeon_free_sc_zombie_list(struct octeon_device *oct)
 {
@@ -818,6 +828,7 @@ int octeon_free_sc_zombie_list(struct octeon_device *oct)
 
        return 0;
 }
+EXPORT_SYMBOL_GPL(octeon_free_sc_zombie_list);
 
 int octeon_free_sc_buffer_pool(struct octeon_device *oct)
 {
@@ -842,6 +853,7 @@ int octeon_free_sc_buffer_pool(struct octeon_device *oct)
 
        return 0;
 }
+EXPORT_SYMBOL_GPL(octeon_free_sc_buffer_pool);
 
 struct octeon_soft_command *octeon_alloc_soft_command(struct octeon_device *oct,
                                                      u32 datasize,
@@ -913,6 +925,7 @@ struct octeon_soft_command *octeon_alloc_soft_command(struct octeon_device *oct,
 
        return sc;
 }
+EXPORT_SYMBOL_GPL(octeon_alloc_soft_command);
 
 void octeon_free_soft_command(struct octeon_device *oct,
                              struct octeon_soft_command *sc)
@@ -925,3 +938,4 @@ void octeon_free_soft_command(struct octeon_device *oct,
 
        spin_unlock_bh(&oct->sc_buf_pool.lock);
 }
+EXPORT_SYMBOL_GPL(octeon_free_soft_command);
index ac7747c..8610509 100644 (file)
@@ -52,12 +52,14 @@ int octeon_setup_response_list(struct octeon_device *oct)
 
        return ret;
 }
+EXPORT_SYMBOL_GPL(octeon_setup_response_list);
 
 void octeon_delete_response_list(struct octeon_device *oct)
 {
        cancel_delayed_work_sync(&oct->dma_comp_wq.wk.work);
        destroy_workqueue(oct->dma_comp_wq.wq);
 }
+EXPORT_SYMBOL_GPL(octeon_delete_response_list);
 
 int lio_process_ordered_list(struct octeon_device *octeon_dev,
                             u32 force_quit)
@@ -219,6 +221,7 @@ int lio_process_ordered_list(struct octeon_device *octeon_dev,
 
        return 0;
 }
+EXPORT_SYMBOL_GPL(lio_process_ordered_list);
 
 static void oct_poll_req_completion(struct work_struct *work)
 {
index efa7f40..2e9a74f 100644 (file)
@@ -2184,9 +2184,8 @@ static void lro_add_page(struct adapter *adap, struct sge_qset *qs,
        len -= offset;
 
        rx_frag += nr_frags;
-       __skb_frag_set_page(rx_frag, sd->pg_chunk.page);
-       skb_frag_off_set(rx_frag, sd->pg_chunk.offset + offset);
-       skb_frag_size_set(rx_frag, len);
+       skb_frag_fill_page_desc(rx_frag, sd->pg_chunk.page,
+                               sd->pg_chunk.offset + offset, len);
 
        skb->len += len;
        skb->data_len += len;
index ae6b17b..5724bbb 100644 (file)
@@ -1092,7 +1092,17 @@ new_buf:
                if (copy > size)
                        copy = size;
 
-               if (skb_tailroom(skb) > 0) {
+               if (msg->msg_flags & MSG_SPLICE_PAGES) {
+                       err = skb_splice_from_iter(skb, &msg->msg_iter, copy,
+                                                  sk->sk_allocation);
+                       if (err < 0) {
+                               if (err == -EMSGSIZE)
+                                       goto new_buf;
+                               goto do_fault;
+                       }
+                       copy = err;
+                       sk_wmem_queued_add(sk, copy);
+               } else if (skb_tailroom(skb) > 0) {
                        copy = min(copy, skb_tailroom(skb));
                        if (is_tls_tx(csk))
                                copy = min_t(int, copy, csk->tlshws.txleft);
@@ -1230,110 +1240,15 @@ out_err:
 int chtls_sendpage(struct sock *sk, struct page *page,
                   int offset, size_t size, int flags)
 {
-       struct chtls_sock *csk;
-       struct chtls_dev *cdev;
-       int mss, err, copied;
-       struct tcp_sock *tp;
-       long timeo;
-
-       tp = tcp_sk(sk);
-       copied = 0;
-       csk = rcu_dereference_sk_user_data(sk);
-       cdev = csk->cdev;
-       lock_sock(sk);
-       timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
-
-       err = sk_stream_wait_connect(sk, &timeo);
-       if (!sk_in_state(sk, TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) &&
-           err != 0)
-               goto out_err;
-
-       mss = csk->mss;
-       csk_set_flag(csk, CSK_TX_MORE_DATA);
-
-       while (size > 0) {
-               struct sk_buff *skb = skb_peek_tail(&csk->txq);
-               int copy, i;
+       struct msghdr msg = { .msg_flags = flags | MSG_SPLICE_PAGES, };
+       struct bio_vec bvec;
 
-               if (!skb || (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND) ||
-                   (copy = mss - skb->len) <= 0) {
-new_buf:
-                       if (!csk_mem_free(cdev, sk))
-                               goto wait_for_sndbuf;
-
-                       if (is_tls_tx(csk)) {
-                               skb = get_record_skb(sk,
-                                                    select_size(sk, size,
-                                                                flags,
-                                                                TX_TLSHDR_LEN),
-                                                    true);
-                       } else {
-                               skb = get_tx_skb(sk, 0);
-                       }
-                       if (!skb)
-                               goto wait_for_memory;
-                       copy = mss;
-               }
-               if (copy > size)
-                       copy = size;
-
-               i = skb_shinfo(skb)->nr_frags;
-               if (skb_can_coalesce(skb, i, page, offset)) {
-                       skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
-               } else if (i < MAX_SKB_FRAGS) {
-                       get_page(page);
-                       skb_fill_page_desc(skb, i, page, offset, copy);
-               } else {
-                       tx_skb_finalize(skb);
-                       push_frames_if_head(sk);
-                       goto new_buf;
-               }
+       if (flags & MSG_SENDPAGE_NOTLAST)
+               msg.msg_flags |= MSG_MORE;
 
-               skb->len += copy;
-               if (skb->len == mss)
-                       tx_skb_finalize(skb);
-               skb->data_len += copy;
-               skb->truesize += copy;
-               sk->sk_wmem_queued += copy;
-               tp->write_seq += copy;
-               copied += copy;
-               offset += copy;
-               size -= copy;
-
-               if (corked(tp, flags) &&
-                   (sk_stream_wspace(sk) < sk_stream_min_wspace(sk)))
-                       ULP_SKB_CB(skb)->flags |= ULPCB_FLAG_NO_APPEND;
-
-               if (!size)
-                       break;
-
-               if (unlikely(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND))
-                       push_frames_if_head(sk);
-               continue;
-wait_for_sndbuf:
-               set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
-wait_for_memory:
-               err = csk_wait_memory(cdev, sk, &timeo);
-               if (err)
-                       goto do_error;
-       }
-out:
-       csk_reset_flag(csk, CSK_TX_MORE_DATA);
-       if (copied)
-               chtls_tcp_push(sk, flags);
-done:
-       release_sock(sk);
-       return copied;
-
-do_error:
-       if (copied)
-               goto out;
-
-out_err:
-       if (csk_conn_inline(csk))
-               csk_reset_flag(csk, CSK_TX_MORE_DATA);
-       copied = sk_stream_error(sk, flags, err);
-       goto done;
+       bvec_set_page(&bvec, page, size, offset);
+       iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size);
+       return chtls_sendmsg(sk, &msg, size);
 }
 
 static void chtls_select_window(struct sock *sk)
index 7e408bc..3164ed2 100644 (file)
@@ -2343,11 +2343,10 @@ static void skb_fill_rx_data(struct be_rx_obj *rxo, struct sk_buff *skb,
                hdr_len = ETH_HLEN;
                memcpy(skb->data, start, hdr_len);
                skb_shinfo(skb)->nr_frags = 1;
-               skb_frag_set_page(skb, 0, page_info->page);
-               skb_frag_off_set(&skb_shinfo(skb)->frags[0],
-                                page_info->page_offset + hdr_len);
-               skb_frag_size_set(&skb_shinfo(skb)->frags[0],
-                                 curr_frag_len - hdr_len);
+               skb_frag_fill_page_desc(&skb_shinfo(skb)->frags[0],
+                                       page_info->page,
+                                       page_info->page_offset + hdr_len,
+                                       curr_frag_len - hdr_len);
                skb->data_len = curr_frag_len - hdr_len;
                skb->truesize += rx_frag_size;
                skb->tail += hdr_len;
@@ -2369,16 +2368,17 @@ static void skb_fill_rx_data(struct be_rx_obj *rxo, struct sk_buff *skb,
                if (page_info->page_offset == 0) {
                        /* Fresh page */
                        j++;
-                       skb_frag_set_page(skb, j, page_info->page);
-                       skb_frag_off_set(&skb_shinfo(skb)->frags[j],
-                                        page_info->page_offset);
-                       skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
+                       skb_frag_fill_page_desc(&skb_shinfo(skb)->frags[j],
+                                               page_info->page,
+                                               page_info->page_offset,
+                                               curr_frag_len);
                        skb_shinfo(skb)->nr_frags++;
                } else {
                        put_page(page_info->page);
+                       skb_frag_size_add(&skb_shinfo(skb)->frags[j],
+                                         curr_frag_len);
                }
 
-               skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
                skb->len += curr_frag_len;
                skb->data_len += curr_frag_len;
                skb->truesize += rx_frag_size;
@@ -2451,14 +2451,16 @@ static void be_rx_compl_process_gro(struct be_rx_obj *rxo,
                if (i == 0 || page_info->page_offset == 0) {
                        /* First frag or Fresh page */
                        j++;
-                       skb_frag_set_page(skb, j, page_info->page);
-                       skb_frag_off_set(&skb_shinfo(skb)->frags[j],
-                                        page_info->page_offset);
-                       skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
+                       skb_frag_fill_page_desc(&skb_shinfo(skb)->frags[j],
+                                               page_info->page,
+                                               page_info->page_offset,
+                                               curr_frag_len);
                } else {
                        put_page(page_info->page);
+                       skb_frag_size_add(&skb_shinfo(skb)->frags[j],
+                                         curr_frag_len);
                }
-               skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
+
                skb->truesize += rx_frag_size;
                remaining -= curr_frag_len;
                memset(page_info, 0, sizeof(*page_info));
index 1581d6b..8a9145f 100644 (file)
@@ -329,7 +329,7 @@ static bool disable_taprio(struct tsnep_adapter *adapter)
        int retval;
 
        memset(&qopt, 0, sizeof(qopt));
-       qopt.enable = 0;
+       qopt.cmd = TAPRIO_CMD_DESTROY;
        retval = tsnep_tc_setup(adapter->netdev, TC_SETUP_QDISC_TAPRIO, &qopt);
        if (retval)
                return false;
@@ -360,7 +360,7 @@ static bool tsnep_test_taprio(struct tsnep_adapter *adapter)
        for (i = 0; i < 255; i++)
                qopt->entries[i].command = TC_TAPRIO_CMD_SET_GATES;
 
-       qopt->enable = 1;
+       qopt->cmd = TAPRIO_CMD_REPLACE;
        qopt->base_time = ktime_set(0, 0);
        qopt->cycle_time = 1500000;
        qopt->cycle_time_extension = 0;
@@ -382,7 +382,7 @@ static bool tsnep_test_taprio(struct tsnep_adapter *adapter)
        if (!run_taprio(adapter, qopt, 100))
                goto failed;
 
-       qopt->enable = 1;
+       qopt->cmd = TAPRIO_CMD_REPLACE;
        qopt->base_time = ktime_set(0, 0);
        qopt->cycle_time = 411854;
        qopt->cycle_time_extension = 0;
@@ -406,7 +406,7 @@ static bool tsnep_test_taprio(struct tsnep_adapter *adapter)
        if (!run_taprio(adapter, qopt, 100))
                goto failed;
 
-       qopt->enable = 1;
+       qopt->cmd = TAPRIO_CMD_REPLACE;
        qopt->base_time = ktime_set(0, 0);
        delay_base_time(adapter, qopt, 12);
        qopt->cycle_time = 125000;
@@ -457,7 +457,7 @@ static bool tsnep_test_taprio_change(struct tsnep_adapter *adapter)
        for (i = 0; i < 255; i++)
                qopt->entries[i].command = TC_TAPRIO_CMD_SET_GATES;
 
-       qopt->enable = 1;
+       qopt->cmd = TAPRIO_CMD_REPLACE;
        qopt->base_time = ktime_set(0, 0);
        qopt->cycle_time = 100000;
        qopt->cycle_time_extension = 0;
@@ -610,7 +610,7 @@ static bool tsnep_test_taprio_extension(struct tsnep_adapter *adapter)
        for (i = 0; i < 255; i++)
                qopt->entries[i].command = TC_TAPRIO_CMD_SET_GATES;
 
-       qopt->enable = 1;
+       qopt->cmd = TAPRIO_CMD_REPLACE;
        qopt->base_time = ktime_set(0, 0);
        qopt->cycle_time = 100000;
        qopt->cycle_time_extension = 50000;
index d083e66..745b191 100644 (file)
@@ -325,7 +325,7 @@ static int tsnep_taprio(struct tsnep_adapter *adapter,
        if (!adapter->gate_control)
                return -EOPNOTSUPP;
 
-       if (!qopt->enable) {
+       if (qopt->cmd == TAPRIO_CMD_DESTROY) {
                /* disable gate control if active */
                mutex_lock(&adapter->gate_control_lock);
 
@@ -337,6 +337,8 @@ static int tsnep_taprio(struct tsnep_adapter *adapter,
                mutex_unlock(&adapter->gate_control_lock);
 
                return 0;
+       } else if (qopt->cmd != TAPRIO_CMD_REPLACE) {
+               return -EOPNOTSUPP;
        }
 
        retval = tsnep_validate_gcl(qopt);
index b1871e6..cb70855 100644 (file)
@@ -273,7 +273,7 @@ static int dpaa2_pcs_create(struct dpaa2_mac *mac,
        mac->pcs = lynx_pcs_create(mdiodev);
        if (!mac->pcs) {
                netdev_err(mac->net_dev, "lynx_pcs_create() failed\n");
-               put_device(&mdiodev->dev);
+               mdio_device_free(mdiodev);
                return -ENOMEM;
        }
 
@@ -286,10 +286,9 @@ static void dpaa2_pcs_destroy(struct dpaa2_mac *mac)
 
        if (phylink_pcs) {
                struct mdio_device *mdio = lynx_get_mdio_device(phylink_pcs);
-               struct device *dev = &mdio->dev;
 
                lynx_pcs_destroy(phylink_pcs);
-               put_device(dev);
+               mdio_device_free(mdio);
                mac->pcs = NULL;
        }
 }
index 9e1b253..164b73d 100644 (file)
@@ -1451,9 +1451,8 @@ static void enetc_add_rx_buff_to_xdp(struct enetc_bdr *rx_ring, int i,
                xdp_buff_set_frag_pfmemalloc(xdp_buff);
 
        frag = &shinfo->frags[shinfo->nr_frags];
-       skb_frag_off_set(frag, rx_swbd->page_offset);
-       skb_frag_size_set(frag, size);
-       __skb_frag_set_page(frag, rx_swbd->page);
+       skb_frag_fill_page_desc(frag, rx_swbd->page, rx_swbd->page_offset,
+                               size);
 
        shinfo->nr_frags++;
 }
@@ -2639,7 +2638,7 @@ static void enetc_debug_tx_ring_prios(struct enetc_ndev_priv *priv)
                           priv->tx_ring[i]->prio);
 }
 
-static void enetc_reset_tc_mqprio(struct net_device *ndev)
+void enetc_reset_tc_mqprio(struct net_device *ndev)
 {
        struct enetc_ndev_priv *priv = netdev_priv(ndev);
        struct enetc_hw *hw = &priv->si->hw;
@@ -2664,6 +2663,7 @@ static void enetc_reset_tc_mqprio(struct net_device *ndev)
 
        enetc_change_preemptible_tcs(priv, 0);
 }
+EXPORT_SYMBOL_GPL(enetc_reset_tc_mqprio);
 
 int enetc_setup_tc_mqprio(struct net_device *ndev, void *type_data)
 {
index c97a8e3..8577cf7 100644 (file)
@@ -429,6 +429,7 @@ struct net_device_stats *enetc_get_stats(struct net_device *ndev);
 void enetc_set_features(struct net_device *ndev, netdev_features_t features);
 int enetc_ioctl(struct net_device *ndev, struct ifreq *rq, int cmd);
 int enetc_setup_tc_mqprio(struct net_device *ndev, void *type_data);
+void enetc_reset_tc_mqprio(struct net_device *ndev);
 int enetc_setup_bpf(struct net_device *ndev, struct netdev_bpf *bpf);
 int enetc_xdp_xmit(struct net_device *ndev, int num_frames,
                   struct xdp_frame **frames, u32 flags);
index 7cd22d3..1416262 100644 (file)
@@ -863,7 +863,6 @@ static int enetc_imdio_create(struct enetc_pf *pf)
        struct device *dev = &pf->si->pdev->dev;
        struct enetc_mdio_priv *mdio_priv;
        struct phylink_pcs *phylink_pcs;
-       struct mdio_device *mdio_device;
        struct mii_bus *bus;
        int err;
 
@@ -889,17 +888,9 @@ static int enetc_imdio_create(struct enetc_pf *pf)
                goto free_mdio_bus;
        }
 
-       mdio_device = mdio_device_create(bus, 0);
-       if (IS_ERR(mdio_device)) {
-               err = PTR_ERR(mdio_device);
-               dev_err(dev, "cannot create mdio device (%d)\n", err);
-               goto unregister_mdiobus;
-       }
-
-       phylink_pcs = lynx_pcs_create(mdio_device);
-       if (!phylink_pcs) {
-               mdio_device_free(mdio_device);
-               err = -ENOMEM;
+       phylink_pcs = lynx_pcs_create_mdiodev(bus, 0);
+       if (IS_ERR(phylink_pcs)) {
+               err = PTR_ERR(phylink_pcs);
                dev_err(dev, "cannot create lynx pcs (%d)\n", err);
                goto unregister_mdiobus;
        }
@@ -918,13 +909,8 @@ free_mdio_bus:
 
 static void enetc_imdio_remove(struct enetc_pf *pf)
 {
-       struct mdio_device *mdio_device;
-
-       if (pf->pcs) {
-               mdio_device = lynx_get_mdio_device(pf->pcs);
-               mdio_device_free(mdio_device);
+       if (pf->pcs)
                lynx_pcs_destroy(pf->pcs);
-       }
        if (pf->imdio) {
                mdiobus_unregister(pf->imdio);
                mdiobus_free(pf->imdio);
index 83c27bb..71157eb 100644 (file)
@@ -43,10 +43,9 @@ void enetc_sched_speed_set(struct enetc_ndev_priv *priv, int speed)
        enetc_port_wr(hw, ENETC_PMR, (tmp & ~ENETC_PMR_PSPEED_MASK) | pspeed);
 }
 
-static int enetc_setup_taprio(struct net_device *ndev,
+static int enetc_setup_taprio(struct enetc_ndev_priv *priv,
                              struct tc_taprio_qopt_offload *admin_conf)
 {
-       struct enetc_ndev_priv *priv = netdev_priv(ndev);
        struct enetc_hw *hw = &priv->si->hw;
        struct enetc_cbd cbd = {.cmd = 0};
        struct tgs_gcl_conf *gcl_config;
@@ -60,19 +59,13 @@ static int enetc_setup_taprio(struct net_device *ndev,
        int err;
        int i;
 
+       /* TSD and Qbv are mutually exclusive in hardware */
+       for (i = 0; i < priv->num_tx_rings; i++)
+               if (priv->tx_ring[i]->tsd_enable)
+                       return -EBUSY;
+
        if (admin_conf->num_entries > enetc_get_max_gcl_len(hw))
                return -EINVAL;
-       gcl_len = admin_conf->num_entries;
-
-       tge = enetc_rd(hw, ENETC_PTGCR);
-       if (!admin_conf->enable) {
-               enetc_wr(hw, ENETC_PTGCR, tge & ~ENETC_PTGCR_TGE);
-               enetc_reset_ptcmsdur(hw);
-
-               priv->active_offloads &= ~ENETC_F_QBV;
-
-               return 0;
-       }
 
        if (admin_conf->cycle_time > U32_MAX ||
            admin_conf->cycle_time_extension > U32_MAX)
@@ -82,6 +75,7 @@ static int enetc_setup_taprio(struct net_device *ndev,
         * control BD descriptor.
         */
        gcl_config = &cbd.gcl_conf;
+       gcl_len = admin_conf->num_entries;
 
        data_size = struct_size(gcl_data, entry, gcl_len);
        tmp = enetc_cbd_alloc_data_mem(priv->si, &cbd, data_size,
@@ -115,6 +109,7 @@ static int enetc_setup_taprio(struct net_device *ndev,
        cbd.cls = BDCR_CMD_PORT_GCL;
        cbd.status_flags = 0;
 
+       tge = enetc_rd(hw, ENETC_PTGCR);
        enetc_wr(hw, ENETC_PTGCR, tge | ENETC_PTGCR_TGE);
 
        err = enetc_send_cmd(priv->si, &cbd);
@@ -132,25 +127,92 @@ static int enetc_setup_taprio(struct net_device *ndev,
        return 0;
 }
 
-int enetc_setup_tc_taprio(struct net_device *ndev, void *type_data)
+static void enetc_reset_taprio(struct enetc_ndev_priv *priv)
+{
+       struct enetc_hw *hw = &priv->si->hw;
+       u32 val;
+
+       val = enetc_rd(hw, ENETC_PTGCR);
+       enetc_wr(hw, ENETC_PTGCR, val & ~ENETC_PTGCR_TGE);
+       enetc_reset_ptcmsdur(hw);
+
+       priv->active_offloads &= ~ENETC_F_QBV;
+}
+
+static void enetc_taprio_destroy(struct net_device *ndev)
 {
-       struct tc_taprio_qopt_offload *taprio = type_data;
        struct enetc_ndev_priv *priv = netdev_priv(ndev);
-       int err, i;
 
-       /* TSD and Qbv are mutually exclusive in hardware */
+       enetc_reset_taprio(priv);
+       enetc_reset_tc_mqprio(ndev);
+}
+
+static void enetc_taprio_stats(struct net_device *ndev,
+                              struct tc_taprio_qopt_stats *stats)
+{
+       struct enetc_ndev_priv *priv = netdev_priv(ndev);
+       u64 window_drops = 0;
+       int i;
+
        for (i = 0; i < priv->num_tx_rings; i++)
-               if (priv->tx_ring[i]->tsd_enable)
-                       return -EBUSY;
+               window_drops += priv->tx_ring[i]->stats.win_drop;
+
+       stats->window_drops = window_drops;
+}
+
+static void enetc_taprio_tc_stats(struct net_device *ndev,
+                                 struct tc_taprio_qopt_tc_stats *tc_stats)
+{
+       struct tc_taprio_qopt_stats *stats = &tc_stats->stats;
+       struct enetc_ndev_priv *priv = netdev_priv(ndev);
+       int tc = tc_stats->tc;
+       u64 window_drops = 0;
+       int i;
+
+       for (i = 0; i < priv->num_tx_rings; i++)
+               if (priv->tx_ring[i]->prio == tc)
+                       window_drops += priv->tx_ring[i]->stats.win_drop;
+
+       stats->window_drops = window_drops;
+}
+
+static int enetc_taprio_replace(struct net_device *ndev,
+                               struct tc_taprio_qopt_offload *offload)
+{
+       struct enetc_ndev_priv *priv = netdev_priv(ndev);
+       int err;
 
-       err = enetc_setup_tc_mqprio(ndev, &taprio->mqprio);
+       err = enetc_setup_tc_mqprio(ndev, &offload->mqprio);
        if (err)
                return err;
 
-       err = enetc_setup_taprio(ndev, taprio);
-       if (err) {
-               taprio->mqprio.qopt.num_tc = 0;
-               enetc_setup_tc_mqprio(ndev, &taprio->mqprio);
+       err = enetc_setup_taprio(priv, offload);
+       if (err)
+               enetc_reset_tc_mqprio(ndev);
+
+       return err;
+}
+
+int enetc_setup_tc_taprio(struct net_device *ndev, void *type_data)
+{
+       struct tc_taprio_qopt_offload *offload = type_data;
+       int err = 0;
+
+       switch (offload->cmd) {
+       case TAPRIO_CMD_REPLACE:
+               err = enetc_taprio_replace(ndev, offload);
+               break;
+       case TAPRIO_CMD_DESTROY:
+               enetc_taprio_destroy(ndev);
+               break;
+       case TAPRIO_CMD_STATS:
+               enetc_taprio_stats(ndev, &offload->stats);
+               break;
+       case TAPRIO_CMD_TC_STATS:
+               enetc_taprio_tc_stats(ndev, &offload->tc_stats);
+               break;
+       default:
+               err = -EOPNOTSUPP;
        }
 
        return err;
index 38e5b5a..4d37a81 100644 (file)
@@ -1011,24 +1011,6 @@ static void fec_enet_enable_ring(struct net_device *ndev)
        }
 }
 
-static void fec_enet_reset_skb(struct net_device *ndev)
-{
-       struct fec_enet_private *fep = netdev_priv(ndev);
-       struct fec_enet_priv_tx_q *txq;
-       int i, j;
-
-       for (i = 0; i < fep->num_tx_queues; i++) {
-               txq = fep->tx_queue[i];
-
-               for (j = 0; j < txq->bd.ring_size; j++) {
-                       if (txq->tx_skbuff[j]) {
-                               dev_kfree_skb_any(txq->tx_skbuff[j]);
-                               txq->tx_skbuff[j] = NULL;
-                       }
-               }
-       }
-}
-
 /*
  * This function is called to start or restart the FEC during a link
  * change, transmit timeout, or to reconfigure the FEC.  The network
@@ -1071,9 +1053,6 @@ fec_restart(struct net_device *ndev)
 
        fec_enet_enable_ring(ndev);
 
-       /* Reset tx SKB buffers. */
-       fec_enet_reset_skb(ndev);
-
        /* Enable MII mode */
        if (fep->full_duplex == DUPLEX_FULL) {
                /* FD enable */
@@ -3791,19 +3770,18 @@ static int fec_enet_txq_xmit_frame(struct fec_enet_private *fep,
                                   struct xdp_frame *frame)
 {
        unsigned int index, status, estatus;
-       struct bufdesc *bdp, *last_bdp;
+       struct bufdesc *bdp;
        dma_addr_t dma_addr;
        int entries_free;
 
        entries_free = fec_enet_get_free_txdesc_num(txq);
        if (entries_free < MAX_SKB_FRAGS + 1) {
                netdev_err(fep->netdev, "NOT enough BD for SG!\n");
-               return NETDEV_TX_BUSY;
+               return -EBUSY;
        }
 
        /* Fill in a Tx ring entry */
        bdp = txq->bd.cur;
-       last_bdp = bdp;
        status = fec16_to_cpu(bdp->cbd_sc);
        status &= ~BD_ENET_TX_STATS;
 
@@ -3812,7 +3790,7 @@ static int fec_enet_txq_xmit_frame(struct fec_enet_private *fep,
        dma_addr = dma_map_single(&fep->pdev->dev, frame->data,
                                  frame->len, DMA_TO_DEVICE);
        if (dma_mapping_error(&fep->pdev->dev, dma_addr))
-               return FEC_ENET_XDP_CONSUMED;
+               return -ENOMEM;
 
        status |= (BD_ENET_TX_INTR | BD_ENET_TX_LAST);
        if (fep->bufdesc_ex)
@@ -3831,7 +3809,6 @@ static int fec_enet_txq_xmit_frame(struct fec_enet_private *fep,
                ebdp->cbd_esc = cpu_to_fec32(estatus);
        }
 
-       index = fec_enet_get_bd_index(last_bdp, &txq->bd);
        txq->tx_skbuff[index] = NULL;
 
        /* Make sure the updates to rest of the descriptor are performed before
@@ -3846,7 +3823,7 @@ static int fec_enet_txq_xmit_frame(struct fec_enet_private *fep,
        bdp->cbd_sc = cpu_to_fec16(status);
 
        /* If this was the last BD in the ring, start at the beginning again. */
-       bdp = fec_enet_get_nextdesc(last_bdp, &txq->bd);
+       bdp = fec_enet_get_nextdesc(bdp, &txq->bd);
 
        /* Make sure the update to bdp are performed before txq->bd.cur. */
        dma_wmb();
@@ -3879,7 +3856,7 @@ static int fec_enet_xdp_xmit(struct net_device *dev,
        __netif_tx_lock(nq, cpu);
 
        for (i = 0; i < num_frames; i++) {
-               if (fec_enet_txq_xmit_frame(fep, txq, frames[i]) != 0)
+               if (fec_enet_txq_xmit_frame(fep, txq, frames[i]) < 0)
                        break;
                sent_frames++;
        }
@@ -4035,6 +4012,11 @@ static int fec_enet_init(struct net_device *ndev)
 
        ndev->hw_features = ndev->features;
 
+       if (!(fep->quirks & FEC_QUIRK_SWAP_FRAME))
+               ndev->xdp_features = NETDEV_XDP_ACT_BASIC |
+                                    NETDEV_XDP_ACT_REDIRECT |
+                                    NETDEV_XDP_ACT_NDO_XMIT;
+
        fec_restart(ndev);
 
        if (fep->quirks & FEC_QUIRK_MIB_CLEAR)
index 29a6c2e..7e25848 100644 (file)
@@ -323,9 +323,8 @@ static int fun_gather_pkt(struct funeth_rxq *q, unsigned int tot_len,
                if (ref_ok)
                        ref_ok |= buf->node;
 
-               __skb_frag_set_page(frags, buf->page);
-               skb_frag_off_set(frags, q->buf_offset);
-               skb_frag_size_set(frags++, frag_len);
+               skb_frag_fill_page_desc(frags++, buf->page, q->buf_offset,
+                                       frag_len);
 
                tot_len -= frag_len;
                if (!tot_len)
index caa00c7..8fb70db 100644 (file)
@@ -31,6 +31,7 @@
 
 // Minimum amount of time between queue kicks in msec (10 seconds)
 #define MIN_TX_TIMEOUT_GAP (1000 * 10)
+#define DQO_TX_MAX     0x3FFFF
 
 const char gve_version_str[] = GVE_VERSION;
 static const char gve_version_prefix[] = GVE_VERSION_PREFIX;
@@ -2047,6 +2048,10 @@ static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device)
                goto err;
        }
 
+       /* Big TCP is only supported on DQ*/
+       if (!gve_is_gqi(priv))
+               netif_set_tso_max_size(priv->dev, DQO_TX_MAX);
+
        priv->num_registered_pages = 0;
        priv->rx_copybreak = GVE_DEFAULT_RX_COPYBREAK;
        /* gvnic has one Notification Block per MSI-x vector, except for the
index b76143b..3c09e66 100644 (file)
@@ -8,6 +8,7 @@
 #include "gve_adminq.h"
 #include "gve_utils.h"
 #include "gve_dqo.h"
+#include <net/ip.h>
 #include <linux/tcp.h>
 #include <linux/slab.h>
 #include <linux/skbuff.h>
@@ -646,6 +647,9 @@ static int gve_try_tx_skb(struct gve_priv *priv, struct gve_tx_ring *tx,
                        goto drop;
                }
 
+               if (unlikely(ipv6_hopopt_jumbo_remove(skb)))
+                       goto drop;
+
                num_buffer_descs = gve_num_buffer_descs_needed(skb);
        } else {
                num_buffer_descs = gve_num_buffer_descs_needed(skb);
index 3ee89ae..773d7aa 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-1.0+
 /* 82596.c: A generic 82596 ethernet driver for linux. */
 /*
    Based on Apricot.c
@@ -31,9 +32,7 @@
    Driver skeleton
    Written 1993 by Donald Becker.
    Copyright 1993 United States Government as represented by the Director,
-   National Security Agency. This software may only be used and distributed
-   according to the terms of the GNU General Public License as modified by SRC,
-   incorporated herein by reference.
+   National Security Agency.
 
    The author may be reached as becker@scyld.com, or C/O
    Scyld Computing Corporation, 410 Severn Ave., Suite 210, Annapolis MD 21403
index 0af7009..3e53e0c 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-1.0+
 /* lasi_82596.c -- driver for the intel 82596 ethernet controller, as
    munged into HPPA boxen .
 
@@ -59,9 +60,7 @@
    Driver skeleton
    Written 1993 by Donald Becker.
    Copyright 1993 United States Government as represented by the Director,
-   National Security Agency. This software may only be used and distributed
-   according to the terms of the GNU General Public License as modified by SRC,
-   incorporated herein by reference.
+   National Security Agency.
 
    The author may be reached as becker@scyld.com, or C/O
    Scyld Computing Corporation, 410 Severn Ave., Suite 210, Annapolis MD 21403
index ca2fb30..67d248a 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-1.0+
 /* lasi_82596.c -- driver for the intel 82596 ethernet controller, as
    munged into HPPA boxen .
 
@@ -59,9 +60,7 @@
    Driver skeleton
    Written 1993 by Donald Becker.
    Copyright 1993 United States Government as represented by the Director,
-   National Security Agency. This software may only be used and distributed
-   according to the terms of the GNU General Public License as modified by SRC,
-   incorporated herein by reference.
+   National Security Agency.
 
    The author may be reached as becker@scyld.com, or C/O
    Scyld Computing Corporation, 410 Severn Ave., Suite 210, Annapolis MD 21403
index 3909c6a..5e27470 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Sun3 i82586 Ethernet driver
  *
index d82eca5..d8e249d 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Intel i82586 Ethernet definitions
  *
index bd7ef59..771a3c9 100644 (file)
@@ -4198,7 +4198,7 @@ void e1000e_reset(struct e1000_adapter *adapter)
 
 /**
  * e1000e_trigger_lsc - trigger an LSC interrupt
- * @adapter: 
+ * @adapter: board private structure
  *
  * Fire a link status change interrupt to start the watchdog.
  **/
index 5d89392..817977e 100644 (file)
@@ -18,6 +18,7 @@ ice-y := ice_main.o   \
         ice_txrx_lib.o \
         ice_txrx.o     \
         ice_fltr.o     \
+        ice_irq.o      \
         ice_pf_vsi_vlan_ops.o \
         ice_vsi_vlan_ops.o \
         ice_vsi_vlan_lib.o \
index aa32111..b4bca1d 100644 (file)
@@ -32,6 +32,7 @@
 #include <linux/pkt_sched.h>
 #include <linux/if_bridge.h>
 #include <linux/ctype.h>
+#include <linux/linkmode.h>
 #include <linux/bpf.h>
 #include <linux/btf.h>
 #include <linux/auxiliary_bus.h>
@@ -74,6 +75,7 @@
 #include "ice_lag.h"
 #include "ice_vsi_vlan_ops.h"
 #include "ice_gnss.h"
+#include "ice_irq.h"
 
 #define ICE_BAR0               0
 #define ICE_REQ_DESC_MULTIPLE  32
 #define ICE_Q_WAIT_RETRY_LIMIT 10
 #define ICE_Q_WAIT_MAX_RETRY   (5 * ICE_Q_WAIT_RETRY_LIMIT)
 #define ICE_MAX_LG_RSS_QS      256
-#define ICE_RES_VALID_BIT      0x8000
-#define ICE_RES_MISC_VEC_ID    (ICE_RES_VALID_BIT - 1)
-#define ICE_RES_RDMA_VEC_ID    (ICE_RES_MISC_VEC_ID - 1)
-/* All VF control VSIs share the same IRQ, so assign a unique ID for them */
-#define ICE_RES_VF_CTRL_VEC_ID (ICE_RES_RDMA_VEC_ID - 1)
 #define ICE_INVAL_Q_INDEX      0xffff
 
 #define ICE_MAX_RXQS_PER_TC            256     /* Used when setting VSI context per TC Rx queues */
@@ -245,12 +242,6 @@ struct ice_tc_cfg {
        struct ice_tc_info tc_info[ICE_MAX_TRAFFIC_CLASS];
 };
 
-struct ice_res_tracker {
-       u16 num_entries;
-       u16 end;
-       u16 list[];
-};
-
 struct ice_qs_cfg {
        struct mutex *qs_mutex;  /* will be assigned to &pf->avail_q_mutex */
        unsigned long *pf_map;
@@ -348,7 +339,9 @@ struct ice_vsi {
        u32 rx_buf_failed;
        u32 rx_page_failed;
        u16 num_q_vectors;
-       u16 base_vector;                /* IRQ base for OS reserved vectors */
+       /* tell if only dynamic irq allocation is allowed */
+       bool irq_dyn_alloc;
+
        enum ice_vsi_type type;
        u16 vsi_num;                    /* HW (absolute) index of this VSI */
        u16 idx;                        /* software index in pf->vsi[] */
@@ -479,6 +472,7 @@ struct ice_q_vector {
        char name[ICE_INT_NAME_STR_LEN];
 
        u16 total_events;       /* net_dim(): number of interrupts processed */
+       struct msi_map irq;
 } ____cacheline_internodealigned_in_smp;
 
 enum ice_pf_flags {
@@ -539,7 +533,7 @@ struct ice_pf {
 
        /* OS reserved IRQ details */
        struct msix_entry *msix_entries;
-       struct ice_res_tracker *irq_tracker;
+       struct ice_irq_tracker irq_tracker;
        /* First MSIX vector used by SR-IOV VFs. Calculated by subtracting the
         * number of MSIX vectors needed for all SR-IOV VFs from the number of
         * MSIX vectors allowed on this PF.
@@ -583,8 +577,7 @@ struct ice_pf {
 
        u32 hw_csum_rx_error;
        u32 oicr_err_reg;
-       u16 oicr_idx;           /* Other interrupt cause MSIX vector index */
-       u16 num_avail_sw_msix;  /* remaining MSIX SW vectors left unclaimed */
+       struct msi_map oicr_irq;        /* Other interrupt cause MSIX vector */
        u16 max_pf_txqs;        /* Total Tx queues PF wide */
        u16 max_pf_rxqs;        /* Total Rx queues PF wide */
        u16 num_lan_msix;       /* Total MSIX vectors for base driver */
@@ -670,7 +663,7 @@ ice_irq_dynamic_ena(struct ice_hw *hw, struct ice_vsi *vsi,
                    struct ice_q_vector *q_vector)
 {
        u32 vector = (vsi && q_vector) ? q_vector->reg_idx :
-                               ((struct ice_pf *)hw->back)->oicr_idx;
+                               ((struct ice_pf *)hw->back)->oicr_irq.index;
        int itr = ICE_ITR_NONE;
        u32 val;
 
@@ -821,25 +814,6 @@ static inline bool ice_is_switchdev_running(struct ice_pf *pf)
        return pf->switchdev.is_running;
 }
 
-/**
- * ice_set_sriov_cap - enable SRIOV in PF flags
- * @pf: PF struct
- */
-static inline void ice_set_sriov_cap(struct ice_pf *pf)
-{
-       if (pf->hw.func_caps.common_cap.sr_iov_1_1)
-               set_bit(ICE_FLAG_SRIOV_CAPABLE, pf->flags);
-}
-
-/**
- * ice_clear_sriov_cap - disable SRIOV in PF flags
- * @pf: PF struct
- */
-static inline void ice_clear_sriov_cap(struct ice_pf *pf)
-{
-       clear_bit(ICE_FLAG_SRIOV_CAPABLE, pf->flags);
-}
-
 #define ICE_FD_STAT_CTR_BLOCK_COUNT    256
 #define ICE_FD_STAT_PF_IDX(base_idx) \
                        ((base_idx) * ICE_FD_STAT_CTR_BLOCK_COUNT)
index 838d9b2..63d3e1d 100644 (file)
@@ -1087,7 +1087,7 @@ struct ice_aqc_get_phy_caps {
 #define ICE_PHY_TYPE_HIGH_100G_CAUI2           BIT_ULL(2)
 #define ICE_PHY_TYPE_HIGH_100G_AUI2_AOC_ACC    BIT_ULL(3)
 #define ICE_PHY_TYPE_HIGH_100G_AUI2            BIT_ULL(4)
-#define ICE_PHY_TYPE_HIGH_MAX_INDEX            5
+#define ICE_PHY_TYPE_HIGH_MAX_INDEX            4
 
 struct ice_aqc_get_phy_caps_data {
        __le64 phy_type_low; /* Use values from ICE_PHY_TYPE_LOW_* */
index fba178e..cca0e75 100644 (file)
@@ -596,7 +596,7 @@ int ice_set_cpu_rx_rmap(struct ice_vsi *vsi)
 {
        struct net_device *netdev;
        struct ice_pf *pf;
-       int base_idx, i;
+       int i;
 
        if (!vsi || vsi->type != ICE_VSI_PF)
                return 0;
@@ -613,10 +613,9 @@ int ice_set_cpu_rx_rmap(struct ice_vsi *vsi)
        if (unlikely(!netdev->rx_cpu_rmap))
                return -EINVAL;
 
-       base_idx = vsi->base_vector;
        ice_for_each_q_vector(vsi, i)
                if (irq_cpu_rmap_add(netdev->rx_cpu_rmap,
-                                    pf->msix_entries[base_idx + i].vector)) {
+                                    vsi->q_vectors[i]->irq.virq)) {
                        ice_free_cpu_rx_rmap(vsi);
                        return -EINVAL;
                }
index 1911d64..4a12316 100644 (file)
@@ -103,10 +103,10 @@ static int ice_vsi_alloc_q_vector(struct ice_vsi *vsi, u16 v_idx)
 {
        struct ice_pf *pf = vsi->back;
        struct ice_q_vector *q_vector;
+       int err;
 
        /* allocate q_vector */
-       q_vector = devm_kzalloc(ice_pf_to_dev(pf), sizeof(*q_vector),
-                               GFP_KERNEL);
+       q_vector = kzalloc(sizeof(*q_vector), GFP_KERNEL);
        if (!q_vector)
                return -ENOMEM;
 
@@ -118,9 +118,34 @@ static int ice_vsi_alloc_q_vector(struct ice_vsi *vsi, u16 v_idx)
        q_vector->rx.itr_mode = ITR_DYNAMIC;
        q_vector->tx.type = ICE_TX_CONTAINER;
        q_vector->rx.type = ICE_RX_CONTAINER;
+       q_vector->irq.index = -ENOENT;
 
-       if (vsi->type == ICE_VSI_VF)
+       if (vsi->type == ICE_VSI_VF) {
+               q_vector->reg_idx = ice_calc_vf_reg_idx(vsi->vf, q_vector);
                goto out;
+       } else if (vsi->type == ICE_VSI_CTRL && vsi->vf) {
+               struct ice_vsi *ctrl_vsi = ice_get_vf_ctrl_vsi(pf, vsi);
+
+               if (ctrl_vsi) {
+                       if (unlikely(!ctrl_vsi->q_vectors)) {
+                               err = -ENOENT;
+                               goto err_free_q_vector;
+                       }
+
+                       q_vector->irq = ctrl_vsi->q_vectors[0]->irq;
+                       goto skip_alloc;
+               }
+       }
+
+       q_vector->irq = ice_alloc_irq(pf, vsi->irq_dyn_alloc);
+       if (q_vector->irq.index < 0) {
+               err = -ENOMEM;
+               goto err_free_q_vector;
+       }
+
+skip_alloc:
+       q_vector->reg_idx = q_vector->irq.index;
+
        /* only set affinity_mask if the CPU is online */
        if (cpu_online(v_idx))
                cpumask_set_cpu(v_idx, &q_vector->affinity_mask);
@@ -137,6 +162,11 @@ out:
        vsi->q_vectors[v_idx] = q_vector;
 
        return 0;
+
+err_free_q_vector:
+       kfree(q_vector);
+
+       return err;
 }
 
 /**
@@ -168,7 +198,19 @@ static void ice_free_q_vector(struct ice_vsi *vsi, int v_idx)
        if (vsi->netdev)
                netif_napi_del(&q_vector->napi);
 
-       devm_kfree(dev, q_vector);
+       /* release MSIX interrupt if q_vector had interrupt allocated */
+       if (q_vector->irq.index < 0)
+               goto free_q_vector;
+
+       /* only free last VF ctrl vsi interrupt */
+       if (vsi->type == ICE_VSI_CTRL && vsi->vf &&
+           ice_get_vf_ctrl_vsi(pf, vsi))
+               goto free_q_vector;
+
+       ice_free_irq(pf, q_vector->irq);
+
+free_q_vector:
+       kfree(q_vector);
        vsi->q_vectors[v_idx] = NULL;
 }
 
index bc44cc2..80dc544 100644 (file)
@@ -1256,8 +1256,6 @@ static const struct devlink_ops ice_devlink_ops = {
                          BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE),
        .reload_down = ice_devlink_reload_down,
        .reload_up = ice_devlink_reload_up,
-       .port_split = ice_devlink_port_split,
-       .port_unsplit = ice_devlink_port_unsplit,
        .eswitch_mode_get = ice_eswitch_mode_get,
        .eswitch_mode_set = ice_eswitch_mode_set,
        .info_get = ice_devlink_info_get,
@@ -1512,6 +1510,11 @@ ice_devlink_set_port_split_options(struct ice_pf *pf,
        ice_active_port_option = active_idx;
 }
 
+static const struct devlink_port_ops ice_devlink_port_ops = {
+       .port_split = ice_devlink_port_split,
+       .port_unsplit = ice_devlink_port_unsplit,
+};
+
 /**
  * ice_devlink_create_pf_port - Create a devlink port for this PF
  * @pf: the PF to create a devlink port for
@@ -1551,7 +1554,8 @@ int ice_devlink_create_pf_port(struct ice_pf *pf)
        devlink_port_attrs_set(devlink_port, &attrs);
        devlink = priv_to_devlink(pf);
 
-       err = devlink_port_register(devlink, devlink_port, vsi->idx);
+       err = devlink_port_register_with_ops(devlink, devlink_port, vsi->idx,
+                                            &ice_devlink_port_ops);
        if (err) {
                dev_err(dev, "Failed to create devlink port for PF %d, error %d\n",
                        pf->hw.pf_id, err);
index f6dd3f8..ad0a007 100644 (file)
 #include "ice_tc_lib.h"
 
 /**
- * ice_eswitch_add_vf_mac_rule - add adv rule with VF's MAC
+ * ice_eswitch_add_vf_sp_rule - add adv rule with VF's VSI index
  * @pf: pointer to PF struct
  * @vf: pointer to VF struct
- * @mac: VF's MAC address
  *
  * This function adds advanced rule that forwards packets with
- * VF's MAC address (src MAC) to the corresponding switchdev ctrl VSI queue.
+ * VF's VSI index to the corresponding switchdev ctrl VSI queue.
  */
-int
-ice_eswitch_add_vf_mac_rule(struct ice_pf *pf, struct ice_vf *vf, const u8 *mac)
+static int
+ice_eswitch_add_vf_sp_rule(struct ice_pf *pf, struct ice_vf *vf)
 {
        struct ice_vsi *ctrl_vsi = pf->switchdev.control_vsi;
        struct ice_adv_rule_info rule_info = { 0 };
@@ -32,76 +31,41 @@ ice_eswitch_add_vf_mac_rule(struct ice_pf *pf, struct ice_vf *vf, const u8 *mac)
        if (!list)
                return -ENOMEM;
 
-       list[0].type = ICE_MAC_OFOS;
-       ether_addr_copy(list[0].h_u.eth_hdr.src_addr, mac);
-       eth_broadcast_addr(list[0].m_u.eth_hdr.src_addr);
+       ice_rule_add_src_vsi_metadata(list);
 
-       rule_info.sw_act.flag |= ICE_FLTR_TX;
+       rule_info.sw_act.flag = ICE_FLTR_TX;
        rule_info.sw_act.vsi_handle = ctrl_vsi->idx;
        rule_info.sw_act.fltr_act = ICE_FWD_TO_Q;
-       rule_info.rx = false;
        rule_info.sw_act.fwd_id.q_id = hw->func_caps.common_cap.rxq_first_id +
                                       ctrl_vsi->rxq_map[vf->vf_id];
        rule_info.flags_info.act |= ICE_SINGLE_ACT_LB_ENABLE;
        rule_info.flags_info.act_valid = true;
        rule_info.tun_type = ICE_SW_TUN_AND_NON_TUN;
+       rule_info.src_vsi = vf->lan_vsi_idx;
 
        err = ice_add_adv_rule(hw, list, lkups_cnt, &rule_info,
-                              vf->repr->mac_rule);
+                              &vf->repr->sp_rule);
        if (err)
-               dev_err(ice_pf_to_dev(pf), "Unable to add VF mac rule in switchdev mode for VF %d",
+               dev_err(ice_pf_to_dev(pf), "Unable to add VF slow-path rule in switchdev mode for VF %d",
                        vf->vf_id);
-       else
-               vf->repr->rule_added = true;
 
        kfree(list);
        return err;
 }
 
 /**
- * ice_eswitch_replay_vf_mac_rule - replay adv rule with VF's MAC
- * @vf: pointer to vF struct
- *
- * This function replays VF's MAC rule after reset.
- */
-void ice_eswitch_replay_vf_mac_rule(struct ice_vf *vf)
-{
-       int err;
-
-       if (!ice_is_switchdev_running(vf->pf))
-               return;
-
-       if (is_valid_ether_addr(vf->hw_lan_addr)) {
-               err = ice_eswitch_add_vf_mac_rule(vf->pf, vf,
-                                                 vf->hw_lan_addr);
-               if (err) {
-                       dev_err(ice_pf_to_dev(vf->pf), "Failed to add MAC %pM for VF %d\n, error %d\n",
-                               vf->hw_lan_addr, vf->vf_id, err);
-                       return;
-               }
-               vf->num_mac++;
-
-               ether_addr_copy(vf->dev_lan_addr, vf->hw_lan_addr);
-       }
-}
-
-/**
- * ice_eswitch_del_vf_mac_rule - delete adv rule with VF's MAC
+ * ice_eswitch_del_vf_sp_rule - delete adv rule with VF's VSI index
  * @vf: pointer to the VF struct
  *
- * Delete the advanced rule that was used to forward packets with the VF's MAC
- * address (src MAC) to the corresponding switchdev ctrl VSI queue.
+ * Delete the advanced rule that was used to forward packets with the VF's VSI
+ * index to the corresponding switchdev ctrl VSI queue.
  */
-void ice_eswitch_del_vf_mac_rule(struct ice_vf *vf)
+static void ice_eswitch_del_vf_sp_rule(struct ice_vf *vf)
 {
-       if (!ice_is_switchdev_running(vf->pf))
-               return;
-
-       if (!vf->repr->rule_added)
+       if (!vf->repr)
                return;
 
-       ice_rem_adv_rule_by_id(&vf->pf->hw, vf->repr->mac_rule);
-       vf->repr->rule_added = false;
+       ice_rem_adv_rule_by_id(&vf->pf->hw, &vf->repr->sp_rule);
 }
 
 /**
@@ -237,6 +201,7 @@ ice_eswitch_release_reprs(struct ice_pf *pf, struct ice_vsi *ctrl_vsi)
                ice_vsi_update_security(vsi, ice_vsi_ctx_set_antispoof);
                metadata_dst_free(vf->repr->dst);
                vf->repr->dst = NULL;
+               ice_eswitch_del_vf_sp_rule(vf);
                ice_fltr_add_mac_and_broadcast(vsi, vf->hw_lan_addr,
                                               ICE_FWD_TO_VSI);
 
@@ -264,25 +229,30 @@ static int ice_eswitch_setup_reprs(struct ice_pf *pf)
                vf->repr->dst = metadata_dst_alloc(0, METADATA_HW_PORT_MUX,
                                                   GFP_KERNEL);
                if (!vf->repr->dst) {
-                       ice_fltr_add_mac_and_broadcast(vsi,
-                                                      vf->hw_lan_addr,
+                       ice_fltr_add_mac_and_broadcast(vsi, vf->hw_lan_addr,
+                                                      ICE_FWD_TO_VSI);
+                       goto err;
+               }
+
+               if (ice_eswitch_add_vf_sp_rule(pf, vf)) {
+                       ice_fltr_add_mac_and_broadcast(vsi, vf->hw_lan_addr,
                                                       ICE_FWD_TO_VSI);
                        goto err;
                }
 
                if (ice_vsi_update_security(vsi, ice_vsi_ctx_clear_antispoof)) {
-                       ice_fltr_add_mac_and_broadcast(vsi,
-                                                      vf->hw_lan_addr,
+                       ice_fltr_add_mac_and_broadcast(vsi, vf->hw_lan_addr,
                                                       ICE_FWD_TO_VSI);
+                       ice_eswitch_del_vf_sp_rule(vf);
                        metadata_dst_free(vf->repr->dst);
                        vf->repr->dst = NULL;
                        goto err;
                }
 
                if (ice_vsi_add_vlan_zero(vsi)) {
-                       ice_fltr_add_mac_and_broadcast(vsi,
-                                                      vf->hw_lan_addr,
+                       ice_fltr_add_mac_and_broadcast(vsi, vf->hw_lan_addr,
                                                       ICE_FWD_TO_VSI);
+                       ice_eswitch_del_vf_sp_rule(vf);
                        metadata_dst_free(vf->repr->dst);
                        vf->repr->dst = NULL;
                        ice_vsi_update_security(vsi, ice_vsi_ctx_set_antispoof);
index 6a41333..b18bf83 100644 (file)
@@ -20,11 +20,6 @@ bool ice_is_eswitch_mode_switchdev(struct ice_pf *pf);
 void ice_eswitch_update_repr(struct ice_vsi *vsi);
 
 void ice_eswitch_stop_all_tx_queues(struct ice_pf *pf);
-int
-ice_eswitch_add_vf_mac_rule(struct ice_pf *pf, struct ice_vf *vf,
-                           const u8 *mac);
-void ice_eswitch_replay_vf_mac_rule(struct ice_vf *vf);
-void ice_eswitch_del_vf_mac_rule(struct ice_vf *vf);
 
 void ice_eswitch_set_target_vsi(struct sk_buff *skb,
                                struct ice_tx_offload_params *off);
@@ -34,15 +29,6 @@ ice_eswitch_port_start_xmit(struct sk_buff *skb, struct net_device *netdev);
 static inline void ice_eswitch_release(struct ice_pf *pf) { }
 
 static inline void ice_eswitch_stop_all_tx_queues(struct ice_pf *pf) { }
-static inline void ice_eswitch_replay_vf_mac_rule(struct ice_vf *vf) { }
-static inline void ice_eswitch_del_vf_mac_rule(struct ice_vf *vf) { }
-
-static inline int
-ice_eswitch_add_vf_mac_rule(struct ice_pf *pf, struct ice_vf *vf,
-                           const u8 *mac)
-{
-       return -EOPNOTSUPP;
-}
 
 static inline void
 ice_eswitch_set_target_vsi(struct sk_buff *skb,
index f86e814..8d5cbbd 100644 (file)
@@ -4,6 +4,7 @@
 /* ethtool support for ice */
 
 #include "ice.h"
+#include "ice_ethtool.h"
 #include "ice_flow.h"
 #include "ice_fltr.h"
 #include "ice_lib.h"
@@ -956,7 +957,7 @@ static u64 ice_intr_test(struct net_device *netdev)
 
        netdev_info(netdev, "interrupt test\n");
 
-       wr32(&pf->hw, GLINT_DYN_CTL(pf->oicr_idx),
+       wr32(&pf->hw, GLINT_DYN_CTL(pf->oicr_irq.index),
             GLINT_DYN_CTL_SW_ITR_INDX_M |
             GLINT_DYN_CTL_INTENA_MSK_M |
             GLINT_DYN_CTL_SWINT_TRIG_M);
@@ -1658,15 +1659,26 @@ ice_mask_min_supported_speeds(struct ice_hw *hw,
                *phy_types_low &= ~ICE_PHY_TYPE_LOW_MASK_MIN_1G;
 }
 
-#define ice_ethtool_advertise_link_mode(aq_link_speed, ethtool_link_mode)    \
-       do {                                                                 \
-               if (req_speeds & (aq_link_speed) ||                          \
-                   (!req_speeds &&                                          \
-                    (advert_phy_type_lo & phy_type_mask_lo ||               \
-                     advert_phy_type_hi & phy_type_mask_hi)))               \
-                       ethtool_link_ksettings_add_link_mode(ks, advertising,\
-                                                       ethtool_link_mode);  \
-       } while (0)
+/**
+ * ice_linkmode_set_bit - set link mode bit
+ * @phy_to_ethtool: PHY type to ethtool link mode struct to set
+ * @ks: ethtool link ksettings struct to fill out
+ * @req_speeds: speed requested by user
+ * @advert_phy_type: advertised PHY type
+ * @phy_type: PHY type
+ */
+static void
+ice_linkmode_set_bit(const struct ice_phy_type_to_ethtool *phy_to_ethtool,
+                    struct ethtool_link_ksettings *ks, u32 req_speeds,
+                    u64 advert_phy_type, u32 phy_type)
+{
+       linkmode_set_bit(phy_to_ethtool->link_mode, ks->link_modes.supported);
+
+       if (req_speeds & phy_to_ethtool->aq_link_speed ||
+           (!req_speeds && advert_phy_type & BIT(phy_type)))
+               linkmode_set_bit(phy_to_ethtool->link_mode,
+                                ks->link_modes.advertising);
+}
 
 /**
  * ice_phy_type_to_ethtool - convert the phy_types to ethtool link modes
@@ -1682,11 +1694,10 @@ ice_phy_type_to_ethtool(struct net_device *netdev,
        struct ice_pf *pf = vsi->back;
        u64 advert_phy_type_lo = 0;
        u64 advert_phy_type_hi = 0;
-       u64 phy_type_mask_lo = 0;
-       u64 phy_type_mask_hi = 0;
        u64 phy_types_high = 0;
        u64 phy_types_low = 0;
-       u16 req_speeds;
+       u32 req_speeds;
+       u32 i;
 
        req_speeds = vsi->port_info->phy.link_info.req_speeds;
 
@@ -1743,272 +1754,22 @@ ice_phy_type_to_ethtool(struct net_device *netdev,
                advert_phy_type_hi = vsi->port_info->phy.phy_type_high;
        }
 
-       ethtool_link_ksettings_zero_link_mode(ks, supported);
-       ethtool_link_ksettings_zero_link_mode(ks, advertising);
-
-       phy_type_mask_lo = ICE_PHY_TYPE_LOW_100BASE_TX |
-                          ICE_PHY_TYPE_LOW_100M_SGMII;
-       if (phy_types_low & phy_type_mask_lo) {
-               ethtool_link_ksettings_add_link_mode(ks, supported,
-                                                    100baseT_Full);
-
-               ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_100MB,
-                                               100baseT_Full);
-       }
-
-       phy_type_mask_lo = ICE_PHY_TYPE_LOW_1000BASE_T |
-                          ICE_PHY_TYPE_LOW_1G_SGMII;
-       if (phy_types_low & phy_type_mask_lo) {
-               ethtool_link_ksettings_add_link_mode(ks, supported,
-                                                    1000baseT_Full);
-               ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_1000MB,
-                                               1000baseT_Full);
-       }
-
-       phy_type_mask_lo = ICE_PHY_TYPE_LOW_1000BASE_KX;
-       if (phy_types_low & phy_type_mask_lo) {
-               ethtool_link_ksettings_add_link_mode(ks, supported,
-                                                    1000baseKX_Full);
-               ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_1000MB,
-                                               1000baseKX_Full);
-       }
-
-       phy_type_mask_lo = ICE_PHY_TYPE_LOW_1000BASE_SX |
-                          ICE_PHY_TYPE_LOW_1000BASE_LX;
-       if (phy_types_low & phy_type_mask_lo) {
-               ethtool_link_ksettings_add_link_mode(ks, supported,
-                                                    1000baseX_Full);
-               ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_1000MB,
-                                               1000baseX_Full);
-       }
-
-       phy_type_mask_lo = ICE_PHY_TYPE_LOW_2500BASE_T;
-       if (phy_types_low & phy_type_mask_lo) {
-               ethtool_link_ksettings_add_link_mode(ks, supported,
-                                                    2500baseT_Full);
-               ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_2500MB,
-                                               2500baseT_Full);
-       }
-
-       phy_type_mask_lo = ICE_PHY_TYPE_LOW_2500BASE_X |
-                          ICE_PHY_TYPE_LOW_2500BASE_KX;
-       if (phy_types_low & phy_type_mask_lo) {
-               ethtool_link_ksettings_add_link_mode(ks, supported,
-                                                    2500baseX_Full);
-               ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_2500MB,
-                                               2500baseX_Full);
-       }
-
-       phy_type_mask_lo = ICE_PHY_TYPE_LOW_5GBASE_T |
-                          ICE_PHY_TYPE_LOW_5GBASE_KR;
-       if (phy_types_low & phy_type_mask_lo) {
-               ethtool_link_ksettings_add_link_mode(ks, supported,
-                                                    5000baseT_Full);
-               ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_5GB,
-                                               5000baseT_Full);
-       }
-
-       phy_type_mask_lo = ICE_PHY_TYPE_LOW_10GBASE_T |
-                          ICE_PHY_TYPE_LOW_10G_SFI_DA |
-                          ICE_PHY_TYPE_LOW_10G_SFI_AOC_ACC |
-                          ICE_PHY_TYPE_LOW_10G_SFI_C2C;
-       if (phy_types_low & phy_type_mask_lo) {
-               ethtool_link_ksettings_add_link_mode(ks, supported,
-                                                    10000baseT_Full);
-               ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_10GB,
-                                               10000baseT_Full);
-       }
-
-       phy_type_mask_lo = ICE_PHY_TYPE_LOW_10GBASE_KR_CR1;
-       if (phy_types_low & phy_type_mask_lo) {
-               ethtool_link_ksettings_add_link_mode(ks, supported,
-                                                    10000baseKR_Full);
-               ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_10GB,
-                                               10000baseKR_Full);
-       }
-
-       phy_type_mask_lo = ICE_PHY_TYPE_LOW_10GBASE_SR;
-       if (phy_types_low & phy_type_mask_lo) {
-               ethtool_link_ksettings_add_link_mode(ks, supported,
-                                                    10000baseSR_Full);
-               ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_10GB,
-                                               10000baseSR_Full);
-       }
-
-       phy_type_mask_lo = ICE_PHY_TYPE_LOW_10GBASE_LR;
-       if (phy_types_low & phy_type_mask_lo) {
-               ethtool_link_ksettings_add_link_mode(ks, supported,
-                                                    10000baseLR_Full);
-               ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_10GB,
-                                               10000baseLR_Full);
-       }
-
-       phy_type_mask_lo = ICE_PHY_TYPE_LOW_25GBASE_T |
-                          ICE_PHY_TYPE_LOW_25GBASE_CR |
-                          ICE_PHY_TYPE_LOW_25GBASE_CR_S |
-                          ICE_PHY_TYPE_LOW_25GBASE_CR1 |
-                          ICE_PHY_TYPE_LOW_25G_AUI_AOC_ACC |
-                          ICE_PHY_TYPE_LOW_25G_AUI_C2C;
-       if (phy_types_low & phy_type_mask_lo) {
-               ethtool_link_ksettings_add_link_mode(ks, supported,
-                                                    25000baseCR_Full);
-               ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_25GB,
-                                               25000baseCR_Full);
-       }
-
-       phy_type_mask_lo = ICE_PHY_TYPE_LOW_25GBASE_SR |
-                          ICE_PHY_TYPE_LOW_25GBASE_LR;
-       if (phy_types_low & phy_type_mask_lo) {
-               ethtool_link_ksettings_add_link_mode(ks, supported,
-                                                    25000baseSR_Full);
-               ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_25GB,
-                                               25000baseSR_Full);
-       }
-
-       phy_type_mask_lo = ICE_PHY_TYPE_LOW_25GBASE_KR |
-                          ICE_PHY_TYPE_LOW_25GBASE_KR_S |
-                          ICE_PHY_TYPE_LOW_25GBASE_KR1;
-       if (phy_types_low & phy_type_mask_lo) {
-               ethtool_link_ksettings_add_link_mode(ks, supported,
-                                                    25000baseKR_Full);
-               ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_25GB,
-                                               25000baseKR_Full);
-       }
-
-       phy_type_mask_lo = ICE_PHY_TYPE_LOW_40GBASE_KR4;
-       if (phy_types_low & phy_type_mask_lo) {
-               ethtool_link_ksettings_add_link_mode(ks, supported,
-                                                    40000baseKR4_Full);
-               ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_40GB,
-                                               40000baseKR4_Full);
-       }
-
-       phy_type_mask_lo = ICE_PHY_TYPE_LOW_40GBASE_CR4 |
-                          ICE_PHY_TYPE_LOW_40G_XLAUI_AOC_ACC |
-                          ICE_PHY_TYPE_LOW_40G_XLAUI;
-       if (phy_types_low & phy_type_mask_lo) {
-               ethtool_link_ksettings_add_link_mode(ks, supported,
-                                                    40000baseCR4_Full);
-               ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_40GB,
-                                               40000baseCR4_Full);
-       }
-
-       phy_type_mask_lo = ICE_PHY_TYPE_LOW_40GBASE_SR4;
-       if (phy_types_low & phy_type_mask_lo) {
-               ethtool_link_ksettings_add_link_mode(ks, supported,
-                                                    40000baseSR4_Full);
-               ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_40GB,
-                                               40000baseSR4_Full);
-       }
-
-       phy_type_mask_lo = ICE_PHY_TYPE_LOW_40GBASE_LR4;
-       if (phy_types_low & phy_type_mask_lo) {
-               ethtool_link_ksettings_add_link_mode(ks, supported,
-                                                    40000baseLR4_Full);
-               ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_40GB,
-                                               40000baseLR4_Full);
-       }
-
-       phy_type_mask_lo = ICE_PHY_TYPE_LOW_50GBASE_CR2 |
-                          ICE_PHY_TYPE_LOW_50G_LAUI2_AOC_ACC |
-                          ICE_PHY_TYPE_LOW_50G_LAUI2 |
-                          ICE_PHY_TYPE_LOW_50G_AUI2_AOC_ACC |
-                          ICE_PHY_TYPE_LOW_50G_AUI2 |
-                          ICE_PHY_TYPE_LOW_50GBASE_CP |
-                          ICE_PHY_TYPE_LOW_50GBASE_SR |
-                          ICE_PHY_TYPE_LOW_50G_AUI1_AOC_ACC |
-                          ICE_PHY_TYPE_LOW_50G_AUI1;
-       if (phy_types_low & phy_type_mask_lo) {
-               ethtool_link_ksettings_add_link_mode(ks, supported,
-                                                    50000baseCR2_Full);
-               ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_50GB,
-                                               50000baseCR2_Full);
-       }
-
-       phy_type_mask_lo = ICE_PHY_TYPE_LOW_50GBASE_KR2 |
-                          ICE_PHY_TYPE_LOW_50GBASE_KR_PAM4;
-       if (phy_types_low & phy_type_mask_lo) {
-               ethtool_link_ksettings_add_link_mode(ks, supported,
-                                                    50000baseKR2_Full);
-               ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_50GB,
-                                               50000baseKR2_Full);
-       }
-
-       phy_type_mask_lo = ICE_PHY_TYPE_LOW_50GBASE_SR2 |
-                          ICE_PHY_TYPE_LOW_50GBASE_LR2 |
-                          ICE_PHY_TYPE_LOW_50GBASE_FR |
-                          ICE_PHY_TYPE_LOW_50GBASE_LR;
-       if (phy_types_low & phy_type_mask_lo) {
-               ethtool_link_ksettings_add_link_mode(ks, supported,
-                                                    50000baseSR2_Full);
-               ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_50GB,
-                                               50000baseSR2_Full);
-       }
-
-       phy_type_mask_lo = ICE_PHY_TYPE_LOW_100GBASE_CR4 |
-                          ICE_PHY_TYPE_LOW_100G_CAUI4_AOC_ACC |
-                          ICE_PHY_TYPE_LOW_100G_CAUI4 |
-                          ICE_PHY_TYPE_LOW_100G_AUI4_AOC_ACC |
-                          ICE_PHY_TYPE_LOW_100G_AUI4 |
-                          ICE_PHY_TYPE_LOW_100GBASE_CR_PAM4;
-       phy_type_mask_hi = ICE_PHY_TYPE_HIGH_100G_CAUI2_AOC_ACC |
-                          ICE_PHY_TYPE_HIGH_100G_CAUI2 |
-                          ICE_PHY_TYPE_HIGH_100G_AUI2_AOC_ACC |
-                          ICE_PHY_TYPE_HIGH_100G_AUI2;
-       if (phy_types_low & phy_type_mask_lo ||
-           phy_types_high & phy_type_mask_hi) {
-               ethtool_link_ksettings_add_link_mode(ks, supported,
-                                                    100000baseCR4_Full);
-               ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_100GB,
-                                               100000baseCR4_Full);
-       }
-
-       if (phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_CP2) {
-               ethtool_link_ksettings_add_link_mode(ks, supported,
-                                                    100000baseCR2_Full);
-               ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_100GB,
-                                               100000baseCR2_Full);
-       }
-
-       if (phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_SR4) {
-               ethtool_link_ksettings_add_link_mode(ks, supported,
-                                                    100000baseSR4_Full);
-               ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_100GB,
-                                               100000baseSR4_Full);
-       }
-
-       if (phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_SR2) {
-               ethtool_link_ksettings_add_link_mode(ks, supported,
-                                                    100000baseSR2_Full);
-               ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_100GB,
-                                               100000baseSR2_Full);
-       }
-
-       phy_type_mask_lo = ICE_PHY_TYPE_LOW_100GBASE_LR4 |
-                          ICE_PHY_TYPE_LOW_100GBASE_DR;
-       if (phy_types_low & phy_type_mask_lo) {
-               ethtool_link_ksettings_add_link_mode(ks, supported,
-                                                    100000baseLR4_ER4_Full);
-               ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_100GB,
-                                               100000baseLR4_ER4_Full);
-       }
+       linkmode_zero(ks->link_modes.supported);
+       linkmode_zero(ks->link_modes.advertising);
 
-       phy_type_mask_lo = ICE_PHY_TYPE_LOW_100GBASE_KR4 |
-                          ICE_PHY_TYPE_LOW_100GBASE_KR_PAM4;
-       if (phy_types_low & phy_type_mask_lo) {
-               ethtool_link_ksettings_add_link_mode(ks, supported,
-                                                    100000baseKR4_Full);
-               ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_100GB,
-                                               100000baseKR4_Full);
+       for (i = 0; i < BITS_PER_TYPE(u64); i++) {
+               if (phy_types_low & BIT_ULL(i))
+                       ice_linkmode_set_bit(&phy_type_low_lkup[i], ks,
+                                            req_speeds, advert_phy_type_lo,
+                                            i);
        }
 
-       if (phy_types_high & ICE_PHY_TYPE_HIGH_100GBASE_KR2_PAM4) {
-               ethtool_link_ksettings_add_link_mode(ks, supported,
-                                                    100000baseKR2_Full);
-               ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_100GB,
-                                               100000baseKR2_Full);
+       for (i = 0; i < BITS_PER_TYPE(u64); i++) {
+               if (phy_types_high & BIT_ULL(i))
+                       ice_linkmode_set_bit(&phy_type_high_lkup[i], ks,
+                                            req_speeds, advert_phy_type_hi,
+                                            i);
        }
-
 }
 
 #define TEST_SET_BITS_TIMEOUT  50
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.h b/drivers/net/ethernet/intel/ice/ice_ethtool.h
new file mode 100644 (file)
index 0000000..b403ee7
--- /dev/null
@@ -0,0 +1,105 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (C) 2023 Intel Corporation */
+
+#ifndef _ICE_ETHTOOL_H_
+#define _ICE_ETHTOOL_H_
+
+struct ice_phy_type_to_ethtool {
+       u64 aq_link_speed;
+       u8 link_mode;
+};
+
+/* Macro to make PHY type to Ethtool link mode table entry.
+ * The index is the PHY type.
+ */
+#define ICE_PHY_TYPE(LINK_SPEED, ETHTOOL_LINK_MODE) {\
+       .aq_link_speed = ICE_AQ_LINK_SPEED_##LINK_SPEED, \
+       .link_mode = ETHTOOL_LINK_MODE_##ETHTOOL_LINK_MODE##_BIT, \
+}
+
+/* Lookup table mapping PHY type low to link speed and Ethtool link modes.
+ * Array index corresponds to HW PHY type bit, see
+ * ice_adminq_cmd.h:ICE_PHY_TYPE_LOW_*.
+ */
+static const struct ice_phy_type_to_ethtool
+phy_type_low_lkup[] = {
+       [0] = ICE_PHY_TYPE(100MB, 100baseT_Full),
+       [1] = ICE_PHY_TYPE(100MB, 100baseT_Full),
+       [2] = ICE_PHY_TYPE(1000MB, 1000baseT_Full),
+       [3] = ICE_PHY_TYPE(1000MB, 1000baseX_Full),
+       [4] = ICE_PHY_TYPE(1000MB, 1000baseX_Full),
+       [5] = ICE_PHY_TYPE(1000MB, 1000baseKX_Full),
+       [6] = ICE_PHY_TYPE(1000MB, 1000baseT_Full),
+       [7] = ICE_PHY_TYPE(2500MB, 2500baseT_Full),
+       [8] = ICE_PHY_TYPE(2500MB, 2500baseX_Full),
+       [9] = ICE_PHY_TYPE(2500MB, 2500baseX_Full),
+       [10] = ICE_PHY_TYPE(5GB, 5000baseT_Full),
+       [11] = ICE_PHY_TYPE(5GB, 5000baseT_Full),
+       [12] = ICE_PHY_TYPE(10GB, 10000baseT_Full),
+       [13] = ICE_PHY_TYPE(10GB, 10000baseCR_Full),
+       [14] = ICE_PHY_TYPE(10GB, 10000baseSR_Full),
+       [15] = ICE_PHY_TYPE(10GB, 10000baseLR_Full),
+       [16] = ICE_PHY_TYPE(10GB, 10000baseKR_Full),
+       [17] = ICE_PHY_TYPE(10GB, 10000baseCR_Full),
+       [18] = ICE_PHY_TYPE(10GB, 10000baseKR_Full),
+       [19] = ICE_PHY_TYPE(25GB, 25000baseCR_Full),
+       [20] = ICE_PHY_TYPE(25GB, 25000baseCR_Full),
+       [21] = ICE_PHY_TYPE(25GB, 25000baseCR_Full),
+       [22] = ICE_PHY_TYPE(25GB, 25000baseCR_Full),
+       [23] = ICE_PHY_TYPE(25GB, 25000baseSR_Full),
+       [24] = ICE_PHY_TYPE(25GB, 25000baseSR_Full),
+       [25] = ICE_PHY_TYPE(25GB, 25000baseKR_Full),
+       [26] = ICE_PHY_TYPE(25GB, 25000baseKR_Full),
+       [27] = ICE_PHY_TYPE(25GB, 25000baseKR_Full),
+       [28] = ICE_PHY_TYPE(25GB, 25000baseSR_Full),
+       [29] = ICE_PHY_TYPE(25GB, 25000baseCR_Full),
+       [30] = ICE_PHY_TYPE(40GB, 40000baseCR4_Full),
+       [31] = ICE_PHY_TYPE(40GB, 40000baseSR4_Full),
+       [32] = ICE_PHY_TYPE(40GB, 40000baseLR4_Full),
+       [33] = ICE_PHY_TYPE(40GB, 40000baseKR4_Full),
+       [34] = ICE_PHY_TYPE(40GB, 40000baseSR4_Full),
+       [35] = ICE_PHY_TYPE(40GB, 40000baseCR4_Full),
+       [36] = ICE_PHY_TYPE(50GB, 50000baseCR2_Full),
+       [37] = ICE_PHY_TYPE(50GB, 50000baseSR2_Full),
+       [38] = ICE_PHY_TYPE(50GB, 50000baseSR2_Full),
+       [39] = ICE_PHY_TYPE(50GB, 50000baseKR2_Full),
+       [40] = ICE_PHY_TYPE(50GB, 50000baseSR2_Full),
+       [41] = ICE_PHY_TYPE(50GB, 50000baseCR2_Full),
+       [42] = ICE_PHY_TYPE(50GB, 50000baseSR2_Full),
+       [43] = ICE_PHY_TYPE(50GB, 50000baseCR2_Full),
+       [44] = ICE_PHY_TYPE(50GB, 50000baseCR_Full),
+       [45] = ICE_PHY_TYPE(50GB, 50000baseSR_Full),
+       [46] = ICE_PHY_TYPE(50GB, 50000baseLR_ER_FR_Full),
+       [47] = ICE_PHY_TYPE(50GB, 50000baseLR_ER_FR_Full),
+       [48] = ICE_PHY_TYPE(50GB, 50000baseKR_Full),
+       [49] = ICE_PHY_TYPE(50GB, 50000baseSR_Full),
+       [50] = ICE_PHY_TYPE(50GB, 50000baseCR_Full),
+       [51] = ICE_PHY_TYPE(100GB, 100000baseCR4_Full),
+       [52] = ICE_PHY_TYPE(100GB, 100000baseSR4_Full),
+       [53] = ICE_PHY_TYPE(100GB, 100000baseLR4_ER4_Full),
+       [54] = ICE_PHY_TYPE(100GB, 100000baseKR4_Full),
+       [55] = ICE_PHY_TYPE(100GB, 100000baseCR4_Full),
+       [56] = ICE_PHY_TYPE(100GB, 100000baseCR4_Full),
+       [57] = ICE_PHY_TYPE(100GB, 100000baseSR4_Full),
+       [58] = ICE_PHY_TYPE(100GB, 100000baseCR4_Full),
+       [59] = ICE_PHY_TYPE(100GB, 100000baseCR4_Full),
+       [60] = ICE_PHY_TYPE(100GB, 100000baseKR4_Full),
+       [61] = ICE_PHY_TYPE(100GB, 100000baseCR2_Full),
+       [62] = ICE_PHY_TYPE(100GB, 100000baseSR2_Full),
+       [63] = ICE_PHY_TYPE(100GB, 100000baseLR4_ER4_Full),
+};
+
+/* Lookup table mapping PHY type high to link speed and Ethtool link modes.
+ * Array index corresponds to HW PHY type bit, see
+ * ice_adminq_cmd.h:ICE_PHY_TYPE_HIGH_*
+ */
+static const struct ice_phy_type_to_ethtool
+phy_type_high_lkup[] = {
+       [0] = ICE_PHY_TYPE(100GB, 100000baseKR2_Full),
+       [1] = ICE_PHY_TYPE(100GB, 100000baseSR2_Full),
+       [2] = ICE_PHY_TYPE(100GB, 100000baseCR2_Full),
+       [3] = ICE_PHY_TYPE(100GB, 100000baseSR2_Full),
+       [4] = ICE_PHY_TYPE(100GB, 100000baseCR2_Full),
+};
+
+#endif /* !_ICE_ETHTOOL_H_ */
index e6bc228..145b27f 100644 (file)
@@ -229,20 +229,34 @@ void ice_get_qos_params(struct ice_pf *pf, struct iidc_qos_params *qos)
 EXPORT_SYMBOL_GPL(ice_get_qos_params);
 
 /**
- * ice_reserve_rdma_qvector - Reserve vector resources for RDMA driver
+ * ice_alloc_rdma_qvectors - Allocate vector resources for RDMA driver
  * @pf: board private structure to initialize
  */
-static int ice_reserve_rdma_qvector(struct ice_pf *pf)
+static int ice_alloc_rdma_qvectors(struct ice_pf *pf)
 {
        if (ice_is_rdma_ena(pf)) {
-               int index;
-
-               index = ice_get_res(pf, pf->irq_tracker, pf->num_rdma_msix,
-                                   ICE_RES_RDMA_VEC_ID);
-               if (index < 0)
-                       return index;
-               pf->num_avail_sw_msix -= pf->num_rdma_msix;
-               pf->rdma_base_vector = (u16)index;
+               int i;
+
+               pf->msix_entries = kcalloc(pf->num_rdma_msix,
+                                          sizeof(*pf->msix_entries),
+                                                 GFP_KERNEL);
+               if (!pf->msix_entries)
+                       return -ENOMEM;
+
+               /* RDMA is the only user of pf->msix_entries array */
+               pf->rdma_base_vector = 0;
+
+               for (i = 0; i < pf->num_rdma_msix; i++) {
+                       struct msix_entry *entry = &pf->msix_entries[i];
+                       struct msi_map map;
+
+                       map = ice_alloc_irq(pf, false);
+                       if (map.index < 0)
+                               break;
+
+                       entry->entry = map.index;
+                       entry->vector = map.virq;
+               }
        }
        return 0;
 }
@@ -253,9 +267,21 @@ static int ice_reserve_rdma_qvector(struct ice_pf *pf)
  */
 static void ice_free_rdma_qvector(struct ice_pf *pf)
 {
-       pf->num_avail_sw_msix -= pf->num_rdma_msix;
-       ice_free_res(pf->irq_tracker, pf->rdma_base_vector,
-                    ICE_RES_RDMA_VEC_ID);
+       int i;
+
+       if (!pf->msix_entries)
+               return;
+
+       for (i = 0; i < pf->num_rdma_msix; i++) {
+               struct msi_map map;
+
+               map.index = pf->msix_entries[i].entry;
+               map.virq = pf->msix_entries[i].vector;
+               ice_free_irq(pf, map);
+       }
+
+       kfree(pf->msix_entries);
+       pf->msix_entries = NULL;
 }
 
 /**
@@ -357,7 +383,7 @@ int ice_init_rdma(struct ice_pf *pf)
        }
 
        /* Reserve vector resources */
-       ret = ice_reserve_rdma_qvector(pf);
+       ret = ice_alloc_rdma_qvectors(pf);
        if (ret < 0) {
                dev_err(dev, "failed to reserve vectors for RDMA\n");
                goto err_reserve_rdma_qvector;
diff --git a/drivers/net/ethernet/intel/ice/ice_irq.c b/drivers/net/ethernet/intel/ice/ice_irq.c
new file mode 100644 (file)
index 0000000..ad82ff7
--- /dev/null
@@ -0,0 +1,378 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023, Intel Corporation. */
+
+#include "ice.h"
+#include "ice_lib.h"
+#include "ice_irq.h"
+
+/**
+ * ice_init_irq_tracker - initialize interrupt tracker
+ * @pf: board private structure
+ * @max_vectors: maximum number of vectors that tracker can hold
+ * @num_static: number of preallocated interrupts
+ */
+static void
+ice_init_irq_tracker(struct ice_pf *pf, unsigned int max_vectors,
+                    unsigned int num_static)
+{
+       pf->irq_tracker.num_entries = max_vectors;
+       pf->irq_tracker.num_static = num_static;
+       xa_init_flags(&pf->irq_tracker.entries, XA_FLAGS_ALLOC);
+}
+
+/**
+ * ice_deinit_irq_tracker - free xarray tracker
+ * @pf: board private structure
+ */
+static void ice_deinit_irq_tracker(struct ice_pf *pf)
+{
+       xa_destroy(&pf->irq_tracker.entries);
+}
+
+/**
+ * ice_free_irq_res - free a block of resources
+ * @pf: board private structure
+ * @index: starting index previously returned by ice_get_res
+ */
+static void ice_free_irq_res(struct ice_pf *pf, u16 index)
+{
+       struct ice_irq_entry *entry;
+
+       entry = xa_erase(&pf->irq_tracker.entries, index);
+       kfree(entry);
+}
+
+/**
+ * ice_get_irq_res - get an interrupt resource
+ * @pf: board private structure
+ * @dyn_only: force entry to be dynamically allocated
+ *
+ * Allocate new irq entry in the free slot of the tracker. Since xarray
+ * is used, always allocate new entry at the lowest possible index. Set
+ * proper allocation limit for maximum tracker entries.
+ *
+ * Returns allocated irq entry or NULL on failure.
+ */
+static struct ice_irq_entry *ice_get_irq_res(struct ice_pf *pf, bool dyn_only)
+{
+       struct xa_limit limit = { .max = pf->irq_tracker.num_entries,
+                                 .min = 0 };
+       unsigned int num_static = pf->irq_tracker.num_static;
+       struct ice_irq_entry *entry;
+       unsigned int index;
+       int ret;
+
+       entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+       if (!entry)
+               return NULL;
+
+       /* skip preallocated entries if the caller says so */
+       if (dyn_only)
+               limit.min = num_static;
+
+       ret = xa_alloc(&pf->irq_tracker.entries, &index, entry, limit,
+                      GFP_KERNEL);
+
+       if (ret) {
+               kfree(entry);
+               entry = NULL;
+       } else {
+               entry->index = index;
+               entry->dynamic = index >= num_static;
+       }
+
+       return entry;
+}
+
+/**
+ * ice_reduce_msix_usage - Reduce usage of MSI-X vectors
+ * @pf: board private structure
+ * @v_remain: number of remaining MSI-X vectors to be distributed
+ *
+ * Reduce the usage of MSI-X vectors when entire request cannot be fulfilled.
+ * pf->num_lan_msix and pf->num_rdma_msix values are set based on number of
+ * remaining vectors.
+ */
+static void ice_reduce_msix_usage(struct ice_pf *pf, int v_remain)
+{
+       int v_rdma;
+
+       if (!ice_is_rdma_ena(pf)) {
+               pf->num_lan_msix = v_remain;
+               return;
+       }
+
+       /* RDMA needs at least 1 interrupt in addition to AEQ MSIX */
+       v_rdma = ICE_RDMA_NUM_AEQ_MSIX + 1;
+
+       if (v_remain < ICE_MIN_LAN_TXRX_MSIX + ICE_MIN_RDMA_MSIX) {
+               dev_warn(ice_pf_to_dev(pf), "Not enough MSI-X vectors to support RDMA.\n");
+               clear_bit(ICE_FLAG_RDMA_ENA, pf->flags);
+
+               pf->num_rdma_msix = 0;
+               pf->num_lan_msix = ICE_MIN_LAN_TXRX_MSIX;
+       } else if ((v_remain < ICE_MIN_LAN_TXRX_MSIX + v_rdma) ||
+                  (v_remain - v_rdma < v_rdma)) {
+               /* Support minimum RDMA and give remaining vectors to LAN MSIX
+                */
+               pf->num_rdma_msix = ICE_MIN_RDMA_MSIX;
+               pf->num_lan_msix = v_remain - ICE_MIN_RDMA_MSIX;
+       } else {
+               /* Split remaining MSIX with RDMA after accounting for AEQ MSIX
+                */
+               pf->num_rdma_msix = (v_remain - ICE_RDMA_NUM_AEQ_MSIX) / 2 +
+                                   ICE_RDMA_NUM_AEQ_MSIX;
+               pf->num_lan_msix = v_remain - pf->num_rdma_msix;
+       }
+}
+
+/**
+ * ice_ena_msix_range - Request a range of MSIX vectors from the OS
+ * @pf: board private structure
+ *
+ * Compute the number of MSIX vectors wanted and request from the OS. Adjust
+ * device usage if there are not enough vectors. Return the number of vectors
+ * reserved or negative on failure.
+ */
+static int ice_ena_msix_range(struct ice_pf *pf)
+{
+       int num_cpus, hw_num_msix, v_other, v_wanted, v_actual;
+       struct device *dev = ice_pf_to_dev(pf);
+       int err;
+
+       hw_num_msix = pf->hw.func_caps.common_cap.num_msix_vectors;
+       num_cpus = num_online_cpus();
+
+       /* LAN miscellaneous handler */
+       v_other = ICE_MIN_LAN_OICR_MSIX;
+
+       /* Flow Director */
+       if (test_bit(ICE_FLAG_FD_ENA, pf->flags))
+               v_other += ICE_FDIR_MSIX;
+
+       /* switchdev */
+       v_other += ICE_ESWITCH_MSIX;
+
+       v_wanted = v_other;
+
+       /* LAN traffic */
+       pf->num_lan_msix = num_cpus;
+       v_wanted += pf->num_lan_msix;
+
+       /* RDMA auxiliary driver */
+       if (ice_is_rdma_ena(pf)) {
+               pf->num_rdma_msix = num_cpus + ICE_RDMA_NUM_AEQ_MSIX;
+               v_wanted += pf->num_rdma_msix;
+       }
+
+       if (v_wanted > hw_num_msix) {
+               int v_remain;
+
+               dev_warn(dev, "not enough device MSI-X vectors. wanted = %d, available = %d\n",
+                        v_wanted, hw_num_msix);
+
+               if (hw_num_msix < ICE_MIN_MSIX) {
+                       err = -ERANGE;
+                       goto exit_err;
+               }
+
+               v_remain = hw_num_msix - v_other;
+               if (v_remain < ICE_MIN_LAN_TXRX_MSIX) {
+                       v_other = ICE_MIN_MSIX - ICE_MIN_LAN_TXRX_MSIX;
+                       v_remain = ICE_MIN_LAN_TXRX_MSIX;
+               }
+
+               ice_reduce_msix_usage(pf, v_remain);
+               v_wanted = pf->num_lan_msix + pf->num_rdma_msix + v_other;
+
+               dev_notice(dev, "Reducing request to %d MSI-X vectors for LAN traffic.\n",
+                          pf->num_lan_msix);
+               if (ice_is_rdma_ena(pf))
+                       dev_notice(dev, "Reducing request to %d MSI-X vectors for RDMA.\n",
+                                  pf->num_rdma_msix);
+       }
+
+       /* actually reserve the vectors */
+       v_actual = pci_alloc_irq_vectors(pf->pdev, ICE_MIN_MSIX, v_wanted,
+                                        PCI_IRQ_MSIX);
+       if (v_actual < 0) {
+               dev_err(dev, "unable to reserve MSI-X vectors\n");
+               err = v_actual;
+               goto exit_err;
+       }
+
+       if (v_actual < v_wanted) {
+               dev_warn(dev, "not enough OS MSI-X vectors. requested = %d, obtained = %d\n",
+                        v_wanted, v_actual);
+
+               if (v_actual < ICE_MIN_MSIX) {
+                       /* error if we can't get minimum vectors */
+                       pci_free_irq_vectors(pf->pdev);
+                       err = -ERANGE;
+                       goto exit_err;
+               } else {
+                       int v_remain = v_actual - v_other;
+
+                       if (v_remain < ICE_MIN_LAN_TXRX_MSIX)
+                               v_remain = ICE_MIN_LAN_TXRX_MSIX;
+
+                       ice_reduce_msix_usage(pf, v_remain);
+
+                       dev_notice(dev, "Enabled %d MSI-X vectors for LAN traffic.\n",
+                                  pf->num_lan_msix);
+
+                       if (ice_is_rdma_ena(pf))
+                               dev_notice(dev, "Enabled %d MSI-X vectors for RDMA.\n",
+                                          pf->num_rdma_msix);
+               }
+       }
+
+       return v_actual;
+
+exit_err:
+       pf->num_rdma_msix = 0;
+       pf->num_lan_msix = 0;
+       return err;
+}
+
+/**
+ * ice_clear_interrupt_scheme - Undo things done by ice_init_interrupt_scheme
+ * @pf: board private structure
+ */
+void ice_clear_interrupt_scheme(struct ice_pf *pf)
+{
+       pci_free_irq_vectors(pf->pdev);
+       ice_deinit_irq_tracker(pf);
+}
+
+/**
+ * ice_init_interrupt_scheme - Determine proper interrupt scheme
+ * @pf: board private structure to initialize
+ */
+int ice_init_interrupt_scheme(struct ice_pf *pf)
+{
+       int total_vectors = pf->hw.func_caps.common_cap.num_msix_vectors;
+       int vectors, max_vectors;
+
+       vectors = ice_ena_msix_range(pf);
+
+       if (vectors < 0)
+               return -ENOMEM;
+
+       if (pci_msix_can_alloc_dyn(pf->pdev))
+               max_vectors = total_vectors;
+       else
+               max_vectors = vectors;
+
+       ice_init_irq_tracker(pf, max_vectors, vectors);
+
+       return 0;
+}
+
+/**
+ * ice_alloc_irq - Allocate new interrupt vector
+ * @pf: board private structure
+ * @dyn_only: force dynamic allocation of the interrupt
+ *
+ * Allocate new interrupt vector for a given owner id.
+ * return struct msi_map with interrupt details and track
+ * allocated interrupt appropriately.
+ *
+ * This function reserves new irq entry from the irq_tracker.
+ * if according to the tracker information all interrupts that
+ * were allocated with ice_pci_alloc_irq_vectors are already used
+ * and dynamically allocated interrupts are supported then new
+ * interrupt will be allocated with pci_msix_alloc_irq_at.
+ *
+ * Some callers may only support dynamically allocated interrupts.
+ * This is indicated with dyn_only flag.
+ *
+ * On failure, return map with negative .index. The caller
+ * is expected to check returned map index.
+ *
+ */
+struct msi_map ice_alloc_irq(struct ice_pf *pf, bool dyn_only)
+{
+       int sriov_base_vector = pf->sriov_base_vector;
+       struct msi_map map = { .index = -ENOENT };
+       struct device *dev = ice_pf_to_dev(pf);
+       struct ice_irq_entry *entry;
+
+       entry = ice_get_irq_res(pf, dyn_only);
+       if (!entry)
+               return map;
+
+       /* fail if we're about to violate SRIOV vectors space */
+       if (sriov_base_vector && entry->index >= sriov_base_vector)
+               goto exit_free_res;
+
+       if (pci_msix_can_alloc_dyn(pf->pdev) && entry->dynamic) {
+               map = pci_msix_alloc_irq_at(pf->pdev, entry->index, NULL);
+               if (map.index < 0)
+                       goto exit_free_res;
+               dev_dbg(dev, "allocated new irq at index %d\n", map.index);
+       } else {
+               map.index = entry->index;
+               map.virq = pci_irq_vector(pf->pdev, map.index);
+       }
+
+       return map;
+
+exit_free_res:
+       dev_err(dev, "Could not allocate irq at idx %d\n", entry->index);
+       ice_free_irq_res(pf, entry->index);
+       return map;
+}
+
+/**
+ * ice_free_irq - Free interrupt vector
+ * @pf: board private structure
+ * @map: map with interrupt details
+ *
+ * Remove allocated interrupt from the interrupt tracker. If interrupt was
+ * allocated dynamically, free respective interrupt vector.
+ */
+void ice_free_irq(struct ice_pf *pf, struct msi_map map)
+{
+       struct ice_irq_entry *entry;
+
+       entry = xa_load(&pf->irq_tracker.entries, map.index);
+
+       if (!entry) {
+               dev_err(ice_pf_to_dev(pf), "Failed to get MSIX interrupt entry at index %d",
+                       map.index);
+               return;
+       }
+
+       dev_dbg(ice_pf_to_dev(pf), "Free irq at index %d\n", map.index);
+
+       if (entry->dynamic)
+               pci_msix_free_irq(pf->pdev, map);
+
+       ice_free_irq_res(pf, map.index);
+}
+
+/**
+ * ice_get_max_used_msix_vector - Get the max used interrupt vector
+ * @pf: board private structure
+ *
+ * Return index of maximum used interrupt vectors with respect to the
+ * beginning of the MSIX table. Take into account that some interrupts
+ * may have been dynamically allocated after MSIX was initially enabled.
+ */
+int ice_get_max_used_msix_vector(struct ice_pf *pf)
+{
+       unsigned long start, index, max_idx;
+       void *entry;
+
+       /* Treat all preallocated interrupts as used */
+       start = pf->irq_tracker.num_static;
+       max_idx = start - 1;
+
+       xa_for_each_start(&pf->irq_tracker.entries, index, entry, start) {
+               if (index > max_idx)
+                       max_idx = index;
+       }
+
+       return max_idx;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_irq.h b/drivers/net/ethernet/intel/ice/ice_irq.h
new file mode 100644 (file)
index 0000000..f35efc0
--- /dev/null
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2023, Intel Corporation. */
+
+#ifndef _ICE_IRQ_H_
+#define _ICE_IRQ_H_
+
+struct ice_irq_entry {
+       unsigned int index;
+       bool dynamic;   /* allocation type flag */
+};
+
+struct ice_irq_tracker {
+       struct xarray entries;
+       u16 num_entries;        /* total vectors available */
+       u16 num_static; /* preallocated entries */
+};
+
+int ice_init_interrupt_scheme(struct ice_pf *pf);
+void ice_clear_interrupt_scheme(struct ice_pf *pf);
+
+struct msi_map ice_alloc_irq(struct ice_pf *pf, bool dyn_only);
+void ice_free_irq(struct ice_pf *pf, struct msi_map map);
+int ice_get_max_used_msix_vector(struct ice_pf *pf);
+
+#endif
index ee5b369..5a7753b 100644 (file)
@@ -7,15 +7,6 @@
 #include "ice_lag.h"
 
 /**
- * ice_lag_nop_handler - no-op Rx handler to disable LAG
- * @pskb: pointer to skb pointer
- */
-rx_handler_result_t ice_lag_nop_handler(struct sk_buff __always_unused **pskb)
-{
-       return RX_HANDLER_PASS;
-}
-
-/**
  * ice_lag_set_primary - set PF LAG state as Primary
  * @lag: LAG info struct
  */
@@ -158,7 +149,6 @@ ice_lag_link(struct ice_lag *lag, struct netdev_notifier_changeupper_info *info)
                lag->upper_netdev = upper;
        }
 
-       ice_clear_sriov_cap(pf);
        ice_clear_rdma_cap(pf);
 
        lag->bonded = true;
@@ -205,7 +195,6 @@ ice_lag_unlink(struct ice_lag *lag,
        }
 
        lag->peer_netdev = NULL;
-       ice_set_sriov_cap(pf);
        ice_set_rdma_cap(pf);
        lag->bonded = false;
        lag->role = ICE_LAG_NONE;
@@ -229,7 +218,6 @@ static void ice_lag_unregister(struct ice_lag *lag, struct net_device *netdev)
        if (lag->upper_netdev) {
                dev_put(lag->upper_netdev);
                lag->upper_netdev = NULL;
-               ice_set_sriov_cap(pf);
                ice_set_rdma_cap(pf);
        }
        /* perform some cleanup in case we come back */
index 51b5cf4..2c37367 100644 (file)
@@ -25,63 +25,9 @@ struct ice_lag {
        struct notifier_block notif_block;
        u8 bonded:1; /* currently bonded */
        u8 primary:1; /* this is primary */
-       u8 handler:1; /* did we register a rx_netdev_handler */
-       /* each thing blocking bonding will increment this value by one.
-        * If this value is zero, then bonding is allowed.
-        */
-       u16 dis_lag;
        u8 role;
 };
 
 int ice_init_lag(struct ice_pf *pf);
 void ice_deinit_lag(struct ice_pf *pf);
-rx_handler_result_t ice_lag_nop_handler(struct sk_buff **pskb);
-
-/**
- * ice_disable_lag - increment LAG disable count
- * @lag: LAG struct
- */
-static inline void ice_disable_lag(struct ice_lag *lag)
-{
-       /* If LAG this PF is not already disabled, disable it */
-       rtnl_lock();
-       if (!netdev_is_rx_handler_busy(lag->netdev)) {
-               if (!netdev_rx_handler_register(lag->netdev,
-                                               ice_lag_nop_handler,
-                                               NULL))
-                       lag->handler = true;
-       }
-       rtnl_unlock();
-       lag->dis_lag++;
-}
-
-/**
- * ice_enable_lag - decrement disable count for a PF
- * @lag: LAG struct
- *
- * Decrement the disable counter for a port, and if that count reaches
- * zero, then remove the no-op Rx handler from that netdev
- */
-static inline void ice_enable_lag(struct ice_lag *lag)
-{
-       if (lag->dis_lag)
-               lag->dis_lag--;
-       if (!lag->dis_lag && lag->handler) {
-               rtnl_lock();
-               netdev_rx_handler_unregister(lag->netdev);
-               rtnl_unlock();
-               lag->handler = false;
-       }
-}
-
-/**
- * ice_is_lag_dis - is LAG disabled
- * @lag: LAG struct
- *
- * Return true if bonding is disabled
- */
-static inline bool ice_is_lag_dis(struct ice_lag *lag)
-{
-       return !!(lag->dis_lag);
-}
 #endif /* _ICE_LAG_H_ */
index 11ae0e4..5ddb95d 100644 (file)
@@ -1371,190 +1371,6 @@ out:
 }
 
 /**
- * ice_free_res - free a block of resources
- * @res: pointer to the resource
- * @index: starting index previously returned by ice_get_res
- * @id: identifier to track owner
- *
- * Returns number of resources freed
- */
-int ice_free_res(struct ice_res_tracker *res, u16 index, u16 id)
-{
-       int count = 0;
-       int i;
-
-       if (!res || index >= res->end)
-               return -EINVAL;
-
-       id |= ICE_RES_VALID_BIT;
-       for (i = index; i < res->end && res->list[i] == id; i++) {
-               res->list[i] = 0;
-               count++;
-       }
-
-       return count;
-}
-
-/**
- * ice_search_res - Search the tracker for a block of resources
- * @res: pointer to the resource
- * @needed: size of the block needed
- * @id: identifier to track owner
- *
- * Returns the base item index of the block, or -ENOMEM for error
- */
-static int ice_search_res(struct ice_res_tracker *res, u16 needed, u16 id)
-{
-       u16 start = 0, end = 0;
-
-       if (needed > res->end)
-               return -ENOMEM;
-
-       id |= ICE_RES_VALID_BIT;
-
-       do {
-               /* skip already allocated entries */
-               if (res->list[end++] & ICE_RES_VALID_BIT) {
-                       start = end;
-                       if ((start + needed) > res->end)
-                               break;
-               }
-
-               if (end == (start + needed)) {
-                       int i = start;
-
-                       /* there was enough, so assign it to the requestor */
-                       while (i != end)
-                               res->list[i++] = id;
-
-                       return start;
-               }
-       } while (end < res->end);
-
-       return -ENOMEM;
-}
-
-/**
- * ice_get_free_res_count - Get free count from a resource tracker
- * @res: Resource tracker instance
- */
-static u16 ice_get_free_res_count(struct ice_res_tracker *res)
-{
-       u16 i, count = 0;
-
-       for (i = 0; i < res->end; i++)
-               if (!(res->list[i] & ICE_RES_VALID_BIT))
-                       count++;
-
-       return count;
-}
-
-/**
- * ice_get_res - get a block of resources
- * @pf: board private structure
- * @res: pointer to the resource
- * @needed: size of the block needed
- * @id: identifier to track owner
- *
- * Returns the base item index of the block, or negative for error
- */
-int
-ice_get_res(struct ice_pf *pf, struct ice_res_tracker *res, u16 needed, u16 id)
-{
-       if (!res || !pf)
-               return -EINVAL;
-
-       if (!needed || needed > res->num_entries || id >= ICE_RES_VALID_BIT) {
-               dev_err(ice_pf_to_dev(pf), "param err: needed=%d, num_entries = %d id=0x%04x\n",
-                       needed, res->num_entries, id);
-               return -EINVAL;
-       }
-
-       return ice_search_res(res, needed, id);
-}
-
-/**
- * ice_get_vf_ctrl_res - Get VF control VSI resource
- * @pf: pointer to the PF structure
- * @vsi: the VSI to allocate a resource for
- *
- * Look up whether another VF has already allocated the control VSI resource.
- * If so, re-use this resource so that we share it among all VFs.
- *
- * Otherwise, allocate the resource and return it.
- */
-static int ice_get_vf_ctrl_res(struct ice_pf *pf, struct ice_vsi *vsi)
-{
-       struct ice_vf *vf;
-       unsigned int bkt;
-       int base;
-
-       rcu_read_lock();
-       ice_for_each_vf_rcu(pf, bkt, vf) {
-               if (vf != vsi->vf && vf->ctrl_vsi_idx != ICE_NO_VSI) {
-                       base = pf->vsi[vf->ctrl_vsi_idx]->base_vector;
-                       rcu_read_unlock();
-                       return base;
-               }
-       }
-       rcu_read_unlock();
-
-       return ice_get_res(pf, pf->irq_tracker, vsi->num_q_vectors,
-                          ICE_RES_VF_CTRL_VEC_ID);
-}
-
-/**
- * ice_vsi_setup_vector_base - Set up the base vector for the given VSI
- * @vsi: ptr to the VSI
- *
- * This should only be called after ice_vsi_alloc_def() which allocates the
- * corresponding SW VSI structure and initializes num_queue_pairs for the
- * newly allocated VSI.
- *
- * Returns 0 on success or negative on failure
- */
-static int ice_vsi_setup_vector_base(struct ice_vsi *vsi)
-{
-       struct ice_pf *pf = vsi->back;
-       struct device *dev;
-       u16 num_q_vectors;
-       int base;
-
-       dev = ice_pf_to_dev(pf);
-       /* SRIOV doesn't grab irq_tracker entries for each VSI */
-       if (vsi->type == ICE_VSI_VF)
-               return 0;
-       if (vsi->type == ICE_VSI_CHNL)
-               return 0;
-
-       if (vsi->base_vector) {
-               dev_dbg(dev, "VSI %d has non-zero base vector %d\n",
-                       vsi->vsi_num, vsi->base_vector);
-               return -EEXIST;
-       }
-
-       num_q_vectors = vsi->num_q_vectors;
-       /* reserve slots from OS requested IRQs */
-       if (vsi->type == ICE_VSI_CTRL && vsi->vf) {
-               base = ice_get_vf_ctrl_res(pf, vsi);
-       } else {
-               base = ice_get_res(pf, pf->irq_tracker, num_q_vectors,
-                                  vsi->idx);
-       }
-
-       if (base < 0) {
-               dev_err(dev, "%d MSI-X interrupts available. %s %d failed to get %d MSI-X vectors\n",
-                       ice_get_free_res_count(pf->irq_tracker),
-                       ice_vsi_type_str(vsi->type), vsi->idx, num_q_vectors);
-               return -ENOENT;
-       }
-       vsi->base_vector = (u16)base;
-       pf->num_avail_sw_msix -= num_q_vectors;
-
-       return 0;
-}
-
-/**
  * ice_vsi_clear_rings - Deallocates the Tx and Rx rings for VSI
  * @vsi: the VSI having rings deallocated
  */
@@ -2410,50 +2226,6 @@ static void ice_vsi_set_tc_cfg(struct ice_vsi *vsi)
 }
 
 /**
- * ice_vsi_set_q_vectors_reg_idx - set the HW register index for all q_vectors
- * @vsi: VSI to set the q_vectors register index on
- */
-static int
-ice_vsi_set_q_vectors_reg_idx(struct ice_vsi *vsi)
-{
-       u16 i;
-
-       if (!vsi || !vsi->q_vectors)
-               return -EINVAL;
-
-       ice_for_each_q_vector(vsi, i) {
-               struct ice_q_vector *q_vector = vsi->q_vectors[i];
-
-               if (!q_vector) {
-                       dev_err(ice_pf_to_dev(vsi->back), "Failed to set reg_idx on q_vector %d VSI %d\n",
-                               i, vsi->vsi_num);
-                       goto clear_reg_idx;
-               }
-
-               if (vsi->type == ICE_VSI_VF) {
-                       struct ice_vf *vf = vsi->vf;
-
-                       q_vector->reg_idx = ice_calc_vf_reg_idx(vf, q_vector);
-               } else {
-                       q_vector->reg_idx =
-                               q_vector->v_idx + vsi->base_vector;
-               }
-       }
-
-       return 0;
-
-clear_reg_idx:
-       ice_for_each_q_vector(vsi, i) {
-               struct ice_q_vector *q_vector = vsi->q_vectors[i];
-
-               if (q_vector)
-                       q_vector->reg_idx = 0;
-       }
-
-       return -EINVAL;
-}
-
-/**
  * ice_cfg_sw_lldp - Config switch rules for LLDP packet handling
  * @vsi: the VSI being configured
  * @tx: bool to determine Tx or Rx rule
@@ -2611,37 +2383,6 @@ static void ice_set_agg_vsi(struct ice_vsi *vsi)
                vsi->agg_node->num_vsis);
 }
 
-/**
- * ice_free_vf_ctrl_res - Free the VF control VSI resource
- * @pf: pointer to PF structure
- * @vsi: the VSI to free resources for
- *
- * Check if the VF control VSI resource is still in use. If no VF is using it
- * any more, release the VSI resource. Otherwise, leave it to be cleaned up
- * once no other VF uses it.
- */
-static void ice_free_vf_ctrl_res(struct ice_pf *pf,  struct ice_vsi *vsi)
-{
-       struct ice_vf *vf;
-       unsigned int bkt;
-
-       rcu_read_lock();
-       ice_for_each_vf_rcu(pf, bkt, vf) {
-               if (vf != vsi->vf && vf->ctrl_vsi_idx != ICE_NO_VSI) {
-                       rcu_read_unlock();
-                       return;
-               }
-       }
-       rcu_read_unlock();
-
-       /* No other VFs left that have control VSI. It is now safe to reclaim
-        * SW interrupts back to the common pool.
-        */
-       ice_free_res(pf->irq_tracker, vsi->base_vector,
-                    ICE_RES_VF_CTRL_VEC_ID);
-       pf->num_avail_sw_msix += vsi->num_q_vectors;
-}
-
 static int ice_vsi_cfg_tc_lan(struct ice_pf *pf, struct ice_vsi *vsi)
 {
        u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
@@ -2728,14 +2469,6 @@ ice_vsi_cfg_def(struct ice_vsi *vsi, struct ice_vsi_cfg_params *params)
                if (ret)
                        goto unroll_vsi_init;
 
-               ret = ice_vsi_setup_vector_base(vsi);
-               if (ret)
-                       goto unroll_alloc_q_vector;
-
-               ret = ice_vsi_set_q_vectors_reg_idx(vsi);
-               if (ret)
-                       goto unroll_vector_base;
-
                ret = ice_vsi_alloc_rings(vsi);
                if (ret)
                        goto unroll_vector_base;
@@ -2788,10 +2521,6 @@ ice_vsi_cfg_def(struct ice_vsi *vsi, struct ice_vsi_cfg_params *params)
                if (ret)
                        goto unroll_alloc_q_vector;
 
-               ret = ice_vsi_set_q_vectors_reg_idx(vsi);
-               if (ret)
-                       goto unroll_vector_base;
-
                ret = ice_vsi_alloc_ring_stats(vsi);
                if (ret)
                        goto unroll_vector_base;
@@ -2827,8 +2556,6 @@ ice_vsi_cfg_def(struct ice_vsi *vsi, struct ice_vsi_cfg_params *params)
 
 unroll_vector_base:
        /* reclaim SW interrupts back to the common pool */
-       ice_free_res(pf->irq_tracker, vsi->base_vector, vsi->idx);
-       pf->num_avail_sw_msix += vsi->num_q_vectors;
 unroll_alloc_q_vector:
        ice_vsi_free_q_vectors(vsi);
 unroll_vsi_init:
@@ -2920,14 +2647,6 @@ void ice_vsi_decfg(struct ice_vsi *vsi)
         * many interrupts each VF needs. SR-IOV MSIX resources are also
         * cleared in the same manner.
         */
-       if (vsi->type == ICE_VSI_CTRL && vsi->vf) {
-               ice_free_vf_ctrl_res(pf, vsi);
-       } else if (vsi->type != ICE_VSI_VF) {
-               /* reclaim SW interrupts back to the common pool */
-               ice_free_res(pf->irq_tracker, vsi->base_vector, vsi->idx);
-               pf->num_avail_sw_msix += vsi->num_q_vectors;
-               vsi->base_vector = 0;
-       }
 
        if (vsi->type == ICE_VSI_VF &&
            vsi->agg_node && vsi->agg_node->valid)
@@ -2993,8 +2712,6 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_vsi_cfg_params *params)
        return vsi;
 
 err_vsi_cfg:
-       if (params->type == ICE_VSI_VF)
-               ice_enable_lag(pf->lag);
        ice_vsi_free(vsi);
 
        return NULL;
@@ -3044,7 +2761,6 @@ static void ice_vsi_release_msix(struct ice_vsi *vsi)
 void ice_vsi_free_irq(struct ice_vsi *vsi)
 {
        struct ice_pf *pf = vsi->back;
-       int base = vsi->base_vector;
        int i;
 
        if (!vsi->q_vectors || !vsi->irqs_ready)
@@ -3058,10 +2774,9 @@ void ice_vsi_free_irq(struct ice_vsi *vsi)
        ice_free_cpu_rx_rmap(vsi);
 
        ice_for_each_q_vector(vsi, i) {
-               u16 vector = i + base;
                int irq_num;
 
-               irq_num = pf->msix_entries[vector].vector;
+               irq_num = vsi->q_vectors[i]->irq.virq;
 
                /* free only the irqs that were actually requested */
                if (!vsi->q_vectors[i] ||
@@ -3193,7 +2908,6 @@ void ice_dis_vsi(struct ice_vsi *vsi, bool locked)
  */
 void ice_vsi_dis_irq(struct ice_vsi *vsi)
 {
-       int base = vsi->base_vector;
        struct ice_pf *pf = vsi->back;
        struct ice_hw *hw = &pf->hw;
        u32 val;
@@ -3240,7 +2954,7 @@ void ice_vsi_dis_irq(struct ice_vsi *vsi)
                return;
 
        ice_for_each_q_vector(vsi, i)
-               synchronize_irq(pf->msix_entries[i + base].vector);
+               synchronize_irq(vsi->q_vectors[i]->irq.virq);
 }
 
 /**
index 7522147..e985766 100644 (file)
@@ -104,11 +104,6 @@ int ice_ena_vsi(struct ice_vsi *vsi, bool locked);
 void ice_vsi_decfg(struct ice_vsi *vsi);
 void ice_dis_vsi(struct ice_vsi *vsi, bool locked);
 
-int ice_free_res(struct ice_res_tracker *res, u16 index, u16 id);
-
-int
-ice_get_res(struct ice_pf *pf, struct ice_res_tracker *res, u16 needed, u16 id);
-
 int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags);
 int ice_vsi_cfg(struct ice_vsi *vsi, struct ice_vsi_cfg_params *params);
 
index a1f7c8e..62e9151 100644 (file)
@@ -2490,7 +2490,6 @@ static int ice_vsi_req_irq_msix(struct ice_vsi *vsi, char *basename)
 {
        int q_vectors = vsi->num_q_vectors;
        struct ice_pf *pf = vsi->back;
-       int base = vsi->base_vector;
        struct device *dev;
        int rx_int_idx = 0;
        int tx_int_idx = 0;
@@ -2501,7 +2500,7 @@ static int ice_vsi_req_irq_msix(struct ice_vsi *vsi, char *basename)
        for (vector = 0; vector < q_vectors; vector++) {
                struct ice_q_vector *q_vector = vsi->q_vectors[vector];
 
-               irq_num = pf->msix_entries[base + vector].vector;
+               irq_num = q_vector->irq.virq;
 
                if (q_vector->tx.tx_ring && q_vector->rx.rx_ring) {
                        snprintf(q_vector->name, sizeof(q_vector->name) - 1,
@@ -2555,9 +2554,8 @@ static int ice_vsi_req_irq_msix(struct ice_vsi *vsi, char *basename)
        return 0;
 
 free_q_irqs:
-       while (vector) {
-               vector--;
-               irq_num = pf->msix_entries[base + vector].vector;
+       while (vector--) {
+               irq_num = vsi->q_vectors[vector]->irq.virq;
                if (!IS_ENABLED(CONFIG_RFS_ACCEL))
                        irq_set_affinity_notifier(irq_num, NULL);
                irq_set_affinity_hint(irq_num, NULL);
@@ -3047,7 +3045,7 @@ static void ice_ena_misc_vector(struct ice_pf *pf)
        wr32(hw, PFINT_OICR_ENA, val);
 
        /* SW_ITR_IDX = 0, but don't change INTENA */
-       wr32(hw, GLINT_DYN_CTL(pf->oicr_idx),
+       wr32(hw, GLINT_DYN_CTL(pf->oicr_irq.index),
             GLINT_DYN_CTL_SW_ITR_INDX_M | GLINT_DYN_CTL_INTENA_MSK_M);
 }
 
@@ -3234,6 +3232,7 @@ static void ice_dis_ctrlq_interrupts(struct ice_hw *hw)
  */
 static void ice_free_irq_msix_misc(struct ice_pf *pf)
 {
+       int misc_irq_num = pf->oicr_irq.virq;
        struct ice_hw *hw = &pf->hw;
 
        ice_dis_ctrlq_interrupts(hw);
@@ -3242,14 +3241,10 @@ static void ice_free_irq_msix_misc(struct ice_pf *pf)
        wr32(hw, PFINT_OICR_ENA, 0);
        ice_flush(hw);
 
-       if (pf->msix_entries) {
-               synchronize_irq(pf->msix_entries[pf->oicr_idx].vector);
-               devm_free_irq(ice_pf_to_dev(pf),
-                             pf->msix_entries[pf->oicr_idx].vector, pf);
-       }
+       synchronize_irq(misc_irq_num);
+       devm_free_irq(ice_pf_to_dev(pf), misc_irq_num, pf);
 
-       pf->num_avail_sw_msix += 1;
-       ice_free_res(pf->irq_tracker, pf->oicr_idx, ICE_RES_MISC_VEC_ID);
+       ice_free_irq(pf, pf->oicr_irq);
 }
 
 /**
@@ -3295,7 +3290,8 @@ static int ice_req_irq_msix_misc(struct ice_pf *pf)
 {
        struct device *dev = ice_pf_to_dev(pf);
        struct ice_hw *hw = &pf->hw;
-       int oicr_idx, err = 0;
+       struct msi_map oicr_irq;
+       int err = 0;
 
        if (!pf->int_name[0])
                snprintf(pf->int_name, sizeof(pf->int_name) - 1, "%s-%s:misc",
@@ -3309,30 +3305,26 @@ static int ice_req_irq_msix_misc(struct ice_pf *pf)
                goto skip_req_irq;
 
        /* reserve one vector in irq_tracker for misc interrupts */
-       oicr_idx = ice_get_res(pf, pf->irq_tracker, 1, ICE_RES_MISC_VEC_ID);
-       if (oicr_idx < 0)
-               return oicr_idx;
-
-       pf->num_avail_sw_msix -= 1;
-       pf->oicr_idx = (u16)oicr_idx;
-
-       err = devm_request_threaded_irq(dev,
-                                       pf->msix_entries[pf->oicr_idx].vector,
-                                       ice_misc_intr, ice_misc_intr_thread_fn,
-                                       0, pf->int_name, pf);
+       oicr_irq = ice_alloc_irq(pf, false);
+       if (oicr_irq.index < 0)
+               return oicr_irq.index;
+
+       pf->oicr_irq = oicr_irq;
+       err = devm_request_threaded_irq(dev, pf->oicr_irq.virq, ice_misc_intr,
+                                       ice_misc_intr_thread_fn, 0,
+                                       pf->int_name, pf);
        if (err) {
                dev_err(dev, "devm_request_threaded_irq for %s failed: %d\n",
                        pf->int_name, err);
-               ice_free_res(pf->irq_tracker, 1, ICE_RES_MISC_VEC_ID);
-               pf->num_avail_sw_msix += 1;
+               ice_free_irq(pf, pf->oicr_irq);
                return err;
        }
 
 skip_req_irq:
        ice_ena_misc_vector(pf);
 
-       ice_ena_ctrlq_interrupts(hw, pf->oicr_idx);
-       wr32(hw, GLINT_ITR(ICE_RX_ITR, pf->oicr_idx),
+       ice_ena_ctrlq_interrupts(hw, pf->oicr_irq.index);
+       wr32(hw, GLINT_ITR(ICE_RX_ITR, pf->oicr_irq.index),
             ITR_REG_ALIGN(ICE_ITR_8K) >> ICE_ITR_GRAN_S);
 
        ice_flush(hw);
@@ -3901,224 +3893,6 @@ static int ice_init_pf(struct ice_pf *pf)
 }
 
 /**
- * ice_reduce_msix_usage - Reduce usage of MSI-X vectors
- * @pf: board private structure
- * @v_remain: number of remaining MSI-X vectors to be distributed
- *
- * Reduce the usage of MSI-X vectors when entire request cannot be fulfilled.
- * pf->num_lan_msix and pf->num_rdma_msix values are set based on number of
- * remaining vectors.
- */
-static void ice_reduce_msix_usage(struct ice_pf *pf, int v_remain)
-{
-       int v_rdma;
-
-       if (!ice_is_rdma_ena(pf)) {
-               pf->num_lan_msix = v_remain;
-               return;
-       }
-
-       /* RDMA needs at least 1 interrupt in addition to AEQ MSIX */
-       v_rdma = ICE_RDMA_NUM_AEQ_MSIX + 1;
-
-       if (v_remain < ICE_MIN_LAN_TXRX_MSIX + ICE_MIN_RDMA_MSIX) {
-               dev_warn(ice_pf_to_dev(pf), "Not enough MSI-X vectors to support RDMA.\n");
-               clear_bit(ICE_FLAG_RDMA_ENA, pf->flags);
-
-               pf->num_rdma_msix = 0;
-               pf->num_lan_msix = ICE_MIN_LAN_TXRX_MSIX;
-       } else if ((v_remain < ICE_MIN_LAN_TXRX_MSIX + v_rdma) ||
-                  (v_remain - v_rdma < v_rdma)) {
-               /* Support minimum RDMA and give remaining vectors to LAN MSIX */
-               pf->num_rdma_msix = ICE_MIN_RDMA_MSIX;
-               pf->num_lan_msix = v_remain - ICE_MIN_RDMA_MSIX;
-       } else {
-               /* Split remaining MSIX with RDMA after accounting for AEQ MSIX
-                */
-               pf->num_rdma_msix = (v_remain - ICE_RDMA_NUM_AEQ_MSIX) / 2 +
-                                   ICE_RDMA_NUM_AEQ_MSIX;
-               pf->num_lan_msix = v_remain - pf->num_rdma_msix;
-       }
-}
-
-/**
- * ice_ena_msix_range - Request a range of MSIX vectors from the OS
- * @pf: board private structure
- *
- * Compute the number of MSIX vectors wanted and request from the OS. Adjust
- * device usage if there are not enough vectors. Return the number of vectors
- * reserved or negative on failure.
- */
-static int ice_ena_msix_range(struct ice_pf *pf)
-{
-       int num_cpus, hw_num_msix, v_other, v_wanted, v_actual;
-       struct device *dev = ice_pf_to_dev(pf);
-       int err, i;
-
-       hw_num_msix = pf->hw.func_caps.common_cap.num_msix_vectors;
-       num_cpus = num_online_cpus();
-
-       /* LAN miscellaneous handler */
-       v_other = ICE_MIN_LAN_OICR_MSIX;
-
-       /* Flow Director */
-       if (test_bit(ICE_FLAG_FD_ENA, pf->flags))
-               v_other += ICE_FDIR_MSIX;
-
-       /* switchdev */
-       v_other += ICE_ESWITCH_MSIX;
-
-       v_wanted = v_other;
-
-       /* LAN traffic */
-       pf->num_lan_msix = num_cpus;
-       v_wanted += pf->num_lan_msix;
-
-       /* RDMA auxiliary driver */
-       if (ice_is_rdma_ena(pf)) {
-               pf->num_rdma_msix = num_cpus + ICE_RDMA_NUM_AEQ_MSIX;
-               v_wanted += pf->num_rdma_msix;
-       }
-
-       if (v_wanted > hw_num_msix) {
-               int v_remain;
-
-               dev_warn(dev, "not enough device MSI-X vectors. wanted = %d, available = %d\n",
-                        v_wanted, hw_num_msix);
-
-               if (hw_num_msix < ICE_MIN_MSIX) {
-                       err = -ERANGE;
-                       goto exit_err;
-               }
-
-               v_remain = hw_num_msix - v_other;
-               if (v_remain < ICE_MIN_LAN_TXRX_MSIX) {
-                       v_other = ICE_MIN_MSIX - ICE_MIN_LAN_TXRX_MSIX;
-                       v_remain = ICE_MIN_LAN_TXRX_MSIX;
-               }
-
-               ice_reduce_msix_usage(pf, v_remain);
-               v_wanted = pf->num_lan_msix + pf->num_rdma_msix + v_other;
-
-               dev_notice(dev, "Reducing request to %d MSI-X vectors for LAN traffic.\n",
-                          pf->num_lan_msix);
-               if (ice_is_rdma_ena(pf))
-                       dev_notice(dev, "Reducing request to %d MSI-X vectors for RDMA.\n",
-                                  pf->num_rdma_msix);
-       }
-
-       pf->msix_entries = devm_kcalloc(dev, v_wanted,
-                                       sizeof(*pf->msix_entries), GFP_KERNEL);
-       if (!pf->msix_entries) {
-               err = -ENOMEM;
-               goto exit_err;
-       }
-
-       for (i = 0; i < v_wanted; i++)
-               pf->msix_entries[i].entry = i;
-
-       /* actually reserve the vectors */
-       v_actual = pci_enable_msix_range(pf->pdev, pf->msix_entries,
-                                        ICE_MIN_MSIX, v_wanted);
-       if (v_actual < 0) {
-               dev_err(dev, "unable to reserve MSI-X vectors\n");
-               err = v_actual;
-               goto msix_err;
-       }
-
-       if (v_actual < v_wanted) {
-               dev_warn(dev, "not enough OS MSI-X vectors. requested = %d, obtained = %d\n",
-                        v_wanted, v_actual);
-
-               if (v_actual < ICE_MIN_MSIX) {
-                       /* error if we can't get minimum vectors */
-                       pci_disable_msix(pf->pdev);
-                       err = -ERANGE;
-                       goto msix_err;
-               } else {
-                       int v_remain = v_actual - v_other;
-
-                       if (v_remain < ICE_MIN_LAN_TXRX_MSIX)
-                               v_remain = ICE_MIN_LAN_TXRX_MSIX;
-
-                       ice_reduce_msix_usage(pf, v_remain);
-
-                       dev_notice(dev, "Enabled %d MSI-X vectors for LAN traffic.\n",
-                                  pf->num_lan_msix);
-
-                       if (ice_is_rdma_ena(pf))
-                               dev_notice(dev, "Enabled %d MSI-X vectors for RDMA.\n",
-                                          pf->num_rdma_msix);
-               }
-       }
-
-       return v_actual;
-
-msix_err:
-       devm_kfree(dev, pf->msix_entries);
-
-exit_err:
-       pf->num_rdma_msix = 0;
-       pf->num_lan_msix = 0;
-       return err;
-}
-
-/**
- * ice_dis_msix - Disable MSI-X interrupt setup in OS
- * @pf: board private structure
- */
-static void ice_dis_msix(struct ice_pf *pf)
-{
-       pci_disable_msix(pf->pdev);
-       devm_kfree(ice_pf_to_dev(pf), pf->msix_entries);
-       pf->msix_entries = NULL;
-}
-
-/**
- * ice_clear_interrupt_scheme - Undo things done by ice_init_interrupt_scheme
- * @pf: board private structure
- */
-static void ice_clear_interrupt_scheme(struct ice_pf *pf)
-{
-       ice_dis_msix(pf);
-
-       if (pf->irq_tracker) {
-               devm_kfree(ice_pf_to_dev(pf), pf->irq_tracker);
-               pf->irq_tracker = NULL;
-       }
-}
-
-/**
- * ice_init_interrupt_scheme - Determine proper interrupt scheme
- * @pf: board private structure to initialize
- */
-static int ice_init_interrupt_scheme(struct ice_pf *pf)
-{
-       int vectors;
-
-       vectors = ice_ena_msix_range(pf);
-
-       if (vectors < 0)
-               return vectors;
-
-       /* set up vector assignment tracking */
-       pf->irq_tracker = devm_kzalloc(ice_pf_to_dev(pf),
-                                      struct_size(pf->irq_tracker, list, vectors),
-                                      GFP_KERNEL);
-       if (!pf->irq_tracker) {
-               ice_dis_msix(pf);
-               return -ENOMEM;
-       }
-
-       /* populate SW interrupts pool with number of OS granted IRQs. */
-       pf->num_avail_sw_msix = (u16)vectors;
-       pf->irq_tracker->num_entries = (u16)vectors;
-       pf->irq_tracker->end = pf->irq_tracker->num_entries;
-
-       return 0;
-}
-
-/**
  * ice_is_wol_supported - check if WoL is supported
  * @hw: pointer to hardware info
  *
index 02a4e1c..6a93647 100644 (file)
@@ -47,6 +47,7 @@ enum ice_protocol_type {
        ICE_L2TPV3,
        ICE_VLAN_EX,
        ICE_VLAN_IN,
+       ICE_HW_METADATA,
        ICE_VXLAN_GPE,
        ICE_SCTP_IL,
        ICE_PROTOCOL_LAST
@@ -115,17 +116,7 @@ enum ice_prot_id {
 #define ICE_L2TPV3_HW          104
 
 #define ICE_UDP_OF_HW  52 /* UDP Tunnels */
-#define ICE_META_DATA_ID_HW 255 /* this is used for tunnel and VLAN type */
 
-#define ICE_MDID_SIZE 2
-
-#define ICE_TUN_FLAG_MDID 21
-#define ICE_TUN_FLAG_MDID_OFF (ICE_MDID_SIZE * ICE_TUN_FLAG_MDID)
-#define ICE_TUN_FLAG_MASK 0xFF
-
-#define ICE_VLAN_FLAG_MDID 20
-#define ICE_VLAN_FLAG_MDID_OFF (ICE_MDID_SIZE * ICE_VLAN_FLAG_MDID)
-#define ICE_PKT_FLAGS_0_TO_15_VLAN_FLAGS_MASK 0xD000
 
 #define ICE_TUN_FLAG_FV_IND 2
 
@@ -230,6 +221,191 @@ struct ice_nvgre_hdr {
        __be32 tni_flow;
 };
 
+/* Metadata information
+ *
+ * Not all MDIDs can be used by switch block. It depends on package version.
+ *
+ * MDID 16 (Rx offset)
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |  A  |   B     |   Reserved    |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * A = Source port where the transaction came from (3b).
+ *
+ * B = Destination TC of the packet. The TC is relative to a port (5b).
+ *
+ * MDID 17
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |      PTYPE        | Reserved  |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * PTYPE = Encodes the packet type (10b).
+ *
+ * MDID 18
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Packet length             | R |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * Packet length = Length of the packet in bytes
+ *                (packet always carriers CRC) (14b).
+ * R = Reserved (2b).
+ *
+ * MDID 19
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |   Source VSI      | Reserved  |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * Source VSI = Source VSI of packet loopbacked in switch (for egress) (10b).
+ */
+#define ICE_MDID_SOURCE_VSI_MASK GENMASK(9, 0)
+
+/*
+ * MDID 20
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |A|B|C|D|E|F|R|R|G|H|I|J|K|L|M|N|
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * A = DSI - set for DSI RX pkts.
+ * B = ipsec_decrypted - invalid on NIC.
+ * C = marker - this is a marker packet.
+ * D = from_network - for TX sets to 0
+ *                   for RX:
+ *                     * 1 - packet is from external link
+ *                     * 0 - packet source is from internal
+ * E = source_interface_is_rx - reflect the physical interface from where the
+ *                             packet was received:
+ *                             * 1 - Rx
+ *                             * 0 - Tx
+ * F = from_mng - The bit signals that the packet's origin is the management.
+ * G = ucast - Outer L2 MAC address is unicast.
+ * H = mcast - Outer L2 MAC address is multicast.
+ * I = bcast - Outer L2 MAC address is broadcast.
+ * J = second_outer_mac_present - 2 outer MAC headers are present in the packet.
+ * K = STAG or BVLAN - Outer L2 header has STAG (ethernet type 0x88a8) or
+ *                    BVLAN (ethernet type 0x88a8).
+ * L = ITAG - Outer L2 header has ITAG *ethernet type 0x88e7)
+ * M = EVLAN (0x8100) - Outer L2 header has EVLAN (ethernet type 0x8100)
+ * N = EVLAN (0x9100) - Outer L2 header has EVLAN (ethernet type 0x9100)
+ */
+#define ICE_PKT_VLAN_STAG      BIT(12)
+#define ICE_PKT_VLAN_ITAG      BIT(13)
+#define ICE_PKT_VLAN_EVLAN     (BIT(14) | BIT(15))
+#define ICE_PKT_VLAN_MASK      (ICE_PKT_VLAN_STAG | ICE_PKT_VLAN_ITAG | \
+                               ICE_PKT_VLAN_EVLAN)
+/* MDID 21
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |A|B|C|D|E|F|G|H|I|J|R|R|K|L|M|N|
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * A = VLAN (0x8100) - Outer L2 header has VLAN (ethernet type 0x8100)
+ * B = NSHoE - Outer L2 header has NSH (ethernet type 0x894f)
+ * C = MPLS (0x8847) - There is at least 1 MPLS tag in the outer header
+ *                    (ethernet type 0x8847)
+ * D = MPLS (0x8848) - There is at least 1 MPLS tag in the outer header
+ *                    (ethernet type 0x8848)
+ * E = multi MPLS - There is more than a single MPLS tag in the outer header
+ * F = inner MPLS - There is inner MPLS tag in the packet
+ * G = tunneled MAC - Set if the packet includes a tunneled MAC
+ * H = tunneled VLAN - Same as VLAN, but for a tunneled header
+ * I = pkt_is_frag - Packet is fragmented (ipv4 or ipv6)
+ * J = ipv6_ext - The packet has routing or destination ipv6 extension in inner
+ *               or outer ipv6 headers
+ * K = RoCE - UDP packet detected as RoCEv2
+ * L = UDP_XSUM_0 - Set to 1 if L4 checksum is 0 in a UDP packet
+ * M = ESP - This is a ESP packet
+ * N = NAT_ESP - This is a ESP packet encapsulated in UDP NAT
+ */
+#define ICE_PKT_TUNNEL_MAC     BIT(6)
+#define ICE_PKT_TUNNEL_VLAN    BIT(7)
+#define ICE_PKT_TUNNEL_MASK    (ICE_PKT_TUNNEL_MAC | ICE_PKT_TUNNEL_VLAN)
+
+/* MDID 22
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |A|B|C|D|E|F|  G  |H|I|J| K |L|M|
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * A = fin - fin flag in tcp header
+ * B = sync - sync flag in tcp header
+ * C = rst - rst flag in tcp header
+ * D = psh - psh flag in tcp header
+ * E = ack - ack flag in tcp header
+ * F = urg - urg flag in tcp header
+ * G = tunnel type (3b) - Flags used to decode tunnel type:
+ *                       * b000 - not a VXLAN/Geneve/GRE tunnel
+ *                       * b001 - VXLAN-GPE
+ *                       * b010 - VXLAN (non-GPE)
+ *                       * b011 - Geneve
+ *                       * b100 - GRE (no key, no xsum)
+ *                       * b101 - GREK (key, no xsum)
+ *                       * b110 - GREC (no key, xsum)
+ *                       * b111 - GREKC (key, xsum)
+ * H = UDP_GRE - Packet is UDP (VXLAN or VLAN_GPE or Geneve or MPLSoUDP or GRE)
+ *              tunnel
+ * I = OAM - VXLAN/Geneve/tunneled NSH packet with the OAM bit set
+ * J = tunneled NSH - Packet has NSHoGRE or NSHoUDP
+ * K = switch (2b) - Direction on switch
+ *                  * b00 - normal
+ *                  * b01 - TX force only LAN
+ *                  * b10 - TX disable LAN
+ *                  * b11 - direct to VSI
+ * L = swpe - Represents SWPE bit in TX command
+ * M = sw_cmd - Switch command
+ *
+ * MDID 23
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |A|B|C|D|        R        |E|F|R|
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * A = MAC error - Produced by MAC according to L2 error conditions
+ * B = PPRS no offload - FIFO overflow in PPRS or any problematic condition in
+ *                      PPRS ANA
+ * C = abort - Set when malicious packet is detected
+ * D = partial analysis - ANA's analysing got cut in the middle
+ *                      (header > 504B etc.)
+ * E = FLM - Flow director hit indication
+ * F = FDLONG - Flow direector long bucket indication
+ *
+ */
+#define ICE_MDID_SIZE 2
+#define ICE_META_DATA_ID_HW 255
+
+enum ice_hw_metadata_id {
+       ICE_SOURCE_PORT_MDID = 16,
+       ICE_PTYPE_MDID = 17,
+       ICE_PACKET_LENGTH_MDID = 18,
+       ICE_SOURCE_VSI_MDID = 19,
+       ICE_PKT_VLAN_MDID = 20,
+       ICE_PKT_TUNNEL_MDID = 21,
+       ICE_PKT_TCP_MDID = 22,
+       ICE_PKT_ERROR_MDID = 23,
+};
+
+enum ice_hw_metadata_offset {
+       ICE_SOURCE_PORT_MDID_OFFSET = ICE_MDID_SIZE * ICE_SOURCE_PORT_MDID,
+       ICE_PTYPE_MDID_OFFSET = ICE_MDID_SIZE * ICE_PTYPE_MDID,
+       ICE_PACKET_LENGTH_MDID_OFFSET = ICE_MDID_SIZE * ICE_PACKET_LENGTH_MDID,
+       ICE_SOURCE_VSI_MDID_OFFSET = ICE_MDID_SIZE * ICE_SOURCE_VSI_MDID,
+       ICE_PKT_VLAN_MDID_OFFSET = ICE_MDID_SIZE * ICE_PKT_VLAN_MDID,
+       ICE_PKT_TUNNEL_MDID_OFFSET = ICE_MDID_SIZE * ICE_PKT_TUNNEL_MDID,
+       ICE_PKT_TCP_MDID_OFFSET = ICE_MDID_SIZE * ICE_PKT_TCP_MDID,
+       ICE_PKT_ERROR_MDID_OFFSET = ICE_MDID_SIZE * ICE_PKT_ERROR_MDID,
+};
+
+enum ice_pkt_flags {
+       ICE_PKT_FLAGS_VLAN = 0,
+       ICE_PKT_FLAGS_TUNNEL = 1,
+       ICE_PKT_FLAGS_TCP = 2,
+       ICE_PKT_FLAGS_ERROR = 3,
+};
+
+struct ice_hw_metadata {
+       __be16 source_port;
+       __be16 ptype;
+       __be16 packet_length;
+       __be16 source_vsi;
+       __be16 flags[4];
+};
+
 union ice_prot_hdr {
        struct ice_ether_hdr eth_hdr;
        struct ice_ethtype_hdr ethertype;
@@ -243,6 +419,7 @@ union ice_prot_hdr {
        struct ice_udp_gtp_hdr gtp_hdr;
        struct ice_pppoe_hdr pppoe_hdr;
        struct ice_l2tpv3_sess_hdr l2tpv3_sess_hdr;
+       struct ice_hw_metadata metadata;
 };
 
 /* This is mapping table entry that maps every word within a given protocol
index ac6f06f..d4b6c99 100644 (file)
@@ -911,7 +911,7 @@ ice_ptp_release_tx_tracker(struct ice_pf *pf, struct ice_ptp_tx *tx)
        spin_unlock(&tx->lock);
 
        /* wait for potentially outstanding interrupt to complete */
-       synchronize_irq(pf->msix_entries[pf->oicr_idx].vector);
+       synchronize_irq(pf->oicr_irq.virq);
 
        ice_ptp_flush_tx_tracker(pf, tx);
 
index fd1f8b0..e30e123 100644 (file)
@@ -298,14 +298,6 @@ static int ice_repr_add(struct ice_vf *vf)
        if (!repr)
                return -ENOMEM;
 
-#ifdef CONFIG_ICE_SWITCHDEV
-       repr->mac_rule = kzalloc(sizeof(*repr->mac_rule), GFP_KERNEL);
-       if (!repr->mac_rule) {
-               err = -ENOMEM;
-               goto err_alloc_rule;
-       }
-#endif
-
        repr->netdev = alloc_etherdev(sizeof(struct ice_netdev_priv));
        if (!repr->netdev) {
                err =  -ENOMEM;
@@ -351,11 +343,6 @@ err_alloc_q_vector:
        free_netdev(repr->netdev);
        repr->netdev = NULL;
 err_alloc:
-#ifdef CONFIG_ICE_SWITCHDEV
-       kfree(repr->mac_rule);
-       repr->mac_rule = NULL;
-err_alloc_rule:
-#endif
        kfree(repr);
        vf->repr = NULL;
        return err;
@@ -376,10 +363,6 @@ static void ice_repr_rem(struct ice_vf *vf)
        ice_devlink_destroy_vf_port(vf);
        free_netdev(vf->repr->netdev);
        vf->repr->netdev = NULL;
-#ifdef CONFIG_ICE_SWITCHDEV
-       kfree(vf->repr->mac_rule);
-       vf->repr->mac_rule = NULL;
-#endif
        kfree(vf->repr);
        vf->repr = NULL;
 
index 378a45b..9c2a6f4 100644 (file)
@@ -13,9 +13,8 @@ struct ice_repr {
        struct net_device *netdev;
        struct metadata_dst *dst;
 #ifdef CONFIG_ICE_SWITCHDEV
-       /* info about slow path MAC rule  */
-       struct ice_rule_query_data *mac_rule;
-       u8 rule_added;
+       /* info about slow path rule */
+       struct ice_rule_query_data sp_rule;
 #endif
 };
 
index 588ad86..2ea6d24 100644 (file)
@@ -135,18 +135,9 @@ static void ice_dis_vf_mappings(struct ice_vf *vf)
  */
 static int ice_sriov_free_msix_res(struct ice_pf *pf)
 {
-       struct ice_res_tracker *res;
-
        if (!pf)
                return -EINVAL;
 
-       res = pf->irq_tracker;
-       if (!res)
-               return -EINVAL;
-
-       /* give back irq_tracker resources used */
-       WARN_ON(pf->sriov_base_vector < res->num_entries);
-
        pf->sriov_base_vector = 0;
 
        return 0;
@@ -410,29 +401,6 @@ int ice_calc_vf_reg_idx(struct ice_vf *vf, struct ice_q_vector *q_vector)
 }
 
 /**
- * ice_get_max_valid_res_idx - Get the max valid resource index
- * @res: pointer to the resource to find the max valid index for
- *
- * Start from the end of the ice_res_tracker and return right when we find the
- * first res->list entry with the ICE_RES_VALID_BIT set. This function is only
- * valid for SR-IOV because it is the only consumer that manipulates the
- * res->end and this is always called when res->end is set to res->num_entries.
- */
-static int ice_get_max_valid_res_idx(struct ice_res_tracker *res)
-{
-       int i;
-
-       if (!res)
-               return -EINVAL;
-
-       for (i = res->num_entries - 1; i >= 0; i--)
-               if (res->list[i] & ICE_RES_VALID_BIT)
-                       return i;
-
-       return 0;
-}
-
-/**
  * ice_sriov_set_msix_res - Set any used MSIX resources
  * @pf: pointer to PF structure
  * @num_msix_needed: number of MSIX vectors needed for all SR-IOV VFs
@@ -450,7 +418,7 @@ static int ice_get_max_valid_res_idx(struct ice_res_tracker *res)
 static int ice_sriov_set_msix_res(struct ice_pf *pf, u16 num_msix_needed)
 {
        u16 total_vectors = pf->hw.func_caps.common_cap.num_msix_vectors;
-       int vectors_used = pf->irq_tracker->num_entries;
+       int vectors_used = ice_get_max_used_msix_vector(pf);
        int sriov_base_vector;
 
        sriov_base_vector = total_vectors - num_msix_needed;
@@ -490,7 +458,7 @@ static int ice_sriov_set_msix_res(struct ice_pf *pf, u16 num_msix_needed)
  */
 static int ice_set_per_vf_res(struct ice_pf *pf, u16 num_vfs)
 {
-       int max_valid_res_idx = ice_get_max_valid_res_idx(pf->irq_tracker);
+       int vectors_used = ice_get_max_used_msix_vector(pf);
        u16 num_msix_per_vf, num_txq, num_rxq, avail_qs;
        int msix_avail_per_vf, msix_avail_for_sriov;
        struct device *dev = ice_pf_to_dev(pf);
@@ -501,12 +469,9 @@ static int ice_set_per_vf_res(struct ice_pf *pf, u16 num_vfs)
        if (!num_vfs)
                return -EINVAL;
 
-       if (max_valid_res_idx < 0)
-               return -ENOSPC;
-
        /* determine MSI-X resources per VF */
        msix_avail_for_sriov = pf->hw.func_caps.common_cap.num_msix_vectors -
-               pf->irq_tracker->num_entries;
+               vectors_used;
        msix_avail_per_vf = msix_avail_for_sriov / num_vfs;
        if (msix_avail_per_vf >= ICE_NUM_VF_MSIX_MED) {
                num_msix_per_vf = ICE_NUM_VF_MSIX_MED;
@@ -871,7 +836,7 @@ static int ice_ena_vfs(struct ice_pf *pf, u16 num_vfs)
        int ret;
 
        /* Disable global interrupt 0 so we don't try to handle the VFLR. */
-       wr32(hw, GLINT_DYN_CTL(pf->oicr_idx),
+       wr32(hw, GLINT_DYN_CTL(pf->oicr_irq.index),
             ICE_ITR_NONE << GLINT_DYN_CTL_ITR_INDX_S);
        set_bit(ICE_OICR_INTR_DIS, pf->state);
        ice_flush(hw);
@@ -1014,8 +979,6 @@ int ice_sriov_configure(struct pci_dev *pdev, int num_vfs)
        if (!num_vfs) {
                if (!pci_vfs_assigned(pdev)) {
                        ice_free_vfs(pf);
-                       if (pf->lag)
-                               ice_enable_lag(pf->lag);
                        return 0;
                }
 
@@ -1027,8 +990,6 @@ int ice_sriov_configure(struct pci_dev *pdev, int num_vfs)
        if (err)
                return err;
 
-       if (pf->lag)
-               ice_disable_lag(pf->lag);
        return num_vfs;
 }
 
index 46b3685..2ea9e1a 100644 (file)
@@ -4540,6 +4540,11 @@ ice_free_res_cntr(struct ice_hw *hw, u8 type, u8 alloc_shared, u16 num_items,
        return status;
 }
 
+#define ICE_PROTOCOL_ENTRY(id, ...) {          \
+       .prot_type      = id,                   \
+       .offs           = {__VA_ARGS__},        \
+}
+
 /* This is mapping table entry that maps every word within a given protocol
  * structure to the real byte offset as per the specification of that
  * protocol header.
@@ -4550,29 +4555,38 @@ ice_free_res_cntr(struct ice_hw *hw, u8 type, u8 alloc_shared, u16 num_items,
  * structure is added to that union.
  */
 static const struct ice_prot_ext_tbl_entry ice_prot_ext[ICE_PROTOCOL_LAST] = {
-       { ICE_MAC_OFOS,         { 0, 2, 4, 6, 8, 10, 12 } },
-       { ICE_MAC_IL,           { 0, 2, 4, 6, 8, 10, 12 } },
-       { ICE_ETYPE_OL,         { 0 } },
-       { ICE_ETYPE_IL,         { 0 } },
-       { ICE_VLAN_OFOS,        { 2, 0 } },
-       { ICE_IPV4_OFOS,        { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18 } },
-       { ICE_IPV4_IL,          { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18 } },
-       { ICE_IPV6_OFOS,        { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24,
-                                26, 28, 30, 32, 34, 36, 38 } },
-       { ICE_IPV6_IL,          { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24,
-                                26, 28, 30, 32, 34, 36, 38 } },
-       { ICE_TCP_IL,           { 0, 2 } },
-       { ICE_UDP_OF,           { 0, 2 } },
-       { ICE_UDP_ILOS,         { 0, 2 } },
-       { ICE_VXLAN,            { 8, 10, 12, 14 } },
-       { ICE_GENEVE,           { 8, 10, 12, 14 } },
-       { ICE_NVGRE,            { 0, 2, 4, 6 } },
-       { ICE_GTP,              { 8, 10, 12, 14, 16, 18, 20, 22 } },
-       { ICE_GTP_NO_PAY,       { 8, 10, 12, 14 } },
-       { ICE_PPPOE,            { 0, 2, 4, 6 } },
-       { ICE_L2TPV3,           { 0, 2, 4, 6, 8, 10 } },
-       { ICE_VLAN_EX,          { 2, 0 } },
-       { ICE_VLAN_IN,          { 2, 0 } },
+       ICE_PROTOCOL_ENTRY(ICE_MAC_OFOS, 0, 2, 4, 6, 8, 10, 12),
+       ICE_PROTOCOL_ENTRY(ICE_MAC_IL, 0, 2, 4, 6, 8, 10, 12),
+       ICE_PROTOCOL_ENTRY(ICE_ETYPE_OL, 0),
+       ICE_PROTOCOL_ENTRY(ICE_ETYPE_IL, 0),
+       ICE_PROTOCOL_ENTRY(ICE_VLAN_OFOS, 2, 0),
+       ICE_PROTOCOL_ENTRY(ICE_IPV4_OFOS, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18),
+       ICE_PROTOCOL_ENTRY(ICE_IPV4_IL, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18),
+       ICE_PROTOCOL_ENTRY(ICE_IPV6_OFOS, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18,
+                          20, 22, 24, 26, 28, 30, 32, 34, 36, 38),
+       ICE_PROTOCOL_ENTRY(ICE_IPV6_IL, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20,
+                          22, 24, 26, 28, 30, 32, 34, 36, 38),
+       ICE_PROTOCOL_ENTRY(ICE_TCP_IL, 0, 2),
+       ICE_PROTOCOL_ENTRY(ICE_UDP_OF, 0, 2),
+       ICE_PROTOCOL_ENTRY(ICE_UDP_ILOS, 0, 2),
+       ICE_PROTOCOL_ENTRY(ICE_VXLAN, 8, 10, 12, 14),
+       ICE_PROTOCOL_ENTRY(ICE_GENEVE, 8, 10, 12, 14),
+       ICE_PROTOCOL_ENTRY(ICE_NVGRE, 0, 2, 4, 6),
+       ICE_PROTOCOL_ENTRY(ICE_GTP, 8, 10, 12, 14, 16, 18, 20, 22),
+       ICE_PROTOCOL_ENTRY(ICE_GTP_NO_PAY, 8, 10, 12, 14),
+       ICE_PROTOCOL_ENTRY(ICE_PPPOE, 0, 2, 4, 6),
+       ICE_PROTOCOL_ENTRY(ICE_L2TPV3, 0, 2, 4, 6, 8, 10),
+       ICE_PROTOCOL_ENTRY(ICE_VLAN_EX, 2, 0),
+       ICE_PROTOCOL_ENTRY(ICE_VLAN_IN, 2, 0),
+       ICE_PROTOCOL_ENTRY(ICE_HW_METADATA,
+                          ICE_SOURCE_PORT_MDID_OFFSET,
+                          ICE_PTYPE_MDID_OFFSET,
+                          ICE_PACKET_LENGTH_MDID_OFFSET,
+                          ICE_SOURCE_VSI_MDID_OFFSET,
+                          ICE_PKT_VLAN_MDID_OFFSET,
+                          ICE_PKT_TUNNEL_MDID_OFFSET,
+                          ICE_PKT_TCP_MDID_OFFSET,
+                          ICE_PKT_ERROR_MDID_OFFSET),
 };
 
 static struct ice_protocol_entry ice_prot_id_tbl[ICE_PROTOCOL_LAST] = {
@@ -4597,6 +4611,7 @@ static struct ice_protocol_entry ice_prot_id_tbl[ICE_PROTOCOL_LAST] = {
        { ICE_L2TPV3,           ICE_L2TPV3_HW },
        { ICE_VLAN_EX,          ICE_VLAN_OF_HW },
        { ICE_VLAN_IN,          ICE_VLAN_OL_HW },
+       { ICE_HW_METADATA,      ICE_META_DATA_ID_HW },
 };
 
 /**
@@ -5255,71 +5270,6 @@ ice_create_recipe_group(struct ice_hw *hw, struct ice_sw_recipe *rm,
        return status;
 }
 
-/**
- * ice_tun_type_match_word - determine if tun type needs a match mask
- * @tun_type: tunnel type
- * @mask: mask to be used for the tunnel
- */
-static bool ice_tun_type_match_word(enum ice_sw_tunnel_type tun_type, u16 *mask)
-{
-       switch (tun_type) {
-       case ICE_SW_TUN_GENEVE:
-       case ICE_SW_TUN_VXLAN:
-       case ICE_SW_TUN_NVGRE:
-       case ICE_SW_TUN_GTPU:
-       case ICE_SW_TUN_GTPC:
-               *mask = ICE_TUN_FLAG_MASK;
-               return true;
-
-       default:
-               *mask = 0;
-               return false;
-       }
-}
-
-/**
- * ice_add_special_words - Add words that are not protocols, such as metadata
- * @rinfo: other information regarding the rule e.g. priority and action info
- * @lkup_exts: lookup word structure
- * @dvm_ena: is double VLAN mode enabled
- */
-static int
-ice_add_special_words(struct ice_adv_rule_info *rinfo,
-                     struct ice_prot_lkup_ext *lkup_exts, bool dvm_ena)
-{
-       u16 mask;
-
-       /* If this is a tunneled packet, then add recipe index to match the
-        * tunnel bit in the packet metadata flags.
-        */
-       if (ice_tun_type_match_word(rinfo->tun_type, &mask)) {
-               if (lkup_exts->n_val_words < ICE_MAX_CHAIN_WORDS) {
-                       u8 word = lkup_exts->n_val_words++;
-
-                       lkup_exts->fv_words[word].prot_id = ICE_META_DATA_ID_HW;
-                       lkup_exts->fv_words[word].off = ICE_TUN_FLAG_MDID_OFF;
-                       lkup_exts->field_mask[word] = mask;
-               } else {
-                       return -ENOSPC;
-               }
-       }
-
-       if (rinfo->vlan_type != 0 && dvm_ena) {
-               if (lkup_exts->n_val_words < ICE_MAX_CHAIN_WORDS) {
-                       u8 word = lkup_exts->n_val_words++;
-
-                       lkup_exts->fv_words[word].prot_id = ICE_META_DATA_ID_HW;
-                       lkup_exts->fv_words[word].off = ICE_VLAN_FLAG_MDID_OFF;
-                       lkup_exts->field_mask[word] =
-                                       ICE_PKT_FLAGS_0_TO_15_VLAN_FLAGS_MASK;
-               } else {
-                       return -ENOSPC;
-               }
-       }
-
-       return 0;
-}
-
 /* ice_get_compat_fv_bitmap - Get compatible field vector bitmap for rule
  * @hw: pointer to hardware structure
  * @rinfo: other information regarding the rule e.g. priority and action info
@@ -5433,13 +5383,6 @@ ice_add_adv_recipe(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups,
        if (status)
                goto err_unroll;
 
-       /* Create any special protocol/offset pairs, such as looking at tunnel
-        * bits by extracting metadata
-        */
-       status = ice_add_special_words(rinfo, lkup_exts, ice_is_dvm_ena(hw));
-       if (status)
-               goto err_unroll;
-
        /* Group match words into recipes using preferred recipe grouping
         * criteria.
         */
@@ -5725,6 +5668,10 @@ ice_fill_adv_dummy_packet(struct ice_adv_lkup_elem *lkups, u16 lkups_cnt,
                 * was already checked when search for the dummy packet
                 */
                type = lkups[i].type;
+               /* metadata isn't present in the packet */
+               if (type == ICE_HW_METADATA)
+                       continue;
+
                for (j = 0; offsets[j].type != ICE_PROTOCOL_LAST; j++) {
                        if (type == offsets[j].type) {
                                offset = offsets[j].offset;
@@ -5860,16 +5807,21 @@ ice_fill_adv_packet_tun(struct ice_hw *hw, enum ice_sw_tunnel_type tun_type,
 
 /**
  * ice_fill_adv_packet_vlan - fill dummy packet with VLAN tag type
+ * @hw: pointer to hw structure
  * @vlan_type: VLAN tag type
  * @pkt: dummy packet to fill in
  * @offsets: offset info for the dummy packet
  */
 static int
-ice_fill_adv_packet_vlan(u16 vlan_type, u8 *pkt,
+ice_fill_adv_packet_vlan(struct ice_hw *hw, u16 vlan_type, u8 *pkt,
                         const struct ice_dummy_pkt_offsets *offsets)
 {
        u16 i;
 
+       /* Check if there is something to do */
+       if (!vlan_type || !ice_is_dvm_ena(hw))
+               return 0;
+
        /* Find VLAN header and insert VLAN TPID */
        for (i = 0; offsets[i].type != ICE_PROTOCOL_LAST; i++) {
                if (offsets[i].type == ICE_VLAN_OFOS ||
@@ -5888,6 +5840,15 @@ ice_fill_adv_packet_vlan(u16 vlan_type, u8 *pkt,
        return -EIO;
 }
 
+static bool ice_rules_equal(const struct ice_adv_rule_info *first,
+                           const struct ice_adv_rule_info *second)
+{
+       return first->sw_act.flag == second->sw_act.flag &&
+              first->tun_type == second->tun_type &&
+              first->vlan_type == second->vlan_type &&
+              first->src_vsi == second->src_vsi;
+}
+
 /**
  * ice_find_adv_rule_entry - Search a rule entry
  * @hw: pointer to the hardware structure
@@ -5921,9 +5882,7 @@ ice_find_adv_rule_entry(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups,
                                lkups_matched = false;
                                break;
                        }
-               if (rinfo->sw_act.flag == list_itr->rule_info.sw_act.flag &&
-                   rinfo->tun_type == list_itr->rule_info.tun_type &&
-                   rinfo->vlan_type == list_itr->rule_info.vlan_type &&
+               if (ice_rules_equal(rinfo, &list_itr->rule_info) &&
                    lkups_matched)
                        return list_itr;
        }
@@ -6039,6 +5998,26 @@ ice_adv_add_update_vsi_list(struct ice_hw *hw,
        return status;
 }
 
+void ice_rule_add_tunnel_metadata(struct ice_adv_lkup_elem *lkup)
+{
+       lkup->type = ICE_HW_METADATA;
+       lkup->m_u.metadata.flags[ICE_PKT_FLAGS_TUNNEL] =
+               cpu_to_be16(ICE_PKT_TUNNEL_MASK);
+}
+
+void ice_rule_add_vlan_metadata(struct ice_adv_lkup_elem *lkup)
+{
+       lkup->type = ICE_HW_METADATA;
+       lkup->m_u.metadata.flags[ICE_PKT_FLAGS_VLAN] =
+               cpu_to_be16(ICE_PKT_VLAN_MASK);
+}
+
+void ice_rule_add_src_vsi_metadata(struct ice_adv_lkup_elem *lkup)
+{
+       lkup->type = ICE_HW_METADATA;
+       lkup->m_u.metadata.source_vsi = cpu_to_be16(ICE_MDID_SOURCE_VSI_MASK);
+}
+
 /**
  * ice_add_adv_rule - helper function to create an advanced switch rule
  * @hw: pointer to the hardware structure
@@ -6120,7 +6099,10 @@ ice_add_adv_rule(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups,
        if (rinfo->sw_act.fltr_act == ICE_FWD_TO_VSI)
                rinfo->sw_act.fwd_id.hw_vsi_id =
                        ice_get_hw_vsi_num(hw, vsi_handle);
-       if (rinfo->sw_act.flag & ICE_FLTR_TX)
+
+       if (rinfo->src_vsi)
+               rinfo->sw_act.src = ice_get_hw_vsi_num(hw, rinfo->src_vsi);
+       else
                rinfo->sw_act.src = ice_get_hw_vsi_num(hw, vsi_handle);
 
        status = ice_add_adv_recipe(hw, lkups, lkups_cnt, rinfo, &rid);
@@ -6189,19 +6171,20 @@ ice_add_adv_rule(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups,
                goto err_ice_add_adv_rule;
        }
 
-       /* set the rule LOOKUP type based on caller specified 'Rx'
-        * instead of hardcoding it to be either LOOKUP_TX/RX
+       /* If there is no matching criteria for direction there
+        * is only one difference between Rx and Tx:
+        * - get switch id base on VSI number from source field (Tx)
+        * - get switch id base on port number (Rx)
         *
-        * for 'Rx' set the source to be the port number
-        * for 'Tx' set the source to be the source HW VSI number (determined
-        * by caller)
+        * If matching on direction metadata is chose rule direction is
+        * extracted from type value set here.
         */
-       if (rinfo->rx) {
-               s_rule->hdr.type = cpu_to_le16(ICE_AQC_SW_RULES_T_LKUP_RX);
-               s_rule->src = cpu_to_le16(hw->port_info->lport);
-       } else {
+       if (rinfo->sw_act.flag & ICE_FLTR_TX) {
                s_rule->hdr.type = cpu_to_le16(ICE_AQC_SW_RULES_T_LKUP_TX);
                s_rule->src = cpu_to_le16(rinfo->sw_act.src);
+       } else {
+               s_rule->hdr.type = cpu_to_le16(ICE_AQC_SW_RULES_T_LKUP_RX);
+               s_rule->src = cpu_to_le16(hw->port_info->lport);
        }
 
        s_rule->recipe_id = cpu_to_le16(rid);
@@ -6211,22 +6194,16 @@ ice_add_adv_rule(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups,
        if (status)
                goto err_ice_add_adv_rule;
 
-       if (rinfo->tun_type != ICE_NON_TUN &&
-           rinfo->tun_type != ICE_SW_TUN_AND_NON_TUN) {
-               status = ice_fill_adv_packet_tun(hw, rinfo->tun_type,
-                                                s_rule->hdr_data,
-                                                profile->offsets);
-               if (status)
-                       goto err_ice_add_adv_rule;
-       }
+       status = ice_fill_adv_packet_tun(hw, rinfo->tun_type, s_rule->hdr_data,
+                                        profile->offsets);
+       if (status)
+               goto err_ice_add_adv_rule;
 
-       if (rinfo->vlan_type != 0 && ice_is_dvm_ena(hw)) {
-               status = ice_fill_adv_packet_vlan(rinfo->vlan_type,
-                                                 s_rule->hdr_data,
-                                                 profile->offsets);
-               if (status)
-                       goto err_ice_add_adv_rule;
-       }
+       status = ice_fill_adv_packet_vlan(hw, rinfo->vlan_type,
+                                         s_rule->hdr_data,
+                                         profile->offsets);
+       if (status)
+               goto err_ice_add_adv_rule;
 
        status = ice_aq_sw_rules(hw, (struct ice_aqc_sw_rules *)s_rule,
                                 rule_buf_sz, 1, ice_aqc_opc_add_sw_rules,
@@ -6469,13 +6446,6 @@ ice_rem_adv_rule(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups,
                        return -EIO;
        }
 
-       /* Create any special protocol/offset pairs, such as looking at tunnel
-        * bits by extracting metadata
-        */
-       status = ice_add_special_words(rinfo, &lkup_exts, ice_is_dvm_ena(hw));
-       if (status)
-               return status;
-
        rid = ice_find_recp(hw, &lkup_exts, rinfo->tun_type);
        /* If did not find a recipe that match the existing criteria */
        if (rid == ICE_MAX_NUM_RECIPES)
index 68d8e8a..c84b56f 100644 (file)
@@ -10,7 +10,6 @@
 #define ICE_DFLT_VSI_INVAL 0xff
 #define ICE_FLTR_RX BIT(0)
 #define ICE_FLTR_TX BIT(1)
-#define ICE_FLTR_TX_RX (ICE_FLTR_RX | ICE_FLTR_TX)
 #define ICE_VSI_INVAL_ID 0xffff
 #define ICE_INVAL_Q_HANDLE 0xFFFF
 
@@ -187,12 +186,13 @@ struct ice_adv_rule_flags_info {
 };
 
 struct ice_adv_rule_info {
+       /* Store metadata values in rule info */
        enum ice_sw_tunnel_type tun_type;
-       struct ice_sw_act_ctrl sw_act;
-       u32 priority;
-       u8 rx; /* true means LOOKUP_RX otherwise LOOKUP_TX */
-       u16 fltr_rule_id;
        u16 vlan_type;
+       u16 fltr_rule_id;
+       u32 priority;
+       u16 src_vsi;
+       struct ice_sw_act_ctrl sw_act;
        struct ice_adv_rule_flags_info flags_info;
 };
 
@@ -342,6 +342,9 @@ ice_free_res_cntr(struct ice_hw *hw, u8 type, u8 alloc_shared, u16 num_items,
                  u16 counter_id);
 
 /* Switch/bridge related commands */
+void ice_rule_add_tunnel_metadata(struct ice_adv_lkup_elem *lkup);
+void ice_rule_add_vlan_metadata(struct ice_adv_lkup_elem *lkup);
+void ice_rule_add_src_vsi_metadata(struct ice_adv_lkup_elem *lkup);
 int
 ice_add_adv_rule(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups,
                 u16 lkups_cnt, struct ice_adv_rule_info *rinfo,
index d1a31f2..b54052e 100644 (file)
@@ -54,6 +54,10 @@ ice_tc_count_lkups(u32 flags, struct ice_tc_flower_lyr_2_4_hdrs *headers,
        if (flags & (ICE_TC_FLWR_FIELD_VLAN | ICE_TC_FLWR_FIELD_VLAN_PRIO))
                lkups_cnt++;
 
+       /* is VLAN TPID specified */
+       if (flags & ICE_TC_FLWR_FIELD_VLAN_TPID)
+               lkups_cnt++;
+
        /* is CVLAN specified? */
        if (flags & (ICE_TC_FLWR_FIELD_CVLAN | ICE_TC_FLWR_FIELD_CVLAN_PRIO))
                lkups_cnt++;
@@ -80,6 +84,10 @@ ice_tc_count_lkups(u32 flags, struct ice_tc_flower_lyr_2_4_hdrs *headers,
                     ICE_TC_FLWR_FIELD_SRC_L4_PORT))
                lkups_cnt++;
 
+       /* matching for tunneled packets in metadata */
+       if (fltr->tunnel_type != TNL_LAST)
+               lkups_cnt++;
+
        return lkups_cnt;
 }
 
@@ -320,6 +328,10 @@ ice_tc_fill_tunnel_outer(u32 flags, struct ice_tc_flower_fltr *fltr,
                i++;
        }
 
+       /* always fill matching on tunneled packets in metadata */
+       ice_rule_add_tunnel_metadata(&list[i]);
+       i++;
+
        return i;
 }
 
@@ -390,10 +402,6 @@ ice_tc_fill_rules(struct ice_hw *hw, u32 flags,
 
        /* copy VLAN info */
        if (flags & (ICE_TC_FLWR_FIELD_VLAN | ICE_TC_FLWR_FIELD_VLAN_PRIO)) {
-               vlan_tpid = be16_to_cpu(headers->vlan_hdr.vlan_tpid);
-               rule_info->vlan_type =
-                               ice_check_supported_vlan_tpid(vlan_tpid);
-
                if (flags & ICE_TC_FLWR_FIELD_CVLAN)
                        list[i].type = ICE_VLAN_EX;
                else
@@ -418,6 +426,15 @@ ice_tc_fill_rules(struct ice_hw *hw, u32 flags,
                i++;
        }
 
+       if (flags & ICE_TC_FLWR_FIELD_VLAN_TPID) {
+               vlan_tpid = be16_to_cpu(headers->vlan_hdr.vlan_tpid);
+               rule_info->vlan_type =
+                               ice_check_supported_vlan_tpid(vlan_tpid);
+
+               ice_rule_add_vlan_metadata(&list[i]);
+               i++;
+       }
+
        if (flags & (ICE_TC_FLWR_FIELD_CVLAN | ICE_TC_FLWR_FIELD_CVLAN_PRIO)) {
                list[i].type = ICE_VLAN_IN;
 
@@ -698,12 +715,10 @@ ice_eswitch_add_tc_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr)
        if (fltr->direction == ICE_ESWITCH_FLTR_INGRESS) {
                rule_info.sw_act.flag |= ICE_FLTR_RX;
                rule_info.sw_act.src = hw->pf_id;
-               rule_info.rx = true;
                rule_info.flags_info.act = ICE_SINGLE_ACT_LB_ENABLE;
        } else {
                rule_info.sw_act.flag |= ICE_FLTR_TX;
                rule_info.sw_act.src = vsi->idx;
-               rule_info.rx = false;
                rule_info.flags_info.act = ICE_SINGLE_ACT_LAN_ENABLE;
        }
 
@@ -910,7 +925,6 @@ ice_add_tc_flower_adv_fltr(struct ice_vsi *vsi,
                rule_info.sw_act.vsi_handle = dest_vsi->idx;
                rule_info.priority = ICE_SWITCH_FLTR_PRIO_VSI;
                rule_info.sw_act.src = hw->pf_id;
-               rule_info.rx = true;
                dev_dbg(dev, "add switch rule for TC:%u vsi_idx:%u, lkups_cnt:%u\n",
                        tc_fltr->action.fwd.tc.tc_class,
                        rule_info.sw_act.vsi_handle, lkups_cnt);
@@ -921,7 +935,6 @@ ice_add_tc_flower_adv_fltr(struct ice_vsi *vsi,
                rule_info.sw_act.vsi_handle = dest_vsi->idx;
                rule_info.priority = ICE_SWITCH_FLTR_PRIO_QUEUE;
                rule_info.sw_act.src = hw->pf_id;
-               rule_info.rx = true;
                dev_dbg(dev, "add switch rule action to forward to queue:%u (HW queue %u), lkups_cnt:%u\n",
                        tc_fltr->action.fwd.q.queue,
                        tc_fltr->action.fwd.q.hw_queue, lkups_cnt);
@@ -929,7 +942,6 @@ ice_add_tc_flower_adv_fltr(struct ice_vsi *vsi,
        case ICE_DROP_PACKET:
                rule_info.sw_act.flag |= ICE_FLTR_RX;
                rule_info.sw_act.src = hw->pf_id;
-               rule_info.rx = true;
                rule_info.priority = ICE_SWITCH_FLTR_PRIO_VSI;
                break;
        default:
@@ -1460,8 +1472,10 @@ ice_parse_cls_flower(struct net_device *filter_dev, struct ice_vsi *vsi,
                                                 VLAN_PRIO_MASK);
                }
 
-               if (match.mask->vlan_tpid)
+               if (match.mask->vlan_tpid) {
                        headers->vlan_hdr.vlan_tpid = match.key->vlan_tpid;
+                       fltr->flags |= ICE_TC_FLWR_FIELD_VLAN_TPID;
+               }
        }
 
        if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN)) {
index 8d5e22a..8bbc1a6 100644 (file)
@@ -33,6 +33,7 @@
 #define ICE_TC_FLWR_FIELD_L2TPV3_SESSID                BIT(26)
 #define ICE_TC_FLWR_FIELD_VLAN_PRIO            BIT(27)
 #define ICE_TC_FLWR_FIELD_CVLAN_PRIO           BIT(28)
+#define ICE_TC_FLWR_FIELD_VLAN_TPID            BIT(29)
 
 #define ICE_TC_FLOWER_MASK_32   0xFFFFFFFF
 
index bf74a2f..b26ce44 100644 (file)
@@ -689,8 +689,6 @@ int ice_reset_vf(struct ice_vf *vf, u32 flags)
         */
        ice_vf_clear_all_promisc_modes(vf, vsi);
 
-       ice_eswitch_del_vf_mac_rule(vf);
-
        ice_vf_fdir_exit(vf);
        ice_vf_fdir_init(vf);
        /* clean VF control VSI when resetting VF since it should be setup
@@ -716,7 +714,6 @@ int ice_reset_vf(struct ice_vf *vf, u32 flags)
        }
 
        ice_eswitch_update_repr(vsi);
-       ice_eswitch_replay_vf_mac_rule(vf);
 
        /* if the VF has been reset allow it to come up again */
        ice_mbx_clear_malvf(&vf->mbx_info);
@@ -1329,3 +1326,35 @@ void ice_vf_set_initialized(struct ice_vf *vf)
        set_bit(ICE_VF_STATE_INIT, vf->vf_states);
        memset(&vf->vlan_v2_caps, 0, sizeof(vf->vlan_v2_caps));
 }
+
+/**
+ * ice_get_vf_ctrl_vsi - Get first VF control VSI pointer
+ * @pf: the PF private structure
+ * @vsi: pointer to the VSI
+ *
+ * Return first found VF control VSI other than the vsi
+ * passed by parameter. This function is used to determine
+ * whether new resources have to be allocated for control VSI
+ * or they can be shared with existing one.
+ *
+ * Return found VF control VSI pointer other itself. Return
+ * NULL Otherwise.
+ *
+ */
+struct ice_vsi *ice_get_vf_ctrl_vsi(struct ice_pf *pf, struct ice_vsi *vsi)
+{
+       struct ice_vsi *ctrl_vsi = NULL;
+       struct ice_vf *vf;
+       unsigned int bkt;
+
+       rcu_read_lock();
+       ice_for_each_vf_rcu(pf, bkt, vf) {
+               if (vf != vsi->vf && vf->ctrl_vsi_idx != ICE_NO_VSI) {
+                       ctrl_vsi = pf->vsi[vf->ctrl_vsi_idx];
+                       break;
+               }
+       }
+
+       rcu_read_unlock();
+       return ctrl_vsi;
+}
index a38ef00..67172fd 100644 (file)
@@ -227,6 +227,7 @@ int
 ice_vf_clear_vsi_promisc(struct ice_vf *vf, struct ice_vsi *vsi, u8 promisc_m);
 int ice_reset_vf(struct ice_vf *vf, u32 flags);
 void ice_reset_all_vfs(struct ice_pf *pf);
+struct ice_vsi *ice_get_vf_ctrl_vsi(struct ice_pf *pf, struct ice_vsi *vsi);
 #else /* CONFIG_PCI_IOV */
 static inline struct ice_vf *ice_get_vf_by_id(struct ice_pf *pf, u16 vf_id)
 {
@@ -291,6 +292,12 @@ static inline int ice_reset_vf(struct ice_vf *vf, u32 flags)
 static inline void ice_reset_all_vfs(struct ice_pf *pf)
 {
 }
+
+static inline struct ice_vsi *
+ice_get_vf_ctrl_vsi(struct ice_pf *pf, struct ice_vsi *vsi)
+{
+       return NULL;
+}
 #endif /* !CONFIG_PCI_IOV */
 
 #endif /* _ICE_VF_LIB_H_ */
index f4a524f..efbc296 100644 (file)
@@ -3730,7 +3730,6 @@ static int ice_vc_repr_add_mac(struct ice_vf *vf, u8 *msg)
 
        for (i = 0; i < al->num_elements; i++) {
                u8 *mac_addr = al->list[i].addr;
-               int result;
 
                if (!is_unicast_ether_addr(mac_addr) ||
                    ether_addr_equal(mac_addr, vf->hw_lan_addr))
@@ -3742,13 +3741,6 @@ static int ice_vc_repr_add_mac(struct ice_vf *vf, u8 *msg)
                        goto handle_mac_exit;
                }
 
-               result = ice_eswitch_add_vf_mac_rule(pf, vf, mac_addr);
-               if (result) {
-                       dev_err(ice_pf_to_dev(pf), "Failed to add MAC %pM for VF %d\n, error %d\n",
-                               mac_addr, vf->vf_id, result);
-                       goto handle_mac_exit;
-               }
-
                ice_vfhw_mac_add(vf, &al->list[i]);
                vf->num_mac++;
                break;
index bcda2e0..1279c1f 100644 (file)
@@ -219,7 +219,7 @@ static struct ice_update_recipe_lkup_idx_params ice_dvm_dflt_recipes[] = {
                .rid = ICE_SW_LKUP_VLAN,
                .fv_idx = ICE_PKT_FLAGS_0_TO_15_FV_IDX,
                .ignore_valid = false,
-               .mask = ICE_PKT_FLAGS_0_TO_15_VLAN_FLAGS_MASK,
+               .mask = ICE_PKT_VLAN_MASK,
                .mask_valid = true,
                .lkup_idx = ICE_SW_LKUP_VLAN_PKT_FLAGS_LKUP_IDX,
        },
index d1e489d..a7fe2b4 100644 (file)
@@ -90,7 +90,6 @@ ice_qvec_dis_irq(struct ice_vsi *vsi, struct ice_rx_ring *rx_ring,
 {
        struct ice_pf *pf = vsi->back;
        struct ice_hw *hw = &pf->hw;
-       int base = vsi->base_vector;
        u16 reg;
        u32 val;
 
@@ -103,11 +102,9 @@ ice_qvec_dis_irq(struct ice_vsi *vsi, struct ice_rx_ring *rx_ring,
        wr32(hw, QINT_RQCTL(reg), val);
 
        if (q_vector) {
-               u16 v_idx = q_vector->v_idx;
-
                wr32(hw, GLINT_DYN_CTL(q_vector->reg_idx), 0);
                ice_flush(hw);
-               synchronize_irq(pf->msix_entries[v_idx + base].vector);
+               synchronize_irq(q_vector->irq.virq);
        }
 }
 
index 58872a4..c5cdb88 100644 (file)
@@ -183,11 +183,13 @@ static int igb_resume(struct device *);
 static int igb_runtime_suspend(struct device *dev);
 static int igb_runtime_resume(struct device *dev);
 static int igb_runtime_idle(struct device *dev);
+#ifdef CONFIG_PM
 static const struct dev_pm_ops igb_pm_ops = {
        SET_SYSTEM_SLEEP_PM_OPS(igb_suspend, igb_resume)
        SET_RUNTIME_PM_OPS(igb_runtime_suspend, igb_runtime_resume,
                        igb_runtime_idle)
 };
+#endif
 static void igb_shutdown(struct pci_dev *);
 static int igb_pci_sriov_configure(struct pci_dev *dev, int num_vfs);
 #ifdef CONFIG_IGB_DCA
index 34aebf0..18d4af9 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/ptp_clock_kernel.h>
 #include <linux/timecounter.h>
 #include <linux/net_tstamp.h>
+#include <linux/bitfield.h>
 
 #include "igc_hw.h"
 
@@ -311,6 +312,33 @@ extern char igc_driver_name[];
 #define IGC_MRQC_RSS_FIELD_IPV4_UDP    0x00400000
 #define IGC_MRQC_RSS_FIELD_IPV6_UDP    0x00800000
 
+/* RX-desc Write-Back format RSS Type's */
+enum igc_rss_type_num {
+       IGC_RSS_TYPE_NO_HASH            = 0,
+       IGC_RSS_TYPE_HASH_TCP_IPV4      = 1,
+       IGC_RSS_TYPE_HASH_IPV4          = 2,
+       IGC_RSS_TYPE_HASH_TCP_IPV6      = 3,
+       IGC_RSS_TYPE_HASH_IPV6_EX       = 4,
+       IGC_RSS_TYPE_HASH_IPV6          = 5,
+       IGC_RSS_TYPE_HASH_TCP_IPV6_EX   = 6,
+       IGC_RSS_TYPE_HASH_UDP_IPV4      = 7,
+       IGC_RSS_TYPE_HASH_UDP_IPV6      = 8,
+       IGC_RSS_TYPE_HASH_UDP_IPV6_EX   = 9,
+       IGC_RSS_TYPE_MAX                = 10,
+};
+#define IGC_RSS_TYPE_MAX_TABLE         16
+#define IGC_RSS_TYPE_MASK              GENMASK(3,0) /* 4-bits (3:0) = mask 0x0F */
+
+/* igc_rss_type - Rx descriptor RSS type field */
+static inline u32 igc_rss_type(const union igc_adv_rx_desc *rx_desc)
+{
+       /* RSS Type 4-bits (3:0) number: 0-9 (above 9 is reserved)
+        * Accessing the same bits via u16 (wb.lower.lo_dword.hs_rss.pkt_info)
+        * is slightly slower than via u32 (wb.lower.lo_dword.data)
+        */
+       return le32_get_bits(rx_desc->wb.lower.lo_dword.data, IGC_RSS_TYPE_MASK);
+}
+
 /* Interrupt defines */
 #define IGC_START_ITR                  648 /* ~6000 ints/sec */
 #define IGC_4K_ITR                     980
@@ -471,6 +499,13 @@ struct igc_rx_buffer {
        };
 };
 
+/* context wrapper around xdp_buff to provide access to descriptor metadata */
+struct igc_xdp_buff {
+       struct xdp_buff xdp;
+       union igc_adv_rx_desc *rx_desc;
+       ktime_t rx_ts; /* data indication bit IGC_RXDADV_STAT_TSIP */
+};
+
 struct igc_q_vector {
        struct igc_adapter *adapter;    /* backlink */
        void __iomem *itr_register;
index 1c46768..88145c3 100644 (file)
@@ -1690,14 +1690,36 @@ static void igc_rx_checksum(struct igc_ring *ring,
                   le32_to_cpu(rx_desc->wb.upper.status_error));
 }
 
+/* Mapping HW RSS Type to enum pkt_hash_types */
+static const enum pkt_hash_types igc_rss_type_table[IGC_RSS_TYPE_MAX_TABLE] = {
+       [IGC_RSS_TYPE_NO_HASH]          = PKT_HASH_TYPE_L2,
+       [IGC_RSS_TYPE_HASH_TCP_IPV4]    = PKT_HASH_TYPE_L4,
+       [IGC_RSS_TYPE_HASH_IPV4]        = PKT_HASH_TYPE_L3,
+       [IGC_RSS_TYPE_HASH_TCP_IPV6]    = PKT_HASH_TYPE_L4,
+       [IGC_RSS_TYPE_HASH_IPV6_EX]     = PKT_HASH_TYPE_L3,
+       [IGC_RSS_TYPE_HASH_IPV6]        = PKT_HASH_TYPE_L3,
+       [IGC_RSS_TYPE_HASH_TCP_IPV6_EX] = PKT_HASH_TYPE_L4,
+       [IGC_RSS_TYPE_HASH_UDP_IPV4]    = PKT_HASH_TYPE_L4,
+       [IGC_RSS_TYPE_HASH_UDP_IPV6]    = PKT_HASH_TYPE_L4,
+       [IGC_RSS_TYPE_HASH_UDP_IPV6_EX] = PKT_HASH_TYPE_L4,
+       [10] = PKT_HASH_TYPE_NONE, /* RSS Type above 9 "Reserved" by HW  */
+       [11] = PKT_HASH_TYPE_NONE, /* keep array sized for SW bit-mask   */
+       [12] = PKT_HASH_TYPE_NONE, /* to handle future HW revisons       */
+       [13] = PKT_HASH_TYPE_NONE,
+       [14] = PKT_HASH_TYPE_NONE,
+       [15] = PKT_HASH_TYPE_NONE,
+};
+
 static inline void igc_rx_hash(struct igc_ring *ring,
                               union igc_adv_rx_desc *rx_desc,
                               struct sk_buff *skb)
 {
-       if (ring->netdev->features & NETIF_F_RXHASH)
-               skb_set_hash(skb,
-                            le32_to_cpu(rx_desc->wb.lower.hi_dword.rss),
-                            PKT_HASH_TYPE_L3);
+       if (ring->netdev->features & NETIF_F_RXHASH) {
+               u32 rss_hash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss);
+               u32 rss_type = igc_rss_type(rx_desc);
+
+               skb_set_hash(skb, rss_hash, igc_rss_type_table[rss_type]);
+       }
 }
 
 static void igc_rx_vlan(struct igc_ring *rx_ring,
@@ -2214,6 +2236,8 @@ static bool igc_alloc_rx_buffers_zc(struct igc_ring *ring, u16 count)
        if (!count)
                return ok;
 
+       XSK_CHECK_PRIV_TYPE(struct igc_xdp_buff);
+
        desc = IGC_RX_DESC(ring, i);
        bi = &ring->rx_buffer_info[i];
        i -= ring->count;
@@ -2387,6 +2411,8 @@ static int igc_xdp_xmit_back(struct igc_adapter *adapter, struct xdp_buff *xdp)
        nq = txring_txq(ring);
 
        __netif_tx_lock(nq, cpu);
+       /* Avoid transmit queue timeout since we share it with the slow path */
+       txq_trans_cond_update(nq);
        res = igc_xdp_init_tx_descriptor(ring, xdpf);
        __netif_tx_unlock(nq);
        return res;
@@ -2498,8 +2524,8 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
                union igc_adv_rx_desc *rx_desc;
                struct igc_rx_buffer *rx_buffer;
                unsigned int size, truesize;
+               struct igc_xdp_buff ctx;
                ktime_t timestamp = 0;
-               struct xdp_buff xdp;
                int pkt_offset = 0;
                void *pktbuf;
 
@@ -2528,18 +2554,20 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
                if (igc_test_staterr(rx_desc, IGC_RXDADV_STAT_TSIP)) {
                        timestamp = igc_ptp_rx_pktstamp(q_vector->adapter,
                                                        pktbuf);
+                       ctx.rx_ts = timestamp;
                        pkt_offset = IGC_TS_HDR_LEN;
                        size -= IGC_TS_HDR_LEN;
                }
 
                if (!skb) {
-                       xdp_init_buff(&xdp, truesize, &rx_ring->xdp_rxq);
-                       xdp_prepare_buff(&xdp, pktbuf - igc_rx_offset(rx_ring),
+                       xdp_init_buff(&ctx.xdp, truesize, &rx_ring->xdp_rxq);
+                       xdp_prepare_buff(&ctx.xdp, pktbuf - igc_rx_offset(rx_ring),
                                         igc_rx_offset(rx_ring) + pkt_offset,
                                         size, true);
-                       xdp_buff_clear_frags_flag(&xdp);
+                       xdp_buff_clear_frags_flag(&ctx.xdp);
+                       ctx.rx_desc = rx_desc;
 
-                       skb = igc_xdp_run_prog(adapter, &xdp);
+                       skb = igc_xdp_run_prog(adapter, &ctx.xdp);
                }
 
                if (IS_ERR(skb)) {
@@ -2561,9 +2589,9 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
                } else if (skb)
                        igc_add_rx_frag(rx_ring, rx_buffer, skb, size);
                else if (ring_uses_build_skb(rx_ring))
-                       skb = igc_build_skb(rx_ring, rx_buffer, &xdp);
+                       skb = igc_build_skb(rx_ring, rx_buffer, &ctx.xdp);
                else
-                       skb = igc_construct_skb(rx_ring, rx_buffer, &xdp,
+                       skb = igc_construct_skb(rx_ring, rx_buffer, &ctx.xdp,
                                                timestamp);
 
                /* exit if we failed to retrieve a buffer */
@@ -2664,6 +2692,15 @@ static void igc_dispatch_skb_zc(struct igc_q_vector *q_vector,
        napi_gro_receive(&q_vector->napi, skb);
 }
 
+static struct igc_xdp_buff *xsk_buff_to_igc_ctx(struct xdp_buff *xdp)
+{
+       /* xdp_buff pointer used by ZC code path is alloc as xdp_buff_xsk. The
+        * igc_xdp_buff shares its layout with xdp_buff_xsk and private
+        * igc_xdp_buff fields fall into xdp_buff_xsk->cb
+        */
+       return (struct igc_xdp_buff *)xdp;
+}
+
 static int igc_clean_rx_irq_zc(struct igc_q_vector *q_vector, const int budget)
 {
        struct igc_adapter *adapter = q_vector->adapter;
@@ -2682,6 +2719,7 @@ static int igc_clean_rx_irq_zc(struct igc_q_vector *q_vector, const int budget)
        while (likely(total_packets < budget)) {
                union igc_adv_rx_desc *desc;
                struct igc_rx_buffer *bi;
+               struct igc_xdp_buff *ctx;
                ktime_t timestamp = 0;
                unsigned int size;
                int res;
@@ -2699,9 +2737,13 @@ static int igc_clean_rx_irq_zc(struct igc_q_vector *q_vector, const int budget)
 
                bi = &ring->rx_buffer_info[ntc];
 
+               ctx = xsk_buff_to_igc_ctx(bi->xdp);
+               ctx->rx_desc = desc;
+
                if (igc_test_staterr(desc, IGC_RXDADV_STAT_TSIP)) {
                        timestamp = igc_ptp_rx_pktstamp(q_vector->adapter,
                                                        bi->xdp->data);
+                       ctx->rx_ts = timestamp;
 
                        bi->xdp->data += IGC_TS_HDR_LEN;
 
@@ -2789,6 +2831,9 @@ static void igc_xdp_xmit_zc(struct igc_ring *ring)
 
        __netif_tx_lock(nq, cpu);
 
+       /* Avoid transmit queue timeout since we share it with the slow path */
+       txq_trans_cond_update(nq);
+
        budget = igc_desc_unused(ring);
 
        while (xsk_tx_peek_desc(pool, &xdp_desc) && budget--) {
@@ -6068,9 +6113,18 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter,
        size_t n;
        int i;
 
-       adapter->qbv_enable = qopt->enable;
+       switch (qopt->cmd) {
+       case TAPRIO_CMD_REPLACE:
+               adapter->qbv_enable = true;
+               break;
+       case TAPRIO_CMD_DESTROY:
+               adapter->qbv_enable = false;
+               break;
+       default:
+               return -EOPNOTSUPP;
+       }
 
-       if (!qopt->enable)
+       if (!adapter->qbv_enable)
                return igc_tsn_clear_schedule(adapter);
 
        if (qopt->base_time < 0)
@@ -6314,6 +6368,9 @@ static int igc_xdp_xmit(struct net_device *dev, int num_frames,
 
        __netif_tx_lock(nq, cpu);
 
+       /* Avoid transmit queue timeout since we share it with the slow path */
+       txq_trans_cond_update(nq);
+
        drops = 0;
        for (i = 0; i < num_frames; i++) {
                int err;
@@ -6454,6 +6511,58 @@ u32 igc_rd32(struct igc_hw *hw, u32 reg)
        return value;
 }
 
+/* Mapping HW RSS Type to enum xdp_rss_hash_type */
+static enum xdp_rss_hash_type igc_xdp_rss_type[IGC_RSS_TYPE_MAX_TABLE] = {
+       [IGC_RSS_TYPE_NO_HASH]          = XDP_RSS_TYPE_L2,
+       [IGC_RSS_TYPE_HASH_TCP_IPV4]    = XDP_RSS_TYPE_L4_IPV4_TCP,
+       [IGC_RSS_TYPE_HASH_IPV4]        = XDP_RSS_TYPE_L3_IPV4,
+       [IGC_RSS_TYPE_HASH_TCP_IPV6]    = XDP_RSS_TYPE_L4_IPV6_TCP,
+       [IGC_RSS_TYPE_HASH_IPV6_EX]     = XDP_RSS_TYPE_L3_IPV6_EX,
+       [IGC_RSS_TYPE_HASH_IPV6]        = XDP_RSS_TYPE_L3_IPV6,
+       [IGC_RSS_TYPE_HASH_TCP_IPV6_EX] = XDP_RSS_TYPE_L4_IPV6_TCP_EX,
+       [IGC_RSS_TYPE_HASH_UDP_IPV4]    = XDP_RSS_TYPE_L4_IPV4_UDP,
+       [IGC_RSS_TYPE_HASH_UDP_IPV6]    = XDP_RSS_TYPE_L4_IPV6_UDP,
+       [IGC_RSS_TYPE_HASH_UDP_IPV6_EX] = XDP_RSS_TYPE_L4_IPV6_UDP_EX,
+       [10] = XDP_RSS_TYPE_NONE, /* RSS Type above 9 "Reserved" by HW  */
+       [11] = XDP_RSS_TYPE_NONE, /* keep array sized for SW bit-mask   */
+       [12] = XDP_RSS_TYPE_NONE, /* to handle future HW revisons       */
+       [13] = XDP_RSS_TYPE_NONE,
+       [14] = XDP_RSS_TYPE_NONE,
+       [15] = XDP_RSS_TYPE_NONE,
+};
+
+static int igc_xdp_rx_hash(const struct xdp_md *_ctx, u32 *hash,
+                          enum xdp_rss_hash_type *rss_type)
+{
+       const struct igc_xdp_buff *ctx = (void *)_ctx;
+
+       if (!(ctx->xdp.rxq->dev->features & NETIF_F_RXHASH))
+               return -ENODATA;
+
+       *hash = le32_to_cpu(ctx->rx_desc->wb.lower.hi_dword.rss);
+       *rss_type = igc_xdp_rss_type[igc_rss_type(ctx->rx_desc)];
+
+       return 0;
+}
+
+static int igc_xdp_rx_timestamp(const struct xdp_md *_ctx, u64 *timestamp)
+{
+       const struct igc_xdp_buff *ctx = (void *)_ctx;
+
+       if (igc_test_staterr(ctx->rx_desc, IGC_RXDADV_STAT_TSIP)) {
+               *timestamp = ctx->rx_ts;
+
+               return 0;
+       }
+
+       return -ENODATA;
+}
+
+static const struct xdp_metadata_ops igc_xdp_metadata_ops = {
+       .xmo_rx_hash                    = igc_xdp_rx_hash,
+       .xmo_rx_timestamp               = igc_xdp_rx_timestamp,
+};
+
 /**
  * igc_probe - Device Initialization Routine
  * @pdev: PCI device information struct
@@ -6527,6 +6636,7 @@ static int igc_probe(struct pci_dev *pdev,
        hw->hw_addr = adapter->io_addr;
 
        netdev->netdev_ops = &igc_netdev_ops;
+       netdev->xdp_metadata_ops = &igc_xdp_metadata_ops;
        igc_ethtool_set_ops(netdev);
        netdev->watchdog_timeo = 5 * HZ;
 
@@ -6554,6 +6664,7 @@ static int igc_probe(struct pci_dev *pdev,
        netdev->features |= NETIF_F_TSO;
        netdev->features |= NETIF_F_TSO6;
        netdev->features |= NETIF_F_TSO_ECN;
+       netdev->features |= NETIF_F_RXHASH;
        netdev->features |= NETIF_F_RXCSUM;
        netdev->features |= NETIF_F_HW_CSUM;
        netdev->features |= NETIF_F_SCTP_CRC;
index 2cad76d..e2abc00 100644 (file)
 
 #define MVNETA_MAX_SKB_DESCS (MVNETA_MAX_TSO_SEGS * 2 + MAX_SKB_FRAGS)
 
+/* The size of a TSO header page */
+#define MVNETA_TSO_PAGE_SIZE (2 * PAGE_SIZE)
+
+/* Number of TSO headers per page. This should be a power of 2 */
+#define MVNETA_TSO_PER_PAGE (MVNETA_TSO_PAGE_SIZE / TSO_HEADER_SIZE)
+
+/* Maximum number of TSO header pages */
+#define MVNETA_MAX_TSO_PAGES (MVNETA_MAX_TXD / MVNETA_TSO_PER_PAGE)
+
 /* descriptor aligned size */
 #define MVNETA_DESC_ALIGNED_SIZE       32
 
                         MVNETA_SKB_HEADROOM))
 #define MVNETA_MAX_RX_BUF_SIZE (PAGE_SIZE - MVNETA_SKB_PAD)
 
-#define IS_TSO_HEADER(txq, addr) \
-       ((addr >= txq->tso_hdrs_phys) && \
-        (addr < txq->tso_hdrs_phys + txq->size * TSO_HEADER_SIZE))
-
 #define MVNETA_RX_GET_BM_POOL_ID(rxd) \
        (((rxd)->status & MVNETA_RXD_BM_POOL_MASK) >> MVNETA_RXD_BM_POOL_SHIFT)
 
@@ -638,6 +643,7 @@ struct mvneta_rx_desc {
 #endif
 
 enum mvneta_tx_buf_type {
+       MVNETA_TYPE_TSO,
        MVNETA_TYPE_SKB,
        MVNETA_TYPE_XDP_TX,
        MVNETA_TYPE_XDP_NDO,
@@ -690,10 +696,10 @@ struct mvneta_tx_queue {
        int next_desc_to_proc;
 
        /* DMA buffers for TSO headers */
-       char *tso_hdrs;
+       char *tso_hdrs[MVNETA_MAX_TSO_PAGES];
 
        /* DMA address of TSO headers */
-       dma_addr_t tso_hdrs_phys;
+       dma_addr_t tso_hdrs_phys[MVNETA_MAX_TSO_PAGES];
 
        /* Affinity mask for CPUs*/
        cpumask_t affinity_mask;
@@ -1878,12 +1884,13 @@ static void mvneta_txq_bufs_free(struct mvneta_port *pp,
 
                mvneta_txq_inc_get(txq);
 
-               if (!IS_TSO_HEADER(txq, tx_desc->buf_phys_addr) &&
-                   buf->type != MVNETA_TYPE_XDP_TX)
+               if (buf->type == MVNETA_TYPE_XDP_NDO ||
+                   buf->type == MVNETA_TYPE_SKB)
                        dma_unmap_single(pp->dev->dev.parent,
                                         tx_desc->buf_phys_addr,
                                         tx_desc->data_size, DMA_TO_DEVICE);
-               if (buf->type == MVNETA_TYPE_SKB && buf->skb) {
+               if ((buf->type == MVNETA_TYPE_TSO ||
+                    buf->type == MVNETA_TYPE_SKB) && buf->skb) {
                        bytes_compl += buf->skb->len;
                        pkts_compl++;
                        dev_kfree_skb_any(buf->skb);
@@ -2369,9 +2376,8 @@ mvneta_swbm_add_rx_fragment(struct mvneta_port *pp,
        if (data_len > 0 && sinfo->nr_frags < MAX_SKB_FRAGS) {
                skb_frag_t *frag = &sinfo->frags[sinfo->nr_frags++];
 
-               skb_frag_off_set(frag, pp->rx_offset_correction);
-               skb_frag_size_set(frag, data_len);
-               __skb_frag_set_page(frag, page);
+               skb_frag_fill_page_desc(frag, page,
+                                       pp->rx_offset_correction, data_len);
 
                if (!xdp_buff_has_frags(xdp)) {
                        sinfo->xdp_frags_size = *size;
@@ -2661,20 +2667,72 @@ err_drop_frame:
        return rx_done;
 }
 
-static inline void
-mvneta_tso_put_hdr(struct sk_buff *skb, struct mvneta_tx_queue *txq)
+static void mvneta_free_tso_hdrs(struct mvneta_port *pp,
+                                struct mvneta_tx_queue *txq)
+{
+       struct device *dev = pp->dev->dev.parent;
+       int i;
+
+       for (i = 0; i < MVNETA_MAX_TSO_PAGES; i++) {
+               if (txq->tso_hdrs[i]) {
+                       dma_free_coherent(dev, MVNETA_TSO_PAGE_SIZE,
+                                         txq->tso_hdrs[i],
+                                         txq->tso_hdrs_phys[i]);
+                       txq->tso_hdrs[i] = NULL;
+               }
+       }
+}
+
+static int mvneta_alloc_tso_hdrs(struct mvneta_port *pp,
+                                struct mvneta_tx_queue *txq)
+{
+       struct device *dev = pp->dev->dev.parent;
+       int i, num;
+
+       num = DIV_ROUND_UP(txq->size, MVNETA_TSO_PER_PAGE);
+       for (i = 0; i < num; i++) {
+               txq->tso_hdrs[i] = dma_alloc_coherent(dev, MVNETA_TSO_PAGE_SIZE,
+                                                     &txq->tso_hdrs_phys[i],
+                                                     GFP_KERNEL);
+               if (!txq->tso_hdrs[i]) {
+                       mvneta_free_tso_hdrs(pp, txq);
+                       return -ENOMEM;
+               }
+       }
+
+       return 0;
+}
+
+static char *mvneta_get_tso_hdr(struct mvneta_tx_queue *txq, dma_addr_t *dma)
+{
+       int index, offset;
+
+       index = txq->txq_put_index / MVNETA_TSO_PER_PAGE;
+       offset = (txq->txq_put_index % MVNETA_TSO_PER_PAGE) * TSO_HEADER_SIZE;
+
+       *dma = txq->tso_hdrs_phys[index] + offset;
+
+       return txq->tso_hdrs[index] + offset;
+}
+
+static void mvneta_tso_put_hdr(struct sk_buff *skb, struct mvneta_tx_queue *txq,
+                              struct tso_t *tso, int size, bool is_last)
 {
        struct mvneta_tx_buf *buf = &txq->buf[txq->txq_put_index];
        int hdr_len = skb_tcp_all_headers(skb);
        struct mvneta_tx_desc *tx_desc;
+       dma_addr_t hdr_phys;
+       char *hdr;
+
+       hdr = mvneta_get_tso_hdr(txq, &hdr_phys);
+       tso_build_hdr(skb, hdr, tso, size, is_last);
 
        tx_desc = mvneta_txq_next_desc_get(txq);
        tx_desc->data_size = hdr_len;
        tx_desc->command = mvneta_skb_tx_csum(skb);
        tx_desc->command |= MVNETA_TXD_F_DESC;
-       tx_desc->buf_phys_addr = txq->tso_hdrs_phys +
-                                txq->txq_put_index * TSO_HEADER_SIZE;
-       buf->type = MVNETA_TYPE_SKB;
+       tx_desc->buf_phys_addr = hdr_phys;
+       buf->type = MVNETA_TYPE_TSO;
        buf->skb = NULL;
 
        mvneta_txq_inc_put(txq);
@@ -2714,14 +2772,41 @@ mvneta_tso_put_data(struct net_device *dev, struct mvneta_tx_queue *txq,
        return 0;
 }
 
+static void mvneta_release_descs(struct mvneta_port *pp,
+                                struct mvneta_tx_queue *txq,
+                                int first, int num)
+{
+       int desc_idx, i;
+
+       desc_idx = first + num;
+       if (desc_idx >= txq->size)
+               desc_idx -= txq->size;
+
+       for (i = num; i >= 0; i--) {
+               struct mvneta_tx_desc *tx_desc = txq->descs + desc_idx;
+               struct mvneta_tx_buf *buf = &txq->buf[desc_idx];
+
+               if (buf->type == MVNETA_TYPE_SKB)
+                       dma_unmap_single(pp->dev->dev.parent,
+                                        tx_desc->buf_phys_addr,
+                                        tx_desc->data_size,
+                                        DMA_TO_DEVICE);
+
+               mvneta_txq_desc_put(txq);
+
+               if (desc_idx == 0)
+                       desc_idx = txq->size;
+               desc_idx -= 1;
+       }
+}
+
 static int mvneta_tx_tso(struct sk_buff *skb, struct net_device *dev,
                         struct mvneta_tx_queue *txq)
 {
        int hdr_len, total_len, data_left;
-       int desc_count = 0;
+       int first_desc, desc_count = 0;
        struct mvneta_port *pp = netdev_priv(dev);
        struct tso_t tso;
-       int i;
 
        /* Count needed descriptors */
        if ((txq->count + tso_count_descs(skb)) >= txq->size)
@@ -2732,22 +2817,19 @@ static int mvneta_tx_tso(struct sk_buff *skb, struct net_device *dev,
                return 0;
        }
 
+       first_desc = txq->txq_put_index;
+
        /* Initialize the TSO handler, and prepare the first payload */
        hdr_len = tso_start(skb, &tso);
 
        total_len = skb->len - hdr_len;
        while (total_len > 0) {
-               char *hdr;
-
                data_left = min_t(int, skb_shinfo(skb)->gso_size, total_len);
                total_len -= data_left;
                desc_count++;
 
                /* prepare packet headers: MAC + IP + TCP */
-               hdr = txq->tso_hdrs + txq->txq_put_index * TSO_HEADER_SIZE;
-               tso_build_hdr(skb, hdr, &tso, data_left, total_len == 0);
-
-               mvneta_tso_put_hdr(skb, txq);
+               mvneta_tso_put_hdr(skb, txq, &tso, data_left, total_len == 0);
 
                while (data_left > 0) {
                        int size;
@@ -2772,15 +2854,7 @@ err_release:
        /* Release all used data descriptors; header descriptors must not
         * be DMA-unmapped.
         */
-       for (i = desc_count - 1; i >= 0; i--) {
-               struct mvneta_tx_desc *tx_desc = txq->descs + i;
-               if (!IS_TSO_HEADER(txq, tx_desc->buf_phys_addr))
-                       dma_unmap_single(pp->dev->dev.parent,
-                                        tx_desc->buf_phys_addr,
-                                        tx_desc->data_size,
-                                        DMA_TO_DEVICE);
-               mvneta_txq_desc_put(txq);
-       }
+       mvneta_release_descs(pp, txq, first_desc, desc_count - 1);
        return 0;
 }
 
@@ -2790,6 +2864,7 @@ static int mvneta_tx_frag_process(struct mvneta_port *pp, struct sk_buff *skb,
 {
        struct mvneta_tx_desc *tx_desc;
        int i, nr_frags = skb_shinfo(skb)->nr_frags;
+       int first_desc = txq->txq_put_index;
 
        for (i = 0; i < nr_frags; i++) {
                struct mvneta_tx_buf *buf = &txq->buf[txq->txq_put_index];
@@ -2828,15 +2903,7 @@ error:
        /* Release all descriptors that were used to map fragments of
         * this packet, as well as the corresponding DMA mappings
         */
-       for (i = i - 1; i >= 0; i--) {
-               tx_desc = txq->descs + i;
-               dma_unmap_single(pp->dev->dev.parent,
-                                tx_desc->buf_phys_addr,
-                                tx_desc->data_size,
-                                DMA_TO_DEVICE);
-               mvneta_txq_desc_put(txq);
-       }
-
+       mvneta_release_descs(pp, txq, first_desc, i - 1);
        return -ENOMEM;
 }
 
@@ -3457,7 +3524,7 @@ static void mvneta_rxq_deinit(struct mvneta_port *pp,
 static int mvneta_txq_sw_init(struct mvneta_port *pp,
                              struct mvneta_tx_queue *txq)
 {
-       int cpu;
+       int cpu, err;
 
        txq->size = pp->tx_ring_size;
 
@@ -3482,11 +3549,9 @@ static int mvneta_txq_sw_init(struct mvneta_port *pp,
                return -ENOMEM;
 
        /* Allocate DMA buffers for TSO MAC/IP/TCP headers */
-       txq->tso_hdrs = dma_alloc_coherent(pp->dev->dev.parent,
-                                          txq->size * TSO_HEADER_SIZE,
-                                          &txq->tso_hdrs_phys, GFP_KERNEL);
-       if (!txq->tso_hdrs)
-               return -ENOMEM;
+       err = mvneta_alloc_tso_hdrs(pp, txq);
+       if (err)
+               return err;
 
        /* Setup XPS mapping */
        if (pp->neta_armada3700)
@@ -3538,10 +3603,7 @@ static void mvneta_txq_sw_deinit(struct mvneta_port *pp,
 
        kfree(txq->buf);
 
-       if (txq->tso_hdrs)
-               dma_free_coherent(pp->dev->dev.parent,
-                                 txq->size * TSO_HEADER_SIZE,
-                                 txq->tso_hdrs, txq->tso_hdrs_phys);
+       mvneta_free_tso_hdrs(pp, txq);
        if (txq->descs)
                dma_free_coherent(pp->dev->dev.parent,
                                  txq->size * MVNETA_DESC_ALIGNED_SIZE,
@@ -3550,7 +3612,6 @@ static void mvneta_txq_sw_deinit(struct mvneta_port *pp,
        netdev_tx_reset_queue(nq);
 
        txq->buf               = NULL;
-       txq->tso_hdrs          = NULL;
        txq->descs             = NULL;
        txq->last_desc         = 0;
        txq->next_desc_to_proc = 0;
@@ -5821,6 +5882,8 @@ static int __init mvneta_driver_init(void)
 {
        int ret;
 
+       BUILD_BUG_ON_NOT_POWER_OF_2(MVNETA_TSO_PER_PAGE);
+
        ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "net/mvneta:online",
                                      mvneta_cpu_online,
                                      mvneta_cpu_down_prepare);
index 993ac18..a32d85d 100644 (file)
@@ -32,6 +32,7 @@ config OCTEONTX2_PF
        tristate "Marvell OcteonTX2 NIC Physical Function driver"
        select OCTEONTX2_MBOX
        select NET_DEVLINK
+       select PAGE_POOL
        depends on (64BIT && COMPILE_TEST) || ARM64
        select DIMLIB
        depends on PCI
index 8931864..f5bf719 100644 (file)
@@ -142,7 +142,7 @@ enum nix_scheduler {
 
 #define TXSCH_RR_QTM_MAX               ((1 << 24) - 1)
 #define TXSCH_TL1_DFLT_RR_QTM          TXSCH_RR_QTM_MAX
-#define TXSCH_TL1_DFLT_RR_PRIO         (0x1ull)
+#define TXSCH_TL1_DFLT_RR_PRIO         (0x7ull)
 #define CN10K_MAX_DWRR_WEIGHT          16384 /* Weight is 14bit on CN10K */
 
 /* Min/Max packet sizes, excluding FCS */
index 9533b1d..3b26893 100644 (file)
@@ -1222,6 +1222,11 @@ static int rvu_dbg_npa_ctx_display(struct seq_file *m, void *unused, int ctype)
 
        for (aura = id; aura < max_id; aura++) {
                aq_req.aura_id = aura;
+
+               /* Skip if queue is uninitialized */
+               if (ctype == NPA_AQ_CTYPE_POOL && !test_bit(aura, pfvf->pool_bmap))
+                       continue;
+
                seq_printf(m, "======%s : %d=======\n",
                           (ctype == NPA_AQ_CTYPE_AURA) ? "AURA" : "POOL",
                        aq_req.aura_id);
index 4ad707e..79ed7af 100644 (file)
@@ -1691,6 +1691,42 @@ exit:
        return true;
 }
 
+static void nix_reset_tx_schedule(struct rvu *rvu, int blkaddr,
+                                 int lvl, int schq)
+{
+       u64 tlx_parent = 0, tlx_schedule = 0;
+
+       switch (lvl) {
+       case NIX_TXSCH_LVL_TL2:
+               tlx_parent   = NIX_AF_TL2X_PARENT(schq);
+               tlx_schedule = NIX_AF_TL2X_SCHEDULE(schq);
+               break;
+       case NIX_TXSCH_LVL_TL3:
+               tlx_parent   = NIX_AF_TL3X_PARENT(schq);
+               tlx_schedule = NIX_AF_TL3X_SCHEDULE(schq);
+               break;
+       case NIX_TXSCH_LVL_TL4:
+               tlx_parent   = NIX_AF_TL4X_PARENT(schq);
+               tlx_schedule = NIX_AF_TL4X_SCHEDULE(schq);
+               break;
+       case NIX_TXSCH_LVL_MDQ:
+               /* no need to reset SMQ_CFG as HW clears this CSR
+                * on SMQ flush
+                */
+               tlx_parent   = NIX_AF_MDQX_PARENT(schq);
+               tlx_schedule = NIX_AF_MDQX_SCHEDULE(schq);
+               break;
+       default:
+               return;
+       }
+
+       if (tlx_parent)
+               rvu_write64(rvu, blkaddr, tlx_parent, 0x0);
+
+       if (tlx_schedule)
+               rvu_write64(rvu, blkaddr, tlx_schedule, 0x0);
+}
+
 /* Disable shaping of pkts by a scheduler queue
  * at a given scheduler level.
  */
@@ -2039,6 +2075,7 @@ int rvu_mbox_handler_nix_txsch_alloc(struct rvu *rvu,
                                pfvf_map[schq] = TXSCH_MAP(pcifunc, 0);
                        nix_reset_tx_linkcfg(rvu, blkaddr, lvl, schq);
                        nix_reset_tx_shaping(rvu, blkaddr, nixlf, lvl, schq);
+                       nix_reset_tx_schedule(rvu, blkaddr, lvl, schq);
                }
 
                for (idx = 0; idx < req->schq[lvl]; idx++) {
@@ -2048,6 +2085,7 @@ int rvu_mbox_handler_nix_txsch_alloc(struct rvu *rvu,
                                pfvf_map[schq] = TXSCH_MAP(pcifunc, 0);
                        nix_reset_tx_linkcfg(rvu, blkaddr, lvl, schq);
                        nix_reset_tx_shaping(rvu, blkaddr, nixlf, lvl, schq);
+                       nix_reset_tx_schedule(rvu, blkaddr, lvl, schq);
                }
        }
 
@@ -2143,6 +2181,7 @@ static int nix_txschq_free(struct rvu *rvu, u16 pcifunc)
                                continue;
                        nix_reset_tx_linkcfg(rvu, blkaddr, lvl, schq);
                        nix_clear_tx_xoff(rvu, blkaddr, lvl, schq);
+                       nix_reset_tx_shaping(rvu, blkaddr, nixlf, lvl, schq);
                }
        }
        nix_clear_tx_xoff(rvu, blkaddr, NIX_TXSCH_LVL_TL1,
@@ -2181,6 +2220,7 @@ static int nix_txschq_free(struct rvu *rvu, u16 pcifunc)
                for (schq = 0; schq < txsch->schq.max; schq++) {
                        if (TXSCH_MAP_FUNC(txsch->pfvf_map[schq]) != pcifunc)
                                continue;
+                       nix_reset_tx_schedule(rvu, blkaddr, lvl, schq);
                        rvu_free_rsrc(&txsch->schq, schq);
                        txsch->pfvf_map[schq] = TXSCH_MAP(0, NIX_TXSCHQ_FREE);
                }
@@ -2240,6 +2280,9 @@ static int nix_txschq_free_one(struct rvu *rvu,
         */
        nix_clear_tx_xoff(rvu, blkaddr, lvl, schq);
 
+       nix_reset_tx_linkcfg(rvu, blkaddr, lvl, schq);
+       nix_reset_tx_shaping(rvu, blkaddr, nixlf, lvl, schq);
+
        /* Flush if it is a SMQ. Onus of disabling
         * TL2/3 queue links before SMQ flush is on user
         */
@@ -2249,6 +2292,8 @@ static int nix_txschq_free_one(struct rvu *rvu,
                goto err;
        }
 
+       nix_reset_tx_schedule(rvu, blkaddr, lvl, schq);
+
        /* Free the resource */
        rvu_free_rsrc(&txsch->schq, schq);
        txsch->pfvf_map[schq] = TXSCH_MAP(0, NIX_TXSCHQ_FREE);
index 73fdb87..5664f76 100644 (file)
@@ -8,7 +8,7 @@ obj-$(CONFIG_OCTEONTX2_VF) += rvu_nicvf.o otx2_ptp.o
 
 rvu_nicpf-y := otx2_pf.o otx2_common.o otx2_txrx.o otx2_ethtool.o \
                otx2_flows.o otx2_tc.o cn10k.o otx2_dmac_flt.o \
-               otx2_devlink.o
+               otx2_devlink.o qos_sq.o qos.o
 rvu_nicvf-y := otx2_vf.o otx2_devlink.o
 
 rvu_nicpf-$(CONFIG_DCB) += otx2_dcbnl.o
index a487a98..6e2fb24 100644 (file)
@@ -6,7 +6,6 @@
 
 #include <linux/rtnetlink.h>
 #include <linux/bitfield.h>
-#include <net/macsec.h>
 #include "otx2_common.h"
 
 #define MCS_TCAM0_MAC_DA_MASK          GENMASK_ULL(47, 0)
@@ -212,6 +211,7 @@ static int cn10k_mcs_write_rx_secy(struct otx2_nic *pfvf,
        struct mcs_secy_plcy_write_req *req;
        struct mbox *mbox = &pfvf->mbox;
        u64 policy;
+       u8 cipher;
        int ret;
 
        mutex_lock(&mbox->lock);
@@ -227,7 +227,21 @@ static int cn10k_mcs_write_rx_secy(struct otx2_nic *pfvf,
                policy |= MCS_RX_SECY_PLCY_RP;
 
        policy |= MCS_RX_SECY_PLCY_AUTH_ENA;
-       policy |= FIELD_PREP(MCS_RX_SECY_PLCY_CIP, MCS_GCM_AES_128);
+
+       switch (secy->key_len) {
+       case 16:
+               cipher = secy->xpn ? MCS_GCM_AES_XPN_128 : MCS_GCM_AES_128;
+               break;
+       case 32:
+               cipher = secy->xpn ? MCS_GCM_AES_XPN_256 : MCS_GCM_AES_256;
+               break;
+       default:
+               cipher = MCS_GCM_AES_128;
+               dev_warn(pfvf->dev, "Unsupported key length\n");
+               break;
+       }
+
+       policy |= FIELD_PREP(MCS_RX_SECY_PLCY_CIP, cipher);
        policy |= FIELD_PREP(MCS_RX_SECY_PLCY_VAL, secy->validate_frames);
 
        policy |= MCS_RX_SECY_PLCY_ENA;
@@ -323,9 +337,12 @@ static int cn10k_mcs_write_rx_sa_plcy(struct otx2_nic *pfvf,
 {
        unsigned char *src = rxsc->sa_key[assoc_num];
        struct mcs_sa_plcy_write_req *plcy_req;
+       u8 *salt_p = rxsc->salt[assoc_num];
        struct mcs_rx_sc_sa_map *map_req;
        struct mbox *mbox = &pfvf->mbox;
+       u64 ssci_salt_95_64 = 0;
        u8 reg, key_len;
+       u64 salt_63_0;
        int ret;
 
        mutex_lock(&mbox->lock);
@@ -349,6 +366,15 @@ static int cn10k_mcs_write_rx_sa_plcy(struct otx2_nic *pfvf,
                reg++;
        }
 
+       if (secy->xpn) {
+               memcpy((u8 *)&salt_63_0, salt_p, 8);
+               memcpy((u8 *)&ssci_salt_95_64, salt_p + 8, 4);
+               ssci_salt_95_64 |= (__force u64)rxsc->ssci[assoc_num] << 32;
+
+               plcy_req->plcy[0][6] = salt_63_0;
+               plcy_req->plcy[0][7] = ssci_salt_95_64;
+       }
+
        plcy_req->sa_index[0] = rxsc->hw_sa_id[assoc_num];
        plcy_req->sa_cnt = 1;
        plcy_req->dir = MCS_RX;
@@ -400,12 +426,16 @@ static int cn10k_mcs_write_tx_secy(struct otx2_nic *pfvf,
        struct mcs_secy_plcy_write_req *req;
        struct mbox *mbox = &pfvf->mbox;
        struct macsec_tx_sc *sw_tx_sc;
-       /* Insert SecTag after 12 bytes (DA+SA)*/
-       u8 tag_offset = 12;
        u8 sectag_tci = 0;
+       u8 tag_offset;
        u64 policy;
+       u8 cipher;
        int ret;
 
+       /* Insert SecTag after 12 bytes (DA+SA) or 16 bytes
+        * if VLAN tag needs to be sent in clear text.
+        */
+       tag_offset = txsc->vlan_dev ? 16 : 12;
        sw_tx_sc = &secy->tx_sc;
 
        mutex_lock(&mbox->lock);
@@ -434,7 +464,21 @@ static int cn10k_mcs_write_tx_secy(struct otx2_nic *pfvf,
        policy |= FIELD_PREP(MCS_TX_SECY_PLCY_ST_OFFSET, tag_offset);
        policy |= MCS_TX_SECY_PLCY_INS_MODE;
        policy |= MCS_TX_SECY_PLCY_AUTH_ENA;
-       policy |= FIELD_PREP(MCS_TX_SECY_PLCY_CIP, MCS_GCM_AES_128);
+
+       switch (secy->key_len) {
+       case 16:
+               cipher = secy->xpn ? MCS_GCM_AES_XPN_128 : MCS_GCM_AES_128;
+               break;
+       case 32:
+               cipher = secy->xpn ? MCS_GCM_AES_XPN_256 : MCS_GCM_AES_256;
+               break;
+       default:
+               cipher = MCS_GCM_AES_128;
+               dev_warn(pfvf->dev, "Unsupported key length\n");
+               break;
+       }
+
+       policy |= FIELD_PREP(MCS_TX_SECY_PLCY_CIP, cipher);
 
        if (secy->protect_frames)
                policy |= MCS_TX_SECY_PLCY_PROTECT;
@@ -544,8 +588,11 @@ static int cn10k_mcs_write_tx_sa_plcy(struct otx2_nic *pfvf,
 {
        unsigned char *src = txsc->sa_key[assoc_num];
        struct mcs_sa_plcy_write_req *plcy_req;
+       u8 *salt_p = txsc->salt[assoc_num];
        struct mbox *mbox = &pfvf->mbox;
+       u64 ssci_salt_95_64 = 0;
        u8 reg, key_len;
+       u64 salt_63_0;
        int ret;
 
        mutex_lock(&mbox->lock);
@@ -561,6 +608,15 @@ static int cn10k_mcs_write_tx_sa_plcy(struct otx2_nic *pfvf,
                reg++;
        }
 
+       if (secy->xpn) {
+               memcpy((u8 *)&salt_63_0, salt_p, 8);
+               memcpy((u8 *)&ssci_salt_95_64, salt_p + 8, 4);
+               ssci_salt_95_64 |= (__force u64)txsc->ssci[assoc_num] << 32;
+
+               plcy_req->plcy[0][6] = salt_63_0;
+               plcy_req->plcy[0][7] = ssci_salt_95_64;
+       }
+
        plcy_req->plcy[0][8] = assoc_num;
        plcy_req->sa_index[0] = txsc->hw_sa_id[assoc_num];
        plcy_req->sa_cnt = 1;
@@ -922,8 +978,7 @@ static int cn10k_mcs_secy_tx_cfg(struct otx2_nic *pfvf, struct macsec_secy *secy
 {
        if (sw_tx_sa) {
                cn10k_mcs_write_tx_sa_plcy(pfvf, secy, txsc, sa_num);
-               cn10k_write_tx_sa_pn(pfvf, txsc, sa_num,
-                                    sw_tx_sa->next_pn_halves.lower);
+               cn10k_write_tx_sa_pn(pfvf, txsc, sa_num, sw_tx_sa->next_pn);
                cn10k_mcs_link_tx_sa2sc(pfvf, secy, txsc, sa_num,
                                        sw_tx_sa->active);
        }
@@ -959,7 +1014,7 @@ static int cn10k_mcs_secy_rx_cfg(struct otx2_nic *pfvf,
                        cn10k_mcs_write_rx_sa_plcy(pfvf, secy, mcs_rx_sc,
                                                   sa_num, sw_rx_sa->active);
                        cn10k_mcs_write_rx_sa_pn(pfvf, mcs_rx_sc, sa_num,
-                                                sw_rx_sa->next_pn_halves.lower);
+                                                sw_rx_sa->next_pn);
                }
 
                cn10k_mcs_write_rx_flowid(pfvf, mcs_rx_sc, hw_secy_id);
@@ -1053,7 +1108,7 @@ static void cn10k_mcs_sync_stats(struct otx2_nic *pfvf, struct macsec_secy *secy
 
 static int cn10k_mdo_open(struct macsec_context *ctx)
 {
-       struct otx2_nic *pfvf = netdev_priv(ctx->netdev);
+       struct otx2_nic *pfvf = macsec_netdev_priv(ctx->netdev);
        struct cn10k_mcs_cfg *cfg = pfvf->macsec_cfg;
        struct macsec_secy *secy = ctx->secy;
        struct macsec_tx_sa *sw_tx_sa;
@@ -1077,7 +1132,7 @@ static int cn10k_mdo_open(struct macsec_context *ctx)
 
 static int cn10k_mdo_stop(struct macsec_context *ctx)
 {
-       struct otx2_nic *pfvf = netdev_priv(ctx->netdev);
+       struct otx2_nic *pfvf = macsec_netdev_priv(ctx->netdev);
        struct cn10k_mcs_cfg *cfg = pfvf->macsec_cfg;
        struct cn10k_mcs_txsc *txsc;
        int err;
@@ -1095,7 +1150,7 @@ static int cn10k_mdo_stop(struct macsec_context *ctx)
 
 static int cn10k_mdo_add_secy(struct macsec_context *ctx)
 {
-       struct otx2_nic *pfvf = netdev_priv(ctx->netdev);
+       struct otx2_nic *pfvf = macsec_netdev_priv(ctx->netdev);
        struct cn10k_mcs_cfg *cfg = pfvf->macsec_cfg;
        struct macsec_secy *secy = ctx->secy;
        struct cn10k_mcs_txsc *txsc;
@@ -1103,13 +1158,6 @@ static int cn10k_mdo_add_secy(struct macsec_context *ctx)
        if (secy->icv_len != MACSEC_DEFAULT_ICV_LEN)
                return -EOPNOTSUPP;
 
-       /* Stick to 16 bytes key len until XPN support is added */
-       if (secy->key_len != 16)
-               return -EOPNOTSUPP;
-
-       if (secy->xpn)
-               return -EOPNOTSUPP;
-
        txsc = cn10k_mcs_create_txsc(pfvf);
        if (IS_ERR(txsc))
                return -ENOSPC;
@@ -1118,6 +1166,7 @@ static int cn10k_mdo_add_secy(struct macsec_context *ctx)
        txsc->encoding_sa = secy->tx_sc.encoding_sa;
        txsc->last_validate_frames = secy->validate_frames;
        txsc->last_replay_protect = secy->replay_protect;
+       txsc->vlan_dev = is_vlan_dev(ctx->netdev);
 
        list_add(&txsc->entry, &cfg->txsc_list);
 
@@ -1129,7 +1178,7 @@ static int cn10k_mdo_add_secy(struct macsec_context *ctx)
 
 static int cn10k_mdo_upd_secy(struct macsec_context *ctx)
 {
-       struct otx2_nic *pfvf = netdev_priv(ctx->netdev);
+       struct otx2_nic *pfvf = macsec_netdev_priv(ctx->netdev);
        struct cn10k_mcs_cfg *cfg = pfvf->macsec_cfg;
        struct macsec_secy *secy = ctx->secy;
        struct macsec_tx_sa *sw_tx_sa;
@@ -1164,7 +1213,7 @@ static int cn10k_mdo_upd_secy(struct macsec_context *ctx)
 
 static int cn10k_mdo_del_secy(struct macsec_context *ctx)
 {
-       struct otx2_nic *pfvf = netdev_priv(ctx->netdev);
+       struct otx2_nic *pfvf = macsec_netdev_priv(ctx->netdev);
        struct cn10k_mcs_cfg *cfg = pfvf->macsec_cfg;
        struct cn10k_mcs_txsc *txsc;
 
@@ -1183,7 +1232,7 @@ static int cn10k_mdo_del_secy(struct macsec_context *ctx)
 
 static int cn10k_mdo_add_txsa(struct macsec_context *ctx)
 {
-       struct otx2_nic *pfvf = netdev_priv(ctx->netdev);
+       struct otx2_nic *pfvf = macsec_netdev_priv(ctx->netdev);
        struct macsec_tx_sa *sw_tx_sa = ctx->sa.tx_sa;
        struct cn10k_mcs_cfg *cfg = pfvf->macsec_cfg;
        struct macsec_secy *secy = ctx->secy;
@@ -1202,6 +1251,9 @@ static int cn10k_mdo_add_txsa(struct macsec_context *ctx)
                return -ENOSPC;
 
        memcpy(&txsc->sa_key[sa_num], ctx->sa.key, secy->key_len);
+       memcpy(&txsc->salt[sa_num], sw_tx_sa->key.salt.bytes, MACSEC_SALT_LEN);
+       txsc->ssci[sa_num] = sw_tx_sa->ssci;
+
        txsc->sa_bmap |= 1 << sa_num;
 
        if (netif_running(secy->netdev)) {
@@ -1210,7 +1262,7 @@ static int cn10k_mdo_add_txsa(struct macsec_context *ctx)
                        return err;
 
                err = cn10k_write_tx_sa_pn(pfvf, txsc, sa_num,
-                                          sw_tx_sa->next_pn_halves.lower);
+                                          sw_tx_sa->next_pn);
                if (err)
                        return err;
 
@@ -1225,7 +1277,7 @@ static int cn10k_mdo_add_txsa(struct macsec_context *ctx)
 
 static int cn10k_mdo_upd_txsa(struct macsec_context *ctx)
 {
-       struct otx2_nic *pfvf = netdev_priv(ctx->netdev);
+       struct otx2_nic *pfvf = macsec_netdev_priv(ctx->netdev);
        struct macsec_tx_sa *sw_tx_sa = ctx->sa.tx_sa;
        struct cn10k_mcs_cfg *cfg = pfvf->macsec_cfg;
        struct macsec_secy *secy = ctx->secy;
@@ -1243,7 +1295,7 @@ static int cn10k_mdo_upd_txsa(struct macsec_context *ctx)
        if (netif_running(secy->netdev)) {
                /* Keys cannot be changed after creation */
                err = cn10k_write_tx_sa_pn(pfvf, txsc, sa_num,
-                                          sw_tx_sa->next_pn_halves.lower);
+                                          sw_tx_sa->next_pn);
                if (err)
                        return err;
 
@@ -1258,7 +1310,7 @@ static int cn10k_mdo_upd_txsa(struct macsec_context *ctx)
 
 static int cn10k_mdo_del_txsa(struct macsec_context *ctx)
 {
-       struct otx2_nic *pfvf = netdev_priv(ctx->netdev);
+       struct otx2_nic *pfvf = macsec_netdev_priv(ctx->netdev);
        struct cn10k_mcs_cfg *cfg = pfvf->macsec_cfg;
        u8 sa_num = ctx->sa.assoc_num;
        struct cn10k_mcs_txsc *txsc;
@@ -1278,7 +1330,7 @@ static int cn10k_mdo_del_txsa(struct macsec_context *ctx)
 
 static int cn10k_mdo_add_rxsc(struct macsec_context *ctx)
 {
-       struct otx2_nic *pfvf = netdev_priv(ctx->netdev);
+       struct otx2_nic *pfvf = macsec_netdev_priv(ctx->netdev);
        struct cn10k_mcs_cfg *cfg = pfvf->macsec_cfg;
        struct macsec_secy *secy = ctx->secy;
        struct cn10k_mcs_rxsc *rxsc;
@@ -1312,7 +1364,7 @@ static int cn10k_mdo_add_rxsc(struct macsec_context *ctx)
 
 static int cn10k_mdo_upd_rxsc(struct macsec_context *ctx)
 {
-       struct otx2_nic *pfvf = netdev_priv(ctx->netdev);
+       struct otx2_nic *pfvf = macsec_netdev_priv(ctx->netdev);
        struct cn10k_mcs_cfg *cfg = pfvf->macsec_cfg;
        struct macsec_secy *secy = ctx->secy;
        bool enable = ctx->rx_sc->active;
@@ -1331,7 +1383,7 @@ static int cn10k_mdo_upd_rxsc(struct macsec_context *ctx)
 
 static int cn10k_mdo_del_rxsc(struct macsec_context *ctx)
 {
-       struct otx2_nic *pfvf = netdev_priv(ctx->netdev);
+       struct otx2_nic *pfvf = macsec_netdev_priv(ctx->netdev);
        struct cn10k_mcs_cfg *cfg = pfvf->macsec_cfg;
        struct cn10k_mcs_rxsc *rxsc;
 
@@ -1349,11 +1401,10 @@ static int cn10k_mdo_del_rxsc(struct macsec_context *ctx)
 
 static int cn10k_mdo_add_rxsa(struct macsec_context *ctx)
 {
+       struct otx2_nic *pfvf = macsec_netdev_priv(ctx->netdev);
        struct macsec_rx_sc *sw_rx_sc = ctx->sa.rx_sa->sc;
-       struct otx2_nic *pfvf = netdev_priv(ctx->netdev);
        struct cn10k_mcs_cfg *cfg = pfvf->macsec_cfg;
        struct macsec_rx_sa *rx_sa = ctx->sa.rx_sa;
-       u64 next_pn = rx_sa->next_pn_halves.lower;
        struct macsec_secy *secy = ctx->secy;
        bool sa_in_use = rx_sa->active;
        u8 sa_num = ctx->sa.assoc_num;
@@ -1371,6 +1422,9 @@ static int cn10k_mdo_add_rxsa(struct macsec_context *ctx)
                return -ENOSPC;
 
        memcpy(&rxsc->sa_key[sa_num], ctx->sa.key, ctx->secy->key_len);
+       memcpy(&rxsc->salt[sa_num], rx_sa->key.salt.bytes, MACSEC_SALT_LEN);
+       rxsc->ssci[sa_num] = rx_sa->ssci;
+
        rxsc->sa_bmap |= 1 << sa_num;
 
        if (netif_running(secy->netdev)) {
@@ -1379,7 +1433,8 @@ static int cn10k_mdo_add_rxsa(struct macsec_context *ctx)
                if (err)
                        return err;
 
-               err = cn10k_mcs_write_rx_sa_pn(pfvf, rxsc, sa_num, next_pn);
+               err = cn10k_mcs_write_rx_sa_pn(pfvf, rxsc, sa_num,
+                                              rx_sa->next_pn);
                if (err)
                        return err;
        }
@@ -1389,11 +1444,10 @@ static int cn10k_mdo_add_rxsa(struct macsec_context *ctx)
 
 static int cn10k_mdo_upd_rxsa(struct macsec_context *ctx)
 {
+       struct otx2_nic *pfvf = macsec_netdev_priv(ctx->netdev);
        struct macsec_rx_sc *sw_rx_sc = ctx->sa.rx_sa->sc;
-       struct otx2_nic *pfvf = netdev_priv(ctx->netdev);
        struct cn10k_mcs_cfg *cfg = pfvf->macsec_cfg;
        struct macsec_rx_sa *rx_sa = ctx->sa.rx_sa;
-       u64 next_pn = rx_sa->next_pn_halves.lower;
        struct macsec_secy *secy = ctx->secy;
        bool sa_in_use = rx_sa->active;
        u8 sa_num = ctx->sa.assoc_num;
@@ -1412,7 +1466,8 @@ static int cn10k_mdo_upd_rxsa(struct macsec_context *ctx)
                if (err)
                        return err;
 
-               err = cn10k_mcs_write_rx_sa_pn(pfvf, rxsc, sa_num, next_pn);
+               err = cn10k_mcs_write_rx_sa_pn(pfvf, rxsc, sa_num,
+                                              rx_sa->next_pn);
                if (err)
                        return err;
        }
@@ -1422,8 +1477,8 @@ static int cn10k_mdo_upd_rxsa(struct macsec_context *ctx)
 
 static int cn10k_mdo_del_rxsa(struct macsec_context *ctx)
 {
+       struct otx2_nic *pfvf = macsec_netdev_priv(ctx->netdev);
        struct macsec_rx_sc *sw_rx_sc = ctx->sa.rx_sa->sc;
-       struct otx2_nic *pfvf = netdev_priv(ctx->netdev);
        struct cn10k_mcs_cfg *cfg = pfvf->macsec_cfg;
        u8 sa_num = ctx->sa.assoc_num;
        struct cn10k_mcs_rxsc *rxsc;
@@ -1445,8 +1500,8 @@ static int cn10k_mdo_del_rxsa(struct macsec_context *ctx)
 
 static int cn10k_mdo_get_dev_stats(struct macsec_context *ctx)
 {
+       struct otx2_nic *pfvf = macsec_netdev_priv(ctx->netdev);
        struct mcs_secy_stats tx_rsp = { 0 }, rx_rsp = { 0 };
-       struct otx2_nic *pfvf = netdev_priv(ctx->netdev);
        struct cn10k_mcs_cfg *cfg = pfvf->macsec_cfg;
        struct macsec_secy *secy = ctx->secy;
        struct cn10k_mcs_txsc *txsc;
@@ -1481,7 +1536,7 @@ static int cn10k_mdo_get_dev_stats(struct macsec_context *ctx)
 
 static int cn10k_mdo_get_tx_sc_stats(struct macsec_context *ctx)
 {
-       struct otx2_nic *pfvf = netdev_priv(ctx->netdev);
+       struct otx2_nic *pfvf = macsec_netdev_priv(ctx->netdev);
        struct cn10k_mcs_cfg *cfg = pfvf->macsec_cfg;
        struct mcs_sc_stats rsp = { 0 };
        struct cn10k_mcs_txsc *txsc;
@@ -1502,7 +1557,7 @@ static int cn10k_mdo_get_tx_sc_stats(struct macsec_context *ctx)
 
 static int cn10k_mdo_get_tx_sa_stats(struct macsec_context *ctx)
 {
-       struct otx2_nic *pfvf = netdev_priv(ctx->netdev);
+       struct otx2_nic *pfvf = macsec_netdev_priv(ctx->netdev);
        struct cn10k_mcs_cfg *cfg = pfvf->macsec_cfg;
        struct mcs_sa_stats rsp = { 0 };
        u8 sa_num = ctx->sa.assoc_num;
@@ -1525,7 +1580,7 @@ static int cn10k_mdo_get_tx_sa_stats(struct macsec_context *ctx)
 
 static int cn10k_mdo_get_rx_sc_stats(struct macsec_context *ctx)
 {
-       struct otx2_nic *pfvf = netdev_priv(ctx->netdev);
+       struct otx2_nic *pfvf = macsec_netdev_priv(ctx->netdev);
        struct cn10k_mcs_cfg *cfg = pfvf->macsec_cfg;
        struct macsec_secy *secy = ctx->secy;
        struct mcs_sc_stats rsp = { 0 };
@@ -1567,8 +1622,8 @@ static int cn10k_mdo_get_rx_sc_stats(struct macsec_context *ctx)
 
 static int cn10k_mdo_get_rx_sa_stats(struct macsec_context *ctx)
 {
+       struct otx2_nic *pfvf = macsec_netdev_priv(ctx->netdev);
        struct macsec_rx_sc *sw_rx_sc = ctx->sa.rx_sa->sc;
-       struct otx2_nic *pfvf = netdev_priv(ctx->netdev);
        struct cn10k_mcs_cfg *cfg = pfvf->macsec_cfg;
        struct mcs_sa_stats rsp = { 0 };
        u8 sa_num = ctx->sa.assoc_num;
index 8a41ad8..a79cb68 100644 (file)
@@ -89,6 +89,11 @@ int otx2_update_sq_stats(struct otx2_nic *pfvf, int qidx)
        if (!pfvf->qset.sq)
                return 0;
 
+       if (qidx >= pfvf->hw.non_qos_queues) {
+               if (!test_bit(qidx - pfvf->hw.non_qos_queues, pfvf->qos.qos_sq_bmap))
+                       return 0;
+       }
+
        otx2_nix_sq_op_stats(&sq->stats, pfvf, qidx);
        return 1;
 }
@@ -513,11 +518,32 @@ void otx2_config_irq_coalescing(struct otx2_nic *pfvf, int qidx)
                     (pfvf->hw.cq_ecount_wait - 1));
 }
 
-int __otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool,
-                     dma_addr_t *dma)
+static int otx2_alloc_pool_buf(struct otx2_nic *pfvf, struct otx2_pool *pool,
+                              dma_addr_t *dma)
+{
+       unsigned int offset = 0;
+       struct page *page;
+       size_t sz;
+
+       sz = SKB_DATA_ALIGN(pool->rbsize);
+       sz = ALIGN(sz, OTX2_ALIGN);
+
+       page = page_pool_alloc_frag(pool->page_pool, &offset, sz, GFP_ATOMIC);
+       if (unlikely(!page))
+               return -ENOMEM;
+
+       *dma = page_pool_get_dma_addr(page) + offset;
+       return 0;
+}
+
+static int __otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool,
+                            dma_addr_t *dma)
 {
        u8 *buf;
 
+       if (pool->page_pool)
+               return otx2_alloc_pool_buf(pfvf, pool, dma);
+
        buf = napi_alloc_frag_align(pool->rbsize, OTX2_ALIGN);
        if (unlikely(!buf))
                return -ENOMEM;
@@ -532,8 +558,8 @@ int __otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool,
        return 0;
 }
 
-static int otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool,
-                          dma_addr_t *dma)
+int otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool,
+                   dma_addr_t *dma)
 {
        int ret;
 
@@ -716,7 +742,8 @@ EXPORT_SYMBOL(otx2_smq_flush);
 int otx2_txsch_alloc(struct otx2_nic *pfvf)
 {
        struct nix_txsch_alloc_req *req;
-       int lvl;
+       struct nix_txsch_alloc_rsp *rsp;
+       int lvl, schq, rc;
 
        /* Get memory to put this msg */
        req = otx2_mbox_alloc_msg_nix_txsch_alloc(&pfvf->mbox);
@@ -726,43 +753,83 @@ int otx2_txsch_alloc(struct otx2_nic *pfvf)
        /* Request one schq per level */
        for (lvl = 0; lvl < NIX_TXSCH_LVL_CNT; lvl++)
                req->schq[lvl] = 1;
+       rc = otx2_sync_mbox_msg(&pfvf->mbox);
+       if (rc)
+               return rc;
 
-       return otx2_sync_mbox_msg(&pfvf->mbox);
+       rsp = (struct nix_txsch_alloc_rsp *)
+             otx2_mbox_get_rsp(&pfvf->mbox.mbox, 0, &req->hdr);
+       if (IS_ERR(rsp))
+               return PTR_ERR(rsp);
+
+       /* Setup transmit scheduler list */
+       for (lvl = 0; lvl < NIX_TXSCH_LVL_CNT; lvl++)
+               for (schq = 0; schq < rsp->schq[lvl]; schq++)
+                       pfvf->hw.txschq_list[lvl][schq] =
+                               rsp->schq_list[lvl][schq];
+
+       pfvf->hw.txschq_link_cfg_lvl = rsp->link_cfg_lvl;
+
+       return 0;
 }
 
-int otx2_txschq_stop(struct otx2_nic *pfvf)
+void otx2_txschq_free_one(struct otx2_nic *pfvf, u16 lvl, u16 schq)
 {
        struct nix_txsch_free_req *free_req;
-       int lvl, schq, err;
+       int err;
 
        mutex_lock(&pfvf->mbox.lock);
-       /* Free the transmit schedulers */
+
        free_req = otx2_mbox_alloc_msg_nix_txsch_free(&pfvf->mbox);
        if (!free_req) {
                mutex_unlock(&pfvf->mbox.lock);
-               return -ENOMEM;
+               netdev_err(pfvf->netdev,
+                          "Failed alloc txschq free req\n");
+               return;
        }
 
-       free_req->flags = TXSCHQ_FREE_ALL;
+       free_req->schq_lvl = lvl;
+       free_req->schq = schq;
+
        err = otx2_sync_mbox_msg(&pfvf->mbox);
+       if (err) {
+               netdev_err(pfvf->netdev,
+                          "Failed stop txschq %d at level %d\n", schq, lvl);
+       }
+
        mutex_unlock(&pfvf->mbox.lock);
+}
+
+void otx2_txschq_stop(struct otx2_nic *pfvf)
+{
+       int lvl, schq;
+
+       /* free non QOS TLx nodes */
+       for (lvl = 0; lvl < NIX_TXSCH_LVL_CNT; lvl++)
+               otx2_txschq_free_one(pfvf, lvl,
+                                    pfvf->hw.txschq_list[lvl][0]);
 
        /* Clear the txschq list */
        for (lvl = 0; lvl < NIX_TXSCH_LVL_CNT; lvl++) {
                for (schq = 0; schq < MAX_TXSCHQ_PER_FUNC; schq++)
                        pfvf->hw.txschq_list[lvl][schq] = 0;
        }
-       return err;
+
 }
 
 void otx2_sqb_flush(struct otx2_nic *pfvf)
 {
        int qidx, sqe_tail, sqe_head;
+       struct otx2_snd_queue *sq;
        u64 incr, *ptr, val;
        int timeout = 1000;
 
        ptr = (u64 *)otx2_get_regaddr(pfvf, NIX_LF_SQ_OP_STATUS);
-       for (qidx = 0; qidx < pfvf->hw.tot_tx_queues; qidx++) {
+       for (qidx = 0; qidx < otx2_get_total_tx_queues(pfvf); qidx++) {
+               sq = &pfvf->qset.sq[qidx];
+               if (!sq->sqb_ptrs)
+                       continue;
+
                incr = (u64)qidx << 32;
                while (timeout) {
                        val = otx2_atomic64_add(incr, ptr);
@@ -862,7 +929,7 @@ int otx2_sq_aq_init(void *dev, u16 qidx, u16 sqb_aura)
        return otx2_sync_mbox_msg(&pfvf->mbox);
 }
 
-static int otx2_sq_init(struct otx2_nic *pfvf, u16 qidx, u16 sqb_aura)
+int otx2_sq_init(struct otx2_nic *pfvf, u16 qidx, u16 sqb_aura)
 {
        struct otx2_qset *qset = &pfvf->qset;
        struct otx2_snd_queue *sq;
@@ -935,9 +1002,17 @@ static int otx2_cq_init(struct otx2_nic *pfvf, u16 qidx)
                cq->cint_idx = qidx - pfvf->hw.rx_queues;
                cq->cqe_cnt = qset->sqe_cnt;
        } else {
-               cq->cq_type = CQ_XDP;
-               cq->cint_idx = qidx - non_xdp_queues;
-               cq->cqe_cnt = qset->sqe_cnt;
+               if (pfvf->hw.xdp_queues &&
+                   qidx < non_xdp_queues + pfvf->hw.xdp_queues) {
+                       cq->cq_type = CQ_XDP;
+                       cq->cint_idx = qidx - non_xdp_queues;
+                       cq->cqe_cnt = qset->sqe_cnt;
+               } else {
+                       cq->cq_type = CQ_QOS;
+                       cq->cint_idx = qidx - non_xdp_queues -
+                                      pfvf->hw.xdp_queues;
+                       cq->cqe_cnt = qset->sqe_cnt;
+               }
        }
        cq->cqe_size = pfvf->qset.xqe_size;
 
@@ -1048,7 +1123,7 @@ int otx2_config_nix_queues(struct otx2_nic *pfvf)
        }
 
        /* Initialize TX queues */
-       for (qidx = 0; qidx < pfvf->hw.tot_tx_queues; qidx++) {
+       for (qidx = 0; qidx < pfvf->hw.non_qos_queues; qidx++) {
                u16 sqb_aura = otx2_get_pool_idx(pfvf, AURA_NIX_SQ, qidx);
 
                err = otx2_sq_init(pfvf, qidx, sqb_aura);
@@ -1095,7 +1170,7 @@ int otx2_config_nix(struct otx2_nic *pfvf)
 
        /* Set RQ/SQ/CQ counts */
        nixlf->rq_cnt = pfvf->hw.rx_queues;
-       nixlf->sq_cnt = pfvf->hw.tot_tx_queues;
+       nixlf->sq_cnt = otx2_get_total_tx_queues(pfvf);
        nixlf->cq_cnt = pfvf->qset.cq_cnt;
        nixlf->rss_sz = MAX_RSS_INDIR_TBL_SIZE;
        nixlf->rss_grps = MAX_RSS_GROUPS;
@@ -1133,7 +1208,7 @@ void otx2_sq_free_sqbs(struct otx2_nic *pfvf)
        int sqb, qidx;
        u64 iova, pa;
 
-       for (qidx = 0; qidx < hw->tot_tx_queues; qidx++) {
+       for (qidx = 0; qidx < otx2_get_total_tx_queues(pfvf); qidx++) {
                sq = &qset->sq[qidx];
                if (!sq->sqb_ptrs)
                        continue;
@@ -1151,10 +1226,31 @@ void otx2_sq_free_sqbs(struct otx2_nic *pfvf)
        }
 }
 
+void otx2_free_bufs(struct otx2_nic *pfvf, struct otx2_pool *pool,
+                   u64 iova, int size)
+{
+       struct page *page;
+       u64 pa;
+
+       pa = otx2_iova_to_phys(pfvf->iommu_domain, iova);
+       page = virt_to_head_page(phys_to_virt(pa));
+
+       if (pool->page_pool) {
+               page_pool_put_full_page(pool->page_pool, page, true);
+       } else {
+               dma_unmap_page_attrs(pfvf->dev, iova, size,
+                                    DMA_FROM_DEVICE,
+                                    DMA_ATTR_SKIP_CPU_SYNC);
+
+               put_page(page);
+       }
+}
+
 void otx2_free_aura_ptr(struct otx2_nic *pfvf, int type)
 {
        int pool_id, pool_start = 0, pool_end = 0, size = 0;
-       u64 iova, pa;
+       struct otx2_pool *pool;
+       u64 iova;
 
        if (type == AURA_NIX_SQ) {
                pool_start = otx2_get_pool_idx(pfvf, type, 0);
@@ -1170,15 +1266,13 @@ void otx2_free_aura_ptr(struct otx2_nic *pfvf, int type)
        /* Free SQB and RQB pointers from the aura pool */
        for (pool_id = pool_start; pool_id < pool_end; pool_id++) {
                iova = otx2_aura_allocptr(pfvf, pool_id);
+               pool = &pfvf->qset.pool[pool_id];
                while (iova) {
                        if (type == AURA_NIX_RQ)
                                iova -= OTX2_HEAD_ROOM;
 
-                       pa = otx2_iova_to_phys(pfvf->iommu_domain, iova);
-                       dma_unmap_page_attrs(pfvf->dev, iova, size,
-                                            DMA_FROM_DEVICE,
-                                            DMA_ATTR_SKIP_CPU_SYNC);
-                       put_page(virt_to_page(phys_to_virt(pa)));
+                       otx2_free_bufs(pfvf, pool, iova, size);
+
                        iova = otx2_aura_allocptr(pfvf, pool_id);
                }
        }
@@ -1196,13 +1290,15 @@ void otx2_aura_pool_free(struct otx2_nic *pfvf)
                pool = &pfvf->qset.pool[pool_id];
                qmem_free(pfvf->dev, pool->stack);
                qmem_free(pfvf->dev, pool->fc_addr);
+               page_pool_destroy(pool->page_pool);
+               pool->page_pool = NULL;
        }
        devm_kfree(pfvf->dev, pfvf->qset.pool);
        pfvf->qset.pool = NULL;
 }
 
-static int otx2_aura_init(struct otx2_nic *pfvf, int aura_id,
-                         int pool_id, int numptrs)
+int otx2_aura_init(struct otx2_nic *pfvf, int aura_id,
+                  int pool_id, int numptrs)
 {
        struct npa_aq_enq_req *aq;
        struct otx2_pool *pool;
@@ -1278,9 +1374,10 @@ static int otx2_aura_init(struct otx2_nic *pfvf, int aura_id,
        return 0;
 }
 
-static int otx2_pool_init(struct otx2_nic *pfvf, u16 pool_id,
-                         int stack_pages, int numptrs, int buf_size)
+int otx2_pool_init(struct otx2_nic *pfvf, u16 pool_id,
+                  int stack_pages, int numptrs, int buf_size, int type)
 {
+       struct page_pool_params pp_params = { 0 };
        struct npa_aq_enq_req *aq;
        struct otx2_pool *pool;
        int err;
@@ -1324,6 +1421,22 @@ static int otx2_pool_init(struct otx2_nic *pfvf, u16 pool_id,
        aq->ctype = NPA_AQ_CTYPE_POOL;
        aq->op = NPA_AQ_INSTOP_INIT;
 
+       if (type != AURA_NIX_RQ) {
+               pool->page_pool = NULL;
+               return 0;
+       }
+
+       pp_params.flags = PP_FLAG_PAGE_FRAG | PP_FLAG_DMA_MAP;
+       pp_params.pool_size = numptrs;
+       pp_params.nid = NUMA_NO_NODE;
+       pp_params.dev = pfvf->dev;
+       pp_params.dma_dir = DMA_FROM_DEVICE;
+       pool->page_pool = page_pool_create(&pp_params);
+       if (IS_ERR(pool->page_pool)) {
+               netdev_err(pfvf->netdev, "Creation of page pool failed\n");
+               return PTR_ERR(pool->page_pool);
+       }
+
        return 0;
 }
 
@@ -1349,7 +1462,7 @@ int otx2_sq_aura_pool_init(struct otx2_nic *pfvf)
        stack_pages =
                (num_sqbs + hw->stack_pg_ptrs - 1) / hw->stack_pg_ptrs;
 
-       for (qidx = 0; qidx < hw->tot_tx_queues; qidx++) {
+       for (qidx = 0; qidx < hw->non_qos_queues; qidx++) {
                pool_id = otx2_get_pool_idx(pfvf, AURA_NIX_SQ, qidx);
                /* Initialize aura context */
                err = otx2_aura_init(pfvf, pool_id, pool_id, num_sqbs);
@@ -1358,7 +1471,7 @@ int otx2_sq_aura_pool_init(struct otx2_nic *pfvf)
 
                /* Initialize pool context */
                err = otx2_pool_init(pfvf, pool_id, stack_pages,
-                                    num_sqbs, hw->sqb_size);
+                                    num_sqbs, hw->sqb_size, AURA_NIX_SQ);
                if (err)
                        goto fail;
        }
@@ -1369,7 +1482,7 @@ int otx2_sq_aura_pool_init(struct otx2_nic *pfvf)
                goto fail;
 
        /* Allocate pointers and free them to aura/pool */
-       for (qidx = 0; qidx < hw->tot_tx_queues; qidx++) {
+       for (qidx = 0; qidx < hw->non_qos_queues; qidx++) {
                pool_id = otx2_get_pool_idx(pfvf, AURA_NIX_SQ, qidx);
                pool = &pfvf->qset.pool[pool_id];
 
@@ -1421,7 +1534,7 @@ int otx2_rq_aura_pool_init(struct otx2_nic *pfvf)
        }
        for (pool_id = 0; pool_id < hw->rqpool_cnt; pool_id++) {
                err = otx2_pool_init(pfvf, pool_id, stack_pages,
-                                    num_ptrs, pfvf->rbsize);
+                                    num_ptrs, pfvf->rbsize, AURA_NIX_RQ);
                if (err)
                        goto fail;
        }
@@ -1605,7 +1718,6 @@ int otx2_nix_config_bp(struct otx2_nic *pfvf, bool enable)
        req->bpid_per_chan = 0;
 #endif
 
-
        return otx2_sync_mbox_msg(&pfvf->mbox);
 }
 EXPORT_SYMBOL(otx2_nix_config_bp);
@@ -1629,21 +1741,6 @@ void mbox_handler_cgx_fec_stats(struct otx2_nic *pfvf,
        pfvf->hw.cgx_fec_uncorr_blks += rsp->fec_uncorr_blks;
 }
 
-void mbox_handler_nix_txsch_alloc(struct otx2_nic *pf,
-                                 struct nix_txsch_alloc_rsp *rsp)
-{
-       int lvl, schq;
-
-       /* Setup transmit scheduler list */
-       for (lvl = 0; lvl < NIX_TXSCH_LVL_CNT; lvl++)
-               for (schq = 0; schq < rsp->schq[lvl]; schq++)
-                       pf->hw.txschq_list[lvl][schq] =
-                               rsp->schq_list[lvl][schq];
-
-       pf->hw.txschq_link_cfg_lvl = rsp->link_cfg_lvl;
-}
-EXPORT_SYMBOL(mbox_handler_nix_txsch_alloc);
-
 void mbox_handler_npa_lf_alloc(struct otx2_nic *pfvf,
                               struct npa_lf_alloc_rsp *rsp)
 {
index 0c8fc66..a9ed15d 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/ptp_clock_kernel.h>
 #include <linux/timecounter.h>
 #include <linux/soc/marvell/octeontx2/asm.h>
+#include <net/macsec.h>
 #include <net/pkt_cls.h>
 #include <net/devlink.h>
 #include <linux/time64.h>
@@ -27,6 +28,7 @@
 #include "otx2_txrx.h"
 #include "otx2_devlink.h"
 #include <rvu_trace.h>
+#include "qos.h"
 
 /* IPv4 flag more fragment bit */
 #define IPV4_FLAG_MORE                         0x20
@@ -183,13 +185,29 @@ struct mbox {
        int                     up_num_msgs; /* mbox_up number of messages */
 };
 
+/* Egress rate limiting definitions */
+#define MAX_BURST_EXPONENT             0x0FULL
+#define MAX_BURST_MANTISSA             0xFFULL
+#define MAX_BURST_SIZE                 130816ULL
+#define MAX_RATE_DIVIDER_EXPONENT      12ULL
+#define MAX_RATE_EXPONENT              0x0FULL
+#define MAX_RATE_MANTISSA              0xFFULL
+
+/* Bitfields in NIX_TLX_PIR register */
+#define TLX_RATE_MANTISSA              GENMASK_ULL(8, 1)
+#define TLX_RATE_EXPONENT              GENMASK_ULL(12, 9)
+#define TLX_RATE_DIVIDER_EXPONENT      GENMASK_ULL(16, 13)
+#define TLX_BURST_MANTISSA             GENMASK_ULL(36, 29)
+#define TLX_BURST_EXPONENT             GENMASK_ULL(40, 37)
+
 struct otx2_hw {
        struct pci_dev          *pdev;
        struct otx2_rss_info    rss_info;
        u16                     rx_queues;
        u16                     tx_queues;
        u16                     xdp_queues;
-       u16                     tot_tx_queues;
+       u16                     tc_tx_queues;
+       u16                     non_qos_queues; /* tx queues plus xdp queues */
        u16                     max_queues;
        u16                     pool_cnt;
        u16                     rqpool_cnt;
@@ -250,6 +268,7 @@ struct otx2_hw {
 #define CN10K_RPM              3
 #define CN10K_PTP_ONESTEP      4
 #define CN10K_HW_MACSEC                5
+#define QOS_CIR_PIR_SUPPORT    6
        unsigned long           cap_flag;
 
 #define LMT_LINE_SIZE          128
@@ -398,6 +417,9 @@ struct cn10k_mcs_txsc {
        u8 sa_bmap;
        u8 sa_key[CN10K_MCS_SA_PER_SC][MACSEC_MAX_KEY_LEN];
        u8 encoding_sa;
+       u8 salt[CN10K_MCS_SA_PER_SC][MACSEC_SALT_LEN];
+       ssci_t ssci[CN10K_MCS_SA_PER_SC];
+       bool vlan_dev; /* macsec running on VLAN ? */
 };
 
 struct cn10k_mcs_rxsc {
@@ -410,6 +432,8 @@ struct cn10k_mcs_rxsc {
        u16 hw_sa_id[CN10K_MCS_SA_PER_SC];
        u8 sa_bmap;
        u8 sa_key[CN10K_MCS_SA_PER_SC][MACSEC_MAX_KEY_LEN];
+       u8 salt[CN10K_MCS_SA_PER_SC][MACSEC_SALT_LEN];
+       ssci_t ssci[CN10K_MCS_SA_PER_SC];
 };
 
 struct cn10k_mcs_cfg {
@@ -501,6 +525,8 @@ struct otx2_nic {
        u16                     pfc_schq_list[NIX_TXSCH_LVL_CNT][MAX_TXSCHQ_PER_FUNC];
        bool                    pfc_alloc_status[NIX_PF_PFC_PRIO_MAX];
 #endif
+       /* qos */
+       struct otx2_qos         qos;
 
        /* napi event count. It is needed for adaptive irq coalescing. */
        u32 napi_events;
@@ -582,6 +608,7 @@ static inline void otx2_setup_dev_hw_settings(struct otx2_nic *pfvf)
                __set_bit(CN10K_LMTST, &hw->cap_flag);
                __set_bit(CN10K_RPM, &hw->cap_flag);
                __set_bit(CN10K_PTP_ONESTEP, &hw->cap_flag);
+               __set_bit(QOS_CIR_PIR_SUPPORT, &hw->cap_flag);
        }
 
        if (is_dev_cn10kb(pfvf->pdev))
@@ -745,8 +772,7 @@ static inline void cn10k_aura_freeptr(void *dev, int aura, u64 buf)
 /* Alloc pointer from pool/aura */
 static inline u64 otx2_aura_allocptr(struct otx2_nic *pfvf, int aura)
 {
-       u64 *ptr = (u64 *)otx2_get_regaddr(pfvf,
-                          NPA_LF_AURA_OP_ALLOCX(0));
+       u64 *ptr = (__force u64 *)otx2_get_regaddr(pfvf, NPA_LF_AURA_OP_ALLOCX(0));
        u64 incr = (u64)aura | BIT_ULL(63);
 
        return otx2_atomic64_add(incr, ptr);
@@ -888,12 +914,34 @@ static inline void otx2_dma_unmap_page(struct otx2_nic *pfvf,
 
 static inline u16 otx2_get_smq_idx(struct otx2_nic *pfvf, u16 qidx)
 {
+       u16 smq;
 #ifdef CONFIG_DCB
        if (qidx < NIX_PF_PFC_PRIO_MAX && pfvf->pfc_alloc_status[qidx])
                return pfvf->pfc_schq_list[NIX_TXSCH_LVL_SMQ][qidx];
 #endif
+       /* check if qidx falls under QOS queues */
+       if (qidx >= pfvf->hw.non_qos_queues)
+               smq = pfvf->qos.qid_to_sqmap[qidx - pfvf->hw.non_qos_queues];
+       else
+               smq = pfvf->hw.txschq_list[NIX_TXSCH_LVL_SMQ][0];
+
+       return smq;
+}
+
+static inline u16 otx2_get_total_tx_queues(struct otx2_nic *pfvf)
+{
+       return pfvf->hw.non_qos_queues + pfvf->hw.tc_tx_queues;
+}
+
+static inline u64 otx2_convert_rate(u64 rate)
+{
+       u64 converted_rate;
+
+       /* Convert bytes per second to Mbps */
+       converted_rate = rate * 8;
+       converted_rate = max_t(u64, converted_rate / 1000000, 1);
 
-       return pfvf->hw.txschq_list[NIX_TXSCH_LVL_SMQ][0];
+       return converted_rate;
 }
 
 /* MSI-X APIs */
@@ -920,19 +968,25 @@ int otx2_config_nix(struct otx2_nic *pfvf);
 int otx2_config_nix_queues(struct otx2_nic *pfvf);
 int otx2_txschq_config(struct otx2_nic *pfvf, int lvl, int prio, bool pfc_en);
 int otx2_txsch_alloc(struct otx2_nic *pfvf);
-int otx2_txschq_stop(struct otx2_nic *pfvf);
+void otx2_txschq_stop(struct otx2_nic *pfvf);
+void otx2_txschq_free_one(struct otx2_nic *pfvf, u16 lvl, u16 schq);
 void otx2_sqb_flush(struct otx2_nic *pfvf);
-int __otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool,
-                     dma_addr_t *dma);
+int otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool,
+                   dma_addr_t *dma);
 int otx2_rxtx_enable(struct otx2_nic *pfvf, bool enable);
 void otx2_ctx_disable(struct mbox *mbox, int type, bool npa);
 int otx2_nix_config_bp(struct otx2_nic *pfvf, bool enable);
-void otx2_cleanup_rx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq);
+void otx2_cleanup_rx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq, int qidx);
 void otx2_cleanup_tx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq);
+int otx2_sq_init(struct otx2_nic *pfvf, u16 qidx, u16 sqb_aura);
 int otx2_sq_aq_init(void *dev, u16 qidx, u16 sqb_aura);
 int cn10k_sq_aq_init(void *dev, u16 qidx, u16 sqb_aura);
 int otx2_alloc_buffer(struct otx2_nic *pfvf, struct otx2_cq_queue *cq,
                      dma_addr_t *dma);
+int otx2_pool_init(struct otx2_nic *pfvf, u16 pool_id,
+                  int stack_pages, int numptrs, int buf_size, int type);
+int otx2_aura_init(struct otx2_nic *pfvf, int aura_id,
+                  int pool_id, int numptrs);
 
 /* RSS configuration APIs*/
 int otx2_rss_init(struct otx2_nic *pfvf);
@@ -1000,6 +1054,8 @@ u16 otx2_get_max_mtu(struct otx2_nic *pfvf);
 int otx2_handle_ntuple_tc_features(struct net_device *netdev,
                                   netdev_features_t features);
 int otx2_smq_flush(struct otx2_nic *pfvf, int smq);
+void otx2_free_bufs(struct otx2_nic *pfvf, struct otx2_pool *pool,
+                   u64 iova, int size);
 
 /* tc support */
 int otx2_init_tc(struct otx2_nic *nic);
@@ -1040,4 +1096,24 @@ static inline void cn10k_handle_mcs_event(struct otx2_nic *pfvf,
 {}
 #endif /* CONFIG_MACSEC */
 
+/* qos support */
+static inline void otx2_qos_init(struct otx2_nic *pfvf, int qos_txqs)
+{
+       struct otx2_hw *hw = &pfvf->hw;
+
+       hw->tc_tx_queues = qos_txqs;
+       INIT_LIST_HEAD(&pfvf->qos.qos_tree);
+       mutex_init(&pfvf->qos.qos_lock);
+}
+
+static inline void otx2_shutdown_qos(struct otx2_nic *pfvf)
+{
+       mutex_destroy(&pfvf->qos.qos_lock);
+}
+
+u16 otx2_select_queue(struct net_device *netdev, struct sk_buff *skb,
+                     struct net_device *sb_dev);
+int otx2_get_txq_by_classid(struct otx2_nic *pfvf, u16 classid);
+void otx2_qos_config_txschq(struct otx2_nic *pfvf);
+void otx2_clean_qos_queues(struct otx2_nic *pfvf);
 #endif /* OTX2_COMMON_H */
index 0f8d1a6..c47d91d 100644 (file)
@@ -92,10 +92,16 @@ static void otx2_get_qset_strings(struct otx2_nic *pfvf, u8 **data, int qset)
                        *data += ETH_GSTRING_LEN;
                }
        }
-       for (qidx = 0; qidx < pfvf->hw.tx_queues; qidx++) {
+
+       for (qidx = 0; qidx < otx2_get_total_tx_queues(pfvf); qidx++) {
                for (stats = 0; stats < otx2_n_queue_stats; stats++) {
-                       sprintf(*data, "txq%d: %s", qidx + start_qidx,
-                               otx2_queue_stats[stats].name);
+                       if (qidx >= pfvf->hw.non_qos_queues)
+                               sprintf(*data, "txq_qos%d: %s",
+                                       qidx + start_qidx - pfvf->hw.non_qos_queues,
+                                       otx2_queue_stats[stats].name);
+                       else
+                               sprintf(*data, "txq%d: %s", qidx + start_qidx,
+                                       otx2_queue_stats[stats].name);
                        *data += ETH_GSTRING_LEN;
                }
        }
@@ -159,7 +165,7 @@ static void otx2_get_qset_stats(struct otx2_nic *pfvf,
                                [otx2_queue_stats[stat].index];
        }
 
-       for (qidx = 0; qidx < pfvf->hw.tx_queues; qidx++) {
+       for (qidx = 0; qidx < otx2_get_total_tx_queues(pfvf); qidx++) {
                if (!otx2_update_sq_stats(pfvf, qidx)) {
                        for (stat = 0; stat < otx2_n_queue_stats; stat++)
                                *((*data)++) = 0;
@@ -254,7 +260,7 @@ static int otx2_get_sset_count(struct net_device *netdev, int sset)
                return -EINVAL;
 
        qstats_count = otx2_n_queue_stats *
-                      (pfvf->hw.rx_queues + pfvf->hw.tx_queues);
+                      (pfvf->hw.rx_queues + otx2_get_total_tx_queues(pfvf));
        if (!test_bit(CN10K_RPM, &pfvf->hw.cap_flag))
                mac_stats = CGX_RX_STATS_COUNT + CGX_TX_STATS_COUNT;
        otx2_update_lmac_fec_stats(pfvf);
@@ -282,7 +288,7 @@ static int otx2_set_channels(struct net_device *dev,
 {
        struct otx2_nic *pfvf = netdev_priv(dev);
        bool if_up = netif_running(dev);
-       int err = 0;
+       int err, qos_txqs;
 
        if (!channel->rx_count || !channel->tx_count)
                return -EINVAL;
@@ -296,14 +302,19 @@ static int otx2_set_channels(struct net_device *dev,
        if (if_up)
                dev->netdev_ops->ndo_stop(dev);
 
-       err = otx2_set_real_num_queues(dev, channel->tx_count,
+       qos_txqs = bitmap_weight(pfvf->qos.qos_sq_bmap,
+                                OTX2_QOS_MAX_LEAF_NODES);
+
+       err = otx2_set_real_num_queues(dev, channel->tx_count + qos_txqs,
                                       channel->rx_count);
        if (err)
                return err;
 
        pfvf->hw.rx_queues = channel->rx_count;
        pfvf->hw.tx_queues = channel->tx_count;
-       pfvf->qset.cq_cnt = pfvf->hw.tx_queues +  pfvf->hw.rx_queues;
+       if (pfvf->xdp_prog)
+               pfvf->hw.xdp_queues = channel->rx_count;
+       pfvf->hw.non_qos_queues =  pfvf->hw.tx_queues + pfvf->hw.xdp_queues;
 
        if (if_up)
                err = dev->netdev_ops->ndo_open(dev);
@@ -1405,7 +1416,7 @@ static int otx2vf_get_sset_count(struct net_device *netdev, int sset)
                return -EINVAL;
 
        qstats_count = otx2_n_queue_stats *
-                      (vf->hw.rx_queues + vf->hw.tx_queues);
+                      (vf->hw.rx_queues + otx2_get_total_tx_queues(vf));
 
        return otx2_n_dev_stats + otx2_n_drv_stats + qstats_count + 1;
 }
index 18284ad..db3fcab 100644 (file)
@@ -23,6 +23,7 @@
 #include "otx2_struct.h"
 #include "otx2_ptp.h"
 #include "cn10k.h"
+#include "qos.h"
 #include <rvu_trace.h>
 
 #define DRV_NAME       "rvu_nicpf"
@@ -791,10 +792,6 @@ static void otx2_process_pfaf_mbox_msg(struct otx2_nic *pf,
        case MBOX_MSG_NIX_LF_ALLOC:
                mbox_handler_nix_lf_alloc(pf, (struct nix_lf_alloc_rsp *)msg);
                break;
-       case MBOX_MSG_NIX_TXSCH_ALLOC:
-               mbox_handler_nix_txsch_alloc(pf,
-                                            (struct nix_txsch_alloc_rsp *)msg);
-               break;
        case MBOX_MSG_NIX_BP_ENABLE:
                mbox_handler_nix_bp_enable(pf, (struct nix_bp_cfg_rsp *)msg);
                break;
@@ -1228,6 +1225,7 @@ static char *nix_snd_status_e_str[NIX_SND_STATUS_MAX] =  {
 static irqreturn_t otx2_q_intr_handler(int irq, void *data)
 {
        struct otx2_nic *pf = data;
+       struct otx2_snd_queue *sq;
        u64 val, *ptr;
        u64 qidx = 0;
 
@@ -1257,10 +1255,14 @@ static irqreturn_t otx2_q_intr_handler(int irq, void *data)
        }
 
        /* SQ */
-       for (qidx = 0; qidx < pf->hw.tot_tx_queues; qidx++) {
+       for (qidx = 0; qidx < otx2_get_total_tx_queues(pf); qidx++) {
                u64 sq_op_err_dbg, mnq_err_dbg, snd_err_dbg;
                u8 sq_op_err_code, mnq_err_code, snd_err_code;
 
+               sq = &pf->qset.sq[qidx];
+               if (!sq->sqb_ptrs)
+                       continue;
+
                /* Below debug registers captures first errors corresponding to
                 * those registers. We don't have to check against SQ qid as
                 * these are fatal errors.
@@ -1383,8 +1385,11 @@ static void otx2_free_sq_res(struct otx2_nic *pf)
        otx2_ctx_disable(&pf->mbox, NIX_AQ_CTYPE_SQ, false);
        /* Free SQB pointers */
        otx2_sq_free_sqbs(pf);
-       for (qidx = 0; qidx < pf->hw.tot_tx_queues; qidx++) {
+       for (qidx = 0; qidx < otx2_get_total_tx_queues(pf); qidx++) {
                sq = &qset->sq[qidx];
+               /* Skip freeing Qos queues if they are not initialized */
+               if (!sq->sqe)
+                       continue;
                qmem_free(pf->dev, sq->sqe);
                qmem_free(pf->dev, sq->tso_hdrs);
                kfree(sq->sg);
@@ -1433,7 +1438,7 @@ static int otx2_init_hw_resources(struct otx2_nic *pf)
         * so, aura count = pool count.
         */
        hw->rqpool_cnt = hw->rx_queues;
-       hw->sqpool_cnt = hw->tot_tx_queues;
+       hw->sqpool_cnt = otx2_get_total_tx_queues(pf);
        hw->pool_cnt = hw->rqpool_cnt + hw->sqpool_cnt;
 
        /* Maximum hardware supported transmit length */
@@ -1516,8 +1521,7 @@ err_free_nix_queues:
        otx2_free_cq_res(pf);
        otx2_ctx_disable(mbox, NIX_AQ_CTYPE_RQ, false);
 err_free_txsch:
-       if (otx2_txschq_stop(pf))
-               dev_err(pf->dev, "%s failed to stop TX schedulers\n", __func__);
+       otx2_txschq_stop(pf);
 err_free_sq_ptrs:
        otx2_sq_free_sqbs(pf);
 err_free_rq_ptrs:
@@ -1551,22 +1555,24 @@ static void otx2_free_hw_resources(struct otx2_nic *pf)
        struct nix_lf_free_req *free_req;
        struct mbox *mbox = &pf->mbox;
        struct otx2_cq_queue *cq;
+       struct otx2_pool *pool;
        struct msg_req *req;
-       int qidx, err;
+       int pool_id;
+       int qidx;
 
        /* Ensure all SQE are processed */
        otx2_sqb_flush(pf);
 
        /* Stop transmission */
-       err = otx2_txschq_stop(pf);
-       if (err)
-               dev_err(pf->dev, "RVUPF: Failed to stop/free TX schedulers\n");
+       otx2_txschq_stop(pf);
 
 #ifdef CONFIG_DCB
        if (pf->pfc_en)
                otx2_pfc_txschq_stop(pf);
 #endif
 
+       otx2_clean_qos_queues(pf);
+
        mutex_lock(&mbox->lock);
        /* Disable backpressure */
        if (!(pf->pcifunc & RVU_PFVF_FUNC_MASK))
@@ -1580,7 +1586,7 @@ static void otx2_free_hw_resources(struct otx2_nic *pf)
        for (qidx = 0; qidx < qset->cq_cnt; qidx++) {
                cq = &qset->cq[qidx];
                if (cq->cq_type == CQ_RX)
-                       otx2_cleanup_rx_cqes(pf, cq);
+                       otx2_cleanup_rx_cqes(pf, cq, qidx);
                else
                        otx2_cleanup_tx_cqes(pf, cq);
        }
@@ -1590,6 +1596,13 @@ static void otx2_free_hw_resources(struct otx2_nic *pf)
        /* Free RQ buffer pointers*/
        otx2_free_aura_ptr(pf, AURA_NIX_RQ);
 
+       for (qidx = 0; qidx < pf->hw.rx_queues; qidx++) {
+               pool_id = otx2_get_pool_idx(pf, AURA_NIX_RQ, qidx);
+               pool = &pf->qset.pool[pool_id];
+               page_pool_destroy(pool->page_pool);
+               pool->page_pool = NULL;
+       }
+
        otx2_free_cq_res(pf);
 
        /* Free all ingress bandwidth profiles allocated */
@@ -1688,11 +1701,14 @@ int otx2_open(struct net_device *netdev)
 
        netif_carrier_off(netdev);
 
-       pf->qset.cq_cnt = pf->hw.rx_queues + pf->hw.tot_tx_queues;
        /* RQ and SQs are mapped to different CQs,
         * so find out max CQ IRQs (i.e CINTs) needed.
         */
-       pf->hw.cint_cnt = max(pf->hw.rx_queues, pf->hw.tx_queues);
+       pf->hw.cint_cnt = max3(pf->hw.rx_queues, pf->hw.tx_queues,
+                              pf->hw.tc_tx_queues);
+
+       pf->qset.cq_cnt = pf->hw.rx_queues + otx2_get_total_tx_queues(pf);
+
        qset->napi = kcalloc(pf->hw.cint_cnt, sizeof(*cq_poll), GFP_KERNEL);
        if (!qset->napi)
                return -ENOMEM;
@@ -1708,7 +1724,7 @@ int otx2_open(struct net_device *netdev)
        if (!qset->cq)
                goto err_free_mem;
 
-       qset->sq = kcalloc(pf->hw.tot_tx_queues,
+       qset->sq = kcalloc(otx2_get_total_tx_queues(pf),
                           sizeof(struct otx2_snd_queue), GFP_KERNEL);
        if (!qset->sq)
                goto err_free_mem;
@@ -1743,6 +1759,11 @@ int otx2_open(struct net_device *netdev)
                else
                        cq_poll->cq_ids[CQ_XDP] = CINT_INVALID_CQ;
 
+               cq_poll->cq_ids[CQ_QOS] = (qidx < pf->hw.tc_tx_queues) ?
+                                         (qidx + pf->hw.rx_queues +
+                                          pf->hw.non_qos_queues) :
+                                         CINT_INVALID_CQ;
+
                cq_poll->dev = (void *)pf;
                cq_poll->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_CQE;
                INIT_WORK(&cq_poll->dim.work, otx2_dim_work);
@@ -1826,6 +1847,9 @@ int otx2_open(struct net_device *netdev)
        /* 'intf_down' may be checked on any cpu */
        smp_wmb();
 
+       /* Enable QoS configuration before starting tx queues */
+       otx2_qos_config_txschq(pf);
+
        /* we have already received link status notification */
        if (pf->linfo.link_up && !(pf->pcifunc & RVU_PFVF_FUNC_MASK))
                otx2_handle_link_event(pf);
@@ -1947,6 +1971,12 @@ static netdev_tx_t otx2_xmit(struct sk_buff *skb, struct net_device *netdev)
        int qidx = skb_get_queue_mapping(skb);
        struct otx2_snd_queue *sq;
        struct netdev_queue *txq;
+       int sq_idx;
+
+       /* XDP SQs are not mapped with TXQs
+        * advance qid to derive correct sq mapped with QOS
+        */
+       sq_idx = (qidx >= pf->hw.tx_queues) ? (qidx + pf->hw.xdp_queues) : qidx;
 
        /* Check for minimum and maximum packet length */
        if (skb->len <= ETH_HLEN ||
@@ -1955,7 +1985,7 @@ static netdev_tx_t otx2_xmit(struct sk_buff *skb, struct net_device *netdev)
                return NETDEV_TX_OK;
        }
 
-       sq = &pf->qset.sq[qidx];
+       sq = &pf->qset.sq[sq_idx];
        txq = netdev_get_tx_queue(netdev, qidx);
 
        if (!otx2_sq_append_skb(netdev, sq, skb, qidx)) {
@@ -1973,14 +2003,48 @@ static netdev_tx_t otx2_xmit(struct sk_buff *skb, struct net_device *netdev)
        return NETDEV_TX_OK;
 }
 
-static u16 otx2_select_queue(struct net_device *netdev, struct sk_buff *skb,
-                            struct net_device *sb_dev)
+static int otx2_qos_select_htb_queue(struct otx2_nic *pf, struct sk_buff *skb,
+                                    u16 htb_maj_id)
+{
+       u16 classid;
+
+       if ((TC_H_MAJ(skb->priority) >> 16) == htb_maj_id)
+               classid = TC_H_MIN(skb->priority);
+       else
+               classid = READ_ONCE(pf->qos.defcls);
+
+       if (!classid)
+               return 0;
+
+       return otx2_get_txq_by_classid(pf, classid);
+}
+
+u16 otx2_select_queue(struct net_device *netdev, struct sk_buff *skb,
+                     struct net_device *sb_dev)
 {
-#ifdef CONFIG_DCB
        struct otx2_nic *pf = netdev_priv(netdev);
+       bool qos_enabled;
+#ifdef CONFIG_DCB
        u8 vlan_prio;
 #endif
+       int txq;
+
+       qos_enabled = (netdev->real_num_tx_queues > pf->hw.tx_queues) ? true : false;
+       if (unlikely(qos_enabled)) {
+               /* This smp_load_acquire() pairs with smp_store_release() in
+                * otx2_qos_root_add() called from htb offload root creation
+                */
+               u16 htb_maj_id = smp_load_acquire(&pf->qos.maj_id);
+
+               if (unlikely(htb_maj_id)) {
+                       txq = otx2_qos_select_htb_queue(pf, skb, htb_maj_id);
+                       if (txq > 0)
+                               return txq;
+                       goto process_pfc;
+               }
+       }
 
+process_pfc:
 #ifdef CONFIG_DCB
        if (!skb_vlan_tag_present(skb))
                goto pick_tx;
@@ -1994,8 +2058,13 @@ static u16 otx2_select_queue(struct net_device *netdev, struct sk_buff *skb,
 
 pick_tx:
 #endif
-       return netdev_pick_tx(netdev, skb, NULL);
+       txq = netdev_pick_tx(netdev, skb, NULL);
+       if (unlikely(qos_enabled))
+               return txq % pf->hw.tx_queues;
+
+       return txq;
 }
+EXPORT_SYMBOL(otx2_select_queue);
 
 static netdev_features_t otx2_fix_features(struct net_device *dev,
                                           netdev_features_t features)
@@ -2529,7 +2598,7 @@ static int otx2_xdp_setup(struct otx2_nic *pf, struct bpf_prog *prog)
                xdp_features_clear_redirect_target(dev);
        }
 
-       pf->hw.tot_tx_queues += pf->hw.xdp_queues;
+       pf->hw.non_qos_queues += pf->hw.xdp_queues;
 
        if (if_up)
                otx2_open(pf->netdev);
@@ -2712,10 +2781,10 @@ static void otx2_sriov_vfcfg_cleanup(struct otx2_nic *pf)
 static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
        struct device *dev = &pdev->dev;
+       int err, qcount, qos_txqs;
        struct net_device *netdev;
        struct otx2_nic *pf;
        struct otx2_hw *hw;
-       int err, qcount;
        int num_vec;
 
        err = pcim_enable_device(pdev);
@@ -2740,8 +2809,9 @@ static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
        /* Set number of queues */
        qcount = min_t(int, num_online_cpus(), OTX2_MAX_CQ_CNT);
+       qos_txqs = min_t(int, qcount, OTX2_QOS_MAX_LEAF_NODES);
 
-       netdev = alloc_etherdev_mqs(sizeof(*pf), qcount, qcount);
+       netdev = alloc_etherdev_mqs(sizeof(*pf), qcount + qos_txqs, qcount);
        if (!netdev) {
                err = -ENOMEM;
                goto err_release_regions;
@@ -2760,7 +2830,7 @@ static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id)
        hw->pdev = pdev;
        hw->rx_queues = qcount;
        hw->tx_queues = qcount;
-       hw->tot_tx_queues = qcount;
+       hw->non_qos_queues = qcount;
        hw->max_queues = qcount;
        hw->rbuf_len = OTX2_DEFAULT_RBUF_LEN;
        /* Use CQE of 128 byte descriptor size by default */
@@ -2929,6 +2999,8 @@ static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id)
                goto err_pf_sriov_init;
 #endif
 
+       otx2_qos_init(pf, qos_txqs);
+
        return 0;
 
 err_pf_sriov_init:
@@ -3104,6 +3176,7 @@ static void otx2_remove(struct pci_dev *pdev)
        otx2_ptp_destroy(pf);
        otx2_mcam_flow_del(pf);
        otx2_shutdown_tc(pf);
+       otx2_shutdown_qos(pf);
        otx2_detach_resources(&pf->mbox);
        if (pf->hw.lmt_info)
                free_percpu(pf->hw.lmt_info);
index 1b967ea..45a32e4 100644 (file)
 #define NIX_AF_TL1X_TOPOLOGY(a)                (0xC80 | (a) << 16)
 #define NIX_AF_TL2X_PARENT(a)          (0xE88 | (a) << 16)
 #define NIX_AF_TL2X_SCHEDULE(a)                (0xE00 | (a) << 16)
+#define NIX_AF_TL2X_TOPOLOGY(a)                (0xE80 | (a) << 16)
+#define NIX_AF_TL2X_CIR(a)              (0xE20 | (a) << 16)
+#define NIX_AF_TL2X_PIR(a)              (0xE30 | (a) << 16)
 #define NIX_AF_TL3X_PARENT(a)          (0x1088 | (a) << 16)
 #define NIX_AF_TL3X_SCHEDULE(a)                (0x1000 | (a) << 16)
+#define NIX_AF_TL3X_SHAPE(a)           (0x1010 | (a) << 16)
+#define NIX_AF_TL3X_CIR(a)             (0x1020 | (a) << 16)
+#define NIX_AF_TL3X_PIR(a)             (0x1030 | (a) << 16)
+#define NIX_AF_TL3X_TOPOLOGY(a)                (0x1080 | (a) << 16)
 #define NIX_AF_TL4X_PARENT(a)          (0x1288 | (a) << 16)
 #define NIX_AF_TL4X_SCHEDULE(a)                (0x1200 | (a) << 16)
+#define NIX_AF_TL4X_SHAPE(a)           (0x1210 | (a) << 16)
+#define NIX_AF_TL4X_CIR(a)             (0x1220 | (a) << 16)
 #define NIX_AF_TL4X_PIR(a)             (0x1230 | (a) << 16)
+#define NIX_AF_TL4X_TOPOLOGY(a)                (0x1280 | (a) << 16)
 #define NIX_AF_MDQX_SCHEDULE(a)                (0x1400 | (a) << 16)
+#define NIX_AF_MDQX_SHAPE(a)           (0x1410 | (a) << 16)
+#define NIX_AF_MDQX_CIR(a)             (0x1420 | (a) << 16)
+#define NIX_AF_MDQX_PIR(a)             (0x1430 | (a) << 16)
 #define NIX_AF_MDQX_PARENT(a)          (0x1480 | (a) << 16)
 #define NIX_AF_TL3_TL2X_LINKX_CFG(a, b)        (0x1700 | (a) << 16 | (b) << 3)
 
index 8392f63..231c3f0 100644 (file)
 
 #include "cn10k.h"
 #include "otx2_common.h"
-
-/* Egress rate limiting definitions */
-#define MAX_BURST_EXPONENT             0x0FULL
-#define MAX_BURST_MANTISSA             0xFFULL
-#define MAX_BURST_SIZE                 130816ULL
-#define MAX_RATE_DIVIDER_EXPONENT      12ULL
-#define MAX_RATE_EXPONENT              0x0FULL
-#define MAX_RATE_MANTISSA              0xFFULL
+#include "qos.h"
 
 #define CN10K_MAX_BURST_MANTISSA       0x7FFFULL
 #define CN10K_MAX_BURST_SIZE           8453888ULL
 
-/* Bitfields in NIX_TLX_PIR register */
-#define TLX_RATE_MANTISSA              GENMASK_ULL(8, 1)
-#define TLX_RATE_EXPONENT              GENMASK_ULL(12, 9)
-#define TLX_RATE_DIVIDER_EXPONENT      GENMASK_ULL(16, 13)
-#define TLX_BURST_MANTISSA             GENMASK_ULL(36, 29)
-#define TLX_BURST_EXPONENT             GENMASK_ULL(40, 37)
-
 #define CN10K_TLX_BURST_MANTISSA       GENMASK_ULL(43, 29)
 #define CN10K_TLX_BURST_EXPONENT       GENMASK_ULL(47, 44)
 
@@ -147,8 +133,8 @@ static void otx2_get_egress_rate_cfg(u64 maxrate, u32 *exp,
        }
 }
 
-static u64 otx2_get_txschq_rate_regval(struct otx2_nic *nic,
-                                      u64 maxrate, u32 burst)
+u64 otx2_get_txschq_rate_regval(struct otx2_nic *nic,
+                               u64 maxrate, u32 burst)
 {
        u32 burst_exp, burst_mantissa;
        u32 exp, mantissa, div_exp;
@@ -264,7 +250,6 @@ static int otx2_tc_egress_matchall_install(struct otx2_nic *nic,
        struct netlink_ext_ack *extack = cls->common.extack;
        struct flow_action *actions = &cls->rule->action;
        struct flow_action_entry *entry;
-       u64 rate;
        int err;
 
        err = otx2_tc_validate_flow(nic, actions, extack);
@@ -288,10 +273,8 @@ static int otx2_tc_egress_matchall_install(struct otx2_nic *nic,
                        NL_SET_ERR_MSG_MOD(extack, "QoS offload not support packets per second");
                        return -EOPNOTSUPP;
                }
-               /* Convert bytes per second to Mbps */
-               rate = entry->police.rate_bytes_ps * 8;
-               rate = max_t(u64, rate / 1000000, 1);
-               err = otx2_set_matchall_egress_rate(nic, entry->police.burst, rate);
+               err = otx2_set_matchall_egress_rate(nic, entry->police.burst,
+                                                   otx2_convert_rate(entry->police.rate_bytes_ps));
                if (err)
                        return err;
                nic->flags |= OTX2_FLAG_TC_MATCHALL_EGRESS_ENABLED;
@@ -1127,6 +1110,8 @@ int otx2_setup_tc(struct net_device *netdev, enum tc_setup_type type,
        switch (type) {
        case TC_SETUP_BLOCK:
                return otx2_setup_tc_block(netdev, type_data);
+       case TC_SETUP_QDISC_HTB:
+               return otx2_setup_tc_htb(netdev, type_data);
        default:
                return -EOPNOTSUPP;
        }
index 7af223b..e369baf 100644 (file)
@@ -217,9 +217,6 @@ static bool otx2_skb_add_frag(struct otx2_nic *pfvf, struct sk_buff *skb,
                skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
                                va - page_address(page) + off,
                                len - off, pfvf->rbsize);
-
-               otx2_dma_unmap_page(pfvf, iova - OTX2_HEAD_ROOM,
-                                   pfvf->rbsize, DMA_FROM_DEVICE);
                return true;
        }
 
@@ -382,6 +379,8 @@ static void otx2_rcv_pkt_handler(struct otx2_nic *pfvf,
        if (pfvf->netdev->features & NETIF_F_RXCSUM)
                skb->ip_summed = CHECKSUM_UNNECESSARY;
 
+       skb_mark_for_recycle(skb);
+
        napi_gro_frags(napi);
 }
 
@@ -464,12 +463,13 @@ process_cqe:
                        break;
                }
 
-               if (cq->cq_type == CQ_XDP) {
+               qidx = cq->cq_idx - pfvf->hw.rx_queues;
+
+               if (cq->cq_type == CQ_XDP)
                        otx2_xdp_snd_pkt_handler(pfvf, sq, cqe);
-               } else {
-                       otx2_snd_pkt_handler(pfvf, cq, sq, cqe, budget,
-                                            &tx_pkts, &tx_bytes);
-               }
+               else
+                       otx2_snd_pkt_handler(pfvf, cq, &pfvf->qset.sq[qidx],
+                                            cqe, budget, &tx_pkts, &tx_bytes);
 
                cqe->hdr.cqe_type = NIX_XQE_TYPE_INVALID;
                processed_cqe++;
@@ -486,7 +486,11 @@ process_cqe:
        if (likely(tx_pkts)) {
                struct netdev_queue *txq;
 
-               txq = netdev_get_tx_queue(pfvf->netdev, cq->cint_idx);
+               qidx = cq->cq_idx - pfvf->hw.rx_queues;
+
+               if (qidx >= pfvf->hw.tx_queues)
+                       qidx -= pfvf->hw.xdp_queues;
+               txq = netdev_get_tx_queue(pfvf->netdev, qidx);
                netdev_tx_completed_queue(txq, tx_pkts, tx_bytes);
                /* Check if queue was stopped earlier due to ring full */
                smp_mb();
@@ -734,7 +738,8 @@ static void otx2_sqe_add_hdr(struct otx2_nic *pfvf, struct otx2_snd_queue *sq,
                sqe_hdr->aura = sq->aura_id;
                /* Post a CQE Tx after pkt transmission */
                sqe_hdr->pnc = 1;
-               sqe_hdr->sq = qidx;
+               sqe_hdr->sq = (qidx >=  pfvf->hw.tx_queues) ?
+                              qidx + pfvf->hw.xdp_queues : qidx;
        }
        sqe_hdr->total = skb->len;
        /* Set SQE identifier which will be used later for freeing SKB */
@@ -1178,11 +1183,13 @@ bool otx2_sq_append_skb(struct net_device *netdev, struct otx2_snd_queue *sq,
 }
 EXPORT_SYMBOL(otx2_sq_append_skb);
 
-void otx2_cleanup_rx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq)
+void otx2_cleanup_rx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq, int qidx)
 {
        struct nix_cqe_rx_s *cqe;
+       struct otx2_pool *pool;
        int processed_cqe = 0;
-       u64 iova, pa;
+       u16 pool_id;
+       u64 iova;
 
        if (pfvf->xdp_prog)
                xdp_rxq_info_unreg(&cq->xdp_rxq);
@@ -1190,6 +1197,9 @@ void otx2_cleanup_rx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq)
        if (otx2_nix_cq_op_status(pfvf, cq) || !cq->pend_cqe)
                return;
 
+       pool_id = otx2_get_pool_idx(pfvf, AURA_NIX_RQ, qidx);
+       pool = &pfvf->qset.pool[pool_id];
+
        while (cq->pend_cqe) {
                cqe = (struct nix_cqe_rx_s *)otx2_get_next_cqe(cq);
                processed_cqe++;
@@ -1202,9 +1212,8 @@ void otx2_cleanup_rx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq)
                        continue;
                }
                iova = cqe->sg.seg_addr - OTX2_HEAD_ROOM;
-               pa = otx2_iova_to_phys(pfvf->iommu_domain, iova);
-               otx2_dma_unmap_page(pfvf, iova, pfvf->rbsize, DMA_FROM_DEVICE);
-               put_page(virt_to_page(phys_to_virt(pa)));
+
+               otx2_free_bufs(pfvf, pool, iova, pfvf->rbsize);
        }
 
        /* Free CQEs to HW */
@@ -1219,8 +1228,10 @@ void otx2_cleanup_tx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq)
        struct nix_cqe_tx_s *cqe;
        int processed_cqe = 0;
        struct sg_list *sg;
+       int qidx;
 
-       sq = &pfvf->qset.sq[cq->cint_idx];
+       qidx = cq->cq_idx - pfvf->hw.rx_queues;
+       sq = &pfvf->qset.sq[qidx];
 
        if (otx2_nix_cq_op_status(pfvf, cq) || !cq->pend_cqe)
                return;
index 93cac2c..b5d689e 100644 (file)
@@ -102,7 +102,8 @@ enum cq_type {
        CQ_RX,
        CQ_TX,
        CQ_XDP,
-       CQS_PER_CINT = 3, /* RQ + SQ + XDP */
+       CQ_QOS,
+       CQS_PER_CINT = 4, /* RQ + SQ + XDP + QOS_SQ */
 };
 
 struct otx2_cq_poll {
@@ -117,6 +118,7 @@ struct otx2_cq_poll {
 struct otx2_pool {
        struct qmem             *stack;
        struct qmem             *fc_addr;
+       struct page_pool        *page_pool;
        u16                     rbsize;
 };
 
index 53366db..3734c79 100644 (file)
@@ -70,10 +70,6 @@ static void otx2vf_process_vfaf_mbox_msg(struct otx2_nic *vf,
        case MBOX_MSG_NIX_LF_ALLOC:
                mbox_handler_nix_lf_alloc(vf, (struct nix_lf_alloc_rsp *)msg);
                break;
-       case MBOX_MSG_NIX_TXSCH_ALLOC:
-               mbox_handler_nix_txsch_alloc(vf,
-                                            (struct nix_txsch_alloc_rsp *)msg);
-               break;
        case MBOX_MSG_NIX_BP_ENABLE:
                mbox_handler_nix_bp_enable(vf, (struct nix_bp_cfg_rsp *)msg);
                break;
@@ -479,6 +475,7 @@ static const struct net_device_ops otx2vf_netdev_ops = {
        .ndo_open = otx2vf_open,
        .ndo_stop = otx2vf_stop,
        .ndo_start_xmit = otx2vf_xmit,
+       .ndo_select_queue = otx2_select_queue,
        .ndo_set_rx_mode = otx2vf_set_rx_mode,
        .ndo_set_mac_address = otx2_set_mac_address,
        .ndo_change_mtu = otx2vf_change_mtu,
@@ -524,10 +521,10 @@ static int otx2vf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
        int num_vec = pci_msix_vec_count(pdev);
        struct device *dev = &pdev->dev;
+       int err, qcount, qos_txqs;
        struct net_device *netdev;
        struct otx2_nic *vf;
        struct otx2_hw *hw;
-       int err, qcount;
 
        err = pcim_enable_device(pdev);
        if (err) {
@@ -550,7 +547,8 @@ static int otx2vf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
        pci_set_master(pdev);
 
        qcount = num_online_cpus();
-       netdev = alloc_etherdev_mqs(sizeof(*vf), qcount, qcount);
+       qos_txqs = min_t(int, qcount, OTX2_QOS_MAX_LEAF_NODES);
+       netdev = alloc_etherdev_mqs(sizeof(*vf), qcount + qos_txqs, qcount);
        if (!netdev) {
                err = -ENOMEM;
                goto err_release_regions;
@@ -570,7 +568,7 @@ static int otx2vf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
        hw->rx_queues = qcount;
        hw->tx_queues = qcount;
        hw->max_queues = qcount;
-       hw->tot_tx_queues = qcount;
+       hw->non_qos_queues = qcount;
        hw->rbuf_len = OTX2_DEFAULT_RBUF_LEN;
        /* Use CQE of 128 byte descriptor size by default */
        hw->xqe_size = 128;
@@ -699,6 +697,7 @@ static int otx2vf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
        if (err)
                goto err_shutdown_tc;
 #endif
+       otx2_qos_init(vf, qos_txqs);
 
        return 0;
 
@@ -761,6 +760,7 @@ static void otx2vf_remove(struct pci_dev *pdev)
        otx2_ptp_destroy(vf);
        otx2_mcam_flow_del(vf);
        otx2_shutdown_tc(vf);
+       otx2_shutdown_qos(vf);
        otx2vf_disable_mbox_intr(vf);
        otx2_detach_resources(&vf->mbox);
        free_percpu(vf->hw.lmt_info);
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/qos.c b/drivers/net/ethernet/marvell/octeontx2/nic/qos.c
new file mode 100644 (file)
index 0000000..d3a76c5
--- /dev/null
@@ -0,0 +1,1363 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Marvell RVU Ethernet driver
+ *
+ * Copyright (C) 2023 Marvell.
+ *
+ */
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/bitfield.h>
+
+#include "otx2_common.h"
+#include "cn10k.h"
+#include "qos.h"
+
+#define OTX2_QOS_QID_INNER             0xFFFFU
+#define OTX2_QOS_QID_NONE              0xFFFEU
+#define OTX2_QOS_ROOT_CLASSID          0xFFFFFFFF
+#define OTX2_QOS_CLASS_NONE            0
+#define OTX2_QOS_DEFAULT_PRIO          0xF
+#define OTX2_QOS_INVALID_SQ            0xFFFF
+
+static void otx2_qos_update_tx_netdev_queues(struct otx2_nic *pfvf)
+{
+       struct otx2_hw *hw = &pfvf->hw;
+       int tx_queues, qos_txqs, err;
+
+       qos_txqs = bitmap_weight(pfvf->qos.qos_sq_bmap,
+                                OTX2_QOS_MAX_LEAF_NODES);
+
+       tx_queues = hw->tx_queues + qos_txqs;
+
+       err = netif_set_real_num_tx_queues(pfvf->netdev, tx_queues);
+       if (err) {
+               netdev_err(pfvf->netdev,
+                          "Failed to set no of Tx queues: %d\n", tx_queues);
+               return;
+       }
+}
+
+static void otx2_qos_get_regaddr(struct otx2_qos_node *node,
+                                struct nix_txschq_config *cfg,
+                                int index)
+{
+       if (node->level == NIX_TXSCH_LVL_SMQ) {
+               cfg->reg[index++] = NIX_AF_MDQX_PARENT(node->schq);
+               cfg->reg[index++] = NIX_AF_MDQX_SCHEDULE(node->schq);
+               cfg->reg[index++] = NIX_AF_MDQX_PIR(node->schq);
+               cfg->reg[index]   = NIX_AF_MDQX_CIR(node->schq);
+       } else if (node->level == NIX_TXSCH_LVL_TL4) {
+               cfg->reg[index++] = NIX_AF_TL4X_PARENT(node->schq);
+               cfg->reg[index++] = NIX_AF_TL4X_SCHEDULE(node->schq);
+               cfg->reg[index++] = NIX_AF_TL4X_PIR(node->schq);
+               cfg->reg[index]   = NIX_AF_TL4X_CIR(node->schq);
+       } else if (node->level == NIX_TXSCH_LVL_TL3) {
+               cfg->reg[index++] = NIX_AF_TL3X_PARENT(node->schq);
+               cfg->reg[index++] = NIX_AF_TL3X_SCHEDULE(node->schq);
+               cfg->reg[index++] = NIX_AF_TL3X_PIR(node->schq);
+               cfg->reg[index]   = NIX_AF_TL3X_CIR(node->schq);
+       } else if (node->level == NIX_TXSCH_LVL_TL2) {
+               cfg->reg[index++] = NIX_AF_TL2X_PARENT(node->schq);
+               cfg->reg[index++] = NIX_AF_TL2X_SCHEDULE(node->schq);
+               cfg->reg[index++] = NIX_AF_TL2X_PIR(node->schq);
+               cfg->reg[index]   = NIX_AF_TL2X_CIR(node->schq);
+       }
+}
+
+static void otx2_config_sched_shaping(struct otx2_nic *pfvf,
+                                     struct otx2_qos_node *node,
+                                     struct nix_txschq_config *cfg,
+                                     int *num_regs)
+{
+       u64 maxrate;
+
+       otx2_qos_get_regaddr(node, cfg, *num_regs);
+
+       /* configure parent txschq */
+       cfg->regval[*num_regs] = node->parent->schq << 16;
+       (*num_regs)++;
+
+       /* configure prio/quantum */
+       if (node->qid == OTX2_QOS_QID_NONE) {
+               cfg->regval[*num_regs] =  node->prio << 24 |
+                                         mtu_to_dwrr_weight(pfvf, pfvf->tx_max_pktlen);
+               (*num_regs)++;
+               return;
+       }
+
+       /* configure priority  */
+       cfg->regval[*num_regs] = (node->schq - node->parent->prio_anchor) << 24;
+       (*num_regs)++;
+
+       /* configure PIR */
+       maxrate = (node->rate > node->ceil) ? node->rate : node->ceil;
+
+       cfg->regval[*num_regs] =
+               otx2_get_txschq_rate_regval(pfvf, maxrate, 65536);
+       (*num_regs)++;
+
+       /* Don't configure CIR when both CIR+PIR not supported
+        * On 96xx, CIR + PIR + RED_ALGO=STALL causes deadlock
+        */
+       if (!test_bit(QOS_CIR_PIR_SUPPORT, &pfvf->hw.cap_flag))
+               return;
+
+       cfg->regval[*num_regs] =
+               otx2_get_txschq_rate_regval(pfvf, node->rate, 65536);
+       (*num_regs)++;
+}
+
+static void __otx2_qos_txschq_cfg(struct otx2_nic *pfvf,
+                                 struct otx2_qos_node *node,
+                                 struct nix_txschq_config *cfg)
+{
+       struct otx2_hw *hw = &pfvf->hw;
+       int num_regs = 0;
+       u8 level;
+
+       level = node->level;
+
+       /* program txschq registers */
+       if (level == NIX_TXSCH_LVL_SMQ) {
+               cfg->reg[num_regs] = NIX_AF_SMQX_CFG(node->schq);
+               cfg->regval[num_regs] = ((u64)pfvf->tx_max_pktlen << 8) |
+                                       OTX2_MIN_MTU;
+               cfg->regval[num_regs] |= (0x20ULL << 51) | (0x80ULL << 39) |
+                                        (0x2ULL << 36);
+               num_regs++;
+
+               otx2_config_sched_shaping(pfvf, node, cfg, &num_regs);
+
+       } else if (level == NIX_TXSCH_LVL_TL4) {
+               otx2_config_sched_shaping(pfvf, node, cfg, &num_regs);
+       } else if (level == NIX_TXSCH_LVL_TL3) {
+               /* configure link cfg */
+               if (level == pfvf->qos.link_cfg_lvl) {
+                       cfg->reg[num_regs] = NIX_AF_TL3_TL2X_LINKX_CFG(node->schq, hw->tx_link);
+                       cfg->regval[num_regs] = BIT_ULL(13) | BIT_ULL(12);
+                       num_regs++;
+               }
+
+               otx2_config_sched_shaping(pfvf, node, cfg, &num_regs);
+       } else if (level == NIX_TXSCH_LVL_TL2) {
+               /* configure link cfg */
+               if (level == pfvf->qos.link_cfg_lvl) {
+                       cfg->reg[num_regs] = NIX_AF_TL3_TL2X_LINKX_CFG(node->schq, hw->tx_link);
+                       cfg->regval[num_regs] = BIT_ULL(13) | BIT_ULL(12);
+                       num_regs++;
+               }
+
+               /* check if node is root */
+               if (node->qid == OTX2_QOS_QID_INNER && !node->parent) {
+                       cfg->reg[num_regs] = NIX_AF_TL2X_SCHEDULE(node->schq);
+                       cfg->regval[num_regs] =  TXSCH_TL1_DFLT_RR_PRIO << 24 |
+                                                mtu_to_dwrr_weight(pfvf,
+                                                                   pfvf->tx_max_pktlen);
+                       num_regs++;
+                       goto txschq_cfg_out;
+               }
+
+               otx2_config_sched_shaping(pfvf, node, cfg, &num_regs);
+       }
+
+txschq_cfg_out:
+       cfg->num_regs = num_regs;
+}
+
+static int otx2_qos_txschq_set_parent_topology(struct otx2_nic *pfvf,
+                                              struct otx2_qos_node *parent)
+{
+       struct mbox *mbox = &pfvf->mbox;
+       struct nix_txschq_config *cfg;
+       int rc;
+
+       if (parent->level == NIX_TXSCH_LVL_MDQ)
+               return 0;
+
+       mutex_lock(&mbox->lock);
+
+       cfg = otx2_mbox_alloc_msg_nix_txschq_cfg(&pfvf->mbox);
+       if (!cfg) {
+               mutex_unlock(&mbox->lock);
+               return -ENOMEM;
+       }
+
+       cfg->lvl = parent->level;
+
+       if (parent->level == NIX_TXSCH_LVL_TL4)
+               cfg->reg[0] = NIX_AF_TL4X_TOPOLOGY(parent->schq);
+       else if (parent->level == NIX_TXSCH_LVL_TL3)
+               cfg->reg[0] = NIX_AF_TL3X_TOPOLOGY(parent->schq);
+       else if (parent->level == NIX_TXSCH_LVL_TL2)
+               cfg->reg[0] = NIX_AF_TL2X_TOPOLOGY(parent->schq);
+       else if (parent->level == NIX_TXSCH_LVL_TL1)
+               cfg->reg[0] = NIX_AF_TL1X_TOPOLOGY(parent->schq);
+
+       cfg->regval[0] = (u64)parent->prio_anchor << 32;
+       if (parent->level == NIX_TXSCH_LVL_TL1)
+               cfg->regval[0] |= (u64)TXSCH_TL1_DFLT_RR_PRIO << 1;
+
+       cfg->num_regs++;
+
+       rc = otx2_sync_mbox_msg(&pfvf->mbox);
+
+       mutex_unlock(&mbox->lock);
+
+       return rc;
+}
+
+static void otx2_qos_free_hw_node_schq(struct otx2_nic *pfvf,
+                                      struct otx2_qos_node *parent)
+{
+       struct otx2_qos_node *node;
+
+       list_for_each_entry_reverse(node, &parent->child_schq_list, list)
+               otx2_txschq_free_one(pfvf, node->level, node->schq);
+}
+
+static void otx2_qos_free_hw_node(struct otx2_nic *pfvf,
+                                 struct otx2_qos_node *parent)
+{
+       struct otx2_qos_node *node, *tmp;
+
+       list_for_each_entry_safe(node, tmp, &parent->child_list, list) {
+               otx2_qos_free_hw_node(pfvf, node);
+               otx2_qos_free_hw_node_schq(pfvf, node);
+               otx2_txschq_free_one(pfvf, node->level, node->schq);
+       }
+}
+
+static void otx2_qos_free_hw_cfg(struct otx2_nic *pfvf,
+                                struct otx2_qos_node *node)
+{
+       mutex_lock(&pfvf->qos.qos_lock);
+
+       /* free child node hw mappings */
+       otx2_qos_free_hw_node(pfvf, node);
+       otx2_qos_free_hw_node_schq(pfvf, node);
+
+       /* free node hw mappings */
+       otx2_txschq_free_one(pfvf, node->level, node->schq);
+
+       mutex_unlock(&pfvf->qos.qos_lock);
+}
+
+static void otx2_qos_sw_node_delete(struct otx2_nic *pfvf,
+                                   struct otx2_qos_node *node)
+{
+       hash_del_rcu(&node->hlist);
+
+       if (node->qid != OTX2_QOS_QID_INNER && node->qid != OTX2_QOS_QID_NONE) {
+               __clear_bit(node->qid, pfvf->qos.qos_sq_bmap);
+               otx2_qos_update_tx_netdev_queues(pfvf);
+       }
+
+       list_del(&node->list);
+       kfree(node);
+}
+
+static void otx2_qos_free_sw_node_schq(struct otx2_nic *pfvf,
+                                      struct otx2_qos_node *parent)
+{
+       struct otx2_qos_node *node, *tmp;
+
+       list_for_each_entry_safe(node, tmp, &parent->child_schq_list, list) {
+               list_del(&node->list);
+               kfree(node);
+       }
+}
+
+static void __otx2_qos_free_sw_node(struct otx2_nic *pfvf,
+                                   struct otx2_qos_node *parent)
+{
+       struct otx2_qos_node *node, *tmp;
+
+       list_for_each_entry_safe(node, tmp, &parent->child_list, list) {
+               __otx2_qos_free_sw_node(pfvf, node);
+               otx2_qos_free_sw_node_schq(pfvf, node);
+               otx2_qos_sw_node_delete(pfvf, node);
+       }
+}
+
+static void otx2_qos_free_sw_node(struct otx2_nic *pfvf,
+                                 struct otx2_qos_node *node)
+{
+       mutex_lock(&pfvf->qos.qos_lock);
+
+       __otx2_qos_free_sw_node(pfvf, node);
+       otx2_qos_free_sw_node_schq(pfvf, node);
+       otx2_qos_sw_node_delete(pfvf, node);
+
+       mutex_unlock(&pfvf->qos.qos_lock);
+}
+
+static void otx2_qos_destroy_node(struct otx2_nic *pfvf,
+                                 struct otx2_qos_node *node)
+{
+       otx2_qos_free_hw_cfg(pfvf, node);
+       otx2_qos_free_sw_node(pfvf, node);
+}
+
+static void otx2_qos_fill_cfg_schq(struct otx2_qos_node *parent,
+                                  struct otx2_qos_cfg *cfg)
+{
+       struct otx2_qos_node *node;
+
+       list_for_each_entry(node, &parent->child_schq_list, list)
+               cfg->schq[node->level]++;
+}
+
+static void otx2_qos_fill_cfg_tl(struct otx2_qos_node *parent,
+                                struct otx2_qos_cfg *cfg)
+{
+       struct otx2_qos_node *node;
+
+       list_for_each_entry(node, &parent->child_list, list) {
+               otx2_qos_fill_cfg_tl(node, cfg);
+               cfg->schq_contig[node->level]++;
+               otx2_qos_fill_cfg_schq(node, cfg);
+       }
+}
+
+static void otx2_qos_prepare_txschq_cfg(struct otx2_nic *pfvf,
+                                       struct otx2_qos_node *parent,
+                                       struct otx2_qos_cfg *cfg)
+{
+       mutex_lock(&pfvf->qos.qos_lock);
+       otx2_qos_fill_cfg_tl(parent, cfg);
+       mutex_unlock(&pfvf->qos.qos_lock);
+}
+
+static void otx2_qos_read_txschq_cfg_schq(struct otx2_qos_node *parent,
+                                         struct otx2_qos_cfg *cfg)
+{
+       struct otx2_qos_node *node;
+       int cnt;
+
+       list_for_each_entry(node, &parent->child_schq_list, list) {
+               cnt = cfg->dwrr_node_pos[node->level];
+               cfg->schq_list[node->level][cnt] = node->schq;
+               cfg->schq[node->level]++;
+               cfg->dwrr_node_pos[node->level]++;
+       }
+}
+
+static void otx2_qos_read_txschq_cfg_tl(struct otx2_qos_node *parent,
+                                       struct otx2_qos_cfg *cfg)
+{
+       struct otx2_qos_node *node;
+       int cnt;
+
+       list_for_each_entry(node, &parent->child_list, list) {
+               otx2_qos_read_txschq_cfg_tl(node, cfg);
+               cnt = cfg->static_node_pos[node->level];
+               cfg->schq_contig_list[node->level][cnt] = node->schq;
+               cfg->schq_contig[node->level]++;
+               cfg->static_node_pos[node->level]++;
+               otx2_qos_read_txschq_cfg_schq(node, cfg);
+       }
+}
+
+static void otx2_qos_read_txschq_cfg(struct otx2_nic *pfvf,
+                                    struct otx2_qos_node *node,
+                                    struct otx2_qos_cfg *cfg)
+{
+       mutex_lock(&pfvf->qos.qos_lock);
+       otx2_qos_read_txschq_cfg_tl(node, cfg);
+       mutex_unlock(&pfvf->qos.qos_lock);
+}
+
+static struct otx2_qos_node *
+otx2_qos_alloc_root(struct otx2_nic *pfvf)
+{
+       struct otx2_qos_node *node;
+
+       node = kzalloc(sizeof(*node), GFP_KERNEL);
+       if (!node)
+               return ERR_PTR(-ENOMEM);
+
+       node->parent = NULL;
+       if (!is_otx2_vf(pfvf->pcifunc))
+               node->level = NIX_TXSCH_LVL_TL1;
+       else
+               node->level = NIX_TXSCH_LVL_TL2;
+
+       WRITE_ONCE(node->qid, OTX2_QOS_QID_INNER);
+       node->classid = OTX2_QOS_ROOT_CLASSID;
+
+       hash_add_rcu(pfvf->qos.qos_hlist, &node->hlist, node->classid);
+       list_add_tail(&node->list, &pfvf->qos.qos_tree);
+       INIT_LIST_HEAD(&node->child_list);
+       INIT_LIST_HEAD(&node->child_schq_list);
+
+       return node;
+}
+
+static int otx2_qos_add_child_node(struct otx2_qos_node *parent,
+                                  struct otx2_qos_node *node)
+{
+       struct list_head *head = &parent->child_list;
+       struct otx2_qos_node *tmp_node;
+       struct list_head *tmp;
+
+       for (tmp = head->next; tmp != head; tmp = tmp->next) {
+               tmp_node = list_entry(tmp, struct otx2_qos_node, list);
+               if (tmp_node->prio == node->prio)
+                       return -EEXIST;
+               if (tmp_node->prio > node->prio) {
+                       list_add_tail(&node->list, tmp);
+                       return 0;
+               }
+       }
+
+       list_add_tail(&node->list, head);
+       return 0;
+}
+
+static int otx2_qos_alloc_txschq_node(struct otx2_nic *pfvf,
+                                     struct otx2_qos_node *node)
+{
+       struct otx2_qos_node *txschq_node, *parent, *tmp;
+       int lvl;
+
+       parent = node;
+       for (lvl = node->level - 1; lvl >= NIX_TXSCH_LVL_MDQ; lvl--) {
+               txschq_node = kzalloc(sizeof(*txschq_node), GFP_KERNEL);
+               if (!txschq_node)
+                       goto err_out;
+
+               txschq_node->parent = parent;
+               txschq_node->level = lvl;
+               txschq_node->classid = OTX2_QOS_CLASS_NONE;
+               WRITE_ONCE(txschq_node->qid, OTX2_QOS_QID_NONE);
+               txschq_node->rate = 0;
+               txschq_node->ceil = 0;
+               txschq_node->prio = 0;
+
+               mutex_lock(&pfvf->qos.qos_lock);
+               list_add_tail(&txschq_node->list, &node->child_schq_list);
+               mutex_unlock(&pfvf->qos.qos_lock);
+
+               INIT_LIST_HEAD(&txschq_node->child_list);
+               INIT_LIST_HEAD(&txschq_node->child_schq_list);
+               parent = txschq_node;
+       }
+
+       return 0;
+
+err_out:
+       list_for_each_entry_safe(txschq_node, tmp, &node->child_schq_list,
+                                list) {
+               list_del(&txschq_node->list);
+               kfree(txschq_node);
+       }
+       return -ENOMEM;
+}
+
+static struct otx2_qos_node *
+otx2_qos_sw_create_leaf_node(struct otx2_nic *pfvf,
+                            struct otx2_qos_node *parent,
+                            u16 classid, u32 prio, u64 rate, u64 ceil,
+                            u16 qid)
+{
+       struct otx2_qos_node *node;
+       int err;
+
+       node = kzalloc(sizeof(*node), GFP_KERNEL);
+       if (!node)
+               return ERR_PTR(-ENOMEM);
+
+       node->parent = parent;
+       node->level = parent->level - 1;
+       node->classid = classid;
+       WRITE_ONCE(node->qid, qid);
+
+       node->rate = otx2_convert_rate(rate);
+       node->ceil = otx2_convert_rate(ceil);
+       node->prio = prio;
+
+       __set_bit(qid, pfvf->qos.qos_sq_bmap);
+
+       hash_add_rcu(pfvf->qos.qos_hlist, &node->hlist, classid);
+
+       mutex_lock(&pfvf->qos.qos_lock);
+       err = otx2_qos_add_child_node(parent, node);
+       if (err) {
+               mutex_unlock(&pfvf->qos.qos_lock);
+               return ERR_PTR(err);
+       }
+       mutex_unlock(&pfvf->qos.qos_lock);
+
+       INIT_LIST_HEAD(&node->child_list);
+       INIT_LIST_HEAD(&node->child_schq_list);
+
+       err = otx2_qos_alloc_txschq_node(pfvf, node);
+       if (err) {
+               otx2_qos_sw_node_delete(pfvf, node);
+               return ERR_PTR(-ENOMEM);
+       }
+
+       return node;
+}
+
+static struct otx2_qos_node *
+otx2_sw_node_find(struct otx2_nic *pfvf, u32 classid)
+{
+       struct otx2_qos_node *node = NULL;
+
+       hash_for_each_possible(pfvf->qos.qos_hlist, node, hlist, classid) {
+               if (node->classid == classid)
+                       break;
+       }
+
+       return node;
+}
+
+static struct otx2_qos_node *
+otx2_sw_node_find_rcu(struct otx2_nic *pfvf, u32 classid)
+{
+       struct otx2_qos_node *node = NULL;
+
+       hash_for_each_possible_rcu(pfvf->qos.qos_hlist, node, hlist, classid) {
+               if (node->classid == classid)
+                       break;
+       }
+
+       return node;
+}
+
+int otx2_get_txq_by_classid(struct otx2_nic *pfvf, u16 classid)
+{
+       struct otx2_qos_node *node;
+       u16 qid;
+       int res;
+
+       node = otx2_sw_node_find_rcu(pfvf, classid);
+       if (!node) {
+               res = -ENOENT;
+               goto out;
+       }
+       qid = READ_ONCE(node->qid);
+       if (qid == OTX2_QOS_QID_INNER) {
+               res = -EINVAL;
+               goto out;
+       }
+       res = pfvf->hw.tx_queues + qid;
+out:
+       return res;
+}
+
+static int
+otx2_qos_txschq_config(struct otx2_nic *pfvf, struct otx2_qos_node *node)
+{
+       struct mbox *mbox = &pfvf->mbox;
+       struct nix_txschq_config *req;
+       int rc;
+
+       mutex_lock(&mbox->lock);
+
+       req = otx2_mbox_alloc_msg_nix_txschq_cfg(&pfvf->mbox);
+       if (!req) {
+               mutex_unlock(&mbox->lock);
+               return -ENOMEM;
+       }
+
+       req->lvl = node->level;
+       __otx2_qos_txschq_cfg(pfvf, node, req);
+
+       rc = otx2_sync_mbox_msg(&pfvf->mbox);
+
+       mutex_unlock(&mbox->lock);
+
+       return rc;
+}
+
+static int otx2_qos_txschq_alloc(struct otx2_nic *pfvf,
+                                struct otx2_qos_cfg *cfg)
+{
+       struct nix_txsch_alloc_req *req;
+       struct nix_txsch_alloc_rsp *rsp;
+       struct mbox *mbox = &pfvf->mbox;
+       int lvl, rc, schq;
+
+       mutex_lock(&mbox->lock);
+       req = otx2_mbox_alloc_msg_nix_txsch_alloc(&pfvf->mbox);
+       if (!req) {
+               mutex_unlock(&mbox->lock);
+               return -ENOMEM;
+       }
+
+       for (lvl = 0; lvl < NIX_TXSCH_LVL_CNT; lvl++) {
+               req->schq[lvl] = cfg->schq[lvl];
+               req->schq_contig[lvl] = cfg->schq_contig[lvl];
+       }
+
+       rc = otx2_sync_mbox_msg(&pfvf->mbox);
+       if (rc) {
+               mutex_unlock(&mbox->lock);
+               return rc;
+       }
+
+       rsp = (struct nix_txsch_alloc_rsp *)
+             otx2_mbox_get_rsp(&pfvf->mbox.mbox, 0, &req->hdr);
+
+       if (IS_ERR(rsp)) {
+               rc = PTR_ERR(rsp);
+               goto out;
+       }
+
+       for (lvl = 0; lvl < NIX_TXSCH_LVL_CNT; lvl++) {
+               for (schq = 0; schq < rsp->schq_contig[lvl]; schq++) {
+                       cfg->schq_contig_list[lvl][schq] =
+                               rsp->schq_contig_list[lvl][schq];
+               }
+       }
+
+       for (lvl = 0; lvl < NIX_TXSCH_LVL_CNT; lvl++) {
+               for (schq = 0; schq < rsp->schq[lvl]; schq++) {
+                       cfg->schq_list[lvl][schq] =
+                               rsp->schq_list[lvl][schq];
+               }
+       }
+
+       pfvf->qos.link_cfg_lvl = rsp->link_cfg_lvl;
+
+out:
+       mutex_unlock(&mbox->lock);
+       return rc;
+}
+
+static void otx2_qos_txschq_fill_cfg_schq(struct otx2_nic *pfvf,
+                                         struct otx2_qos_node *node,
+                                         struct otx2_qos_cfg *cfg)
+{
+       struct otx2_qos_node *tmp;
+       int cnt;
+
+       list_for_each_entry(tmp, &node->child_schq_list, list) {
+               cnt = cfg->dwrr_node_pos[tmp->level];
+               tmp->schq = cfg->schq_list[tmp->level][cnt];
+               cfg->dwrr_node_pos[tmp->level]++;
+       }
+}
+
+static void otx2_qos_txschq_fill_cfg_tl(struct otx2_nic *pfvf,
+                                       struct otx2_qos_node *node,
+                                       struct otx2_qos_cfg *cfg)
+{
+       struct otx2_qos_node *tmp;
+       int cnt;
+
+       list_for_each_entry(tmp, &node->child_list, list) {
+               otx2_qos_txschq_fill_cfg_tl(pfvf, tmp, cfg);
+               cnt = cfg->static_node_pos[tmp->level];
+               tmp->schq = cfg->schq_contig_list[tmp->level][cnt];
+               if (cnt == 0)
+                       node->prio_anchor = tmp->schq;
+               cfg->static_node_pos[tmp->level]++;
+               otx2_qos_txschq_fill_cfg_schq(pfvf, tmp, cfg);
+       }
+}
+
+static void otx2_qos_txschq_fill_cfg(struct otx2_nic *pfvf,
+                                    struct otx2_qos_node *node,
+                                    struct otx2_qos_cfg *cfg)
+{
+       mutex_lock(&pfvf->qos.qos_lock);
+       otx2_qos_txschq_fill_cfg_tl(pfvf, node, cfg);
+       otx2_qos_txschq_fill_cfg_schq(pfvf, node, cfg);
+       mutex_unlock(&pfvf->qos.qos_lock);
+}
+
+static int otx2_qos_txschq_push_cfg_schq(struct otx2_nic *pfvf,
+                                        struct otx2_qos_node *node,
+                                        struct otx2_qos_cfg *cfg)
+{
+       struct otx2_qos_node *tmp;
+       int ret;
+
+       list_for_each_entry(tmp, &node->child_schq_list, list) {
+               ret = otx2_qos_txschq_config(pfvf, tmp);
+               if (ret)
+                       return -EIO;
+               ret = otx2_qos_txschq_set_parent_topology(pfvf, tmp->parent);
+               if (ret)
+                       return -EIO;
+       }
+
+       return 0;
+}
+
+static int otx2_qos_txschq_push_cfg_tl(struct otx2_nic *pfvf,
+                                      struct otx2_qos_node *node,
+                                      struct otx2_qos_cfg *cfg)
+{
+       struct otx2_qos_node *tmp;
+       int ret;
+
+       list_for_each_entry(tmp, &node->child_list, list) {
+               ret = otx2_qos_txschq_push_cfg_tl(pfvf, tmp, cfg);
+               if (ret)
+                       return -EIO;
+               ret = otx2_qos_txschq_config(pfvf, tmp);
+               if (ret)
+                       return -EIO;
+               ret = otx2_qos_txschq_push_cfg_schq(pfvf, tmp, cfg);
+               if (ret)
+                       return -EIO;
+       }
+
+       ret = otx2_qos_txschq_set_parent_topology(pfvf, node);
+       if (ret)
+               return -EIO;
+
+       return 0;
+}
+
+static int otx2_qos_txschq_push_cfg(struct otx2_nic *pfvf,
+                                   struct otx2_qos_node *node,
+                                   struct otx2_qos_cfg *cfg)
+{
+       int ret;
+
+       mutex_lock(&pfvf->qos.qos_lock);
+       ret = otx2_qos_txschq_push_cfg_tl(pfvf, node, cfg);
+       if (ret)
+               goto out;
+       ret = otx2_qos_txschq_push_cfg_schq(pfvf, node, cfg);
+out:
+       mutex_unlock(&pfvf->qos.qos_lock);
+       return ret;
+}
+
+static int otx2_qos_txschq_update_config(struct otx2_nic *pfvf,
+                                        struct otx2_qos_node *node,
+                                        struct otx2_qos_cfg *cfg)
+{
+       otx2_qos_txschq_fill_cfg(pfvf, node, cfg);
+
+       return otx2_qos_txschq_push_cfg(pfvf, node, cfg);
+}
+
+static int otx2_qos_txschq_update_root_cfg(struct otx2_nic *pfvf,
+                                          struct otx2_qos_node *root,
+                                          struct otx2_qos_cfg *cfg)
+{
+       root->schq = cfg->schq_list[root->level][0];
+       return otx2_qos_txschq_config(pfvf, root);
+}
+
+static void otx2_qos_free_cfg(struct otx2_nic *pfvf, struct otx2_qos_cfg *cfg)
+{
+       int lvl, idx, schq;
+
+       for (lvl = 0; lvl < NIX_TXSCH_LVL_CNT; lvl++) {
+               for (idx = 0; idx < cfg->schq[lvl]; idx++) {
+                       schq = cfg->schq_list[lvl][idx];
+                       otx2_txschq_free_one(pfvf, lvl, schq);
+               }
+       }
+
+       for (lvl = 0; lvl < NIX_TXSCH_LVL_CNT; lvl++) {
+               for (idx = 0; idx < cfg->schq_contig[lvl]; idx++) {
+                       schq = cfg->schq_contig_list[lvl][idx];
+                       otx2_txschq_free_one(pfvf, lvl, schq);
+               }
+       }
+}
+
+static void otx2_qos_enadis_sq(struct otx2_nic *pfvf,
+                              struct otx2_qos_node *node,
+                              u16 qid)
+{
+       if (pfvf->qos.qid_to_sqmap[qid] != OTX2_QOS_INVALID_SQ)
+               otx2_qos_disable_sq(pfvf, qid);
+
+       pfvf->qos.qid_to_sqmap[qid] = node->schq;
+       otx2_qos_enable_sq(pfvf, qid);
+}
+
+static void otx2_qos_update_smq_schq(struct otx2_nic *pfvf,
+                                    struct otx2_qos_node *node,
+                                    bool action)
+{
+       struct otx2_qos_node *tmp;
+
+       if (node->qid == OTX2_QOS_QID_INNER)
+               return;
+
+       list_for_each_entry(tmp, &node->child_schq_list, list) {
+               if (tmp->level == NIX_TXSCH_LVL_MDQ) {
+                       if (action == QOS_SMQ_FLUSH)
+                               otx2_smq_flush(pfvf, tmp->schq);
+                       else
+                               otx2_qos_enadis_sq(pfvf, tmp, node->qid);
+               }
+       }
+}
+
+static void __otx2_qos_update_smq(struct otx2_nic *pfvf,
+                                 struct otx2_qos_node *node,
+                                 bool action)
+{
+       struct otx2_qos_node *tmp;
+
+       list_for_each_entry(tmp, &node->child_list, list) {
+               __otx2_qos_update_smq(pfvf, tmp, action);
+               if (tmp->qid == OTX2_QOS_QID_INNER)
+                       continue;
+               if (tmp->level == NIX_TXSCH_LVL_MDQ) {
+                       if (action == QOS_SMQ_FLUSH)
+                               otx2_smq_flush(pfvf, tmp->schq);
+                       else
+                               otx2_qos_enadis_sq(pfvf, tmp, tmp->qid);
+               } else {
+                       otx2_qos_update_smq_schq(pfvf, tmp, action);
+               }
+       }
+}
+
+static void otx2_qos_update_smq(struct otx2_nic *pfvf,
+                               struct otx2_qos_node *node,
+                               bool action)
+{
+       mutex_lock(&pfvf->qos.qos_lock);
+       __otx2_qos_update_smq(pfvf, node, action);
+       otx2_qos_update_smq_schq(pfvf, node, action);
+       mutex_unlock(&pfvf->qos.qos_lock);
+}
+
+static int otx2_qos_push_txschq_cfg(struct otx2_nic *pfvf,
+                                   struct otx2_qos_node *node,
+                                   struct otx2_qos_cfg *cfg)
+{
+       int ret;
+
+       ret = otx2_qos_txschq_alloc(pfvf, cfg);
+       if (ret)
+               return -ENOSPC;
+
+       if (!(pfvf->netdev->flags & IFF_UP)) {
+               otx2_qos_txschq_fill_cfg(pfvf, node, cfg);
+               return 0;
+       }
+
+       ret = otx2_qos_txschq_update_config(pfvf, node, cfg);
+       if (ret) {
+               otx2_qos_free_cfg(pfvf, cfg);
+               return -EIO;
+       }
+
+       otx2_qos_update_smq(pfvf, node, QOS_CFG_SQ);
+
+       return 0;
+}
+
+static int otx2_qos_update_tree(struct otx2_nic *pfvf,
+                               struct otx2_qos_node *node,
+                               struct otx2_qos_cfg *cfg)
+{
+       otx2_qos_prepare_txschq_cfg(pfvf, node->parent, cfg);
+       return otx2_qos_push_txschq_cfg(pfvf, node->parent, cfg);
+}
+
+static int otx2_qos_root_add(struct otx2_nic *pfvf, u16 htb_maj_id, u16 htb_defcls,
+                            struct netlink_ext_ack *extack)
+{
+       struct otx2_qos_cfg *new_cfg;
+       struct otx2_qos_node *root;
+       int err;
+
+       netdev_dbg(pfvf->netdev,
+                  "TC_HTB_CREATE: handle=0x%x defcls=0x%x\n",
+                  htb_maj_id, htb_defcls);
+
+       root = otx2_qos_alloc_root(pfvf);
+       if (IS_ERR(root)) {
+               err = PTR_ERR(root);
+               return err;
+       }
+
+       /* allocate txschq queue */
+       new_cfg = kzalloc(sizeof(*new_cfg), GFP_KERNEL);
+       if (!new_cfg) {
+               NL_SET_ERR_MSG_MOD(extack, "Memory allocation error");
+               err = -ENOMEM;
+               goto free_root_node;
+       }
+       /* allocate htb root node */
+       new_cfg->schq[root->level] = 1;
+       err = otx2_qos_txschq_alloc(pfvf, new_cfg);
+       if (err) {
+               NL_SET_ERR_MSG_MOD(extack, "Error allocating txschq");
+               goto free_root_node;
+       }
+
+       if (!(pfvf->netdev->flags & IFF_UP) ||
+           root->level == NIX_TXSCH_LVL_TL1) {
+               root->schq = new_cfg->schq_list[root->level][0];
+               goto out;
+       }
+
+       /* update the txschq configuration in hw */
+       err = otx2_qos_txschq_update_root_cfg(pfvf, root, new_cfg);
+       if (err) {
+               NL_SET_ERR_MSG_MOD(extack,
+                                  "Error updating txschq configuration");
+               goto txschq_free;
+       }
+
+out:
+       WRITE_ONCE(pfvf->qos.defcls, htb_defcls);
+       /* Pairs with smp_load_acquire() in ndo_select_queue */
+       smp_store_release(&pfvf->qos.maj_id, htb_maj_id);
+       kfree(new_cfg);
+       return 0;
+
+txschq_free:
+       otx2_qos_free_cfg(pfvf, new_cfg);
+free_root_node:
+       kfree(new_cfg);
+       otx2_qos_sw_node_delete(pfvf, root);
+       return err;
+}
+
+static int otx2_qos_root_destroy(struct otx2_nic *pfvf)
+{
+       struct otx2_qos_node *root;
+
+       netdev_dbg(pfvf->netdev, "TC_HTB_DESTROY\n");
+
+       /* find root node */
+       root = otx2_sw_node_find(pfvf, OTX2_QOS_ROOT_CLASSID);
+       if (!root)
+               return -ENOENT;
+
+       /* free the hw mappings */
+       otx2_qos_destroy_node(pfvf, root);
+
+       return 0;
+}
+
+static int otx2_qos_validate_configuration(struct otx2_qos_node *parent,
+                                          struct netlink_ext_ack *extack,
+                                          struct otx2_nic *pfvf,
+                                          u64 prio)
+{
+       if (test_bit(prio, parent->prio_bmap)) {
+               NL_SET_ERR_MSG_MOD(extack,
+                                  "Static priority child with same priority exists");
+               return -EEXIST;
+       }
+
+       if (prio == TXSCH_TL1_DFLT_RR_PRIO) {
+               NL_SET_ERR_MSG_MOD(extack,
+                                  "Priority is reserved for Round Robin");
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static int otx2_qos_leaf_alloc_queue(struct otx2_nic *pfvf, u16 classid,
+                                    u32 parent_classid, u64 rate, u64 ceil,
+                                    u64 prio, struct netlink_ext_ack *extack)
+{
+       struct otx2_qos_cfg *old_cfg, *new_cfg;
+       struct otx2_qos_node *node, *parent;
+       int qid, ret, err;
+
+       netdev_dbg(pfvf->netdev,
+                  "TC_HTB_LEAF_ALLOC_QUEUE: classid=0x%x parent_classid=0x%x rate=%lld ceil=%lld prio=%lld\n",
+                  classid, parent_classid, rate, ceil, prio);
+
+       if (prio > OTX2_QOS_MAX_PRIO) {
+               NL_SET_ERR_MSG_MOD(extack, "Valid priority range 0 to 7");
+               ret = -EOPNOTSUPP;
+               goto out;
+       }
+
+       /* get parent node */
+       parent = otx2_sw_node_find(pfvf, parent_classid);
+       if (!parent) {
+               NL_SET_ERR_MSG_MOD(extack, "parent node not found");
+               ret = -ENOENT;
+               goto out;
+       }
+       if (parent->level == NIX_TXSCH_LVL_MDQ) {
+               NL_SET_ERR_MSG_MOD(extack, "HTB qos max levels reached");
+               ret = -EOPNOTSUPP;
+               goto out;
+       }
+
+       ret = otx2_qos_validate_configuration(parent, extack, pfvf, prio);
+       if (ret)
+               goto out;
+
+       set_bit(prio, parent->prio_bmap);
+
+       /* read current txschq configuration */
+       old_cfg = kzalloc(sizeof(*old_cfg), GFP_KERNEL);
+       if (!old_cfg) {
+               NL_SET_ERR_MSG_MOD(extack, "Memory allocation error");
+               ret = -ENOMEM;
+               goto reset_prio;
+       }
+       otx2_qos_read_txschq_cfg(pfvf, parent, old_cfg);
+
+       /* allocate a new sq */
+       qid = otx2_qos_get_qid(pfvf);
+       if (qid < 0) {
+               NL_SET_ERR_MSG_MOD(extack, "Reached max supported QOS SQ's");
+               ret = -ENOMEM;
+               goto free_old_cfg;
+       }
+
+       /* Actual SQ mapping will be updated after SMQ alloc */
+       pfvf->qos.qid_to_sqmap[qid] = OTX2_QOS_INVALID_SQ;
+
+       /* allocate and initialize a new child node */
+       node = otx2_qos_sw_create_leaf_node(pfvf, parent, classid, prio, rate,
+                                           ceil, qid);
+       if (IS_ERR(node)) {
+               NL_SET_ERR_MSG_MOD(extack, "Unable to allocate leaf node");
+               ret = PTR_ERR(node);
+               goto free_old_cfg;
+       }
+
+       /* push new txschq config to hw */
+       new_cfg = kzalloc(sizeof(*new_cfg), GFP_KERNEL);
+       if (!new_cfg) {
+               NL_SET_ERR_MSG_MOD(extack, "Memory allocation error");
+               ret = -ENOMEM;
+               goto free_node;
+       }
+       ret = otx2_qos_update_tree(pfvf, node, new_cfg);
+       if (ret) {
+               NL_SET_ERR_MSG_MOD(extack, "HTB HW configuration error");
+               kfree(new_cfg);
+               otx2_qos_sw_node_delete(pfvf, node);
+               /* restore the old qos tree */
+               err = otx2_qos_txschq_update_config(pfvf, parent, old_cfg);
+               if (err) {
+                       netdev_err(pfvf->netdev,
+                                  "Failed to restore txcshq configuration");
+                       goto free_old_cfg;
+               }
+
+               otx2_qos_update_smq(pfvf, parent, QOS_CFG_SQ);
+               goto free_old_cfg;
+       }
+
+       /* update tx_real_queues */
+       otx2_qos_update_tx_netdev_queues(pfvf);
+
+       /* free new txschq config */
+       kfree(new_cfg);
+
+       /* free old txschq config */
+       otx2_qos_free_cfg(pfvf, old_cfg);
+       kfree(old_cfg);
+
+       return pfvf->hw.tx_queues + qid;
+
+free_node:
+       otx2_qos_sw_node_delete(pfvf, node);
+free_old_cfg:
+       kfree(old_cfg);
+reset_prio:
+       clear_bit(prio, parent->prio_bmap);
+out:
+       return ret;
+}
+
+static int otx2_qos_leaf_to_inner(struct otx2_nic *pfvf, u16 classid,
+                                 u16 child_classid, u64 rate, u64 ceil, u64 prio,
+                                 struct netlink_ext_ack *extack)
+{
+       struct otx2_qos_cfg *old_cfg, *new_cfg;
+       struct otx2_qos_node *node, *child;
+       int ret, err;
+       u16 qid;
+
+       netdev_dbg(pfvf->netdev,
+                  "TC_HTB_LEAF_TO_INNER classid %04x, child %04x, rate %llu, ceil %llu\n",
+                  classid, child_classid, rate, ceil);
+
+       if (prio > OTX2_QOS_MAX_PRIO) {
+               NL_SET_ERR_MSG_MOD(extack, "Valid priority range 0 to 7");
+               ret = -EOPNOTSUPP;
+               goto out;
+       }
+
+       /* find node related to classid */
+       node = otx2_sw_node_find(pfvf, classid);
+       if (!node) {
+               NL_SET_ERR_MSG_MOD(extack, "HTB node not found");
+               ret = -ENOENT;
+               goto out;
+       }
+       /* check max qos txschq level */
+       if (node->level == NIX_TXSCH_LVL_MDQ) {
+               NL_SET_ERR_MSG_MOD(extack, "HTB qos level not supported");
+               ret = -EOPNOTSUPP;
+               goto out;
+       }
+
+       set_bit(prio, node->prio_bmap);
+
+       /* store the qid to assign to leaf node */
+       qid = node->qid;
+
+       /* read current txschq configuration */
+       old_cfg = kzalloc(sizeof(*old_cfg), GFP_KERNEL);
+       if (!old_cfg) {
+               NL_SET_ERR_MSG_MOD(extack, "Memory allocation error");
+               ret = -ENOMEM;
+               goto reset_prio;
+       }
+       otx2_qos_read_txschq_cfg(pfvf, node, old_cfg);
+
+       /* delete the txschq nodes allocated for this node */
+       otx2_qos_free_sw_node_schq(pfvf, node);
+
+       /* mark this node as htb inner node */
+       WRITE_ONCE(node->qid, OTX2_QOS_QID_INNER);
+
+       /* allocate and initialize a new child node */
+       child = otx2_qos_sw_create_leaf_node(pfvf, node, child_classid,
+                                            prio, rate, ceil, qid);
+       if (IS_ERR(child)) {
+               NL_SET_ERR_MSG_MOD(extack, "Unable to allocate leaf node");
+               ret = PTR_ERR(child);
+               goto free_old_cfg;
+       }
+
+       /* push new txschq config to hw */
+       new_cfg = kzalloc(sizeof(*new_cfg), GFP_KERNEL);
+       if (!new_cfg) {
+               NL_SET_ERR_MSG_MOD(extack, "Memory allocation error");
+               ret = -ENOMEM;
+               goto free_node;
+       }
+       ret = otx2_qos_update_tree(pfvf, child, new_cfg);
+       if (ret) {
+               NL_SET_ERR_MSG_MOD(extack, "HTB HW configuration error");
+               kfree(new_cfg);
+               otx2_qos_sw_node_delete(pfvf, child);
+               /* restore the old qos tree */
+               WRITE_ONCE(node->qid, qid);
+               err = otx2_qos_alloc_txschq_node(pfvf, node);
+               if (err) {
+                       netdev_err(pfvf->netdev,
+                                  "Failed to restore old leaf node");
+                       goto free_old_cfg;
+               }
+               err = otx2_qos_txschq_update_config(pfvf, node, old_cfg);
+               if (err) {
+                       netdev_err(pfvf->netdev,
+                                  "Failed to restore txcshq configuration");
+                       goto free_old_cfg;
+               }
+               otx2_qos_update_smq(pfvf, node, QOS_CFG_SQ);
+               goto free_old_cfg;
+       }
+
+       /* free new txschq config */
+       kfree(new_cfg);
+
+       /* free old txschq config */
+       otx2_qos_free_cfg(pfvf, old_cfg);
+       kfree(old_cfg);
+
+       return 0;
+
+free_node:
+       otx2_qos_sw_node_delete(pfvf, child);
+free_old_cfg:
+       kfree(old_cfg);
+reset_prio:
+       clear_bit(prio, node->prio_bmap);
+out:
+       return ret;
+}
+
+static int otx2_qos_leaf_del(struct otx2_nic *pfvf, u16 *classid,
+                            struct netlink_ext_ack *extack)
+{
+       struct otx2_qos_node *node, *parent;
+       u64 prio;
+       u16 qid;
+
+       netdev_dbg(pfvf->netdev, "TC_HTB_LEAF_DEL classid %04x\n", *classid);
+
+       /* find node related to classid */
+       node = otx2_sw_node_find(pfvf, *classid);
+       if (!node) {
+               NL_SET_ERR_MSG_MOD(extack, "HTB node not found");
+               return -ENOENT;
+       }
+       parent = node->parent;
+       prio   = node->prio;
+       qid    = node->qid;
+
+       otx2_qos_disable_sq(pfvf, node->qid);
+
+       otx2_qos_destroy_node(pfvf, node);
+       pfvf->qos.qid_to_sqmap[qid] = OTX2_QOS_INVALID_SQ;
+
+       clear_bit(prio, parent->prio_bmap);
+
+       return 0;
+}
+
+static int otx2_qos_leaf_del_last(struct otx2_nic *pfvf, u16 classid, bool force,
+                                 struct netlink_ext_ack *extack)
+{
+       struct otx2_qos_node *node, *parent;
+       struct otx2_qos_cfg *new_cfg;
+       u64 prio;
+       int err;
+       u16 qid;
+
+       netdev_dbg(pfvf->netdev,
+                  "TC_HTB_LEAF_DEL_LAST classid %04x\n", classid);
+
+       /* find node related to classid */
+       node = otx2_sw_node_find(pfvf, classid);
+       if (!node) {
+               NL_SET_ERR_MSG_MOD(extack, "HTB node not found");
+               return -ENOENT;
+       }
+
+       /* save qid for use by parent */
+       qid = node->qid;
+       prio = node->prio;
+
+       parent = otx2_sw_node_find(pfvf, node->parent->classid);
+       if (!parent) {
+               NL_SET_ERR_MSG_MOD(extack, "parent node not found");
+               return -ENOENT;
+       }
+
+       /* destroy the leaf node */
+       otx2_qos_destroy_node(pfvf, node);
+       pfvf->qos.qid_to_sqmap[qid] = OTX2_QOS_INVALID_SQ;
+
+       clear_bit(prio, parent->prio_bmap);
+
+       /* create downstream txschq entries to parent */
+       err = otx2_qos_alloc_txschq_node(pfvf, parent);
+       if (err) {
+               NL_SET_ERR_MSG_MOD(extack, "HTB failed to create txsch configuration");
+               return err;
+       }
+       WRITE_ONCE(parent->qid, qid);
+       __set_bit(qid, pfvf->qos.qos_sq_bmap);
+
+       /* push new txschq config to hw */
+       new_cfg = kzalloc(sizeof(*new_cfg), GFP_KERNEL);
+       if (!new_cfg) {
+               NL_SET_ERR_MSG_MOD(extack, "Memory allocation error");
+               return -ENOMEM;
+       }
+       /* fill txschq cfg and push txschq cfg to hw */
+       otx2_qos_fill_cfg_schq(parent, new_cfg);
+       err = otx2_qos_push_txschq_cfg(pfvf, parent, new_cfg);
+       if (err) {
+               NL_SET_ERR_MSG_MOD(extack, "HTB HW configuration error");
+               kfree(new_cfg);
+               return err;
+       }
+       kfree(new_cfg);
+
+       /* update tx_real_queues */
+       otx2_qos_update_tx_netdev_queues(pfvf);
+
+       return 0;
+}
+
+void otx2_clean_qos_queues(struct otx2_nic *pfvf)
+{
+       struct otx2_qos_node *root;
+
+       root = otx2_sw_node_find(pfvf, OTX2_QOS_ROOT_CLASSID);
+       if (!root)
+               return;
+
+       otx2_qos_update_smq(pfvf, root, QOS_SMQ_FLUSH);
+}
+
+void otx2_qos_config_txschq(struct otx2_nic *pfvf)
+{
+       struct otx2_qos_node *root;
+       int err;
+
+       root = otx2_sw_node_find(pfvf, OTX2_QOS_ROOT_CLASSID);
+       if (!root)
+               return;
+
+       err = otx2_qos_txschq_config(pfvf, root);
+       if (err) {
+               netdev_err(pfvf->netdev, "Error update txschq configuration\n");
+               goto root_destroy;
+       }
+
+       err = otx2_qos_txschq_push_cfg_tl(pfvf, root, NULL);
+       if (err) {
+               netdev_err(pfvf->netdev, "Error update txschq configuration\n");
+               goto root_destroy;
+       }
+
+       otx2_qos_update_smq(pfvf, root, QOS_CFG_SQ);
+       return;
+
+root_destroy:
+       netdev_err(pfvf->netdev, "Failed to update Scheduler/Shaping config in Hardware\n");
+       /* Free resources allocated */
+       otx2_qos_root_destroy(pfvf);
+}
+
+int otx2_setup_tc_htb(struct net_device *ndev, struct tc_htb_qopt_offload *htb)
+{
+       struct otx2_nic *pfvf = netdev_priv(ndev);
+       int res;
+
+       switch (htb->command) {
+       case TC_HTB_CREATE:
+               return otx2_qos_root_add(pfvf, htb->parent_classid,
+                                        htb->classid, htb->extack);
+       case TC_HTB_DESTROY:
+               return otx2_qos_root_destroy(pfvf);
+       case TC_HTB_LEAF_ALLOC_QUEUE:
+               res = otx2_qos_leaf_alloc_queue(pfvf, htb->classid,
+                                               htb->parent_classid,
+                                               htb->rate, htb->ceil,
+                                               htb->prio, htb->extack);
+               if (res < 0)
+                       return res;
+               htb->qid = res;
+               return 0;
+       case TC_HTB_LEAF_TO_INNER:
+               return otx2_qos_leaf_to_inner(pfvf, htb->parent_classid,
+                                             htb->classid, htb->rate,
+                                             htb->ceil, htb->prio,
+                                             htb->extack);
+       case TC_HTB_LEAF_DEL:
+               return otx2_qos_leaf_del(pfvf, &htb->classid, htb->extack);
+       case TC_HTB_LEAF_DEL_LAST:
+       case TC_HTB_LEAF_DEL_LAST_FORCE:
+               return otx2_qos_leaf_del_last(pfvf, htb->classid,
+                               htb->command == TC_HTB_LEAF_DEL_LAST_FORCE,
+                                             htb->extack);
+       case TC_HTB_LEAF_QUERY_QUEUE:
+               res = otx2_get_txq_by_classid(pfvf, htb->classid);
+               htb->qid = res;
+               return 0;
+       case TC_HTB_NODE_MODIFY:
+               fallthrough;
+       default:
+               return -EOPNOTSUPP;
+       }
+}
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/qos.h b/drivers/net/ethernet/marvell/octeontx2/nic/qos.h
new file mode 100644 (file)
index 0000000..1977328
--- /dev/null
@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Marvell RVU Ethernet driver
+ *
+ * Copyright (C) 2023 Marvell.
+ *
+ */
+#ifndef OTX2_QOS_H
+#define OTX2_QOS_H
+
+#include <linux/types.h>
+#include <linux/netdevice.h>
+#include <linux/rhashtable.h>
+
+#define OTX2_QOS_MAX_LVL               4
+#define OTX2_QOS_MAX_PRIO              7
+#define OTX2_QOS_MAX_LEAF_NODES                16
+
+enum qos_smq_operations {
+       QOS_CFG_SQ,
+       QOS_SMQ_FLUSH,
+};
+
+u64 otx2_get_txschq_rate_regval(struct otx2_nic *nic, u64 maxrate, u32 burst);
+
+int otx2_setup_tc_htb(struct net_device *ndev, struct tc_htb_qopt_offload *htb);
+int otx2_qos_get_qid(struct otx2_nic *pfvf);
+void otx2_qos_free_qid(struct otx2_nic *pfvf, int qidx);
+int otx2_qos_enable_sq(struct otx2_nic *pfvf, int qidx);
+void otx2_qos_disable_sq(struct otx2_nic *pfvf, int qidx);
+
+struct otx2_qos_cfg {
+       u16 schq[NIX_TXSCH_LVL_CNT];
+       u16 schq_contig[NIX_TXSCH_LVL_CNT];
+       int static_node_pos[NIX_TXSCH_LVL_CNT];
+       int dwrr_node_pos[NIX_TXSCH_LVL_CNT];
+       u16 schq_contig_list[NIX_TXSCH_LVL_CNT][MAX_TXSCHQ_PER_FUNC];
+       u16 schq_list[NIX_TXSCH_LVL_CNT][MAX_TXSCHQ_PER_FUNC];
+};
+
+struct otx2_qos {
+       DECLARE_HASHTABLE(qos_hlist, order_base_2(OTX2_QOS_MAX_LEAF_NODES));
+       struct mutex qos_lock; /* child list lock */
+       u16 qid_to_sqmap[OTX2_QOS_MAX_LEAF_NODES];
+       struct list_head qos_tree;
+       DECLARE_BITMAP(qos_sq_bmap, OTX2_QOS_MAX_LEAF_NODES);
+       u16 maj_id;
+       u16 defcls;
+       u8  link_cfg_lvl; /* LINKX_CFG CSRs mapped to TL3 or TL2's index ? */
+};
+
+struct otx2_qos_node {
+       struct list_head list; /* list management */
+       struct list_head child_list;
+       struct list_head child_schq_list;
+       struct hlist_node hlist;
+       DECLARE_BITMAP(prio_bmap, OTX2_QOS_MAX_PRIO + 1);
+       struct otx2_qos_node *parent;   /* parent qos node */
+       u64 rate; /* htb params */
+       u64 ceil;
+       u32 classid;
+       u32 prio;
+       u16 schq; /* hw txschq */
+       u16 qid;
+       u16 prio_anchor;
+       u8 level;
+};
+
+
+#endif
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/qos_sq.c b/drivers/net/ethernet/marvell/octeontx2/nic/qos_sq.c
new file mode 100644 (file)
index 0000000..9d887bf
--- /dev/null
@@ -0,0 +1,296 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Marvell RVU Physical Function ethernet driver
+ *
+ * Copyright (C) 2023 Marvell.
+ *
+ */
+
+#include <linux/netdevice.h>
+#include <net/tso.h>
+
+#include "cn10k.h"
+#include "otx2_reg.h"
+#include "otx2_common.h"
+#include "otx2_txrx.h"
+#include "otx2_struct.h"
+
+#define OTX2_QOS_MAX_LEAF_NODES 16
+
+static void otx2_qos_aura_pool_free(struct otx2_nic *pfvf, int pool_id)
+{
+       struct otx2_pool *pool;
+
+       if (!pfvf->qset.pool)
+               return;
+
+       pool = &pfvf->qset.pool[pool_id];
+       qmem_free(pfvf->dev, pool->stack);
+       qmem_free(pfvf->dev, pool->fc_addr);
+       pool->stack = NULL;
+       pool->fc_addr = NULL;
+}
+
+static int otx2_qos_sq_aura_pool_init(struct otx2_nic *pfvf, int qidx)
+{
+       struct otx2_qset *qset = &pfvf->qset;
+       int pool_id, stack_pages, num_sqbs;
+       struct otx2_hw *hw = &pfvf->hw;
+       struct otx2_snd_queue *sq;
+       struct otx2_pool *pool;
+       dma_addr_t bufptr;
+       int err, ptr;
+       u64 iova, pa;
+
+       /* Calculate number of SQBs needed.
+        *
+        * For a 128byte SQE, and 4K size SQB, 31 SQEs will fit in one SQB.
+        * Last SQE is used for pointing to next SQB.
+        */
+       num_sqbs = (hw->sqb_size / 128) - 1;
+       num_sqbs = (qset->sqe_cnt + num_sqbs) / num_sqbs;
+
+       /* Get no of stack pages needed */
+       stack_pages =
+               (num_sqbs + hw->stack_pg_ptrs - 1) / hw->stack_pg_ptrs;
+
+       pool_id = otx2_get_pool_idx(pfvf, AURA_NIX_SQ, qidx);
+       pool = &pfvf->qset.pool[pool_id];
+
+       /* Initialize aura context */
+       err = otx2_aura_init(pfvf, pool_id, pool_id, num_sqbs);
+       if (err)
+               return err;
+
+       /* Initialize pool context */
+       err = otx2_pool_init(pfvf, pool_id, stack_pages,
+                            num_sqbs, hw->sqb_size, AURA_NIX_SQ);
+       if (err)
+               goto aura_free;
+
+       /* Flush accumulated messages */
+       err = otx2_sync_mbox_msg(&pfvf->mbox);
+       if (err)
+               goto pool_free;
+
+       /* Allocate pointers and free them to aura/pool */
+       sq = &qset->sq[qidx];
+       sq->sqb_count = 0;
+       sq->sqb_ptrs = kcalloc(num_sqbs, sizeof(*sq->sqb_ptrs), GFP_KERNEL);
+       if (!sq->sqb_ptrs) {
+               err = -ENOMEM;
+               goto pool_free;
+       }
+
+       for (ptr = 0; ptr < num_sqbs; ptr++) {
+               err = otx2_alloc_rbuf(pfvf, pool, &bufptr);
+               if (err)
+                       goto sqb_free;
+               pfvf->hw_ops->aura_freeptr(pfvf, pool_id, bufptr);
+               sq->sqb_ptrs[sq->sqb_count++] = (u64)bufptr;
+       }
+
+       return 0;
+
+sqb_free:
+       while (ptr--) {
+               if (!sq->sqb_ptrs[ptr])
+                       continue;
+               iova = sq->sqb_ptrs[ptr];
+               pa = otx2_iova_to_phys(pfvf->iommu_domain, iova);
+               dma_unmap_page_attrs(pfvf->dev, iova, hw->sqb_size,
+                                    DMA_FROM_DEVICE,
+                                    DMA_ATTR_SKIP_CPU_SYNC);
+               put_page(virt_to_page(phys_to_virt(pa)));
+               otx2_aura_allocptr(pfvf, pool_id);
+       }
+       sq->sqb_count = 0;
+       kfree(sq->sqb_ptrs);
+pool_free:
+       qmem_free(pfvf->dev, pool->stack);
+aura_free:
+       qmem_free(pfvf->dev, pool->fc_addr);
+       otx2_mbox_reset(&pfvf->mbox.mbox, 0);
+       return err;
+}
+
+static void otx2_qos_sq_free_sqbs(struct otx2_nic *pfvf, int qidx)
+{
+       struct otx2_qset *qset = &pfvf->qset;
+       struct otx2_hw *hw = &pfvf->hw;
+       struct otx2_snd_queue *sq;
+       u64 iova, pa;
+       int sqb;
+
+       sq = &qset->sq[qidx];
+       if (!sq->sqb_ptrs)
+               return;
+       for (sqb = 0; sqb < sq->sqb_count; sqb++) {
+               if (!sq->sqb_ptrs[sqb])
+                       continue;
+               iova = sq->sqb_ptrs[sqb];
+               pa = otx2_iova_to_phys(pfvf->iommu_domain, iova);
+               dma_unmap_page_attrs(pfvf->dev, iova, hw->sqb_size,
+                                    DMA_FROM_DEVICE,
+                                    DMA_ATTR_SKIP_CPU_SYNC);
+               put_page(virt_to_page(phys_to_virt(pa)));
+       }
+
+       sq->sqb_count = 0;
+
+       sq = &qset->sq[qidx];
+       qmem_free(pfvf->dev, sq->sqe);
+       qmem_free(pfvf->dev, sq->tso_hdrs);
+       kfree(sq->sg);
+       kfree(sq->sqb_ptrs);
+       qmem_free(pfvf->dev, sq->timestamps);
+
+       memset((void *)sq, 0, sizeof(*sq));
+}
+
+/* send queue id */
+static void otx2_qos_sqb_flush(struct otx2_nic *pfvf, int qidx)
+{
+       int sqe_tail, sqe_head;
+       u64 incr, *ptr, val;
+
+       ptr = (__force u64 *)otx2_get_regaddr(pfvf, NIX_LF_SQ_OP_STATUS);
+       incr = (u64)qidx << 32;
+       val = otx2_atomic64_add(incr, ptr);
+       sqe_head = (val >> 20) & 0x3F;
+       sqe_tail = (val >> 28) & 0x3F;
+       if (sqe_head != sqe_tail)
+               usleep_range(50, 60);
+}
+
+static int otx2_qos_ctx_disable(struct otx2_nic *pfvf, u16 qidx, int aura_id)
+{
+       struct nix_cn10k_aq_enq_req *cn10k_sq_aq;
+       struct npa_aq_enq_req *aura_aq;
+       struct npa_aq_enq_req *pool_aq;
+       struct nix_aq_enq_req *sq_aq;
+
+       if (test_bit(CN10K_LMTST, &pfvf->hw.cap_flag)) {
+               cn10k_sq_aq = otx2_mbox_alloc_msg_nix_cn10k_aq_enq(&pfvf->mbox);
+               if (!cn10k_sq_aq)
+                       return -ENOMEM;
+               cn10k_sq_aq->qidx = qidx;
+               cn10k_sq_aq->sq.ena = 0;
+               cn10k_sq_aq->sq_mask.ena = 1;
+               cn10k_sq_aq->ctype = NIX_AQ_CTYPE_SQ;
+               cn10k_sq_aq->op = NIX_AQ_INSTOP_WRITE;
+       } else {
+               sq_aq = otx2_mbox_alloc_msg_nix_aq_enq(&pfvf->mbox);
+               if (!sq_aq)
+                       return -ENOMEM;
+               sq_aq->qidx = qidx;
+               sq_aq->sq.ena = 0;
+               sq_aq->sq_mask.ena = 1;
+               sq_aq->ctype = NIX_AQ_CTYPE_SQ;
+               sq_aq->op = NIX_AQ_INSTOP_WRITE;
+       }
+
+       aura_aq = otx2_mbox_alloc_msg_npa_aq_enq(&pfvf->mbox);
+       if (!aura_aq) {
+               otx2_mbox_reset(&pfvf->mbox.mbox, 0);
+               return -ENOMEM;
+       }
+
+       aura_aq->aura_id = aura_id;
+       aura_aq->aura.ena = 0;
+       aura_aq->aura_mask.ena = 1;
+       aura_aq->ctype = NPA_AQ_CTYPE_AURA;
+       aura_aq->op = NPA_AQ_INSTOP_WRITE;
+
+       pool_aq = otx2_mbox_alloc_msg_npa_aq_enq(&pfvf->mbox);
+       if (!pool_aq) {
+               otx2_mbox_reset(&pfvf->mbox.mbox, 0);
+               return -ENOMEM;
+       }
+
+       pool_aq->aura_id = aura_id;
+       pool_aq->pool.ena = 0;
+       pool_aq->pool_mask.ena = 1;
+
+       pool_aq->ctype = NPA_AQ_CTYPE_POOL;
+       pool_aq->op = NPA_AQ_INSTOP_WRITE;
+
+       return otx2_sync_mbox_msg(&pfvf->mbox);
+}
+
+int otx2_qos_get_qid(struct otx2_nic *pfvf)
+{
+       int qidx;
+
+       qidx = find_first_zero_bit(pfvf->qos.qos_sq_bmap,
+                                  pfvf->hw.tc_tx_queues);
+
+       return qidx == pfvf->hw.tc_tx_queues ? -ENOSPC : qidx;
+}
+
+void otx2_qos_free_qid(struct otx2_nic *pfvf, int qidx)
+{
+       clear_bit(qidx, pfvf->qos.qos_sq_bmap);
+}
+
+int otx2_qos_enable_sq(struct otx2_nic *pfvf, int qidx)
+{
+       struct otx2_hw *hw = &pfvf->hw;
+       int pool_id, sq_idx, err;
+
+       if (pfvf->flags & OTX2_FLAG_INTF_DOWN)
+               return -EPERM;
+
+       sq_idx = hw->non_qos_queues + qidx;
+
+       mutex_lock(&pfvf->mbox.lock);
+       err = otx2_qos_sq_aura_pool_init(pfvf, sq_idx);
+       if (err)
+               goto out;
+
+       pool_id = otx2_get_pool_idx(pfvf, AURA_NIX_SQ, sq_idx);
+       err = otx2_sq_init(pfvf, sq_idx, pool_id);
+       if (err)
+               goto out;
+out:
+       mutex_unlock(&pfvf->mbox.lock);
+       return err;
+}
+
+void otx2_qos_disable_sq(struct otx2_nic *pfvf, int qidx)
+{
+       struct otx2_qset *qset = &pfvf->qset;
+       struct otx2_hw *hw = &pfvf->hw;
+       struct otx2_snd_queue *sq;
+       struct otx2_cq_queue *cq;
+       int pool_id, sq_idx;
+
+       sq_idx = hw->non_qos_queues + qidx;
+
+       /* If the DOWN flag is set SQs are already freed */
+       if (pfvf->flags & OTX2_FLAG_INTF_DOWN)
+               return;
+
+       sq = &pfvf->qset.sq[sq_idx];
+       if (!sq->sqb_ptrs)
+               return;
+
+       if (sq_idx < hw->non_qos_queues ||
+           sq_idx >= otx2_get_total_tx_queues(pfvf)) {
+               netdev_err(pfvf->netdev, "Send Queue is not a QoS queue\n");
+               return;
+       }
+
+       cq = &qset->cq[pfvf->hw.rx_queues + sq_idx];
+       pool_id = otx2_get_pool_idx(pfvf, AURA_NIX_SQ, sq_idx);
+
+       otx2_qos_sqb_flush(pfvf, sq_idx);
+       otx2_smq_flush(pfvf, otx2_get_smq_idx(pfvf, sq_idx));
+       otx2_cleanup_tx_cqes(pfvf, cq);
+
+       mutex_lock(&pfvf->mbox.lock);
+       otx2_qos_ctx_disable(pfvf, sq_idx, pool_id);
+       mutex_unlock(&pfvf->mbox.lock);
+
+       otx2_qos_sq_free_sqbs(pfvf, sq_idx);
+       otx2_qos_aura_pool_free(pfvf, pool_id);
+}
index 91a478b..3e20e71 100644 (file)
@@ -148,6 +148,12 @@ static int prestera_flower_parse_meta(struct prestera_acl_rule *rule,
        __be16 key, mask;
 
        flow_rule_match_meta(f_rule, &match);
+
+       if (match.mask->l2_miss) {
+               NL_SET_ERR_MSG_MOD(f->common.extack, "Can't match on \"l2_miss\"");
+               return -EOPNOTSUPP;
+       }
+
        if (match.mask->ingress_ifindex != 0xFFFFFFFF) {
                NL_SET_ERR_MSG_MOD(f->common.extack,
                                   "Unsupported ingress ifindex mask");
index 277738c..61286b0 100644 (file)
@@ -1374,16 +1374,13 @@ static int mlx4_mf_bond(struct mlx4_dev *dev)
        int nvfs;
        struct mlx4_slaves_pport slaves_port1;
        struct mlx4_slaves_pport slaves_port2;
-       DECLARE_BITMAP(slaves_port_1_2, MLX4_MFUNC_MAX);
 
        slaves_port1 = mlx4_phys_to_slaves_pport(dev, 1);
        slaves_port2 = mlx4_phys_to_slaves_pport(dev, 2);
-       bitmap_and(slaves_port_1_2,
-                  slaves_port1.slaves, slaves_port2.slaves,
-                  dev->persist->num_vfs + 1);
 
        /* only single port vfs are allowed */
-       if (bitmap_weight(slaves_port_1_2, dev->persist->num_vfs + 1) > 1) {
+       if (bitmap_weight_and(slaves_port1.slaves, slaves_port2.slaves,
+                             dev->persist->num_vfs + 1) > 1) {
                mlx4_warn(dev, "HA mode unsupported for dual ported VFs\n");
                return -EINVAL;
        }
@@ -3027,13 +3024,43 @@ no_msi:
        }
 }
 
+static int mlx4_devlink_port_type_set(struct devlink_port *devlink_port,
+                                     enum devlink_port_type port_type)
+{
+       struct mlx4_port_info *info = container_of(devlink_port,
+                                                  struct mlx4_port_info,
+                                                  devlink_port);
+       enum mlx4_port_type mlx4_port_type;
+
+       switch (port_type) {
+       case DEVLINK_PORT_TYPE_AUTO:
+               mlx4_port_type = MLX4_PORT_TYPE_AUTO;
+               break;
+       case DEVLINK_PORT_TYPE_ETH:
+               mlx4_port_type = MLX4_PORT_TYPE_ETH;
+               break;
+       case DEVLINK_PORT_TYPE_IB:
+               mlx4_port_type = MLX4_PORT_TYPE_IB;
+               break;
+       default:
+               return -EOPNOTSUPP;
+       }
+
+       return __set_port_type(info, mlx4_port_type);
+}
+
+static const struct devlink_port_ops mlx4_devlink_port_ops = {
+       .port_type_set = mlx4_devlink_port_type_set,
+};
+
 static int mlx4_init_port_info(struct mlx4_dev *dev, int port)
 {
        struct devlink *devlink = priv_to_devlink(mlx4_priv(dev));
        struct mlx4_port_info *info = &mlx4_priv(dev)->port[port];
        int err;
 
-       err = devl_port_register(devlink, &info->devlink_port, port);
+       err = devl_port_register_with_ops(devlink, &info->devlink_port, port,
+                                         &mlx4_devlink_port_ops);
        if (err)
                return err;
 
@@ -3877,31 +3904,6 @@ err_disable_pdev:
        return err;
 }
 
-static int mlx4_devlink_port_type_set(struct devlink_port *devlink_port,
-                                     enum devlink_port_type port_type)
-{
-       struct mlx4_port_info *info = container_of(devlink_port,
-                                                  struct mlx4_port_info,
-                                                  devlink_port);
-       enum mlx4_port_type mlx4_port_type;
-
-       switch (port_type) {
-       case DEVLINK_PORT_TYPE_AUTO:
-               mlx4_port_type = MLX4_PORT_TYPE_AUTO;
-               break;
-       case DEVLINK_PORT_TYPE_ETH:
-               mlx4_port_type = MLX4_PORT_TYPE_ETH;
-               break;
-       case DEVLINK_PORT_TYPE_IB:
-               mlx4_port_type = MLX4_PORT_TYPE_IB;
-               break;
-       default:
-               return -EOPNOTSUPP;
-       }
-
-       return __set_port_type(info, mlx4_port_type);
-}
-
 static void mlx4_devlink_param_load_driverinit_values(struct devlink *devlink)
 {
        struct mlx4_priv *priv = devlink_priv(devlink);
@@ -3986,7 +3988,6 @@ static int mlx4_devlink_reload_up(struct devlink *devlink, enum devlink_reload_a
 }
 
 static const struct devlink_ops mlx4_devlink_ops = {
-       .port_type_set  = mlx4_devlink_port_type_set,
        .reload_actions = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT),
        .reload_down    = mlx4_devlink_reload_down,
        .reload_up      = mlx4_devlink_reload_up,
index 4b60778..63635cc 100644 (file)
@@ -162,9 +162,8 @@ static int mlx5_devlink_reload_down(struct devlink *devlink, bool netns_change,
                return -EOPNOTSUPP;
        }
 
-       if (pci_num_vf(pdev)) {
+       if (mlx5_core_is_pf(dev) && pci_num_vf(pdev))
                NL_SET_ERR_MSG_MOD(extack, "reload while VFs are present is unfavorable");
-       }
 
        switch (action) {
        case DEVLINK_RELOAD_ACTION_DRIVER_REINIT:
@@ -311,8 +310,6 @@ static const struct devlink_ops mlx5_devlink_ops = {
        .eswitch_inline_mode_get = mlx5_devlink_eswitch_inline_mode_get,
        .eswitch_encap_mode_set = mlx5_devlink_eswitch_encap_mode_set,
        .eswitch_encap_mode_get = mlx5_devlink_eswitch_encap_mode_get,
-       .port_function_hw_addr_get = mlx5_devlink_port_function_hw_addr_get,
-       .port_function_hw_addr_set = mlx5_devlink_port_function_hw_addr_set,
        .rate_leaf_tx_share_set = mlx5_esw_devlink_rate_leaf_tx_share_set,
        .rate_leaf_tx_max_set = mlx5_esw_devlink_rate_leaf_tx_max_set,
        .rate_node_tx_share_set = mlx5_esw_devlink_rate_node_tx_share_set,
@@ -320,16 +317,9 @@ static const struct devlink_ops mlx5_devlink_ops = {
        .rate_node_new = mlx5_esw_devlink_rate_node_new,
        .rate_node_del = mlx5_esw_devlink_rate_node_del,
        .rate_leaf_parent_set = mlx5_esw_devlink_rate_parent_set,
-       .port_fn_roce_get = mlx5_devlink_port_fn_roce_get,
-       .port_fn_roce_set = mlx5_devlink_port_fn_roce_set,
-       .port_fn_migratable_get = mlx5_devlink_port_fn_migratable_get,
-       .port_fn_migratable_set = mlx5_devlink_port_fn_migratable_set,
 #endif
 #ifdef CONFIG_MLX5_SF_MANAGER
        .port_new = mlx5_devlink_sf_port_new,
-       .port_del = mlx5_devlink_sf_port_del,
-       .port_fn_state_get = mlx5_devlink_sf_port_fn_state_get,
-       .port_fn_state_set = mlx5_devlink_sf_port_fn_state_set,
 #endif
        .flash_update = mlx5_devlink_flash_update,
        .info_get = mlx5_devlink_info_get,
@@ -464,27 +454,6 @@ static int mlx5_devlink_esw_multiport_get(struct devlink *devlink, u32 id,
        ctx->val.vbool = mlx5_lag_is_mpesw(dev);
        return 0;
 }
-
-static int mlx5_devlink_esw_multiport_validate(struct devlink *devlink, u32 id,
-                                              union devlink_param_value val,
-                                              struct netlink_ext_ack *extack)
-{
-       struct mlx5_core_dev *dev = devlink_priv(devlink);
-
-       if (!MLX5_ESWITCH_MANAGER(dev)) {
-               NL_SET_ERR_MSG_MOD(extack, "E-Switch is unsupported");
-               return -EOPNOTSUPP;
-       }
-
-       if (mlx5_eswitch_mode(dev) != MLX5_ESWITCH_OFFLOADS) {
-               NL_SET_ERR_MSG_MOD(extack,
-                                  "E-Switch must be in switchdev mode");
-               return -EBUSY;
-       }
-
-       return 0;
-}
-
 #endif
 
 static int mlx5_devlink_eq_depth_validate(struct devlink *devlink, u32 id,
@@ -563,7 +532,7 @@ static const struct devlink_param mlx5_devlink_params[] = {
                             BIT(DEVLINK_PARAM_CMODE_RUNTIME),
                             mlx5_devlink_esw_multiport_get,
                             mlx5_devlink_esw_multiport_set,
-                            mlx5_devlink_esw_multiport_validate),
+                            NULL),
 #endif
        DEVLINK_PARAM_GENERIC(IO_EQ_SIZE, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
                              NULL, NULL, mlx5_devlink_eq_depth_validate),
index 2842195..1874c2f 100644 (file)
@@ -379,6 +379,12 @@ int mlx5e_htb_setup_tc(struct mlx5e_priv *priv, struct tc_htb_qopt_offload *htb_
        if (!htb && htb_qopt->command != TC_HTB_CREATE)
                return -EINVAL;
 
+       if (htb_qopt->prio) {
+               NL_SET_ERR_MSG_MOD(htb_qopt->extack,
+                                  "prio parameter is not supported by device with HTB offload enabled.");
+               return -EOPNOTSUPP;
+       }
+
        switch (htb_qopt->command) {
        case TC_HTB_CREATE:
                if (!mlx5_qos_is_supported(priv->mdev)) {
@@ -515,4 +521,3 @@ int mlx5e_mqprio_rl_get_node_hw_id(struct mlx5e_mqprio_rl *rl, int tc, u32 *hw_i
        *hw_id = rl->leaves_id[tc];
        return 0;
 }
-
index ba2b1f2..6cc23af 100644 (file)
@@ -94,13 +94,13 @@ struct mlx5e_tc_flow {
         * destinations.
         */
        struct encap_flow_item encaps[MLX5_MAX_FLOW_FWD_VPORTS];
-       struct mlx5e_tc_flow *peer_flow;
        struct mlx5e_hairpin_entry *hpe; /* attached hairpin instance */
        struct list_head hairpin; /* flows sharing the same hairpin */
-       struct list_head peer;    /* flows with peer flow */
+       struct list_head peer[MLX5_MAX_PORTS];    /* flows with peer flow */
        struct list_head unready; /* flows not ready to be offloaded (e.g
                                   * due to missing route)
                                   */
+       struct list_head peer_flows; /* flows on peer */
        struct net_device *orig_dev; /* netdev adding flow first */
        int tmp_entry_index;
        struct list_head tmp_list; /* temporary flow list used by neigh update */
index 6b7b563..592b165 100644 (file)
@@ -349,15 +349,6 @@ static void mlx5e_macsec_cleanup_sa(struct mlx5e_macsec *macsec,
        sa->macsec_rule = NULL;
 }
 
-static struct mlx5e_priv *macsec_netdev_priv(const struct net_device *dev)
-{
-#if IS_ENABLED(CONFIG_VLAN_8021Q)
-       if (is_vlan_dev(dev))
-               return netdev_priv(vlan_dev_priv(dev)->real_dev);
-#endif
-       return netdev_priv(dev);
-}
-
 static int mlx5e_macsec_init_sa(struct macsec_context *ctx,
                                struct mlx5e_macsec_sa *sa,
                                bool encrypt,
index 33bfe4d..934b0d5 100644 (file)
@@ -283,7 +283,7 @@ static int __mlx5e_add_vlan_rule(struct mlx5e_flow_steering *fs,
        if (IS_ERR(*rule_p)) {
                err = PTR_ERR(*rule_p);
                *rule_p = NULL;
-               fs_err(fs, "%s: add rule failed\n", __func__);
+               fs_err(fs, "add rule failed\n");
        }
 
        return err;
@@ -395,8 +395,7 @@ int mlx5e_add_vlan_trap(struct mlx5e_flow_steering *fs, int trap_id, int tir_num
        if (IS_ERR(rule)) {
                err = PTR_ERR(rule);
                fs->vlan->trap_rule = NULL;
-               fs_err(fs, "%s: add VLAN trap rule failed, err %d\n",
-                      __func__, err);
+               fs_err(fs, "add VLAN trap rule failed, err %d\n", err);
                return err;
        }
        fs->vlan->trap_rule = rule;
@@ -421,8 +420,7 @@ int mlx5e_add_mac_trap(struct mlx5e_flow_steering *fs, int trap_id, int tir_num)
        if (IS_ERR(rule)) {
                err = PTR_ERR(rule);
                fs->l2.trap_rule = NULL;
-               fs_err(fs, "%s: add MAC trap rule failed, err %d\n",
-                      __func__, err);
+               fs_err(fs, "add MAC trap rule failed, err %d\n", err);
                return err;
        }
        fs->l2.trap_rule = rule;
@@ -763,7 +761,7 @@ static int mlx5e_add_promisc_rule(struct mlx5e_flow_steering *fs)
        if (IS_ERR(*rule_p)) {
                err = PTR_ERR(*rule_p);
                *rule_p = NULL;
-               fs_err(fs, "%s: add promiscuous rule failed\n", __func__);
+               fs_err(fs, "add promiscuous rule failed\n");
        }
        kvfree(spec);
        return err;
@@ -995,7 +993,7 @@ static int mlx5e_add_l2_flow_rule(struct mlx5e_flow_steering *fs,
 
        ai->rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
        if (IS_ERR(ai->rule)) {
-               fs_err(fs, "%s: add l2 rule(mac:%pM) failed\n", __func__, mv_dmac);
+               fs_err(fs, "add l2 rule(mac:%pM) failed\n", mv_dmac);
                err = PTR_ERR(ai->rule);
                ai->rule = NULL;
        }
index 3e7041b..965a826 100644 (file)
@@ -374,7 +374,9 @@ static void mlx5e_sqs2vport_stop(struct mlx5_eswitch *esw,
                                 struct mlx5_eswitch_rep *rep)
 {
        struct mlx5e_rep_sq *rep_sq, *tmp;
+       struct mlx5e_rep_sq_peer *sq_peer;
        struct mlx5e_rep_priv *rpriv;
+       unsigned long i;
 
        if (esw->mode != MLX5_ESWITCH_OFFLOADS)
                return;
@@ -382,31 +384,78 @@ static void mlx5e_sqs2vport_stop(struct mlx5_eswitch *esw,
        rpriv = mlx5e_rep_to_rep_priv(rep);
        list_for_each_entry_safe(rep_sq, tmp, &rpriv->vport_sqs_list, list) {
                mlx5_eswitch_del_send_to_vport_rule(rep_sq->send_to_vport_rule);
-               if (rep_sq->send_to_vport_rule_peer)
-                       mlx5_eswitch_del_send_to_vport_rule(rep_sq->send_to_vport_rule_peer);
+               xa_for_each(&rep_sq->sq_peer, i, sq_peer) {
+                       if (sq_peer->rule)
+                               mlx5_eswitch_del_send_to_vport_rule(sq_peer->rule);
+
+                       xa_erase(&rep_sq->sq_peer, i);
+                       kfree(sq_peer);
+               }
+
+               xa_destroy(&rep_sq->sq_peer);
                list_del(&rep_sq->list);
                kfree(rep_sq);
        }
 }
 
+static int mlx5e_sqs2vport_add_peers_rules(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep,
+                                          struct mlx5_devcom *devcom,
+                                          struct mlx5e_rep_sq *rep_sq, int i)
+{
+       struct mlx5_eswitch *peer_esw = NULL;
+       struct mlx5_flow_handle *flow_rule;
+       int tmp;
+
+       mlx5_devcom_for_each_peer_entry(devcom, MLX5_DEVCOM_ESW_OFFLOADS,
+                                       peer_esw, tmp) {
+               int peer_rule_idx = mlx5_get_dev_index(peer_esw->dev);
+               struct mlx5e_rep_sq_peer *sq_peer;
+               int err;
+
+               sq_peer = kzalloc(sizeof(*sq_peer), GFP_KERNEL);
+               if (!sq_peer)
+                       return -ENOMEM;
+
+               flow_rule = mlx5_eswitch_add_send_to_vport_rule(peer_esw, esw,
+                                                               rep, rep_sq->sqn);
+               if (IS_ERR(flow_rule)) {
+                       kfree(sq_peer);
+                       return PTR_ERR(flow_rule);
+               }
+
+               sq_peer->rule = flow_rule;
+               sq_peer->peer = peer_esw;
+               err = xa_insert(&rep_sq->sq_peer, peer_rule_idx, sq_peer, GFP_KERNEL);
+               if (err) {
+                       kfree(sq_peer);
+                       mlx5_eswitch_del_send_to_vport_rule(flow_rule);
+                       return err;
+               }
+       }
+
+       return 0;
+}
+
 static int mlx5e_sqs2vport_start(struct mlx5_eswitch *esw,
                                 struct mlx5_eswitch_rep *rep,
                                 u32 *sqns_array, int sqns_num)
 {
-       struct mlx5_eswitch *peer_esw = NULL;
        struct mlx5_flow_handle *flow_rule;
        struct mlx5e_rep_priv *rpriv;
        struct mlx5e_rep_sq *rep_sq;
+       struct mlx5_devcom *devcom;
+       bool devcom_locked = false;
        int err;
        int i;
 
        if (esw->mode != MLX5_ESWITCH_OFFLOADS)
                return 0;
 
+       devcom = esw->dev->priv.devcom;
        rpriv = mlx5e_rep_to_rep_priv(rep);
-       if (mlx5_devcom_is_paired(esw->dev->priv.devcom, MLX5_DEVCOM_ESW_OFFLOADS))
-               peer_esw = mlx5_devcom_get_peer_data(esw->dev->priv.devcom,
-                                                    MLX5_DEVCOM_ESW_OFFLOADS);
+       if (mlx5_devcom_comp_is_ready(devcom, MLX5_DEVCOM_ESW_OFFLOADS) &&
+           mlx5_devcom_for_each_peer_begin(devcom, MLX5_DEVCOM_ESW_OFFLOADS))
+               devcom_locked = true;
 
        for (i = 0; i < sqns_num; i++) {
                rep_sq = kzalloc(sizeof(*rep_sq), GFP_KERNEL);
@@ -426,31 +475,30 @@ static int mlx5e_sqs2vport_start(struct mlx5_eswitch *esw,
                rep_sq->send_to_vport_rule = flow_rule;
                rep_sq->sqn = sqns_array[i];
 
-               if (peer_esw) {
-                       flow_rule = mlx5_eswitch_add_send_to_vport_rule(peer_esw, esw,
-                                                                       rep, sqns_array[i]);
-                       if (IS_ERR(flow_rule)) {
-                               err = PTR_ERR(flow_rule);
+               xa_init(&rep_sq->sq_peer);
+               if (devcom_locked) {
+                       err = mlx5e_sqs2vport_add_peers_rules(esw, rep, devcom, rep_sq, i);
+                       if (err) {
                                mlx5_eswitch_del_send_to_vport_rule(rep_sq->send_to_vport_rule);
+                               xa_destroy(&rep_sq->sq_peer);
                                kfree(rep_sq);
                                goto out_err;
                        }
-                       rep_sq->send_to_vport_rule_peer = flow_rule;
                }
 
                list_add(&rep_sq->list, &rpriv->vport_sqs_list);
        }
 
-       if (peer_esw)
-               mlx5_devcom_release_peer_data(esw->dev->priv.devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+       if (devcom_locked)
+               mlx5_devcom_for_each_peer_end(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
 
        return 0;
 
 out_err:
        mlx5e_sqs2vport_stop(esw, rep);
 
-       if (peer_esw)
-               mlx5_devcom_release_peer_data(esw->dev->priv.devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+       if (devcom_locked)
+               mlx5_devcom_for_each_peer_end(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
 
        return err;
 }
@@ -1530,17 +1578,24 @@ static void *mlx5e_vport_rep_get_proto_dev(struct mlx5_eswitch_rep *rep)
        return rpriv->netdev;
 }
 
-static void mlx5e_vport_rep_event_unpair(struct mlx5_eswitch_rep *rep)
+static void mlx5e_vport_rep_event_unpair(struct mlx5_eswitch_rep *rep,
+                                        struct mlx5_eswitch *peer_esw)
 {
+       int i = mlx5_get_dev_index(peer_esw->dev);
        struct mlx5e_rep_priv *rpriv;
        struct mlx5e_rep_sq *rep_sq;
 
+       WARN_ON_ONCE(!peer_esw);
        rpriv = mlx5e_rep_to_rep_priv(rep);
        list_for_each_entry(rep_sq, &rpriv->vport_sqs_list, list) {
-               if (!rep_sq->send_to_vport_rule_peer)
+               struct mlx5e_rep_sq_peer *sq_peer = xa_load(&rep_sq->sq_peer, i);
+
+               if (!sq_peer || sq_peer->peer != peer_esw)
                        continue;
-               mlx5_eswitch_del_send_to_vport_rule(rep_sq->send_to_vport_rule_peer);
-               rep_sq->send_to_vport_rule_peer = NULL;
+
+               mlx5_eswitch_del_send_to_vport_rule(sq_peer->rule);
+               xa_erase(&rep_sq->sq_peer, i);
+               kfree(sq_peer);
        }
 }
 
@@ -1548,24 +1603,52 @@ static int mlx5e_vport_rep_event_pair(struct mlx5_eswitch *esw,
                                      struct mlx5_eswitch_rep *rep,
                                      struct mlx5_eswitch *peer_esw)
 {
+       int i = mlx5_get_dev_index(peer_esw->dev);
        struct mlx5_flow_handle *flow_rule;
+       struct mlx5e_rep_sq_peer *sq_peer;
        struct mlx5e_rep_priv *rpriv;
        struct mlx5e_rep_sq *rep_sq;
+       int err;
 
        rpriv = mlx5e_rep_to_rep_priv(rep);
        list_for_each_entry(rep_sq, &rpriv->vport_sqs_list, list) {
-               if (rep_sq->send_to_vport_rule_peer)
+               sq_peer = xa_load(&rep_sq->sq_peer, i);
+
+               if (sq_peer && sq_peer->peer)
                        continue;
-               flow_rule = mlx5_eswitch_add_send_to_vport_rule(peer_esw, esw, rep, rep_sq->sqn);
-               if (IS_ERR(flow_rule))
+
+               flow_rule = mlx5_eswitch_add_send_to_vport_rule(peer_esw, esw, rep,
+                                                               rep_sq->sqn);
+               if (IS_ERR(flow_rule)) {
+                       err = PTR_ERR(flow_rule);
                        goto err_out;
-               rep_sq->send_to_vport_rule_peer = flow_rule;
+               }
+
+               if (sq_peer) {
+                       sq_peer->rule = flow_rule;
+                       sq_peer->peer = peer_esw;
+                       continue;
+               }
+               sq_peer = kzalloc(sizeof(*sq_peer), GFP_KERNEL);
+               if (!sq_peer) {
+                       err = -ENOMEM;
+                       goto err_sq_alloc;
+               }
+               err = xa_insert(&rep_sq->sq_peer, i, sq_peer, GFP_KERNEL);
+               if (err)
+                       goto err_xa;
+               sq_peer->rule = flow_rule;
+               sq_peer->peer = peer_esw;
        }
 
        return 0;
+err_xa:
+       kfree(sq_peer);
+err_sq_alloc:
+       mlx5_eswitch_del_send_to_vport_rule(flow_rule);
 err_out:
-       mlx5e_vport_rep_event_unpair(rep);
-       return PTR_ERR(flow_rule);
+       mlx5e_vport_rep_event_unpair(rep, peer_esw);
+       return err;
 }
 
 static int mlx5e_vport_rep_event(struct mlx5_eswitch *esw,
@@ -1578,7 +1661,7 @@ static int mlx5e_vport_rep_event(struct mlx5_eswitch *esw,
        if (event == MLX5_SWITCHDEV_EVENT_PAIR)
                err = mlx5e_vport_rep_event_pair(esw, rep, data);
        else if (event == MLX5_SWITCHDEV_EVENT_UNPAIR)
-               mlx5e_vport_rep_event_unpair(rep);
+               mlx5e_vport_rep_event_unpair(rep, data);
 
        return err;
 }
index 80b7f50..70640fa 100644 (file)
@@ -225,9 +225,14 @@ struct mlx5e_encap_entry {
        struct rcu_head rcu;
 };
 
+struct mlx5e_rep_sq_peer {
+       struct mlx5_flow_handle *rule;
+       void *peer;
+};
+
 struct mlx5e_rep_sq {
        struct mlx5_flow_handle *send_to_vport_rule;
-       struct mlx5_flow_handle *send_to_vport_rule_peer;
+       struct xarray sq_peer;
        u32 sqn;
        struct list_head         list;
 };
index 6963482..704b022 100644 (file)
@@ -491,9 +491,7 @@ mlx5e_add_skb_shared_info_frag(struct mlx5e_rq *rq, struct skb_shared_info *sinf
        }
 
        frag = &sinfo->frags[sinfo->nr_frags++];
-       __skb_frag_set_page(frag, frag_page->page);
-       skb_frag_off_set(frag, frag_offset);
-       skb_frag_size_set(frag, len);
+       skb_frag_fill_page_desc(frag, frag_page->page, frag_offset, len);
 
        if (page_is_pfmemalloc(frag_page->page))
                xdp_buff_set_frag_pfmemalloc(xdp);
index 8a5a870..88631fb 100644 (file)
@@ -1666,8 +1666,11 @@ int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *ro
 {
        struct mlx5e_priv *out_priv, *route_priv;
        struct mlx5_core_dev *route_mdev;
+       struct mlx5_devcom *devcom;
        struct mlx5_eswitch *esw;
        u16 vhca_id;
+       int err;
+       int i;
 
        out_priv = netdev_priv(out_dev);
        esw = out_priv->mdev->priv.eswitch;
@@ -1675,28 +1678,25 @@ int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *ro
        route_mdev = route_priv->mdev;
 
        vhca_id = MLX5_CAP_GEN(route_mdev, vhca_id);
-       if (mlx5_lag_is_active(out_priv->mdev)) {
-               struct mlx5_devcom *devcom;
-               int err;
-
-               /* In lag case we may get devices from different eswitch instances.
-                * If we failed to get vport num, it means, mostly, that we on the wrong
-                * eswitch.
-                */
-               err = mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport);
-               if (err != -ENOENT)
-                       return err;
-
-               rcu_read_lock();
-               devcom = out_priv->mdev->priv.devcom;
-               esw = mlx5_devcom_get_peer_data_rcu(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
-               err = esw ? mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport) : -ENODEV;
-               rcu_read_unlock();
+       err = mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport);
+       if (!err)
+               return err;
 
+       if (!mlx5_lag_is_active(out_priv->mdev))
                return err;
+
+       rcu_read_lock();
+       devcom = out_priv->mdev->priv.devcom;
+       err = -ENODEV;
+       mlx5_devcom_for_each_peer_entry_rcu(devcom, MLX5_DEVCOM_ESW_OFFLOADS,
+                                           esw, i) {
+               err = mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport);
+               if (!err)
+                       break;
        }
+       rcu_read_unlock();
 
-       return mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport);
+       return err;
 }
 
 static int
@@ -1986,47 +1986,59 @@ void mlx5e_put_flow_list(struct mlx5e_priv *priv, struct list_head *flow_list)
                mlx5e_flow_put(priv, flow);
 }
 
-static void __mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow)
+static void mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow,
+                                      int peer_index)
 {
        struct mlx5_eswitch *esw = flow->priv->mdev->priv.eswitch;
+       struct mlx5e_tc_flow *peer_flow;
+       struct mlx5e_tc_flow *tmp;
 
        if (!flow_flag_test(flow, ESWITCH) ||
            !flow_flag_test(flow, DUP))
                return;
 
        mutex_lock(&esw->offloads.peer_mutex);
-       list_del(&flow->peer);
+       list_del(&flow->peer[peer_index]);
        mutex_unlock(&esw->offloads.peer_mutex);
 
-       flow_flag_clear(flow, DUP);
-
-       if (refcount_dec_and_test(&flow->peer_flow->refcnt)) {
-               mlx5e_tc_del_fdb_flow(flow->peer_flow->priv, flow->peer_flow);
-               kfree(flow->peer_flow);
+       list_for_each_entry_safe(peer_flow, tmp, &flow->peer_flows, peer_flows) {
+               if (peer_index != mlx5_get_dev_index(peer_flow->priv->mdev))
+                       continue;
+               if (refcount_dec_and_test(&peer_flow->refcnt)) {
+                       mlx5e_tc_del_fdb_flow(peer_flow->priv, peer_flow);
+                       list_del(&peer_flow->peer_flows);
+                       kfree(peer_flow);
+               }
        }
 
-       flow->peer_flow = NULL;
+       if (list_empty(&flow->peer_flows))
+               flow_flag_clear(flow, DUP);
 }
 
-static void mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow)
+static void mlx5e_tc_del_fdb_peers_flow(struct mlx5e_tc_flow *flow)
 {
-       struct mlx5_core_dev *dev = flow->priv->mdev;
-       struct mlx5_devcom *devcom = dev->priv.devcom;
-       struct mlx5_eswitch *peer_esw;
-
-       peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
-       if (!peer_esw)
-               return;
+       int i;
 
-       __mlx5e_tc_del_fdb_peer_flow(flow);
-       mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+       for (i = 0; i < MLX5_MAX_PORTS; i++) {
+               if (i == mlx5_get_dev_index(flow->priv->mdev))
+                       continue;
+               mlx5e_tc_del_fdb_peer_flow(flow, i);
+       }
 }
 
 static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
                              struct mlx5e_tc_flow *flow)
 {
        if (mlx5e_is_eswitch_flow(flow)) {
-               mlx5e_tc_del_fdb_peer_flow(flow);
+               struct mlx5_devcom *devcom = flow->priv->mdev->priv.devcom;
+
+               if (!mlx5_devcom_for_each_peer_begin(devcom, MLX5_DEVCOM_ESW_OFFLOADS)) {
+                       mlx5e_tc_del_fdb_flow(priv, flow);
+                       return;
+               }
+
+               mlx5e_tc_del_fdb_peers_flow(flow);
+               mlx5_devcom_for_each_peer_end(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
                mlx5e_tc_del_fdb_flow(priv, flow);
        } else {
                mlx5e_tc_del_nic_flow(priv, flow);
@@ -2502,6 +2514,12 @@ static int mlx5e_flower_parse_meta(struct net_device *filter_dev,
                return 0;
 
        flow_rule_match_meta(rule, &match);
+
+       if (match.mask->l2_miss) {
+               NL_SET_ERR_MSG_MOD(f->common.extack, "Can't match on \"l2_miss\"");
+               return -EOPNOTSUPP;
+       }
+
        if (!match.mask->ingress_ifindex)
                return 0;
 
@@ -4197,8 +4215,8 @@ static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow)
                flow_flag_test(flow, INGRESS);
        bool act_is_encap = !!(attr->action &
                               MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT);
-       bool esw_paired = mlx5_devcom_is_paired(esw_attr->in_mdev->priv.devcom,
-                                               MLX5_DEVCOM_ESW_OFFLOADS);
+       bool esw_paired = mlx5_devcom_comp_is_ready(esw_attr->in_mdev->priv.devcom,
+                                                   MLX5_DEVCOM_ESW_OFFLOADS);
 
        if (!esw_paired)
                return false;
@@ -4289,6 +4307,7 @@ mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size,
        INIT_LIST_HEAD(&flow->hairpin);
        INIT_LIST_HEAD(&flow->l3_to_l2_reformat);
        INIT_LIST_HEAD(&flow->attrs);
+       INIT_LIST_HEAD(&flow->peer_flows);
        refcount_set(&flow->refcnt, 1);
        init_completion(&flow->init_done);
        init_completion(&flow->del_hw_done);
@@ -4397,22 +4416,19 @@ out:
 
 static int mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload *f,
                                      struct mlx5e_tc_flow *flow,
-                                     unsigned long flow_flags)
+                                     unsigned long flow_flags,
+                                     struct mlx5_eswitch *peer_esw)
 {
        struct mlx5e_priv *priv = flow->priv, *peer_priv;
-       struct mlx5_eswitch *esw = priv->mdev->priv.eswitch, *peer_esw;
+       struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
        struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr;
-       struct mlx5_devcom *devcom = priv->mdev->priv.devcom;
        struct mlx5e_tc_flow_parse_attr *parse_attr;
+       int i = mlx5_get_dev_index(peer_esw->dev);
        struct mlx5e_rep_priv *peer_urpriv;
        struct mlx5e_tc_flow *peer_flow;
        struct mlx5_core_dev *in_mdev;
        int err = 0;
 
-       peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
-       if (!peer_esw)
-               return -ENODEV;
-
        peer_urpriv = mlx5_eswitch_get_uplink_priv(peer_esw, REP_ETH);
        peer_priv = netdev_priv(peer_urpriv->netdev);
 
@@ -4437,14 +4453,13 @@ static int mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload *f,
                goto out;
        }
 
-       flow->peer_flow = peer_flow;
+       list_add_tail(&peer_flow->peer_flows, &flow->peer_flows);
        flow_flag_set(flow, DUP);
        mutex_lock(&esw->offloads.peer_mutex);
-       list_add_tail(&flow->peer, &esw->offloads.peer_flows);
+       list_add_tail(&flow->peer[i], &esw->offloads.peer_flows[i]);
        mutex_unlock(&esw->offloads.peer_mutex);
 
 out:
-       mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
        return err;
 }
 
@@ -4455,30 +4470,48 @@ mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
                   struct net_device *filter_dev,
                   struct mlx5e_tc_flow **__flow)
 {
+       struct mlx5_devcom *devcom = priv->mdev->priv.devcom;
        struct mlx5e_rep_priv *rpriv = priv->ppriv;
        struct mlx5_eswitch_rep *in_rep = rpriv->rep;
        struct mlx5_core_dev *in_mdev = priv->mdev;
+       struct mlx5_eswitch *peer_esw;
        struct mlx5e_tc_flow *flow;
        int err;
+       int i;
 
        flow = __mlx5e_add_fdb_flow(priv, f, flow_flags, filter_dev, in_rep,
                                    in_mdev);
        if (IS_ERR(flow))
                return PTR_ERR(flow);
 
-       if (is_peer_flow_needed(flow)) {
-               err = mlx5e_tc_add_fdb_peer_flow(f, flow, flow_flags);
-               if (err) {
-                       mlx5e_tc_del_fdb_flow(priv, flow);
-                       goto out;
-               }
+       if (!is_peer_flow_needed(flow)) {
+               *__flow = flow;
+               return 0;
        }
 
-       *__flow = flow;
+       if (!mlx5_devcom_for_each_peer_begin(devcom, MLX5_DEVCOM_ESW_OFFLOADS)) {
+               err = -ENODEV;
+               goto clean_flow;
+       }
+
+       mlx5_devcom_for_each_peer_entry(devcom,
+                                       MLX5_DEVCOM_ESW_OFFLOADS,
+                                       peer_esw, i) {
+               err = mlx5e_tc_add_fdb_peer_flow(f, flow, flow_flags, peer_esw);
+               if (err)
+                       goto peer_clean;
+       }
 
+       mlx5_devcom_for_each_peer_end(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+
+       *__flow = flow;
        return 0;
 
-out:
+peer_clean:
+       mlx5e_tc_del_fdb_peers_flow(flow);
+       mlx5_devcom_for_each_peer_end(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+clean_flow:
+       mlx5e_tc_del_fdb_flow(priv, flow);
        return err;
 }
 
@@ -4696,7 +4729,6 @@ int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv,
 {
        struct mlx5_devcom *devcom = priv->mdev->priv.devcom;
        struct rhashtable *tc_ht = get_tc_ht(priv, flags);
-       struct mlx5_eswitch *peer_esw;
        struct mlx5e_tc_flow *flow;
        struct mlx5_fc *counter;
        u64 lastuse = 0;
@@ -4731,23 +4763,29 @@ int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv,
        /* Under multipath it's possible for one rule to be currently
         * un-offloaded while the other rule is offloaded.
         */
-       peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
-       if (!peer_esw)
+       if (!mlx5_devcom_for_each_peer_begin(devcom, MLX5_DEVCOM_ESW_OFFLOADS))
                goto out;
 
-       if (flow_flag_test(flow, DUP) &&
-           flow_flag_test(flow->peer_flow, OFFLOADED)) {
-               u64 bytes2;
-               u64 packets2;
-               u64 lastuse2;
+       if (flow_flag_test(flow, DUP)) {
+               struct mlx5e_tc_flow *peer_flow;
 
-               if (flow_flag_test(flow, USE_ACT_STATS)) {
-                       f->use_act_stats = true;
-               } else {
-                       counter = mlx5e_tc_get_counter(flow->peer_flow);
+               list_for_each_entry(peer_flow, &flow->peer_flows, peer_flows) {
+                       u64 packets2;
+                       u64 lastuse2;
+                       u64 bytes2;
+
+                       if (!flow_flag_test(peer_flow, OFFLOADED))
+                               continue;
+                       if (flow_flag_test(flow, USE_ACT_STATS)) {
+                               f->use_act_stats = true;
+                               break;
+                       }
+
+                       counter = mlx5e_tc_get_counter(peer_flow);
                        if (!counter)
                                goto no_peer_counter;
-                       mlx5_fc_query_cached(counter, &bytes2, &packets2, &lastuse2);
+                       mlx5_fc_query_cached(counter, &bytes2, &packets2,
+                                            &lastuse2);
 
                        bytes += bytes2;
                        packets += packets2;
@@ -4756,7 +4794,7 @@ int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv,
        }
 
 no_peer_counter:
-       mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+       mlx5_devcom_for_each_peer_end(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
 out:
        flow_stats_update(&f->stats, bytes, packets, 0, lastuse,
                          FLOW_ACTION_HW_STATS_DELAYED);
@@ -5274,9 +5312,14 @@ int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags)
 void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw)
 {
        struct mlx5e_tc_flow *flow, *tmp;
+       int i;
 
-       list_for_each_entry_safe(flow, tmp, &esw->offloads.peer_flows, peer)
-               __mlx5e_tc_del_fdb_peer_flow(flow);
+       for (i = 0; i < MLX5_MAX_PORTS; i++) {
+               if (i == mlx5_get_dev_index(esw->dev))
+                       continue;
+               list_for_each_entry_safe(flow, tmp, &esw->offloads.peer_flows[i], peer[i])
+                       mlx5e_tc_del_fdb_peers_flow(flow);
+       }
 }
 
 void mlx5e_tc_reoffload_flows_work(struct work_struct *work)
index 2e504c7..24b1ca4 100644 (file)
@@ -15,13 +15,27 @@ static void esw_acl_egress_ofld_fwd2vport_destroy(struct mlx5_vport *vport)
        vport->egress.offloads.fwd_rule = NULL;
 }
 
-static void esw_acl_egress_ofld_bounce_rule_destroy(struct mlx5_vport *vport)
+void esw_acl_egress_ofld_bounce_rule_destroy(struct mlx5_vport *vport, int rule_index)
 {
-       if (!vport->egress.offloads.bounce_rule)
+       struct mlx5_flow_handle *bounce_rule =
+               xa_load(&vport->egress.offloads.bounce_rules, rule_index);
+
+       if (!bounce_rule)
                return;
 
-       mlx5_del_flow_rules(vport->egress.offloads.bounce_rule);
-       vport->egress.offloads.bounce_rule = NULL;
+       mlx5_del_flow_rules(bounce_rule);
+       xa_erase(&vport->egress.offloads.bounce_rules, rule_index);
+}
+
+static void esw_acl_egress_ofld_bounce_rules_destroy(struct mlx5_vport *vport)
+{
+       struct mlx5_flow_handle *bounce_rule;
+       unsigned long i;
+
+       xa_for_each(&vport->egress.offloads.bounce_rules, i, bounce_rule) {
+               mlx5_del_flow_rules(bounce_rule);
+               xa_erase(&vport->egress.offloads.bounce_rules, i);
+       }
 }
 
 static int esw_acl_egress_ofld_fwd2vport_create(struct mlx5_eswitch *esw,
@@ -96,7 +110,7 @@ static void esw_acl_egress_ofld_rules_destroy(struct mlx5_vport *vport)
 {
        esw_acl_egress_vlan_destroy(vport);
        esw_acl_egress_ofld_fwd2vport_destroy(vport);
-       esw_acl_egress_ofld_bounce_rule_destroy(vport);
+       esw_acl_egress_ofld_bounce_rules_destroy(vport);
 }
 
 static int esw_acl_egress_ofld_groups_create(struct mlx5_eswitch *esw,
@@ -194,6 +208,7 @@ int esw_acl_egress_ofld_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport
                vport->egress.acl = NULL;
                return err;
        }
+       vport->egress.type = VPORT_EGRESS_ACL_TYPE_DEFAULT;
 
        err = esw_acl_egress_ofld_groups_create(esw, vport);
        if (err)
index 45b8391..d599e50 100644 (file)
@@ -35,7 +35,8 @@ esw_acl_table_create(struct mlx5_eswitch *esw, struct mlx5_vport *vport, int ns,
        }
 
        ft_attr.max_fte = size;
-       ft_attr.flags = MLX5_FLOW_TABLE_OTHER_VPORT;
+       if (vport_num || mlx5_core_is_ecpf(esw->dev))
+               ft_attr.flags = MLX5_FLOW_TABLE_OTHER_VPORT;
        acl = mlx5_create_vport_flow_table(root_ns, &ft_attr, vport_num);
        if (IS_ERR(acl)) {
                err = PTR_ERR(acl);
index c9f8469..536b04e 100644 (file)
@@ -10,6 +10,7 @@
 /* Eswitch acl egress external APIs */
 int esw_acl_egress_ofld_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport);
 void esw_acl_egress_ofld_cleanup(struct mlx5_vport *vport);
+void esw_acl_egress_ofld_bounce_rule_destroy(struct mlx5_vport *vport, int rule_index);
 int mlx5_esw_acl_egress_vport_bond(struct mlx5_eswitch *esw, u16 active_vport_num,
                                   u16 passive_vport_num);
 int mlx5_esw_acl_egress_vport_unbond(struct mlx5_eswitch *esw, u16 vport_num);
index 1ba03e2..bea7cc6 100644 (file)
@@ -647,22 +647,35 @@ mlx5_esw_bridge_ingress_flow_create(u16 vport_num, const unsigned char *addr,
 }
 
 static struct mlx5_flow_handle *
-mlx5_esw_bridge_ingress_flow_peer_create(u16 vport_num, const unsigned char *addr,
+mlx5_esw_bridge_ingress_flow_peer_create(u16 vport_num, u16 esw_owner_vhca_id,
+                                        const unsigned char *addr,
                                         struct mlx5_esw_bridge_vlan *vlan, u32 counter_id,
                                         struct mlx5_esw_bridge *bridge)
 {
        struct mlx5_devcom *devcom = bridge->br_offloads->esw->dev->priv.devcom;
+       struct mlx5_eswitch *tmp, *peer_esw = NULL;
        static struct mlx5_flow_handle *handle;
-       struct mlx5_eswitch *peer_esw;
+       int i;
 
-       peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
-       if (!peer_esw)
+       if (!mlx5_devcom_for_each_peer_begin(devcom, MLX5_DEVCOM_ESW_OFFLOADS))
                return ERR_PTR(-ENODEV);
 
+       mlx5_devcom_for_each_peer_entry(devcom,
+                                       MLX5_DEVCOM_ESW_OFFLOADS,
+                                       tmp, i) {
+               if (mlx5_esw_is_owner(tmp, vport_num, esw_owner_vhca_id)) {
+                       peer_esw = tmp;
+                       break;
+               }
+       }
+       if (!peer_esw) {
+               mlx5_devcom_for_each_peer_end(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+               return ERR_PTR(-ENODEV);
+       }
+
        handle = mlx5_esw_bridge_ingress_flow_with_esw_create(vport_num, addr, vlan, counter_id,
                                                              bridge, peer_esw);
-
-       mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+       mlx5_devcom_for_each_peer_end(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
        return handle;
 }
 
@@ -1369,8 +1382,9 @@ mlx5_esw_bridge_fdb_entry_init(struct net_device *dev, u16 vport_num, u16 esw_ow
        entry->ingress_counter = counter;
 
        handle = peer ?
-               mlx5_esw_bridge_ingress_flow_peer_create(vport_num, addr, vlan,
-                                                        mlx5_fc_id(counter), bridge) :
+               mlx5_esw_bridge_ingress_flow_peer_create(vport_num, esw_owner_vhca_id,
+                                                        addr, vlan, mlx5_fc_id(counter),
+                                                        bridge) :
                mlx5_esw_bridge_ingress_flow_create(vport_num, addr, vlan,
                                                    mlx5_fc_id(counter), bridge);
        if (IS_ERR(handle)) {
index 2eae594..2455f8b 100644 (file)
@@ -540,16 +540,29 @@ static struct mlx5_flow_handle *
 mlx5_esw_bridge_mcast_filter_flow_peer_create(struct mlx5_esw_bridge_port *port)
 {
        struct mlx5_devcom *devcom = port->bridge->br_offloads->esw->dev->priv.devcom;
+       struct mlx5_eswitch *tmp, *peer_esw = NULL;
        static struct mlx5_flow_handle *handle;
-       struct mlx5_eswitch *peer_esw;
+       int i;
 
-       peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
-       if (!peer_esw)
+       if (!mlx5_devcom_for_each_peer_begin(devcom, MLX5_DEVCOM_ESW_OFFLOADS))
                return ERR_PTR(-ENODEV);
 
+       mlx5_devcom_for_each_peer_entry(devcom,
+                                       MLX5_DEVCOM_ESW_OFFLOADS,
+                                       tmp, i) {
+               if (mlx5_esw_is_owner(tmp, port->vport_num, port->esw_owner_vhca_id)) {
+                       peer_esw = tmp;
+                       break;
+               }
+       }
+       if (!peer_esw) {
+               mlx5_devcom_for_each_peer_end(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+               return ERR_PTR(-ENODEV);
+       }
+
        handle = mlx5_esw_bridge_mcast_flow_with_esw_create(port, peer_esw);
 
-       mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+       mlx5_devcom_for_each_peer_end(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
        return handle;
 }
 
index 084a910..f370f67 100644 (file)
@@ -65,6 +65,15 @@ static void mlx5_esw_dl_port_free(struct devlink_port *dl_port)
        kfree(dl_port);
 }
 
+static const struct devlink_port_ops mlx5_esw_dl_port_ops = {
+       .port_fn_hw_addr_get = mlx5_devlink_port_fn_hw_addr_get,
+       .port_fn_hw_addr_set = mlx5_devlink_port_fn_hw_addr_set,
+       .port_fn_roce_get = mlx5_devlink_port_fn_roce_get,
+       .port_fn_roce_set = mlx5_devlink_port_fn_roce_set,
+       .port_fn_migratable_get = mlx5_devlink_port_fn_migratable_get,
+       .port_fn_migratable_set = mlx5_devlink_port_fn_migratable_set,
+};
+
 int mlx5_esw_offloads_devlink_port_register(struct mlx5_eswitch *esw, u16 vport_num)
 {
        struct mlx5_core_dev *dev = esw->dev;
@@ -87,7 +96,8 @@ int mlx5_esw_offloads_devlink_port_register(struct mlx5_eswitch *esw, u16 vport_
 
        devlink = priv_to_devlink(dev);
        dl_port_index = mlx5_esw_vport_to_devlink_port_index(dev, vport_num);
-       err = devl_port_register(devlink, dl_port, dl_port_index);
+       err = devl_port_register_with_ops(devlink, dl_port, dl_port_index,
+                                         &mlx5_esw_dl_port_ops);
        if (err)
                goto reg_err;
 
@@ -134,6 +144,20 @@ struct devlink_port *mlx5_esw_offloads_devlink_port(struct mlx5_eswitch *esw, u1
        return IS_ERR(vport) ? ERR_CAST(vport) : vport->dl_port;
 }
 
+static const struct devlink_port_ops mlx5_esw_dl_sf_port_ops = {
+#ifdef CONFIG_MLX5_SF_MANAGER
+       .port_del = mlx5_devlink_sf_port_del,
+#endif
+       .port_fn_hw_addr_get = mlx5_devlink_port_fn_hw_addr_get,
+       .port_fn_hw_addr_set = mlx5_devlink_port_fn_hw_addr_set,
+       .port_fn_roce_get = mlx5_devlink_port_fn_roce_get,
+       .port_fn_roce_set = mlx5_devlink_port_fn_roce_set,
+#ifdef CONFIG_MLX5_SF_MANAGER
+       .port_fn_state_get = mlx5_devlink_sf_port_fn_state_get,
+       .port_fn_state_set = mlx5_devlink_sf_port_fn_state_set,
+#endif
+};
+
 int mlx5_esw_devlink_sf_port_register(struct mlx5_eswitch *esw, struct devlink_port *dl_port,
                                      u16 vport_num, u32 controller, u32 sfnum)
 {
@@ -156,7 +180,8 @@ int mlx5_esw_devlink_sf_port_register(struct mlx5_eswitch *esw, struct devlink_p
        devlink_port_attrs_pci_sf_set(dl_port, controller, pfnum, sfnum, !!controller);
        devlink = priv_to_devlink(dev);
        dl_port_index = mlx5_esw_vport_to_devlink_port_index(dev, vport_num);
-       err = devl_port_register(devlink, dl_port, dl_port_index);
+       err = devl_port_register_with_ops(devlink, dl_port, dl_port_index,
+                                         &mlx5_esw_dl_sf_port_ops);
        if (err)
                return err;
 
index 901c537..31956cd 100644 (file)
@@ -92,7 +92,7 @@ mlx5_eswitch_get_vport(struct mlx5_eswitch *esw, u16 vport_num)
 {
        struct mlx5_vport *vport;
 
-       if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager))
+       if (!esw)
                return ERR_PTR(-EPERM);
 
        vport = xa_load(&esw->vports, vport_num);
@@ -113,7 +113,8 @@ static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport,
                 opcode, MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
        MLX5_SET(modify_nic_vport_context_in, in, field_select.change_event, 1);
        MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport);
-       MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1);
+       if (vport || mlx5_core_is_ecpf(dev))
+               MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1);
        nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in,
                                     in, nic_vport_context);
 
@@ -309,11 +310,12 @@ static int esw_add_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr)
 
 fdb_add:
        /* SRIOV is enabled: Forward UC MAC to vport */
-       if (esw->fdb_table.legacy.fdb && esw->mode == MLX5_ESWITCH_LEGACY)
+       if (esw->fdb_table.legacy.fdb && esw->mode == MLX5_ESWITCH_LEGACY) {
                vaddr->flow_rule = esw_fdb_set_vport_rule(esw, mac, vport);
 
-       esw_debug(esw->dev, "\tADDED UC MAC: vport[%d] %pM fr(%p)\n",
-                 vport, mac, vaddr->flow_rule);
+               esw_debug(esw->dev, "\tADDED UC MAC: vport[%d] %pM fr(%p)\n",
+                         vport, mac, vaddr->flow_rule);
+       }
 
        return 0;
 }
@@ -710,6 +712,9 @@ void esw_vport_change_handle_locked(struct mlx5_vport *vport)
        struct mlx5_eswitch *esw = dev->priv.eswitch;
        u8 mac[ETH_ALEN];
 
+       if (!MLX5_CAP_GEN(dev, log_max_l2_table))
+               return;
+
        mlx5_query_nic_vport_mac_address(dev, vport->vport, true, mac);
        esw_debug(dev, "vport[%d] Context Changed: perm mac: %pM\n",
                  vport->vport, mac);
@@ -946,7 +951,8 @@ void mlx5_esw_vport_disable(struct mlx5_eswitch *esw, u16 vport_num)
        vport->enabled = false;
 
        /* Disable events from this vport */
-       arm_vport_context_events_cmd(esw->dev, vport->vport, 0);
+       if (MLX5_CAP_GEN(esw->dev, log_max_l2_table))
+               arm_vport_context_events_cmd(esw->dev, vport->vport, 0);
 
        if (!mlx5_esw_is_manager_vport(esw, vport->vport) &&
            MLX5_CAP_GEN(esw->dev, vhca_resource_manager))
@@ -1616,7 +1622,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
        struct mlx5_eswitch *esw;
        int err;
 
-       if (!MLX5_VPORT_MANAGER(dev))
+       if (!MLX5_VPORT_MANAGER(dev) && !MLX5_ESWITCH_MANAGER(dev))
                return 0;
 
        esw = kzalloc(sizeof(*esw), GFP_KERNEL);
@@ -1686,7 +1692,7 @@ abort:
 
 void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw)
 {
-       if (!esw || !MLX5_VPORT_MANAGER(esw->dev))
+       if (!esw)
                return;
 
        esw_info(esw->dev, "cleanup\n");
index add6cfa..c42c16d 100644 (file)
@@ -123,8 +123,14 @@ struct vport_ingress {
        } offloads;
 };
 
+enum vport_egress_acl_type {
+       VPORT_EGRESS_ACL_TYPE_DEFAULT,
+       VPORT_EGRESS_ACL_TYPE_SHARED_FDB,
+};
+
 struct vport_egress {
        struct mlx5_flow_table *acl;
+       enum vport_egress_acl_type type;
        struct mlx5_flow_handle  *allowed_vlan;
        struct mlx5_flow_group *vlan_grp;
        union {
@@ -136,7 +142,7 @@ struct vport_egress {
                struct {
                        struct mlx5_flow_group *fwd_grp;
                        struct mlx5_flow_handle *fwd_rule;
-                       struct mlx5_flow_handle *bounce_rule;
+                       struct xarray bounce_rules;
                        struct mlx5_flow_group *bounce_grp;
                } offloads;
        };
@@ -218,7 +224,7 @@ struct mlx5_eswitch_fdb {
                        struct mlx5_flow_group *send_to_vport_grp;
                        struct mlx5_flow_group *send_to_vport_meta_grp;
                        struct mlx5_flow_group *peer_miss_grp;
-                       struct mlx5_flow_handle **peer_miss_rules;
+                       struct mlx5_flow_handle **peer_miss_rules[MLX5_MAX_PORTS];
                        struct mlx5_flow_group *miss_grp;
                        struct mlx5_flow_handle **send_to_vport_meta_rules;
                        struct mlx5_flow_handle *miss_rule_uni;
@@ -249,7 +255,7 @@ struct mlx5_esw_offload {
        struct mlx5_flow_group *vport_rx_drop_group;
        struct mlx5_flow_handle *vport_rx_drop_rule;
        struct xarray vport_reps;
-       struct list_head peer_flows;
+       struct list_head peer_flows[MLX5_MAX_PORTS];
        struct mutex peer_mutex;
        struct mutex encap_tbl_lock; /* protects encap_tbl */
        DECLARE_HASHTABLE(encap_tbl, 8);
@@ -337,6 +343,7 @@ struct mlx5_eswitch {
        int                     mode;
        u16                     manager_vport;
        u16                     first_host_vport;
+       u8                      num_peers;
        struct mlx5_esw_functions esw_funcs;
        struct {
                u32             large_group_num;
@@ -506,12 +513,12 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink,
                                        struct netlink_ext_ack *extack);
 int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink,
                                        enum devlink_eswitch_encap_mode *encap);
-int mlx5_devlink_port_function_hw_addr_get(struct devlink_port *port,
-                                          u8 *hw_addr, int *hw_addr_len,
-                                          struct netlink_ext_ack *extack);
-int mlx5_devlink_port_function_hw_addr_set(struct devlink_port *port,
-                                          const u8 *hw_addr, int hw_addr_len,
-                                          struct netlink_ext_ack *extack);
+int mlx5_devlink_port_fn_hw_addr_get(struct devlink_port *port,
+                                    u8 *hw_addr, int *hw_addr_len,
+                                    struct netlink_ext_ack *extack);
+int mlx5_devlink_port_fn_hw_addr_set(struct devlink_port *port,
+                                    const u8 *hw_addr, int hw_addr_len,
+                                    struct netlink_ext_ack *extack);
 int mlx5_devlink_port_fn_roce_get(struct devlink_port *port, bool *is_enabled,
                                  struct netlink_ext_ack *extack);
 int mlx5_devlink_port_fn_roce_set(struct devlink_port *port, bool enable,
@@ -578,6 +585,13 @@ mlx5_esw_is_manager_vport(const struct mlx5_eswitch *esw, u16 vport_num)
        return esw->manager_vport == vport_num;
 }
 
+static inline bool mlx5_esw_is_owner(struct mlx5_eswitch *esw, u16 vport_num,
+                                    u16 esw_owner_vhca_id)
+{
+       return esw_owner_vhca_id == MLX5_CAP_GEN(esw->dev, vhca_id) ||
+               (vport_num == MLX5_VPORT_UPLINK && mlx5_lag_is_master(esw->dev));
+}
+
 static inline u16 mlx5_eswitch_first_host_vport_num(struct mlx5_core_dev *dev)
 {
        return mlx5_core_is_ecpf_esw_manager(dev) ?
@@ -686,6 +700,14 @@ mlx5_esw_vporttbl_put(struct mlx5_eswitch *esw, struct mlx5_vport_tbl_attr *attr
 struct mlx5_flow_handle *
 esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag);
 
+void mlx5_esw_set_flow_group_source_port(struct mlx5_eswitch *esw,
+                                        u32 *flow_group_in,
+                                        int match_params);
+
+void mlx5_esw_set_spec_source_port(struct mlx5_eswitch *esw,
+                                  u16 vport,
+                                  struct mlx5_flow_spec *spec);
+
 int esw_offloads_load_rep(struct mlx5_eswitch *esw, u16 vport_num);
 void esw_offloads_unload_rep(struct mlx5_eswitch *esw, u16 vport_num);
 
@@ -740,9 +762,9 @@ void esw_vport_change_handle_locked(struct mlx5_vport *vport);
 
 bool mlx5_esw_offloads_controller_valid(const struct mlx5_eswitch *esw, u32 controller);
 
-int mlx5_eswitch_offloads_config_single_fdb(struct mlx5_eswitch *master_esw,
-                                           struct mlx5_eswitch *slave_esw);
-void mlx5_eswitch_offloads_destroy_single_fdb(struct mlx5_eswitch *master_esw,
+int mlx5_eswitch_offloads_single_fdb_add_one(struct mlx5_eswitch *master_esw,
+                                            struct mlx5_eswitch *slave_esw, int max_slaves);
+void mlx5_eswitch_offloads_single_fdb_del_one(struct mlx5_eswitch *master_esw,
                                              struct mlx5_eswitch *slave_esw);
 int mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw);
 
@@ -794,14 +816,14 @@ mlx5_esw_vport_to_devlink_port_index(const struct mlx5_core_dev *dev,
 }
 
 static inline int
-mlx5_eswitch_offloads_config_single_fdb(struct mlx5_eswitch *master_esw,
-                                       struct mlx5_eswitch *slave_esw)
+mlx5_eswitch_offloads_single_fdb_add_one(struct mlx5_eswitch *master_esw,
+                                        struct mlx5_eswitch *slave_esw, int max_slaves)
 {
        return 0;
 }
 
 static inline void
-mlx5_eswitch_offloads_destroy_single_fdb(struct mlx5_eswitch *master_esw,
+mlx5_eswitch_offloads_single_fdb_del_one(struct mlx5_eswitch *master_esw,
                                         struct mlx5_eswitch *slave_esw) {}
 
 static inline int
index 8d19c20..29de4e7 100644 (file)
@@ -838,6 +838,7 @@ mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *on_esw,
        struct mlx5_flow_handle *flow_rule;
        struct mlx5_flow_spec *spec;
        void *misc;
+       u16 vport;
 
        spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
        if (!spec) {
@@ -847,20 +848,43 @@ mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *on_esw,
 
        misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
        MLX5_SET(fte_match_set_misc, misc, source_sqn, sqn);
-       /* source vport is the esw manager */
-       MLX5_SET(fte_match_set_misc, misc, source_port, from_esw->manager_vport);
-       if (MLX5_CAP_ESW(on_esw->dev, merged_eswitch))
-               MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id,
-                        MLX5_CAP_GEN(from_esw->dev, vhca_id));
 
        misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
        MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_sqn);
-       MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
-       if (MLX5_CAP_ESW(on_esw->dev, merged_eswitch))
-               MLX5_SET_TO_ONES(fte_match_set_misc, misc,
-                                source_eswitch_owner_vhca_id);
 
        spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
+
+       /* source vport is the esw manager */
+       vport = from_esw->manager_vport;
+
+       if (mlx5_eswitch_vport_match_metadata_enabled(on_esw)) {
+               misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_2);
+               MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
+                        mlx5_eswitch_get_vport_metadata_for_match(from_esw, vport));
+
+               misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2);
+               MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
+                        mlx5_eswitch_get_vport_metadata_mask());
+
+               spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2;
+       } else {
+               misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
+               MLX5_SET(fte_match_set_misc, misc, source_port, vport);
+
+               if (MLX5_CAP_ESW(on_esw->dev, merged_eswitch))
+                       MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id,
+                                MLX5_CAP_GEN(from_esw->dev, vhca_id));
+
+               misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+               MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+
+               if (MLX5_CAP_ESW(on_esw->dev, merged_eswitch))
+                       MLX5_SET_TO_ONES(fte_match_set_misc, misc,
+                                        source_eswitch_owner_vhca_id);
+
+               spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
+       }
+
        dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
        dest.vport.num = rep->vport;
        dest.vport.vhca_id = MLX5_CAP_GEN(rep->esw->dev, vhca_id);
@@ -1108,7 +1132,7 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
                flows[vport->index] = flow;
        }
 
-       esw->fdb_table.offloads.peer_miss_rules = flows;
+       esw->fdb_table.offloads.peer_miss_rules[mlx5_get_dev_index(peer_dev)] = flows;
 
        kvfree(spec);
        return 0;
@@ -1136,13 +1160,14 @@ alloc_flows_err:
        return err;
 }
 
-static void esw_del_fdb_peer_miss_rules(struct mlx5_eswitch *esw)
+static void esw_del_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
+                                       struct mlx5_core_dev *peer_dev)
 {
        struct mlx5_flow_handle **flows;
        struct mlx5_vport *vport;
        unsigned long i;
 
-       flows = esw->fdb_table.offloads.peer_miss_rules;
+       flows = esw->fdb_table.offloads.peer_miss_rules[mlx5_get_dev_index(peer_dev)];
 
        mlx5_esw_for_each_vf_vport(esw, i, vport, mlx5_core_max_vfs(esw->dev))
                mlx5_del_flow_rules(flows[vport->index]);
@@ -1269,8 +1294,10 @@ esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag)
 #define MAX_PF_SQ 256
 #define MAX_SQ_NVPORTS 32
 
-static void esw_set_flow_group_source_port(struct mlx5_eswitch *esw,
-                                          u32 *flow_group_in)
+void
+mlx5_esw_set_flow_group_source_port(struct mlx5_eswitch *esw,
+                                   u32 *flow_group_in,
+                                   int match_params)
 {
        void *match_criteria = MLX5_ADDR_OF(create_flow_group_in,
                                            flow_group_in,
@@ -1279,7 +1306,7 @@ static void esw_set_flow_group_source_port(struct mlx5_eswitch *esw,
        if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
                MLX5_SET(create_flow_group_in, flow_group_in,
                         match_criteria_enable,
-                        MLX5_MATCH_MISC_PARAMETERS_2);
+                        MLX5_MATCH_MISC_PARAMETERS_2 | match_params);
 
                MLX5_SET(fte_match_param, match_criteria,
                         misc_parameters_2.metadata_reg_c_0,
@@ -1287,7 +1314,7 @@ static void esw_set_flow_group_source_port(struct mlx5_eswitch *esw,
        } else {
                MLX5_SET(create_flow_group_in, flow_group_in,
                         match_criteria_enable,
-                        MLX5_MATCH_MISC_PARAMETERS);
+                        MLX5_MATCH_MISC_PARAMETERS | match_params);
 
                MLX5_SET_TO_ONES(fte_match_param, match_criteria,
                                 misc_parameters.source_port);
@@ -1463,14 +1490,13 @@ esw_create_send_to_vport_group(struct mlx5_eswitch *esw,
 
        memset(flow_group_in, 0, inlen);
 
-       MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
-                MLX5_MATCH_MISC_PARAMETERS);
+       mlx5_esw_set_flow_group_source_port(esw, flow_group_in, MLX5_MATCH_MISC_PARAMETERS);
 
        match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria);
-
        MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_sqn);
-       MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_port);
-       if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) {
+
+       if (!mlx5_eswitch_vport_match_metadata_enabled(esw) &&
+           MLX5_CAP_ESW(esw->dev, merged_eswitch)) {
                MLX5_SET_TO_ONES(fte_match_param, match_criteria,
                                 misc_parameters.source_eswitch_owner_vhca_id);
                MLX5_SET(create_flow_group_in, flow_group_in,
@@ -1548,6 +1574,7 @@ esw_create_peer_esw_miss_group(struct mlx5_eswitch *esw,
                               u32 *flow_group_in,
                               int *ix)
 {
+       int max_peer_ports = (esw->total_vports - 1) * (MLX5_MAX_PORTS - 1);
        int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
        struct mlx5_flow_group *g;
        void *match_criteria;
@@ -1558,7 +1585,7 @@ esw_create_peer_esw_miss_group(struct mlx5_eswitch *esw,
 
        memset(flow_group_in, 0, inlen);
 
-       esw_set_flow_group_source_port(esw, flow_group_in);
+       mlx5_esw_set_flow_group_source_port(esw, flow_group_in, 0);
 
        if (!mlx5_eswitch_vport_match_metadata_enabled(esw)) {
                match_criteria = MLX5_ADDR_OF(create_flow_group_in,
@@ -1574,8 +1601,8 @@ esw_create_peer_esw_miss_group(struct mlx5_eswitch *esw,
 
        MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, *ix);
        MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index,
-                *ix + esw->total_vports - 1);
-       *ix += esw->total_vports;
+                *ix + max_peer_ports);
+       *ix += max_peer_ports + 1;
 
        g = mlx5_create_flow_group(fdb, flow_group_in);
        if (IS_ERR(g)) {
@@ -1677,7 +1704,7 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw)
         * total vports of the peer (currently is also uses esw->total_vports).
         */
        table_size = MLX5_MAX_PORTS * (esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ) +
-                    esw->total_vports * 2 + MLX5_ESW_MISS_FLOWS;
+                    esw->total_vports * MLX5_MAX_PORTS + MLX5_ESW_MISS_FLOWS;
 
        /* create the slow path fdb with encap set, so further table instances
         * can be created at run time while VFs are probed if the FW allows that.
@@ -1845,7 +1872,7 @@ static int esw_create_vport_rx_group(struct mlx5_eswitch *esw)
                return -ENOMEM;
 
        /* create vport rx group */
-       esw_set_flow_group_source_port(esw, flow_group_in);
+       mlx5_esw_set_flow_group_source_port(esw, flow_group_in, 0);
 
        MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
        MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, nvports - 1);
@@ -1915,21 +1942,13 @@ static void esw_destroy_vport_rx_drop_group(struct mlx5_eswitch *esw)
                mlx5_destroy_flow_group(esw->offloads.vport_rx_drop_group);
 }
 
-struct mlx5_flow_handle *
-mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, u16 vport,
-                                 struct mlx5_flow_destination *dest)
+void
+mlx5_esw_set_spec_source_port(struct mlx5_eswitch *esw,
+                             u16 vport,
+                             struct mlx5_flow_spec *spec)
 {
-       struct mlx5_flow_act flow_act = {0};
-       struct mlx5_flow_handle *flow_rule;
-       struct mlx5_flow_spec *spec;
        void *misc;
 
-       spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
-       if (!spec) {
-               flow_rule = ERR_PTR(-ENOMEM);
-               goto out;
-       }
-
        if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
                misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_2);
                MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
@@ -1949,6 +1968,23 @@ mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, u16 vport,
 
                spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
        }
+}
+
+struct mlx5_flow_handle *
+mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, u16 vport,
+                                 struct mlx5_flow_destination *dest)
+{
+       struct mlx5_flow_act flow_act = {0};
+       struct mlx5_flow_handle *flow_rule;
+       struct mlx5_flow_spec *spec;
+
+       spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+       if (!spec) {
+               flow_rule = ERR_PTR(-ENOMEM);
+               goto out;
+       }
+
+       mlx5_esw_set_spec_source_port(esw, vport, spec);
 
        flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
        flow_rule = mlx5_add_flow_rules(esw->offloads.ft_offloads, spec,
@@ -2476,6 +2512,7 @@ static int __esw_set_master_egress_rule(struct mlx5_core_dev *master,
                                        struct mlx5_vport *vport,
                                        struct mlx5_flow_table *acl)
 {
+       u16 slave_index = MLX5_CAP_GEN(slave, vhca_id);
        struct mlx5_flow_handle *flow_rule = NULL;
        struct mlx5_flow_destination dest = {};
        struct mlx5_flow_act flow_act = {};
@@ -2491,8 +2528,7 @@ static int __esw_set_master_egress_rule(struct mlx5_core_dev *master,
        misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
                            misc_parameters);
        MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_UPLINK);
-       MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id,
-                MLX5_CAP_GEN(slave, vhca_id));
+       MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id, slave_index);
 
        misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
        MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
@@ -2507,44 +2543,35 @@ static int __esw_set_master_egress_rule(struct mlx5_core_dev *master,
 
        flow_rule = mlx5_add_flow_rules(acl, spec, &flow_act,
                                        &dest, 1);
-       if (IS_ERR(flow_rule))
+       if (IS_ERR(flow_rule)) {
                err = PTR_ERR(flow_rule);
-       else
-               vport->egress.offloads.bounce_rule = flow_rule;
+       } else {
+               err = xa_insert(&vport->egress.offloads.bounce_rules,
+                               slave_index, flow_rule, GFP_KERNEL);
+               if (err)
+                       mlx5_del_flow_rules(flow_rule);
+       }
 
        kvfree(spec);
        return err;
 }
 
-static int esw_set_master_egress_rule(struct mlx5_core_dev *master,
-                                     struct mlx5_core_dev *slave)
+static int esw_master_egress_create_resources(struct mlx5_flow_namespace *egress_ns,
+                                             struct mlx5_vport *vport, size_t count)
 {
        int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
-       struct mlx5_eswitch *esw = master->priv.eswitch;
        struct mlx5_flow_table_attr ft_attr = {
-               .max_fte = 1, .prio = 0, .level = 0,
+               .max_fte = count, .prio = 0, .level = 0,
                .flags = MLX5_FLOW_TABLE_OTHER_VPORT,
        };
-       struct mlx5_flow_namespace *egress_ns;
        struct mlx5_flow_table *acl;
        struct mlx5_flow_group *g;
-       struct mlx5_vport *vport;
        void *match_criteria;
        u32 *flow_group_in;
        int err;
 
-       vport = mlx5_eswitch_get_vport(esw, esw->manager_vport);
-       if (IS_ERR(vport))
-               return PTR_ERR(vport);
-
-       egress_ns = mlx5_get_flow_vport_acl_namespace(master,
-                                                     MLX5_FLOW_NAMESPACE_ESW_EGRESS,
-                                                     vport->index);
-       if (!egress_ns)
-               return -EINVAL;
-
        if (vport->egress.acl)
-               return -EINVAL;
+               return 0;
 
        flow_group_in = kvzalloc(inlen, GFP_KERNEL);
        if (!flow_group_in)
@@ -2568,7 +2595,7 @@ static int esw_set_master_egress_rule(struct mlx5_core_dev *master,
        MLX5_SET(create_flow_group_in, flow_group_in,
                 source_eswitch_owner_vhca_id_valid, 1);
        MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
-       MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0);
+       MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, count);
 
        g = mlx5_create_flow_group(acl, flow_group_in);
        if (IS_ERR(g)) {
@@ -2576,19 +2603,15 @@ static int esw_set_master_egress_rule(struct mlx5_core_dev *master,
                goto err_group;
        }
 
-       err = __esw_set_master_egress_rule(master, slave, vport, acl);
-       if (err)
-               goto err_rule;
-
        vport->egress.acl = acl;
        vport->egress.offloads.bounce_grp = g;
+       vport->egress.type = VPORT_EGRESS_ACL_TYPE_SHARED_FDB;
+       xa_init_flags(&vport->egress.offloads.bounce_rules, XA_FLAGS_ALLOC);
 
        kvfree(flow_group_in);
 
        return 0;
 
-err_rule:
-       mlx5_destroy_flow_group(g);
 err_group:
        mlx5_destroy_flow_table(acl);
 out:
@@ -2596,18 +2619,70 @@ out:
        return err;
 }
 
-static void esw_unset_master_egress_rule(struct mlx5_core_dev *dev)
+static void esw_master_egress_destroy_resources(struct mlx5_vport *vport)
+{
+       mlx5_destroy_flow_group(vport->egress.offloads.bounce_grp);
+       mlx5_destroy_flow_table(vport->egress.acl);
+}
+
+static int esw_set_master_egress_rule(struct mlx5_core_dev *master,
+                                     struct mlx5_core_dev *slave, size_t count)
+{
+       struct mlx5_eswitch *esw = master->priv.eswitch;
+       u16 slave_index = MLX5_CAP_GEN(slave, vhca_id);
+       struct mlx5_flow_namespace *egress_ns;
+       struct mlx5_vport *vport;
+       int err;
+
+       vport = mlx5_eswitch_get_vport(esw, esw->manager_vport);
+       if (IS_ERR(vport))
+               return PTR_ERR(vport);
+
+       egress_ns = mlx5_get_flow_vport_acl_namespace(master,
+                                                     MLX5_FLOW_NAMESPACE_ESW_EGRESS,
+                                                     vport->index);
+       if (!egress_ns)
+               return -EINVAL;
+
+       if (vport->egress.acl && vport->egress.type != VPORT_EGRESS_ACL_TYPE_SHARED_FDB)
+               return 0;
+
+       err = esw_master_egress_create_resources(egress_ns, vport, count);
+       if (err)
+               return err;
+
+       if (xa_load(&vport->egress.offloads.bounce_rules, slave_index))
+               return -EINVAL;
+
+       err = __esw_set_master_egress_rule(master, slave, vport, vport->egress.acl);
+       if (err)
+               goto err_rule;
+
+       return 0;
+
+err_rule:
+       esw_master_egress_destroy_resources(vport);
+       return err;
+}
+
+static void esw_unset_master_egress_rule(struct mlx5_core_dev *dev,
+                                        struct mlx5_core_dev *slave_dev)
 {
        struct mlx5_vport *vport;
 
        vport = mlx5_eswitch_get_vport(dev->priv.eswitch,
                                       dev->priv.eswitch->manager_vport);
 
-       esw_acl_egress_ofld_cleanup(vport);
+       esw_acl_egress_ofld_bounce_rule_destroy(vport, MLX5_CAP_GEN(slave_dev, vhca_id));
+
+       if (xa_empty(&vport->egress.offloads.bounce_rules)) {
+               esw_acl_egress_ofld_cleanup(vport);
+               xa_destroy(&vport->egress.offloads.bounce_rules);
+       }
 }
 
-int mlx5_eswitch_offloads_config_single_fdb(struct mlx5_eswitch *master_esw,
-                                           struct mlx5_eswitch *slave_esw)
+int mlx5_eswitch_offloads_single_fdb_add_one(struct mlx5_eswitch *master_esw,
+                                            struct mlx5_eswitch *slave_esw, int max_slaves)
 {
        int err;
 
@@ -2617,7 +2692,7 @@ int mlx5_eswitch_offloads_config_single_fdb(struct mlx5_eswitch *master_esw,
                return err;
 
        err = esw_set_master_egress_rule(master_esw->dev,
-                                        slave_esw->dev);
+                                        slave_esw->dev, max_slaves);
        if (err)
                goto err_acl;
 
@@ -2625,21 +2700,21 @@ int mlx5_eswitch_offloads_config_single_fdb(struct mlx5_eswitch *master_esw,
 
 err_acl:
        esw_set_slave_root_fdb(NULL, slave_esw->dev);
-
        return err;
 }
 
-void mlx5_eswitch_offloads_destroy_single_fdb(struct mlx5_eswitch *master_esw,
+void mlx5_eswitch_offloads_single_fdb_del_one(struct mlx5_eswitch *master_esw,
                                              struct mlx5_eswitch *slave_esw)
 {
-       esw_unset_master_egress_rule(master_esw->dev);
        esw_set_slave_root_fdb(NULL, slave_esw->dev);
+       esw_unset_master_egress_rule(master_esw->dev, slave_esw->dev);
 }
 
 #define ESW_OFFLOADS_DEVCOM_PAIR       (0)
 #define ESW_OFFLOADS_DEVCOM_UNPAIR     (1)
 
-static void mlx5_esw_offloads_rep_event_unpair(struct mlx5_eswitch *esw)
+static void mlx5_esw_offloads_rep_event_unpair(struct mlx5_eswitch *esw,
+                                              struct mlx5_eswitch *peer_esw)
 {
        const struct mlx5_eswitch_rep_ops *ops;
        struct mlx5_eswitch_rep *rep;
@@ -2652,18 +2727,19 @@ static void mlx5_esw_offloads_rep_event_unpair(struct mlx5_eswitch *esw)
                        ops = esw->offloads.rep_ops[rep_type];
                        if (atomic_read(&rep->rep_data[rep_type].state) == REP_LOADED &&
                            ops->event)
-                               ops->event(esw, rep, MLX5_SWITCHDEV_EVENT_UNPAIR, NULL);
+                               ops->event(esw, rep, MLX5_SWITCHDEV_EVENT_UNPAIR, peer_esw);
                }
        }
 }
 
-static void mlx5_esw_offloads_unpair(struct mlx5_eswitch *esw)
+static void mlx5_esw_offloads_unpair(struct mlx5_eswitch *esw,
+                                    struct mlx5_eswitch *peer_esw)
 {
 #if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
        mlx5e_tc_clean_fdb_peer_flows(esw);
 #endif
-       mlx5_esw_offloads_rep_event_unpair(esw);
-       esw_del_fdb_peer_miss_rules(esw);
+       mlx5_esw_offloads_rep_event_unpair(esw, peer_esw);
+       esw_del_fdb_peer_miss_rules(esw, peer_esw->dev);
 }
 
 static int mlx5_esw_offloads_pair(struct mlx5_eswitch *esw,
@@ -2694,7 +2770,7 @@ static int mlx5_esw_offloads_pair(struct mlx5_eswitch *esw,
        return 0;
 
 err_out:
-       mlx5_esw_offloads_unpair(esw);
+       mlx5_esw_offloads_unpair(esw, peer_esw);
        return err;
 }
 
@@ -2702,7 +2778,9 @@ static int mlx5_esw_offloads_set_ns_peer(struct mlx5_eswitch *esw,
                                         struct mlx5_eswitch *peer_esw,
                                         bool pair)
 {
+       u8 peer_idx = mlx5_get_dev_index(peer_esw->dev);
        struct mlx5_flow_root_namespace *peer_ns;
+       u8 idx = mlx5_get_dev_index(esw->dev);
        struct mlx5_flow_root_namespace *ns;
        int err;
 
@@ -2710,18 +2788,18 @@ static int mlx5_esw_offloads_set_ns_peer(struct mlx5_eswitch *esw,
        ns = esw->dev->priv.steering->fdb_root_ns;
 
        if (pair) {
-               err = mlx5_flow_namespace_set_peer(ns, peer_ns);
+               err = mlx5_flow_namespace_set_peer(ns, peer_ns, peer_idx);
                if (err)
                        return err;
 
-               err = mlx5_flow_namespace_set_peer(peer_ns, ns);
+               err = mlx5_flow_namespace_set_peer(peer_ns, ns, idx);
                if (err) {
-                       mlx5_flow_namespace_set_peer(ns, NULL);
+                       mlx5_flow_namespace_set_peer(ns, NULL, peer_idx);
                        return err;
                }
        } else {
-               mlx5_flow_namespace_set_peer(ns, NULL);
-               mlx5_flow_namespace_set_peer(peer_ns, NULL);
+               mlx5_flow_namespace_set_peer(ns, NULL, peer_idx);
+               mlx5_flow_namespace_set_peer(peer_ns, NULL, idx);
        }
 
        return 0;
@@ -2758,18 +2836,23 @@ static int mlx5_esw_offloads_devcom_event(int event,
 
                esw->paired[mlx5_get_dev_index(peer_esw->dev)] = true;
                peer_esw->paired[mlx5_get_dev_index(esw->dev)] = true;
-               mlx5_devcom_set_paired(devcom, MLX5_DEVCOM_ESW_OFFLOADS, true);
+               esw->num_peers++;
+               peer_esw->num_peers++;
+               mlx5_devcom_comp_set_ready(devcom, MLX5_DEVCOM_ESW_OFFLOADS, true);
                break;
 
        case ESW_OFFLOADS_DEVCOM_UNPAIR:
                if (!esw->paired[mlx5_get_dev_index(peer_esw->dev)])
                        break;
 
-               mlx5_devcom_set_paired(devcom, MLX5_DEVCOM_ESW_OFFLOADS, false);
+               peer_esw->num_peers--;
+               esw->num_peers--;
+               if (!esw->num_peers && !peer_esw->num_peers)
+                       mlx5_devcom_comp_set_ready(devcom, MLX5_DEVCOM_ESW_OFFLOADS, false);
                esw->paired[mlx5_get_dev_index(peer_esw->dev)] = false;
                peer_esw->paired[mlx5_get_dev_index(esw->dev)] = false;
-               mlx5_esw_offloads_unpair(peer_esw);
-               mlx5_esw_offloads_unpair(esw);
+               mlx5_esw_offloads_unpair(peer_esw, esw);
+               mlx5_esw_offloads_unpair(esw, peer_esw);
                mlx5_esw_offloads_set_ns_peer(esw, peer_esw, false);
                break;
        }
@@ -2777,7 +2860,7 @@ static int mlx5_esw_offloads_devcom_event(int event,
        return 0;
 
 err_pair:
-       mlx5_esw_offloads_unpair(esw);
+       mlx5_esw_offloads_unpair(esw, peer_esw);
 err_peer:
        mlx5_esw_offloads_set_ns_peer(esw, peer_esw, false);
 err_out:
@@ -2789,8 +2872,10 @@ err_out:
 void mlx5_esw_offloads_devcom_init(struct mlx5_eswitch *esw)
 {
        struct mlx5_devcom *devcom = esw->dev->priv.devcom;
+       int i;
 
-       INIT_LIST_HEAD(&esw->offloads.peer_flows);
+       for (i = 0; i < MLX5_MAX_PORTS; i++)
+               INIT_LIST_HEAD(&esw->offloads.peer_flows[i]);
        mutex_init(&esw->offloads.peer_mutex);
 
        if (!MLX5_CAP_ESW(esw->dev, merged_eswitch))
@@ -2804,9 +2889,11 @@ void mlx5_esw_offloads_devcom_init(struct mlx5_eswitch *esw)
                                       mlx5_esw_offloads_devcom_event,
                                       esw);
 
+       esw->num_peers = 0;
        mlx5_devcom_send_event(devcom,
                               MLX5_DEVCOM_ESW_OFFLOADS,
-                              ESW_OFFLOADS_DEVCOM_PAIR, esw);
+                              ESW_OFFLOADS_DEVCOM_PAIR,
+                              ESW_OFFLOADS_DEVCOM_UNPAIR, esw);
 }
 
 void mlx5_esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw)
@@ -2820,6 +2907,7 @@ void mlx5_esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw)
                return;
 
        mlx5_devcom_send_event(devcom, MLX5_DEVCOM_ESW_OFFLOADS,
+                              ESW_OFFLOADS_DEVCOM_UNPAIR,
                               ESW_OFFLOADS_DEVCOM_UNPAIR, esw);
 
        mlx5_devcom_unregister_component(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
@@ -2834,9 +2922,6 @@ bool mlx5_esw_vport_match_metadata_supported(const struct mlx5_eswitch *esw)
              MLX5_FDB_TO_VPORT_REG_C_0))
                return false;
 
-       if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev, flow_source))
-               return false;
-
        return true;
 }
 
@@ -3285,7 +3370,7 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw,
        /* If changing from switchdev to legacy mode without sriov enabled,
         * no need to create legacy fdb.
         */
-       if (!mlx5_sriov_is_enabled(esw->dev))
+       if (!mlx5_core_is_pf(esw->dev) || !mlx5_sriov_is_enabled(esw->dev))
                return 0;
 
        err = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_IGNORE_NUM_VFS);
@@ -3926,9 +4011,9 @@ is_port_function_supported(struct mlx5_eswitch *esw, u16 vport_num)
               mlx5_esw_is_sf_vport(esw, vport_num);
 }
 
-int mlx5_devlink_port_function_hw_addr_get(struct devlink_port *port,
-                                          u8 *hw_addr, int *hw_addr_len,
-                                          struct netlink_ext_ack *extack)
+int mlx5_devlink_port_fn_hw_addr_get(struct devlink_port *port,
+                                    u8 *hw_addr, int *hw_addr_len,
+                                    struct netlink_ext_ack *extack)
 {
        struct mlx5_eswitch *esw;
        struct mlx5_vport *vport;
@@ -3955,9 +4040,9 @@ int mlx5_devlink_port_function_hw_addr_get(struct devlink_port *port,
        return 0;
 }
 
-int mlx5_devlink_port_function_hw_addr_set(struct devlink_port *port,
-                                          const u8 *hw_addr, int hw_addr_len,
-                                          struct netlink_ext_ack *extack)
+int mlx5_devlink_port_fn_hw_addr_set(struct devlink_port *port,
+                                    const u8 *hw_addr, int hw_addr_len,
+                                    struct netlink_ext_ack *extack)
 {
        struct mlx5_eswitch *esw;
        u16 vport_num;
index 144e594..11374c3 100644 (file)
@@ -139,7 +139,8 @@ static void mlx5_cmd_stub_modify_header_dealloc(struct mlx5_flow_root_namespace
 }
 
 static int mlx5_cmd_stub_set_peer(struct mlx5_flow_root_namespace *ns,
-                                 struct mlx5_flow_root_namespace *peer_ns)
+                                 struct mlx5_flow_root_namespace *peer_ns,
+                                 u8 peer_idx)
 {
        return 0;
 }
index 8ef4254..b6b9a5a 100644 (file)
@@ -93,7 +93,8 @@ struct mlx5_flow_cmds {
                                      struct mlx5_modify_hdr *modify_hdr);
 
        int (*set_peer)(struct mlx5_flow_root_namespace *ns,
-                       struct mlx5_flow_root_namespace *peer_ns);
+                       struct mlx5_flow_root_namespace *peer_ns,
+                       u8 peer_idx);
 
        int (*create_ns)(struct mlx5_flow_root_namespace *ns);
        int (*destroy_ns)(struct mlx5_flow_root_namespace *ns);
index 19da02c..4ef04aa 100644 (file)
@@ -3620,7 +3620,8 @@ void mlx5_destroy_match_definer(struct mlx5_core_dev *dev,
 }
 
 int mlx5_flow_namespace_set_peer(struct mlx5_flow_root_namespace *ns,
-                                struct mlx5_flow_root_namespace *peer_ns)
+                                struct mlx5_flow_root_namespace *peer_ns,
+                                u8 peer_idx)
 {
        if (peer_ns && ns->mode != peer_ns->mode) {
                mlx5_core_err(ns->dev,
@@ -3628,7 +3629,7 @@ int mlx5_flow_namespace_set_peer(struct mlx5_flow_root_namespace *ns,
                return -EINVAL;
        }
 
-       return ns->cmds->set_peer(ns, peer_ns);
+       return ns->cmds->set_peer(ns, peer_ns, peer_idx);
 }
 
 /* This function should be called only at init stage of the namespace.
index f137a06..200ec94 100644 (file)
@@ -295,7 +295,8 @@ void mlx5_fc_update_sampling_interval(struct mlx5_core_dev *dev,
 const struct mlx5_flow_cmds *mlx5_fs_cmd_get_fw_cmds(void);
 
 int mlx5_flow_namespace_set_peer(struct mlx5_flow_root_namespace *ns,
-                                struct mlx5_flow_root_namespace *peer_ns);
+                                struct mlx5_flow_root_namespace *peer_ns,
+                                u8 peer_idx);
 
 int mlx5_flow_namespace_set_mode(struct mlx5_flow_namespace *ns,
                                 enum mlx5_flow_steering_mode mode);
index 7bb7be0..fb2035a 100644 (file)
@@ -196,14 +196,11 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev)
                        return err;
        }
 
-       if (MLX5_CAP_GEN(dev, vport_group_manager) &&
-           MLX5_ESWITCH_MANAGER(dev)) {
+       if (MLX5_ESWITCH_MANAGER(dev)) {
                err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH_FLOW_TABLE);
                if (err)
                        return err;
-       }
 
-       if (MLX5_ESWITCH_MANAGER(dev)) {
                err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH);
                if (err)
                        return err;
index 5d331b9..c820f7d 100644 (file)
@@ -550,6 +550,29 @@ char *mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags)
        }
 }
 
+static int mlx5_lag_create_single_fdb(struct mlx5_lag *ldev)
+{
+       struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
+       struct mlx5_eswitch *master_esw = dev0->priv.eswitch;
+       int err;
+       int i;
+
+       for (i = MLX5_LAG_P1 + 1; i < ldev->ports; i++) {
+               struct mlx5_eswitch *slave_esw = ldev->pf[i].dev->priv.eswitch;
+
+               err = mlx5_eswitch_offloads_single_fdb_add_one(master_esw,
+                                                              slave_esw, ldev->ports);
+               if (err)
+                       goto err;
+       }
+       return 0;
+err:
+       for (; i > MLX5_LAG_P1; i--)
+               mlx5_eswitch_offloads_single_fdb_del_one(master_esw,
+                                                        ldev->pf[i].dev->priv.eswitch);
+       return err;
+}
+
 static int mlx5_create_lag(struct mlx5_lag *ldev,
                           struct lag_tracker *tracker,
                           enum mlx5_lag_mode mode,
@@ -557,7 +580,6 @@ static int mlx5_create_lag(struct mlx5_lag *ldev,
 {
        bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags);
        struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
-       struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
        u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
        int err;
 
@@ -575,8 +597,7 @@ static int mlx5_create_lag(struct mlx5_lag *ldev,
        }
 
        if (shared_fdb) {
-               err = mlx5_eswitch_offloads_config_single_fdb(dev0->priv.eswitch,
-                                                             dev1->priv.eswitch);
+               err = mlx5_lag_create_single_fdb(ldev);
                if (err)
                        mlx5_core_err(dev0, "Can't enable single FDB mode\n");
                else
@@ -647,19 +668,21 @@ int mlx5_activate_lag(struct mlx5_lag *ldev,
 int mlx5_deactivate_lag(struct mlx5_lag *ldev)
 {
        struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
-       struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
+       struct mlx5_eswitch *master_esw = dev0->priv.eswitch;
        u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
        bool roce_lag = __mlx5_lag_is_roce(ldev);
        unsigned long flags = ldev->mode_flags;
        int err;
+       int i;
 
        ldev->mode = MLX5_LAG_MODE_NONE;
        ldev->mode_flags = 0;
        mlx5_lag_mp_reset(ldev);
 
        if (test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags)) {
-               mlx5_eswitch_offloads_destroy_single_fdb(dev0->priv.eswitch,
-                                                        dev1->priv.eswitch);
+               for (i = MLX5_LAG_P1 + 1; i < ldev->ports; i++)
+                       mlx5_eswitch_offloads_single_fdb_del_one(master_esw,
+                                                                ldev->pf[i].dev->priv.eswitch);
                clear_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags);
        }
 
@@ -801,8 +824,8 @@ bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev)
            is_mdev_switchdev_mode(dev1) &&
            mlx5_eswitch_vport_match_metadata_enabled(dev0->priv.eswitch) &&
            mlx5_eswitch_vport_match_metadata_enabled(dev1->priv.eswitch) &&
-           mlx5_devcom_is_paired(dev0->priv.devcom,
-                                 MLX5_DEVCOM_ESW_OFFLOADS) &&
+           mlx5_devcom_comp_is_ready(dev0->priv.devcom,
+                                     MLX5_DEVCOM_ESW_OFFLOADS) &&
            MLX5_CAP_GEN(dev1, lag_native_fdb_selection) &&
            MLX5_CAP_ESW(dev1, root_ft_on_other_esw) &&
            MLX5_CAP_ESW(dev0, esw_shared_ingress_acl))
index b7d779d..8472bbb 100644 (file)
@@ -19,7 +19,7 @@ struct mlx5_devcom_component {
 
        mlx5_devcom_event_handler_t handler;
        struct rw_semaphore sem;
-       bool paired;
+       bool ready;
 };
 
 struct mlx5_devcom_list {
@@ -193,7 +193,7 @@ void mlx5_devcom_unregister_component(struct mlx5_devcom *devcom,
 
 int mlx5_devcom_send_event(struct mlx5_devcom *devcom,
                           enum mlx5_devcom_components id,
-                          int event,
+                          int event, int rollback_event,
                           void *event_data)
 {
        struct mlx5_devcom_component *comp;
@@ -210,84 +210,134 @@ int mlx5_devcom_send_event(struct mlx5_devcom *devcom,
 
                if (i != devcom->idx && data) {
                        err = comp->handler(event, data, event_data);
-                       break;
+                       if (err)
+                               goto rollback;
                }
        }
 
        up_write(&comp->sem);
+       return 0;
+
+rollback:
+       while (i--) {
+               void *data = rcu_dereference_protected(comp->device[i].data,
+                                                      lockdep_is_held(&comp->sem));
+
+               if (i != devcom->idx && data)
+                       comp->handler(rollback_event, data, event_data);
+       }
+
+       up_write(&comp->sem);
        return err;
 }
 
-void mlx5_devcom_set_paired(struct mlx5_devcom *devcom,
-                           enum mlx5_devcom_components id,
-                           bool paired)
+void mlx5_devcom_comp_set_ready(struct mlx5_devcom *devcom,
+                               enum mlx5_devcom_components id,
+                               bool ready)
 {
        struct mlx5_devcom_component *comp;
 
        comp = &devcom->priv->components[id];
        WARN_ON(!rwsem_is_locked(&comp->sem));
 
-       WRITE_ONCE(comp->paired, paired);
+       WRITE_ONCE(comp->ready, ready);
 }
 
-bool mlx5_devcom_is_paired(struct mlx5_devcom *devcom,
-                          enum mlx5_devcom_components id)
+bool mlx5_devcom_comp_is_ready(struct mlx5_devcom *devcom,
+                              enum mlx5_devcom_components id)
 {
        if (IS_ERR_OR_NULL(devcom))
                return false;
 
-       return READ_ONCE(devcom->priv->components[id].paired);
+       return READ_ONCE(devcom->priv->components[id].ready);
 }
 
-void *mlx5_devcom_get_peer_data(struct mlx5_devcom *devcom,
-                               enum mlx5_devcom_components id)
+bool mlx5_devcom_for_each_peer_begin(struct mlx5_devcom *devcom,
+                                    enum mlx5_devcom_components id)
 {
        struct mlx5_devcom_component *comp;
-       int i;
 
        if (IS_ERR_OR_NULL(devcom))
-               return NULL;
+               return false;
 
        comp = &devcom->priv->components[id];
        down_read(&comp->sem);
-       if (!READ_ONCE(comp->paired)) {
+       if (!READ_ONCE(comp->ready)) {
                up_read(&comp->sem);
-               return NULL;
+               return false;
        }
 
-       for (i = 0; i < MLX5_DEVCOM_PORTS_SUPPORTED; i++)
-               if (i != devcom->idx)
-                       break;
+       return true;
+}
+
+void mlx5_devcom_for_each_peer_end(struct mlx5_devcom *devcom,
+                                  enum mlx5_devcom_components id)
+{
+       struct mlx5_devcom_component *comp = &devcom->priv->components[id];
 
-       return rcu_dereference_protected(comp->device[i].data, lockdep_is_held(&comp->sem));
+       up_read(&comp->sem);
 }
 
-void *mlx5_devcom_get_peer_data_rcu(struct mlx5_devcom *devcom, enum mlx5_devcom_components id)
+void *mlx5_devcom_get_next_peer_data(struct mlx5_devcom *devcom,
+                                    enum mlx5_devcom_components id,
+                                    int *i)
 {
        struct mlx5_devcom_component *comp;
-       int i;
+       void *ret;
+       int idx;
 
-       if (IS_ERR_OR_NULL(devcom))
-               return NULL;
+       comp = &devcom->priv->components[id];
 
-       for (i = 0; i < MLX5_DEVCOM_PORTS_SUPPORTED; i++)
-               if (i != devcom->idx)
-                       break;
+       if (*i == MLX5_DEVCOM_PORTS_SUPPORTED)
+               return NULL;
+       for (idx = *i; idx < MLX5_DEVCOM_PORTS_SUPPORTED; idx++) {
+               if (idx != devcom->idx) {
+                       ret = rcu_dereference_protected(comp->device[idx].data,
+                                                       lockdep_is_held(&comp->sem));
+                       if (ret)
+                               break;
+               }
+       }
 
-       comp = &devcom->priv->components[id];
-       /* This can change concurrently, however 'data' pointer will remain
-        * valid for the duration of RCU read section.
-        */
-       if (!READ_ONCE(comp->paired))
+       if (idx == MLX5_DEVCOM_PORTS_SUPPORTED) {
+               *i = idx;
                return NULL;
+       }
+       *i = idx + 1;
 
-       return rcu_dereference(comp->device[i].data);
+       return ret;
 }
 
-void mlx5_devcom_release_peer_data(struct mlx5_devcom *devcom,
-                                  enum mlx5_devcom_components id)
+void *mlx5_devcom_get_next_peer_data_rcu(struct mlx5_devcom *devcom,
+                                        enum mlx5_devcom_components id,
+                                        int *i)
 {
-       struct mlx5_devcom_component *comp = &devcom->priv->components[id];
+       struct mlx5_devcom_component *comp;
+       void *ret;
+       int idx;
 
-       up_read(&comp->sem);
+       comp = &devcom->priv->components[id];
+
+       if (*i == MLX5_DEVCOM_PORTS_SUPPORTED)
+               return NULL;
+       for (idx = *i; idx < MLX5_DEVCOM_PORTS_SUPPORTED; idx++) {
+               if (idx != devcom->idx) {
+                       /* This can change concurrently, however 'data' pointer will remain
+                        * valid for the duration of RCU read section.
+                        */
+                       if (!READ_ONCE(comp->ready))
+                               return NULL;
+                       ret = rcu_dereference(comp->device[idx].data);
+                       if (ret)
+                               break;
+               }
+       }
+
+       if (idx == MLX5_DEVCOM_PORTS_SUPPORTED) {
+               *i = idx;
+               return NULL;
+       }
+       *i = idx + 1;
+
+       return ret;
 }
index 9a496f4..bb1970b 100644 (file)
@@ -30,20 +30,33 @@ void mlx5_devcom_unregister_component(struct mlx5_devcom *devcom,
 
 int mlx5_devcom_send_event(struct mlx5_devcom *devcom,
                           enum mlx5_devcom_components id,
-                          int event,
+                          int event, int rollback_event,
                           void *event_data);
 
-void mlx5_devcom_set_paired(struct mlx5_devcom *devcom,
-                           enum mlx5_devcom_components id,
-                           bool paired);
-bool mlx5_devcom_is_paired(struct mlx5_devcom *devcom,
-                          enum mlx5_devcom_components id);
+void mlx5_devcom_comp_set_ready(struct mlx5_devcom *devcom,
+                               enum mlx5_devcom_components id,
+                               bool ready);
+bool mlx5_devcom_comp_is_ready(struct mlx5_devcom *devcom,
+                              enum mlx5_devcom_components id);
 
-void *mlx5_devcom_get_peer_data(struct mlx5_devcom *devcom,
-                               enum mlx5_devcom_components id);
-void *mlx5_devcom_get_peer_data_rcu(struct mlx5_devcom *devcom, enum mlx5_devcom_components id);
-void mlx5_devcom_release_peer_data(struct mlx5_devcom *devcom,
+bool mlx5_devcom_for_each_peer_begin(struct mlx5_devcom *devcom,
+                                    enum mlx5_devcom_components id);
+void mlx5_devcom_for_each_peer_end(struct mlx5_devcom *devcom,
                                   enum mlx5_devcom_components id);
+void *mlx5_devcom_get_next_peer_data(struct mlx5_devcom *devcom,
+                                    enum mlx5_devcom_components id, int *i);
 
-#endif
+#define mlx5_devcom_for_each_peer_entry(devcom, id, data, i)                   \
+       for (i = 0, data = mlx5_devcom_get_next_peer_data(devcom, id, &i);      \
+            data;                                                              \
+            data = mlx5_devcom_get_next_peer_data(devcom, id, &i))
+
+void *mlx5_devcom_get_next_peer_data_rcu(struct mlx5_devcom *devcom,
+                                        enum mlx5_devcom_components id, int *i);
 
+#define mlx5_devcom_for_each_peer_entry_rcu(devcom, id, data, i)               \
+       for (i = 0, data = mlx5_devcom_get_next_peer_data_rcu(devcom, id, &i);  \
+            data;                                                              \
+            data = mlx5_devcom_get_next_peer_data_rcu(devcom, id, &i))
+
+#endif
index 8ff1631..4450091 100644 (file)
@@ -99,7 +99,7 @@ int mlx5_mpfs_init(struct mlx5_core_dev *dev)
        int l2table_size = 1 << MLX5_CAP_GEN(dev, log_max_l2_table);
        struct mlx5_mpfs *mpfs;
 
-       if (!MLX5_ESWITCH_MANAGER(dev))
+       if (!MLX5_ESWITCH_MANAGER(dev) || l2table_size == 1)
                return 0;
 
        mpfs = kzalloc(sizeof(*mpfs), GFP_KERNEL);
index 540cf05..a42f6cd 100644 (file)
@@ -30,9 +30,8 @@ static int mlx5_rdma_enable_roce_steering(struct mlx5_core_dev *dev)
        struct mlx5_flow_spec *spec;
        struct mlx5_flow_table *ft;
        struct mlx5_flow_group *fg;
-       void *match_criteria;
+       struct mlx5_eswitch *esw;
        u32 *flow_group_in;
-       void *misc;
        int err;
 
        if (!(MLX5_CAP_FLOWTABLE_RDMA_RX(dev, ft_support) &&
@@ -63,12 +62,8 @@ static int mlx5_rdma_enable_roce_steering(struct mlx5_core_dev *dev)
                goto free;
        }
 
-       MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
-                MLX5_MATCH_MISC_PARAMETERS);
-       match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
-                                     match_criteria);
-       MLX5_SET_TO_ONES(fte_match_param, match_criteria,
-                        misc_parameters.source_port);
+       esw = dev->priv.eswitch;
+       mlx5_esw_set_flow_group_source_port(esw, flow_group_in, 0);
 
        fg = mlx5_create_flow_group(ft, flow_group_in);
        if (IS_ERR(fg)) {
@@ -77,14 +72,7 @@ static int mlx5_rdma_enable_roce_steering(struct mlx5_core_dev *dev)
                goto destroy_flow_table;
        }
 
-       spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
-       misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
-                           misc_parameters);
-       MLX5_SET(fte_match_set_misc, misc, source_port,
-                dev->priv.eswitch->manager_vport);
-       misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
-                           misc_parameters);
-       MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+       mlx5_esw_set_spec_source_port(esw, esw->manager_vport, spec);
 
        flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW;
        flow_rule = mlx5_add_flow_rules(ft, spec, &flow_act, NULL, 0);
@@ -115,7 +103,7 @@ free:
 
 static void mlx5_rdma_del_roce_addr(struct mlx5_core_dev *dev)
 {
-       mlx5_core_roce_gid_set(dev, 0, 0, 0,
+       mlx5_core_roce_gid_set(dev, 0, MLX5_ROCE_VERSION_2, 0,
                               NULL, NULL, false, 0, 1);
 }
 
@@ -135,7 +123,7 @@ static int mlx5_rdma_add_roce_addr(struct mlx5_core_dev *dev)
 
        mlx5_rdma_make_default_gid(dev, &gid);
        return mlx5_core_roce_gid_set(dev, 0,
-                                     MLX5_ROCE_VERSION_1,
+                                     MLX5_ROCE_VERSION_2,
                                      0, gid.raw, mac,
                                      false, 0, 1);
 }
index 7d955a4..9c02e5e 100644 (file)
@@ -283,7 +283,7 @@ out:
 static int mlx5_sf_add(struct mlx5_core_dev *dev, struct mlx5_sf_table *table,
                       const struct devlink_port_new_attrs *new_attr,
                       struct netlink_ext_ack *extack,
-                      unsigned int *new_port_index)
+                      struct devlink_port **dl_port)
 {
        struct mlx5_eswitch *esw = dev->priv.eswitch;
        struct mlx5_sf *sf;
@@ -297,7 +297,7 @@ static int mlx5_sf_add(struct mlx5_core_dev *dev, struct mlx5_sf_table *table,
                                                new_attr->controller, new_attr->sfnum);
        if (err)
                goto esw_err;
-       *new_port_index = sf->port_index;
+       *dl_port = &sf->dl_port;
        trace_mlx5_sf_add(dev, sf->port_index, sf->controller, sf->hw_fn_id, new_attr->sfnum);
        return 0;
 
@@ -339,7 +339,7 @@ mlx5_sf_new_check_attr(struct mlx5_core_dev *dev, const struct devlink_port_new_
 int mlx5_devlink_sf_port_new(struct devlink *devlink,
                             const struct devlink_port_new_attrs *new_attr,
                             struct netlink_ext_ack *extack,
-                            unsigned int *new_port_index)
+                            struct devlink_port **dl_port)
 {
        struct mlx5_core_dev *dev = devlink_priv(devlink);
        struct mlx5_sf_table *table;
@@ -355,7 +355,7 @@ int mlx5_devlink_sf_port_new(struct devlink *devlink,
                                   "Port add is only supported in eswitch switchdev mode or SF ports are disabled.");
                return -EOPNOTSUPP;
        }
-       err = mlx5_sf_add(dev, table, new_attr, extack, new_port_index);
+       err = mlx5_sf_add(dev, table, new_attr, extack, dl_port);
        mlx5_sf_table_put(table);
        return err;
 }
@@ -379,7 +379,8 @@ static void mlx5_sf_dealloc(struct mlx5_sf_table *table, struct mlx5_sf *sf)
        }
 }
 
-int mlx5_devlink_sf_port_del(struct devlink *devlink, unsigned int port_index,
+int mlx5_devlink_sf_port_del(struct devlink *devlink,
+                            struct devlink_port *dl_port,
                             struct netlink_ext_ack *extack)
 {
        struct mlx5_core_dev *dev = devlink_priv(devlink);
@@ -394,7 +395,7 @@ int mlx5_devlink_sf_port_del(struct devlink *devlink, unsigned int port_index,
                                   "Port del is only supported in eswitch switchdev mode or SF ports are disabled.");
                return -EOPNOTSUPP;
        }
-       sf = mlx5_sf_lookup_by_index(table, port_index);
+       sf = mlx5_sf_lookup_by_index(table, dl_port->index);
        if (!sf) {
                err = -ENODEV;
                goto sf_err;
index 3a480e0..860f9dd 100644 (file)
@@ -21,8 +21,9 @@ void mlx5_sf_table_cleanup(struct mlx5_core_dev *dev);
 int mlx5_devlink_sf_port_new(struct devlink *devlink,
                             const struct devlink_port_new_attrs *add_attr,
                             struct netlink_ext_ack *extack,
-                            unsigned int *new_port_index);
-int mlx5_devlink_sf_port_del(struct devlink *devlink, unsigned int port_index,
+                            struct devlink_port **dl_port);
+int mlx5_devlink_sf_port_del(struct devlink *devlink,
+                            struct devlink_port *dl_port,
                             struct netlink_ext_ack *extack);
 int mlx5_devlink_sf_port_fn_state_get(struct devlink_port *dl_port,
                                      enum devlink_port_fn_state *state,
index 20d7662..f07d009 100644 (file)
@@ -74,9 +74,6 @@ static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs)
        struct mlx5_core_sriov *sriov = &dev->priv.sriov;
        int err, vf, num_msix_count;
 
-       if (!MLX5_ESWITCH_MANAGER(dev))
-               goto enable_vfs_hca;
-
        err = mlx5_eswitch_enable(dev->priv.eswitch, num_vfs);
        if (err) {
                mlx5_core_warn(dev,
@@ -84,7 +81,6 @@ static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs)
                return err;
        }
 
-enable_vfs_hca:
        num_msix_count = mlx5_get_default_msix_vec_count(dev, num_vfs);
        for (vf = 0; vf < num_vfs; vf++) {
                /* Notify the VF before its enablement to let it set
index 0eb9a8d..4e9bc18 100644 (file)
@@ -2071,8 +2071,9 @@ mlx5dr_action_create_dest_vport(struct mlx5dr_domain *dmn,
        struct mlx5dr_action *action;
        u8 peer_vport;
 
-       peer_vport = vhca_id_valid && (vhca_id != dmn->info.caps.gvmi);
-       vport_dmn = peer_vport ? dmn->peer_dmn : dmn;
+       peer_vport = vhca_id_valid && mlx5_core_is_pf(dmn->mdev) &&
+               (vhca_id != dmn->info.caps.gvmi);
+       vport_dmn = peer_vport ? dmn->peer_dmn[vhca_id] : dmn;
        if (!vport_dmn) {
                mlx5dr_dbg(dmn, "No peer vport domain for given vhca_id\n");
                return NULL;
index 9a2dfe6..75dc85d 100644 (file)
@@ -555,17 +555,18 @@ int mlx5dr_domain_destroy(struct mlx5dr_domain *dmn)
 }
 
 void mlx5dr_domain_set_peer(struct mlx5dr_domain *dmn,
-                           struct mlx5dr_domain *peer_dmn)
+                           struct mlx5dr_domain *peer_dmn,
+                           u8 peer_idx)
 {
        mlx5dr_domain_lock(dmn);
 
-       if (dmn->peer_dmn)
-               refcount_dec(&dmn->peer_dmn->refcount);
+       if (dmn->peer_dmn[peer_idx])
+               refcount_dec(&dmn->peer_dmn[peer_idx]->refcount);
 
-       dmn->peer_dmn = peer_dmn;
+       dmn->peer_dmn[peer_idx] = peer_dmn;
 
-       if (dmn->peer_dmn)
-               refcount_inc(&dmn->peer_dmn->refcount);
+       if (dmn->peer_dmn[peer_idx])
+               refcount_inc(&dmn->peer_dmn[peer_idx]->refcount);
 
        mlx5dr_domain_unlock(dmn);
 }
index 2010d4a..69d7a8f 100644 (file)
@@ -1647,6 +1647,7 @@ dr_ste_v0_build_src_gvmi_qpn_tag(struct mlx5dr_match_param *value,
                                 u8 *tag)
 {
        struct mlx5dr_match_misc *misc = &value->misc;
+       int id = misc->source_eswitch_owner_vhca_id;
        struct mlx5dr_cmd_vport_cap *vport_cap;
        struct mlx5dr_domain *dmn = sb->dmn;
        struct mlx5dr_domain *vport_dmn;
@@ -1657,11 +1658,11 @@ dr_ste_v0_build_src_gvmi_qpn_tag(struct mlx5dr_match_param *value,
 
        if (sb->vhca_id_valid) {
                /* Find port GVMI based on the eswitch_owner_vhca_id */
-               if (misc->source_eswitch_owner_vhca_id == dmn->info.caps.gvmi)
+               if (id == dmn->info.caps.gvmi)
                        vport_dmn = dmn;
-               else if (dmn->peer_dmn && (misc->source_eswitch_owner_vhca_id ==
-                                          dmn->peer_dmn->info.caps.gvmi))
-                       vport_dmn = dmn->peer_dmn;
+               else if (id < MLX5_MAX_PORTS && dmn->peer_dmn[id] &&
+                        (id == dmn->peer_dmn[id]->info.caps.gvmi))
+                       vport_dmn = dmn->peer_dmn[id];
                else
                        return -EINVAL;
 
index 4c0704a..f4ef0b2 100644 (file)
@@ -1979,6 +1979,7 @@ static int dr_ste_v1_build_src_gvmi_qpn_tag(struct mlx5dr_match_param *value,
                                            u8 *tag)
 {
        struct mlx5dr_match_misc *misc = &value->misc;
+       int id = misc->source_eswitch_owner_vhca_id;
        struct mlx5dr_cmd_vport_cap *vport_cap;
        struct mlx5dr_domain *dmn = sb->dmn;
        struct mlx5dr_domain *vport_dmn;
@@ -1988,11 +1989,11 @@ static int dr_ste_v1_build_src_gvmi_qpn_tag(struct mlx5dr_match_param *value,
 
        if (sb->vhca_id_valid) {
                /* Find port GVMI based on the eswitch_owner_vhca_id */
-               if (misc->source_eswitch_owner_vhca_id == dmn->info.caps.gvmi)
+               if (id == dmn->info.caps.gvmi)
                        vport_dmn = dmn;
-               else if (dmn->peer_dmn && (misc->source_eswitch_owner_vhca_id ==
-                                          dmn->peer_dmn->info.caps.gvmi))
-                       vport_dmn = dmn->peer_dmn;
+               else if (id < MLX5_MAX_PORTS && dmn->peer_dmn[id] &&
+                        (id == dmn->peer_dmn[id]->info.caps.gvmi))
+                       vport_dmn = dmn->peer_dmn[id];
                else
                        return -EINVAL;
 
index 678a993..1622dbb 100644 (file)
@@ -935,7 +935,7 @@ struct mlx5dr_domain_info {
 };
 
 struct mlx5dr_domain {
-       struct mlx5dr_domain *peer_dmn;
+       struct mlx5dr_domain *peer_dmn[MLX5_MAX_PORTS];
        struct mlx5_core_dev *mdev;
        u32 pdn;
        struct mlx5_uars_page *uar;
index 9846537..c6fda1c 100644 (file)
@@ -770,14 +770,15 @@ restore_fte:
 }
 
 static int mlx5_cmd_dr_set_peer(struct mlx5_flow_root_namespace *ns,
-                               struct mlx5_flow_root_namespace *peer_ns)
+                               struct mlx5_flow_root_namespace *peer_ns,
+                               u8 peer_idx)
 {
        struct mlx5dr_domain *peer_domain = NULL;
 
        if (peer_ns)
                peer_domain = peer_ns->fs_dr_domain.dr_domain;
        mlx5dr_domain_set_peer(ns->fs_dr_domain.dr_domain,
-                              peer_domain);
+                              peer_domain, peer_idx);
        return 0;
 }
 
index 9afd268..5ba88f2 100644 (file)
@@ -48,7 +48,8 @@ int mlx5dr_domain_destroy(struct mlx5dr_domain *domain);
 int mlx5dr_domain_sync(struct mlx5dr_domain *domain, u32 flags);
 
 void mlx5dr_domain_set_peer(struct mlx5dr_domain *dmn,
-                           struct mlx5dr_domain *peer_dmn);
+                           struct mlx5dr_domain *peer_dmn,
+                           u8 peer_idx);
 
 struct mlx5dr_table *
 mlx5dr_table_create(struct mlx5dr_domain *domain, u32 level, u32 flags,
index ba7e3df..bc66b07 100644 (file)
@@ -288,7 +288,8 @@ int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev,
                 MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT);
        MLX5_SET(query_nic_vport_context_in, in, allowed_list_type, list_type);
        MLX5_SET(query_nic_vport_context_in, in, vport_number, vport);
-       MLX5_SET(query_nic_vport_context_in, in, other_vport, 1);
+       if (vport || mlx5_core_is_ecpf(dev))
+               MLX5_SET(query_nic_vport_context_in, in, other_vport, 1);
 
        err = mlx5_cmd_exec(dev, in, sizeof(in), out, out_sz);
        if (err)
index b001e52..47f6cc0 100644 (file)
@@ -44,7 +44,7 @@ MLXFW_MFA2_TLV(multi, struct mlxfw_mfa2_tlv_multi,
               MLXFW_MFA2_TLV_MULTI_PART);
 
 struct mlxfw_mfa2_tlv_psid {
-       u8 psid[0];
+       DECLARE_FLEX_ARRAY(u8, psid);
 } __packed;
 
 MLXFW_MFA2_TLV_VARSIZE(psid, struct mlxfw_mfa2_tlv_psid,
index 22db0bb..1ccf3b7 100644 (file)
@@ -1723,8 +1723,6 @@ static const struct devlink_ops mlxsw_devlink_ops = {
                                  BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE),
        .reload_down            = mlxsw_devlink_core_bus_device_reload_down,
        .reload_up              = mlxsw_devlink_core_bus_device_reload_up,
-       .port_split                     = mlxsw_devlink_port_split,
-       .port_unsplit                   = mlxsw_devlink_port_unsplit,
        .sb_pool_get                    = mlxsw_devlink_sb_pool_get,
        .sb_pool_set                    = mlxsw_devlink_sb_pool_set,
        .sb_port_pool_get               = mlxsw_devlink_sb_port_pool_get,
@@ -3116,6 +3114,11 @@ u64 mlxsw_core_res_get(struct mlxsw_core *mlxsw_core,
 }
 EXPORT_SYMBOL(mlxsw_core_res_get);
 
+static const struct devlink_port_ops mlxsw_devlink_port_ops = {
+       .port_split                     = mlxsw_devlink_port_split,
+       .port_unsplit                   = mlxsw_devlink_port_unsplit,
+};
+
 static int __mlxsw_core_port_init(struct mlxsw_core *mlxsw_core, u16 local_port,
                                  enum devlink_port_flavour flavour,
                                  u8 slot_index, u32 port_number, bool split,
@@ -3150,7 +3153,8 @@ static int __mlxsw_core_port_init(struct mlxsw_core *mlxsw_core, u16 local_port,
                devlink_port_linecard_set(devlink_port,
                                          linecard->devlink_linecard);
        }
-       err = devl_port_register(devlink, devlink_port, local_port);
+       err = devl_port_register_with_ops(devlink, devlink_port, local_port,
+                                         &mlxsw_devlink_port_ops);
        if (err)
                memset(mlxsw_core_port, 0, sizeof(*mlxsw_core_port));
        return err;
index bd1a51a..f0b2963 100644 (file)
@@ -42,6 +42,7 @@ static const struct mlxsw_afk_element_info mlxsw_afk_element_infos[] = {
        MLXSW_AFK_ELEMENT_INFO_BUF(DST_IP_64_95, 0x34, 4),
        MLXSW_AFK_ELEMENT_INFO_BUF(DST_IP_32_63, 0x38, 4),
        MLXSW_AFK_ELEMENT_INFO_BUF(DST_IP_0_31, 0x3C, 4),
+       MLXSW_AFK_ELEMENT_INFO_U32(FDB_MISS, 0x40, 0, 1),
 };
 
 struct mlxsw_afk {
index 3a037fe..65a4aba 100644 (file)
@@ -35,6 +35,7 @@ enum mlxsw_afk_element {
        MLXSW_AFK_ELEMENT_IP_DSCP,
        MLXSW_AFK_ELEMENT_VIRT_ROUTER_MSB,
        MLXSW_AFK_ELEMENT_VIRT_ROUTER_LSB,
+       MLXSW_AFK_ELEMENT_FDB_MISS,
        MLXSW_AFK_ELEMENT_MAX,
 };
 
@@ -69,7 +70,7 @@ struct mlxsw_afk_element_info {
        MLXSW_AFK_ELEMENT_INFO(MLXSW_AFK_ELEMENT_TYPE_BUF,                      \
                               _element, _offset, 0, _size)
 
-#define MLXSW_AFK_ELEMENT_STORAGE_SIZE 0x40
+#define MLXSW_AFK_ELEMENT_STORAGE_SIZE 0x44
 
 struct mlxsw_afk_element_inst { /* element instance in actual block */
        enum mlxsw_afk_element element;
index 00c3232..4dea39f 100644 (file)
@@ -123,10 +123,12 @@ const struct mlxsw_afk_ops mlxsw_sp1_afk_ops = {
 };
 
 static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_mac_0[] = {
+       MLXSW_AFK_ELEMENT_INST_U32(FDB_MISS, 0x00, 3, 1),
        MLXSW_AFK_ELEMENT_INST_BUF(DMAC_0_31, 0x04, 4),
 };
 
 static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_mac_1[] = {
+       MLXSW_AFK_ELEMENT_INST_U32(FDB_MISS, 0x00, 3, 1),
        MLXSW_AFK_ELEMENT_INST_BUF(SMAC_0_31, 0x04, 4),
 };
 
index 594cdcb..72917f0 100644 (file)
@@ -281,39 +281,38 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp,
        return 0;
 }
 
-static int mlxsw_sp_flower_parse_meta(struct mlxsw_sp_acl_rule_info *rulei,
-                                     struct flow_cls_offload *f,
-                                     struct mlxsw_sp_flow_block *block)
+static int
+mlxsw_sp_flower_parse_meta_iif(struct mlxsw_sp_acl_rule_info *rulei,
+                              const struct mlxsw_sp_flow_block *block,
+                              const struct flow_match_meta *match,
+                              struct netlink_ext_ack *extack)
 {
-       struct flow_rule *rule = flow_cls_offload_flow_rule(f);
        struct mlxsw_sp_port *mlxsw_sp_port;
        struct net_device *ingress_dev;
-       struct flow_match_meta match;
 
-       if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META))
+       if (!match->mask->ingress_ifindex)
                return 0;
 
-       flow_rule_match_meta(rule, &match);
-       if (match.mask->ingress_ifindex != 0xFFFFFFFF) {
-               NL_SET_ERR_MSG_MOD(f->common.extack, "Unsupported ingress ifindex mask");
+       if (match->mask->ingress_ifindex != 0xFFFFFFFF) {
+               NL_SET_ERR_MSG_MOD(extack, "Unsupported ingress ifindex mask");
                return -EINVAL;
        }
 
        ingress_dev = __dev_get_by_index(block->net,
-                                        match.key->ingress_ifindex);
+                                        match->key->ingress_ifindex);
        if (!ingress_dev) {
-               NL_SET_ERR_MSG_MOD(f->common.extack, "Can't find specified ingress port to match on");
+               NL_SET_ERR_MSG_MOD(extack, "Can't find specified ingress port to match on");
                return -EINVAL;
        }
 
        if (!mlxsw_sp_port_dev_check(ingress_dev)) {
-               NL_SET_ERR_MSG_MOD(f->common.extack, "Can't match on non-mlxsw ingress port");
+               NL_SET_ERR_MSG_MOD(extack, "Can't match on non-mlxsw ingress port");
                return -EINVAL;
        }
 
        mlxsw_sp_port = netdev_priv(ingress_dev);
        if (mlxsw_sp_port->mlxsw_sp != block->mlxsw_sp) {
-               NL_SET_ERR_MSG_MOD(f->common.extack, "Can't match on a port from different device");
+               NL_SET_ERR_MSG_MOD(extack, "Can't match on a port from different device");
                return -EINVAL;
        }
 
@@ -321,9 +320,29 @@ static int mlxsw_sp_flower_parse_meta(struct mlxsw_sp_acl_rule_info *rulei,
                                       MLXSW_AFK_ELEMENT_SRC_SYS_PORT,
                                       mlxsw_sp_port->local_port,
                                       0xFFFFFFFF);
+
        return 0;
 }
 
+static int mlxsw_sp_flower_parse_meta(struct mlxsw_sp_acl_rule_info *rulei,
+                                     struct flow_cls_offload *f,
+                                     struct mlxsw_sp_flow_block *block)
+{
+       struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+       struct flow_match_meta match;
+
+       if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META))
+               return 0;
+
+       flow_rule_match_meta(rule, &match);
+
+       mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_FDB_MISS,
+                                      match.key->l2_miss, match.mask->l2_miss);
+
+       return mlxsw_sp_flower_parse_meta_iif(rulei, block, &match,
+                                             f->common.extack);
+}
+
 static void mlxsw_sp_flower_parse_ipv4(struct mlxsw_sp_acl_rule_info *rulei,
                                       struct flow_cls_offload *f)
 {
index 4a73e2f..7304e8a 100644 (file)
@@ -96,8 +96,8 @@ struct mlxsw_sp_rif_subport {
 struct mlxsw_sp_rif_ipip_lb {
        struct mlxsw_sp_rif common;
        struct mlxsw_sp_rif_ipip_lb_config lb_config;
-       u16 ul_vr_id; /* Reserved for Spectrum-2. */
-       u16 ul_rif_id; /* Reserved for Spectrum. */
+       u16 ul_vr_id;   /* Spectrum-1. */
+       u16 ul_rif_id;  /* Spectrum-2+. */
 };
 
 struct mlxsw_sp_rif_params_ipip_lb {
@@ -748,10 +748,11 @@ static bool mlxsw_sp_vr_is_used(const struct mlxsw_sp_vr *vr)
 
 static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp)
 {
+       int max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
        struct mlxsw_sp_vr *vr;
        int i;
 
-       for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
+       for (i = 0; i < max_vrs; i++) {
                vr = &mlxsw_sp->router->vrs[i];
                if (!mlxsw_sp_vr_is_used(vr))
                        return vr;
@@ -792,12 +793,13 @@ static u32 mlxsw_sp_fix_tb_id(u32 tb_id)
 static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp,
                                            u32 tb_id)
 {
+       int max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
        struct mlxsw_sp_vr *vr;
        int i;
 
        tb_id = mlxsw_sp_fix_tb_id(tb_id);
 
-       for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
+       for (i = 0; i < max_vrs; i++) {
                vr = &mlxsw_sp->router->vrs[i];
                if (mlxsw_sp_vr_is_used(vr) && vr->tb_id == tb_id)
                        return vr;
@@ -959,6 +961,7 @@ static int mlxsw_sp_vrs_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
                                         struct mlxsw_sp_fib *fib,
                                         struct mlxsw_sp_lpm_tree *new_tree)
 {
+       int max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
        enum mlxsw_sp_l3proto proto = fib->proto;
        struct mlxsw_sp_lpm_tree *old_tree;
        u8 old_id, new_id = new_tree->id;
@@ -968,7 +971,7 @@ static int mlxsw_sp_vrs_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
        old_tree = mlxsw_sp->router->lpm.proto_trees[proto];
        old_id = old_tree->id;
 
-       for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
+       for (i = 0; i < max_vrs; i++) {
                vr = &mlxsw_sp->router->vrs[i];
                if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, old_id))
                        continue;
@@ -7298,9 +7301,10 @@ static void mlxsw_sp_vr_fib_flush(struct mlxsw_sp *mlxsw_sp,
 
 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp)
 {
+       int max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
        int i, j;
 
-       for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
+       for (i = 0; i < max_vrs; i++) {
                struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
 
                if (!mlxsw_sp_vr_is_used(vr))
@@ -7699,9 +7703,10 @@ static struct mlxsw_sp_rif *
 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
                         const struct net_device *dev)
 {
+       int max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
        int i;
 
-       for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
+       for (i = 0; i < max_rifs; i++)
                if (mlxsw_sp->router->rifs[i] &&
                    mlxsw_sp->router->rifs[i]->dev == dev)
                        return mlxsw_sp->router->rifs[i];
@@ -9724,7 +9729,7 @@ mlxsw_sp1_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif,
        struct mlxsw_sp_vr *ul_vr;
        int err;
 
-       ul_vr = mlxsw_sp_vr_get(mlxsw_sp, ul_tb_id, NULL);
+       ul_vr = mlxsw_sp_vr_get(mlxsw_sp, ul_tb_id, extack);
        if (IS_ERR(ul_vr))
                return PTR_ERR(ul_vr);
 
@@ -9923,7 +9928,7 @@ mlxsw_sp2_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif,
        struct mlxsw_sp_rif *ul_rif;
        int err;
 
-       ul_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, ul_tb_id, NULL);
+       ul_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, ul_tb_id, extack);
        if (IS_ERR(ul_rif))
                return PTR_ERR(ul_rif);
 
@@ -10041,11 +10046,12 @@ err_rifs_table_init:
 
 static void mlxsw_sp_rifs_fini(struct mlxsw_sp *mlxsw_sp)
 {
+       int max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
        struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
        int i;
 
        WARN_ON_ONCE(atomic_read(&mlxsw_sp->router->rifs_count));
-       for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
+       for (i = 0; i < max_rifs; i++)
                WARN_ON_ONCE(mlxsw_sp->router->rifs[i]);
 
        devl_resource_occ_get_unregister(devlink, MLXSW_SP_RESOURCE_RIFS);
index 176efbe..d6c9491 100644 (file)
@@ -58,7 +58,6 @@ struct enc28j60_net {
        struct mutex lock;
        struct sk_buff *tx_skb;
        struct work_struct tx_work;
-       struct work_struct irq_work;
        struct work_struct setrx_work;
        struct work_struct restart_work;
        u8 bank;                /* current register bank selected */
@@ -1118,10 +1117,9 @@ static int enc28j60_rx_interrupt(struct net_device *ndev)
        return ret;
 }
 
-static void enc28j60_irq_work_handler(struct work_struct *work)
+static irqreturn_t enc28j60_irq(int irq, void *dev_id)
 {
-       struct enc28j60_net *priv =
-               container_of(work, struct enc28j60_net, irq_work);
+       struct enc28j60_net *priv = dev_id;
        struct net_device *ndev = priv->netdev;
        int intflags, loop;
 
@@ -1225,6 +1223,8 @@ static void enc28j60_irq_work_handler(struct work_struct *work)
 
        /* re-enable interrupts */
        locked_reg_bfset(priv, EIE, EIE_INTIE);
+
+       return IRQ_HANDLED;
 }
 
 /*
@@ -1309,22 +1309,6 @@ static void enc28j60_tx_work_handler(struct work_struct *work)
        enc28j60_hw_tx(priv);
 }
 
-static irqreturn_t enc28j60_irq(int irq, void *dev_id)
-{
-       struct enc28j60_net *priv = dev_id;
-
-       /*
-        * Can't do anything in interrupt context because we need to
-        * block (spi_sync() is blocking) so fire of the interrupt
-        * handling workqueue.
-        * Remember that we access enc28j60 registers through SPI bus
-        * via spi_sync() call.
-        */
-       schedule_work(&priv->irq_work);
-
-       return IRQ_HANDLED;
-}
-
 static void enc28j60_tx_timeout(struct net_device *ndev, unsigned int txqueue)
 {
        struct enc28j60_net *priv = netdev_priv(ndev);
@@ -1559,7 +1543,6 @@ static int enc28j60_probe(struct spi_device *spi)
        mutex_init(&priv->lock);
        INIT_WORK(&priv->tx_work, enc28j60_tx_work_handler);
        INIT_WORK(&priv->setrx_work, enc28j60_setrx_work_handler);
-       INIT_WORK(&priv->irq_work, enc28j60_irq_work_handler);
        INIT_WORK(&priv->restart_work, enc28j60_restart_work_handler);
        spi_set_drvdata(spi, priv);     /* spi to priv reference */
        SET_NETDEV_DEV(dev, &spi->dev);
@@ -1578,7 +1561,8 @@ static int enc28j60_probe(struct spi_device *spi)
        /* Board setup must set the relevant edge trigger type;
         * level triggers won't currently work.
         */
-       ret = request_irq(spi->irq, enc28j60_irq, 0, DRV_NAME, priv);
+       ret = request_threaded_irq(spi->irq, NULL, enc28j60_irq, IRQF_ONESHOT,
+                                  DRV_NAME, priv);
        if (ret < 0) {
                if (netif_msg_probe(priv))
                        dev_err(&spi->dev, "request irq %d failed (ret = %d)\n",
index 957d96a..f1bded9 100644 (file)
@@ -160,16 +160,13 @@ static int lan743x_csr_init(struct lan743x_adapter *adapter)
 {
        struct lan743x_csr *csr = &adapter->csr;
        resource_size_t bar_start, bar_length;
-       int result;
 
        bar_start = pci_resource_start(adapter->pdev, 0);
        bar_length = pci_resource_len(adapter->pdev, 0);
        csr->csr_address = devm_ioremap(&adapter->pdev->dev,
                                        bar_start, bar_length);
-       if (!csr->csr_address) {
-               result = -ENOMEM;
-               goto clean_up;
-       }
+       if (!csr->csr_address)
+               return -ENOMEM;
 
        csr->id_rev = lan743x_csr_read(adapter, ID_REV);
        csr->fpga_rev = lan743x_csr_read(adapter, FPGA_REV);
@@ -177,10 +174,8 @@ static int lan743x_csr_init(struct lan743x_adapter *adapter)
                   "ID_REV = 0x%08X, FPGA_REV = %d.%d\n",
                   csr->id_rev, FPGA_REV_GET_MAJOR_(csr->fpga_rev),
                   FPGA_REV_GET_MINOR_(csr->fpga_rev));
-       if (!ID_REV_IS_VALID_CHIP_ID_(csr->id_rev)) {
-               result = -ENODEV;
-               goto clean_up;
-       }
+       if (!ID_REV_IS_VALID_CHIP_ID_(csr->id_rev))
+               return -ENODEV;
 
        csr->flags = LAN743X_CSR_FLAG_SUPPORTS_INTR_AUTO_SET_CLR;
        switch (csr->id_rev & ID_REV_CHIP_REV_MASK_) {
@@ -193,12 +188,7 @@ static int lan743x_csr_init(struct lan743x_adapter *adapter)
                break;
        }
 
-       result = lan743x_csr_light_reset(adapter);
-       if (result)
-               goto clean_up;
-       return 0;
-clean_up:
-       return result;
+       return lan743x_csr_light_reset(adapter);
 }
 
 static void lan743x_intr_software_isr(struct lan743x_adapter *adapter)
index 571e6d4..f9ebffc 100644 (file)
@@ -10,3 +10,14 @@ config LAN966X_SWITCH
        select VCAP
        help
          This driver supports the Lan966x network switch device.
+
+config LAN966X_DCB
+       bool "Data Center Bridging (DCB) support"
+       depends on LAN966X_SWITCH && DCB
+       default y
+       help
+         Say Y here if you want to use Data Center Bridging (DCB) in the
+         driver. This can be used to assign priority to traffic, based on
+         DSCP and PCP.
+
+         If unsure, set to Y.
index 7b0cda4..3b6ac33 100644 (file)
@@ -15,6 +15,7 @@ lan966x-switch-objs  := lan966x_main.o lan966x_phylink.o lan966x_port.o \
                        lan966x_xdp.o lan966x_vcap_impl.o lan966x_vcap_ag_api.o \
                        lan966x_tc_flower.o lan966x_goto.o
 
+lan966x-switch-$(CONFIG_LAN966X_DCB) += lan966x_dcb.o
 lan966x-switch-$(CONFIG_DEBUG_FS) += lan966x_vcap_debugfs.o
 
 # Provide include files
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_dcb.c b/drivers/net/ethernet/microchip/lan966x/lan966x_dcb.c
new file mode 100644 (file)
index 0000000..ed2d96d
--- /dev/null
@@ -0,0 +1,365 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#include "lan966x_main.h"
+
+enum lan966x_dcb_apptrust_values {
+       LAN966X_DCB_APPTRUST_EMPTY,
+       LAN966X_DCB_APPTRUST_DSCP,
+       LAN966X_DCB_APPTRUST_PCP,
+       LAN966X_DCB_APPTRUST_DSCP_PCP,
+       __LAN966X_DCB_APPTRUST_MAX
+};
+
+static const struct lan966x_dcb_apptrust {
+       u8 selectors[IEEE_8021QAZ_APP_SEL_MAX + 1];
+       int nselectors;
+} *lan966x_port_apptrust[NUM_PHYS_PORTS];
+
+static const char *lan966x_dcb_apptrust_names[__LAN966X_DCB_APPTRUST_MAX] = {
+       [LAN966X_DCB_APPTRUST_EMPTY]    = "empty",
+       [LAN966X_DCB_APPTRUST_DSCP]     = "dscp",
+       [LAN966X_DCB_APPTRUST_PCP]      = "pcp",
+       [LAN966X_DCB_APPTRUST_DSCP_PCP] = "dscp pcp"
+};
+
+/* Lan966x supported apptrust policies */
+static const struct lan966x_dcb_apptrust
+       lan966x_dcb_apptrust_policies[__LAN966X_DCB_APPTRUST_MAX] = {
+       /* Empty *must* be first */
+       [LAN966X_DCB_APPTRUST_EMPTY]    = { { 0 }, 0 },
+       [LAN966X_DCB_APPTRUST_DSCP]     = { { IEEE_8021QAZ_APP_SEL_DSCP }, 1 },
+       [LAN966X_DCB_APPTRUST_PCP]      = { { DCB_APP_SEL_PCP }, 1 },
+       [LAN966X_DCB_APPTRUST_DSCP_PCP] = { { IEEE_8021QAZ_APP_SEL_DSCP,
+                                             DCB_APP_SEL_PCP }, 2 },
+};
+
+static bool lan966x_dcb_apptrust_contains(int portno, u8 selector)
+{
+       const struct lan966x_dcb_apptrust *conf = lan966x_port_apptrust[portno];
+
+       for (int i = 0; i < conf->nselectors; i++)
+               if (conf->selectors[i] == selector)
+                       return true;
+
+       return false;
+}
+
+static void lan966x_dcb_app_update(struct net_device *dev)
+{
+       struct dcb_ieee_app_prio_map dscp_rewr_map = {0};
+       struct dcb_rewr_prio_pcp_map pcp_rewr_map = {0};
+       struct lan966x_port *port = netdev_priv(dev);
+       struct lan966x_port_qos qos = {0};
+       struct dcb_app app_itr;
+       bool dscp_rewr = false;
+       bool pcp_rewr = false;
+
+       /* Get pcp ingress mapping */
+       for (int i = 0; i < ARRAY_SIZE(qos.pcp.map); i++) {
+               app_itr.selector = DCB_APP_SEL_PCP;
+               app_itr.protocol = i;
+               qos.pcp.map[i] = dcb_getapp(dev, &app_itr);
+       }
+
+       /* Get dscp ingress mapping */
+       for (int i = 0; i < ARRAY_SIZE(qos.dscp.map); i++) {
+               app_itr.selector = IEEE_8021QAZ_APP_SEL_DSCP;
+               app_itr.protocol = i;
+               qos.dscp.map[i] = dcb_getapp(dev, &app_itr);
+       }
+
+       /* Get default prio */
+       qos.default_prio = dcb_ieee_getapp_default_prio_mask(dev);
+       if (qos.default_prio)
+               qos.default_prio = fls(qos.default_prio) - 1;
+
+       /* Get pcp rewrite mapping */
+       dcb_getrewr_prio_pcp_mask_map(dev, &pcp_rewr_map);
+       for (int i = 0; i < ARRAY_SIZE(pcp_rewr_map.map); i++) {
+               if (!pcp_rewr_map.map[i])
+                       continue;
+
+               pcp_rewr = true;
+               qos.pcp_rewr.map[i] = fls(pcp_rewr_map.map[i]) - 1;
+       }
+
+       /* Get dscp rewrite mapping */
+       dcb_getrewr_prio_dscp_mask_map(dev, &dscp_rewr_map);
+       for (int i = 0; i < ARRAY_SIZE(dscp_rewr_map.map); i++) {
+               if (!dscp_rewr_map.map[i])
+                       continue;
+
+               dscp_rewr = true;
+               qos.dscp_rewr.map[i] = fls64(dscp_rewr_map.map[i]) - 1;
+       }
+
+       /* Enable use of pcp for queue classification */
+       if (lan966x_dcb_apptrust_contains(port->chip_port, DCB_APP_SEL_PCP)) {
+               qos.pcp.enable = true;
+
+               if (pcp_rewr)
+                       qos.pcp_rewr.enable = true;
+       }
+
+       /* Enable use of dscp for queue classification */
+       if (lan966x_dcb_apptrust_contains(port->chip_port, IEEE_8021QAZ_APP_SEL_DSCP)) {
+               qos.dscp.enable = true;
+
+               if (dscp_rewr)
+                       qos.dscp_rewr.enable = true;
+       }
+
+       lan966x_port_qos_set(port, &qos);
+}
+
+/* DSCP mapping is global for all ports, so set and delete app entries are
+ * replicated for each port.
+ */
+static int lan966x_dcb_ieee_dscp_setdel(struct net_device *dev,
+                                       struct dcb_app *app,
+                                       int (*setdel)(struct net_device *,
+                                                     struct dcb_app *))
+{
+       struct lan966x_port *port = netdev_priv(dev);
+       struct lan966x *lan966x = port->lan966x;
+       int err;
+
+       for (int i = 0; i < NUM_PHYS_PORTS; i++) {
+               port = lan966x->ports[i];
+               if (!port)
+                       continue;
+
+               err = setdel(port->dev, app);
+               if (err)
+                       return err;
+       }
+
+       return 0;
+}
+
+static int lan966x_dcb_app_validate(struct net_device *dev,
+                                   const struct dcb_app *app)
+{
+       int err = 0;
+
+       switch (app->selector) {
+       /* Default priority checks */
+       case IEEE_8021QAZ_APP_SEL_ETHERTYPE:
+               if (app->protocol)
+                       err = -EINVAL;
+               else if (app->priority >= NUM_PRIO_QUEUES)
+                       err = -ERANGE;
+               break;
+       /* Dscp checks */
+       case IEEE_8021QAZ_APP_SEL_DSCP:
+               if (app->protocol >= LAN966X_PORT_QOS_DSCP_COUNT)
+                       err = -EINVAL;
+               else if (app->priority >= NUM_PRIO_QUEUES)
+                       err = -ERANGE;
+               break;
+       /* Pcp checks */
+       case DCB_APP_SEL_PCP:
+               if (app->protocol >= LAN966X_PORT_QOS_PCP_DEI_COUNT)
+                       err = -EINVAL;
+               else if (app->priority >= NUM_PRIO_QUEUES)
+                       err = -ERANGE;
+               break;
+       default:
+               err = -EINVAL;
+               break;
+       }
+
+       if (err)
+               netdev_err(dev, "Invalid entry: %d:%d\n", app->protocol,
+                          app->priority);
+
+       return err;
+}
+
+static int lan966x_dcb_ieee_delapp(struct net_device *dev, struct dcb_app *app)
+{
+       int err;
+
+       if (app->selector == IEEE_8021QAZ_APP_SEL_DSCP)
+               err = lan966x_dcb_ieee_dscp_setdel(dev, app, dcb_ieee_delapp);
+       else
+               err = dcb_ieee_delapp(dev, app);
+
+       if (err)
+               return err;
+
+       lan966x_dcb_app_update(dev);
+
+       return 0;
+}
+
+static int lan966x_dcb_ieee_setapp(struct net_device *dev, struct dcb_app *app)
+{
+       struct dcb_app app_itr;
+       int err;
+       u8 prio;
+
+       err = lan966x_dcb_app_validate(dev, app);
+       if (err)
+               return err;
+
+       /* Delete current mapping, if it exists */
+       prio = dcb_getapp(dev, app);
+       if (prio) {
+               app_itr = *app;
+               app_itr.priority = prio;
+               lan966x_dcb_ieee_delapp(dev, &app_itr);
+       }
+
+       if (app->selector == IEEE_8021QAZ_APP_SEL_DSCP)
+               err = lan966x_dcb_ieee_dscp_setdel(dev, app, dcb_ieee_setapp);
+       else
+               err = dcb_ieee_setapp(dev, app);
+
+       if (err)
+               return err;
+
+       lan966x_dcb_app_update(dev);
+
+       return 0;
+}
+
+static int lan966x_dcb_apptrust_validate(struct net_device *dev,
+                                        u8 *selectors,
+                                        int nselectors)
+{
+       for (int i = 0; i < ARRAY_SIZE(lan966x_dcb_apptrust_policies); i++) {
+               bool match;
+
+               if (lan966x_dcb_apptrust_policies[i].nselectors != nselectors)
+                       continue;
+
+               match = true;
+               for (int j = 0; j < nselectors; j++) {
+                       if (lan966x_dcb_apptrust_policies[i].selectors[j] !=
+                           *(selectors + j)) {
+                               match = false;
+                               break;
+                       }
+               }
+               if (match)
+                       return i;
+       }
+
+       netdev_err(dev, "Valid apptrust configurations are:\n");
+       for (int i = 0; i < ARRAY_SIZE(lan966x_dcb_apptrust_names); i++)
+               pr_info("order: %s\n", lan966x_dcb_apptrust_names[i]);
+
+       return -EOPNOTSUPP;
+}
+
+static int lan966x_dcb_setapptrust(struct net_device *dev,
+                                  u8 *selectors,
+                                  int nselectors)
+{
+       struct lan966x_port *port = netdev_priv(dev);
+       int idx;
+
+       idx = lan966x_dcb_apptrust_validate(dev, selectors, nselectors);
+       if (idx < 0)
+               return idx;
+
+       lan966x_port_apptrust[port->chip_port] = &lan966x_dcb_apptrust_policies[idx];
+       lan966x_dcb_app_update(dev);
+
+       return 0;
+}
+
+static int lan966x_dcb_getapptrust(struct net_device *dev, u8 *selectors,
+                                  int *nselectors)
+{
+       struct lan966x_port *port = netdev_priv(dev);
+       const struct lan966x_dcb_apptrust *trust;
+
+       trust = lan966x_port_apptrust[port->chip_port];
+
+       memcpy(selectors, trust->selectors, trust->nselectors);
+       *nselectors = trust->nselectors;
+
+       return 0;
+}
+
+static int lan966x_dcb_delrewr(struct net_device *dev, struct dcb_app *app)
+{
+       int err;
+
+       if (app->selector == IEEE_8021QAZ_APP_SEL_DSCP)
+               err = lan966x_dcb_ieee_dscp_setdel(dev, app, dcb_delrewr);
+       else
+               err = dcb_delrewr(dev, app);
+
+       if (err < 0)
+               return err;
+
+       lan966x_dcb_app_update(dev);
+
+       return 0;
+}
+
+static int lan966x_dcb_setrewr(struct net_device *dev, struct dcb_app *app)
+{
+       struct dcb_app app_itr;
+       u16 proto;
+       int err;
+
+       err = lan966x_dcb_app_validate(dev, app);
+       if (err)
+               goto out;
+
+       /* Delete current mapping, if it exists. */
+       proto = dcb_getrewr(dev, app);
+       if (proto) {
+               app_itr = *app;
+               app_itr.protocol = proto;
+               lan966x_dcb_delrewr(dev, &app_itr);
+       }
+
+       if (app->selector == IEEE_8021QAZ_APP_SEL_DSCP)
+               err = lan966x_dcb_ieee_dscp_setdel(dev, app, dcb_setrewr);
+       else
+               err = dcb_setrewr(dev, app);
+
+       if (err)
+               goto out;
+
+       lan966x_dcb_app_update(dev);
+
+out:
+       return err;
+}
+
+static const struct dcbnl_rtnl_ops lan966x_dcbnl_ops = {
+       .ieee_setapp = lan966x_dcb_ieee_setapp,
+       .ieee_delapp = lan966x_dcb_ieee_delapp,
+       .dcbnl_setapptrust = lan966x_dcb_setapptrust,
+       .dcbnl_getapptrust = lan966x_dcb_getapptrust,
+       .dcbnl_setrewr = lan966x_dcb_setrewr,
+       .dcbnl_delrewr = lan966x_dcb_delrewr,
+};
+
+void lan966x_dcb_init(struct lan966x *lan966x)
+{
+       for (int p = 0; p < lan966x->num_phys_ports; ++p) {
+               struct lan966x_port *port;
+
+               port = lan966x->ports[p];
+               if (!port)
+                       continue;
+
+               port->dev->dcbnl_ops = &lan966x_dcbnl_ops;
+
+               lan966x_port_apptrust[port->chip_port] =
+                       &lan966x_dcb_apptrust_policies[LAN966X_DCB_APPTRUST_DSCP_PCP];
+
+               /* Enable DSCP classification based on classified QoS class and
+                * DP, for all DSCP values, for all ports.
+                */
+               lan966x_port_qos_dscp_rewr_mode_set(port,
+                                                   LAN966X_PORT_QOS_REWR_DSCP_ALL);
+       }
+}
index ee26986..f6931df 100644 (file)
@@ -1223,6 +1223,8 @@ static int lan966x_probe(struct platform_device *pdev)
        if (err)
                goto cleanup_fdma;
 
+       lan966x_dcb_init(lan966x);
+
        return 0;
 
 cleanup_fdma:
index c977c70..27f2728 100644 (file)
 #define LAN966X_VCAP_CID_IS2_L1 VCAP_CID_INGRESS_STAGE2_L1 /* IS2 lookup 1 */
 #define LAN966X_VCAP_CID_IS2_MAX (VCAP_CID_INGRESS_STAGE2_L2 - 1) /* IS2 Max */
 
+#define LAN966X_VCAP_CID_ES0_L0 VCAP_CID_EGRESS_L0 /* ES0 lookup 0 */
+#define LAN966X_VCAP_CID_ES0_MAX (VCAP_CID_EGRESS_L1 - 1) /* ES0 Max */
+
+#define LAN966X_PORT_QOS_PCP_COUNT     8
+#define LAN966X_PORT_QOS_DEI_COUNT     8
+#define LAN966X_PORT_QOS_PCP_DEI_COUNT \
+       (LAN966X_PORT_QOS_PCP_COUNT + LAN966X_PORT_QOS_DEI_COUNT)
+
+#define LAN966X_PORT_QOS_DSCP_COUNT    64
+
+/* Port PCP rewrite mode */
+#define LAN966X_PORT_REW_TAG_CTRL_CLASSIFIED   0
+#define LAN966X_PORT_REW_TAG_CTRL_MAPPED       2
+
+/* Port DSCP rewrite mode */
+#define LAN966X_PORT_REW_DSCP_FRAME            0
+#define LAN966X_PORT_REW_DSCP_ANALIZER         1
+#define LAN966X_PORT_QOS_REWR_DSCP_ALL         3
+
 /* MAC table entry types.
  * ENTRYTYPE_NORMAL is subject to aging.
  * ENTRYTYPE_LOCKED is not subject to aging.
@@ -389,6 +408,34 @@ struct lan966x_port_tc {
        struct flow_stats mirror_stat;
 };
 
+struct lan966x_port_qos_pcp {
+       u8 map[LAN966X_PORT_QOS_PCP_DEI_COUNT];
+       bool enable;
+};
+
+struct lan966x_port_qos_dscp {
+       u8 map[LAN966X_PORT_QOS_DSCP_COUNT];
+       bool enable;
+};
+
+struct lan966x_port_qos_pcp_rewr {
+       u16 map[NUM_PRIO_QUEUES];
+       bool enable;
+};
+
+struct lan966x_port_qos_dscp_rewr {
+       u16 map[LAN966X_PORT_QOS_DSCP_COUNT];
+       bool enable;
+};
+
+struct lan966x_port_qos {
+       struct lan966x_port_qos_pcp pcp;
+       struct lan966x_port_qos_dscp dscp;
+       struct lan966x_port_qos_pcp_rewr pcp_rewr;
+       struct lan966x_port_qos_dscp_rewr dscp_rewr;
+       u8 default_prio;
+};
+
 struct lan966x_port {
        struct net_device *dev;
        struct lan966x *lan966x;
@@ -453,6 +500,11 @@ int lan966x_port_pcs_set(struct lan966x_port *port,
                         struct lan966x_port_config *config);
 void lan966x_port_init(struct lan966x_port *port);
 
+void lan966x_port_qos_set(struct lan966x_port *port,
+                         struct lan966x_port_qos *qos);
+void lan966x_port_qos_dscp_rewr_mode_set(struct lan966x_port *port,
+                                        int mode);
+
 int lan966x_mac_ip_learn(struct lan966x *lan966x,
                         bool cpu_copy,
                         const unsigned char mac[ETH_ALEN],
@@ -677,6 +729,14 @@ int lan966x_goto_port_del(struct lan966x_port *port,
                          unsigned long goto_id,
                          struct netlink_ext_ack *extack);
 
+#ifdef CONFIG_LAN966X_DCB
+void lan966x_dcb_init(struct lan966x *lan966x);
+#else
+static inline void lan966x_dcb_init(struct lan966x *lan966x)
+{
+}
+#endif
+
 static inline void __iomem *lan_addr(void __iomem *base[],
                                     int id, int tinst, int tcnt,
                                     int gbase, int ginst,
index 0050fcb..92108d3 100644 (file)
@@ -394,6 +394,155 @@ int lan966x_port_pcs_set(struct lan966x_port *port,
        return 0;
 }
 
+static void lan966x_port_qos_pcp_set(struct lan966x_port *port,
+                                    struct lan966x_port_qos_pcp *qos)
+{
+       u8 *pcp_itr = qos->map;
+       u8 pcp, dp;
+
+       lan_rmw(ANA_QOS_CFG_QOS_PCP_ENA_SET(qos->enable),
+               ANA_QOS_CFG_QOS_PCP_ENA,
+               port->lan966x, ANA_QOS_CFG(port->chip_port));
+
+       /* Map PCP and DEI to priority */
+       for (int i = 0; i < ARRAY_SIZE(qos->map); i++) {
+               pcp = *(pcp_itr + i);
+               dp = (i < LAN966X_PORT_QOS_PCP_COUNT) ? 0 : 1;
+
+               lan_rmw(ANA_PCP_DEI_CFG_QOS_PCP_DEI_VAL_SET(pcp) |
+                       ANA_PCP_DEI_CFG_DP_PCP_DEI_VAL_SET(dp),
+                       ANA_PCP_DEI_CFG_QOS_PCP_DEI_VAL |
+                       ANA_PCP_DEI_CFG_DP_PCP_DEI_VAL,
+                       port->lan966x,
+                       ANA_PCP_DEI_CFG(port->chip_port, i));
+       }
+}
+
+static void lan966x_port_qos_dscp_set(struct lan966x_port *port,
+                                     struct lan966x_port_qos_dscp *qos)
+{
+       struct lan966x *lan966x = port->lan966x;
+
+       /* Enable/disable dscp for qos classification. */
+       lan_rmw(ANA_QOS_CFG_QOS_DSCP_ENA_SET(qos->enable),
+               ANA_QOS_CFG_QOS_DSCP_ENA,
+               lan966x, ANA_QOS_CFG(port->chip_port));
+
+       /* Map each dscp value to priority and dp */
+       for (int i = 0; i < ARRAY_SIZE(qos->map); i++)
+               lan_rmw(ANA_DSCP_CFG_DP_DSCP_VAL_SET(0) |
+                       ANA_DSCP_CFG_QOS_DSCP_VAL_SET(*(qos->map + i)),
+                       ANA_DSCP_CFG_DP_DSCP_VAL |
+                       ANA_DSCP_CFG_QOS_DSCP_VAL,
+                       lan966x, ANA_DSCP_CFG(i));
+
+       /* Set per-dscp trust */
+       for (int i = 0; i <  ARRAY_SIZE(qos->map); i++)
+               lan_rmw(ANA_DSCP_CFG_DSCP_TRUST_ENA_SET(qos->enable),
+                       ANA_DSCP_CFG_DSCP_TRUST_ENA,
+                       lan966x, ANA_DSCP_CFG(i));
+}
+
+static int lan966x_port_qos_default_set(struct lan966x_port *port,
+                                       struct lan966x_port_qos *qos)
+{
+       /* Set default prio and dp level */
+       lan_rmw(ANA_QOS_CFG_DP_DEFAULT_VAL_SET(0) |
+               ANA_QOS_CFG_QOS_DEFAULT_VAL_SET(qos->default_prio),
+               ANA_QOS_CFG_DP_DEFAULT_VAL |
+               ANA_QOS_CFG_QOS_DEFAULT_VAL,
+               port->lan966x, ANA_QOS_CFG(port->chip_port));
+
+       /* Set default pcp and dei for untagged frames */
+       lan_rmw(ANA_VLAN_CFG_VLAN_DEI_SET(0) |
+               ANA_VLAN_CFG_VLAN_PCP_SET(0),
+               ANA_VLAN_CFG_VLAN_DEI |
+               ANA_VLAN_CFG_VLAN_PCP,
+               port->lan966x, ANA_VLAN_CFG(port->chip_port));
+
+       return 0;
+}
+
+static void lan966x_port_qos_pcp_rewr_set(struct lan966x_port *port,
+                                         struct lan966x_port_qos_pcp_rewr *qos)
+{
+       u8 mode = LAN966X_PORT_REW_TAG_CTRL_CLASSIFIED;
+       u8 pcp, dei;
+
+       if (qos->enable)
+               mode = LAN966X_PORT_REW_TAG_CTRL_MAPPED;
+
+       /* Map the values only if it is enabled otherwise will be the classified
+        * value
+        */
+       lan_rmw(REW_TAG_CFG_TAG_PCP_CFG_SET(mode) |
+               REW_TAG_CFG_TAG_DEI_CFG_SET(mode),
+               REW_TAG_CFG_TAG_PCP_CFG |
+               REW_TAG_CFG_TAG_DEI_CFG,
+               port->lan966x, REW_TAG_CFG(port->chip_port));
+
+       /* Map each value to pcp and dei */
+       for (int i = 0; i < ARRAY_SIZE(qos->map); i++) {
+               pcp = qos->map[i];
+               if (pcp > LAN966X_PORT_QOS_PCP_COUNT)
+                       dei = 1;
+               else
+                       dei = 0;
+
+               lan_rmw(REW_PCP_DEI_CFG_DEI_QOS_VAL_SET(dei) |
+                       REW_PCP_DEI_CFG_PCP_QOS_VAL_SET(pcp),
+                       REW_PCP_DEI_CFG_DEI_QOS_VAL |
+                       REW_PCP_DEI_CFG_PCP_QOS_VAL,
+                       port->lan966x,
+                       REW_PCP_DEI_CFG(port->chip_port,
+                                       i + dei * LAN966X_PORT_QOS_PCP_COUNT));
+       }
+}
+
+static void lan966x_port_qos_dscp_rewr_set(struct lan966x_port *port,
+                                          struct lan966x_port_qos_dscp_rewr *qos)
+{
+       u16 dscp;
+       u8 mode;
+
+       if (qos->enable)
+               mode = LAN966X_PORT_REW_DSCP_ANALIZER;
+       else
+               mode = LAN966X_PORT_REW_DSCP_FRAME;
+
+       /* Enable the rewrite otherwise will use the values from the frame */
+       lan_rmw(REW_DSCP_CFG_DSCP_REWR_CFG_SET(mode),
+               REW_DSCP_CFG_DSCP_REWR_CFG,
+               port->lan966x, REW_DSCP_CFG(port->chip_port));
+
+       /* Map each classified Qos class and DP to classified DSCP value */
+       for (int i = 0; i < ARRAY_SIZE(qos->map); i++) {
+               dscp = qos->map[i];
+
+               lan_rmw(ANA_DSCP_REWR_CFG_DSCP_QOS_REWR_VAL_SET(dscp),
+                       ANA_DSCP_REWR_CFG_DSCP_QOS_REWR_VAL,
+                       port->lan966x, ANA_DSCP_REWR_CFG(i));
+       }
+}
+
+void lan966x_port_qos_dscp_rewr_mode_set(struct lan966x_port *port,
+                                        int mode)
+{
+       lan_rmw(ANA_QOS_CFG_DSCP_REWR_CFG_SET(mode),
+               ANA_QOS_CFG_DSCP_REWR_CFG,
+               port->lan966x, ANA_QOS_CFG(port->chip_port));
+}
+
+void lan966x_port_qos_set(struct lan966x_port *port,
+                         struct lan966x_port_qos *qos)
+{
+       lan966x_port_qos_pcp_set(port, &qos->pcp);
+       lan966x_port_qos_dscp_set(port, &qos->dscp);
+       lan966x_port_qos_default_set(port, qos);
+       lan966x_port_qos_pcp_rewr_set(port, &qos->pcp_rewr);
+       lan966x_port_qos_dscp_rewr_set(port, &qos->dscp_rewr);
+}
+
 void lan966x_port_init(struct lan966x_port *port)
 {
        struct lan966x_port_config *config = &port->config;
index f99f88b..4b55392 100644 (file)
@@ -283,6 +283,18 @@ enum lan966x_target {
 #define ANA_VLAN_CFG_VLAN_POP_CNT_GET(x)\
        FIELD_GET(ANA_VLAN_CFG_VLAN_POP_CNT, x)
 
+#define ANA_VLAN_CFG_VLAN_PCP                    GENMASK(15, 13)
+#define ANA_VLAN_CFG_VLAN_PCP_SET(x)\
+       FIELD_PREP(ANA_VLAN_CFG_VLAN_PCP, x)
+#define ANA_VLAN_CFG_VLAN_PCP_GET(x)\
+       FIELD_GET(ANA_VLAN_CFG_VLAN_PCP, x)
+
+#define ANA_VLAN_CFG_VLAN_DEI                    BIT(12)
+#define ANA_VLAN_CFG_VLAN_DEI_SET(x)\
+       FIELD_PREP(ANA_VLAN_CFG_VLAN_DEI, x)
+#define ANA_VLAN_CFG_VLAN_DEI_GET(x)\
+       FIELD_GET(ANA_VLAN_CFG_VLAN_DEI, x)
+
 #define ANA_VLAN_CFG_VLAN_VID                    GENMASK(11, 0)
 #define ANA_VLAN_CFG_VLAN_VID_SET(x)\
        FIELD_PREP(ANA_VLAN_CFG_VLAN_VID, x)
@@ -316,6 +328,39 @@ enum lan966x_target {
 #define ANA_DROP_CFG_DROP_MC_SMAC_ENA_GET(x)\
        FIELD_GET(ANA_DROP_CFG_DROP_MC_SMAC_ENA, x)
 
+/*      ANA:PORT:QOS_CFG */
+#define ANA_QOS_CFG(g)            __REG(TARGET_ANA, 0, 1, 28672, g, 9, 128, 8, 0, 1, 4)
+
+#define ANA_QOS_CFG_DP_DEFAULT_VAL               BIT(8)
+#define ANA_QOS_CFG_DP_DEFAULT_VAL_SET(x)\
+       FIELD_PREP(ANA_QOS_CFG_DP_DEFAULT_VAL, x)
+#define ANA_QOS_CFG_DP_DEFAULT_VAL_GET(x)\
+       FIELD_GET(ANA_QOS_CFG_DP_DEFAULT_VAL, x)
+
+#define ANA_QOS_CFG_QOS_DEFAULT_VAL              GENMASK(7, 5)
+#define ANA_QOS_CFG_QOS_DEFAULT_VAL_SET(x)\
+       FIELD_PREP(ANA_QOS_CFG_QOS_DEFAULT_VAL, x)
+#define ANA_QOS_CFG_QOS_DEFAULT_VAL_GET(x)\
+       FIELD_GET(ANA_QOS_CFG_QOS_DEFAULT_VAL, x)
+
+#define ANA_QOS_CFG_QOS_DSCP_ENA                 BIT(4)
+#define ANA_QOS_CFG_QOS_DSCP_ENA_SET(x)\
+       FIELD_PREP(ANA_QOS_CFG_QOS_DSCP_ENA, x)
+#define ANA_QOS_CFG_QOS_DSCP_ENA_GET(x)\
+       FIELD_GET(ANA_QOS_CFG_QOS_DSCP_ENA, x)
+
+#define ANA_QOS_CFG_QOS_PCP_ENA                  BIT(3)
+#define ANA_QOS_CFG_QOS_PCP_ENA_SET(x)\
+       FIELD_PREP(ANA_QOS_CFG_QOS_PCP_ENA, x)
+#define ANA_QOS_CFG_QOS_PCP_ENA_GET(x)\
+       FIELD_GET(ANA_QOS_CFG_QOS_PCP_ENA, x)
+
+#define ANA_QOS_CFG_DSCP_REWR_CFG                GENMASK(1, 0)
+#define ANA_QOS_CFG_DSCP_REWR_CFG_SET(x)\
+       FIELD_PREP(ANA_QOS_CFG_DSCP_REWR_CFG, x)
+#define ANA_QOS_CFG_DSCP_REWR_CFG_GET(x)\
+       FIELD_GET(ANA_QOS_CFG_DSCP_REWR_CFG, x)
+
 /*      ANA:PORT:VCAP_CFG */
 #define ANA_VCAP_CFG(g)           __REG(TARGET_ANA, 0, 1, 28672, g, 9, 128, 12, 0, 1, 4)
 
@@ -415,6 +460,21 @@ enum lan966x_target {
 #define ANA_VCAP_S2_CFG_OAM_DIS_GET(x)\
        FIELD_GET(ANA_VCAP_S2_CFG_OAM_DIS, x)
 
+/*      ANA:PORT:QOS_PCP_DEI_MAP_CFG */
+#define ANA_PCP_DEI_CFG(g, r)     __REG(TARGET_ANA, 0, 1, 28672, g, 9, 128, 32, r, 16, 4)
+
+#define ANA_PCP_DEI_CFG_DP_PCP_DEI_VAL           BIT(3)
+#define ANA_PCP_DEI_CFG_DP_PCP_DEI_VAL_SET(x)\
+       FIELD_PREP(ANA_PCP_DEI_CFG_DP_PCP_DEI_VAL, x)
+#define ANA_PCP_DEI_CFG_DP_PCP_DEI_VAL_GET(x)\
+       FIELD_GET(ANA_PCP_DEI_CFG_DP_PCP_DEI_VAL, x)
+
+#define ANA_PCP_DEI_CFG_QOS_PCP_DEI_VAL          GENMASK(2, 0)
+#define ANA_PCP_DEI_CFG_QOS_PCP_DEI_VAL_SET(x)\
+       FIELD_PREP(ANA_PCP_DEI_CFG_QOS_PCP_DEI_VAL, x)
+#define ANA_PCP_DEI_CFG_QOS_PCP_DEI_VAL_GET(x)\
+       FIELD_GET(ANA_PCP_DEI_CFG_QOS_PCP_DEI_VAL, x)
+
 /*      ANA:PORT:CPU_FWD_CFG */
 #define ANA_CPU_FWD_CFG(g)        __REG(TARGET_ANA, 0, 1, 28672, g, 9, 128, 96, 0, 1, 4)
 
@@ -478,6 +538,15 @@ enum lan966x_target {
 #define ANA_PORT_CFG_PORTID_VAL_GET(x)\
        FIELD_GET(ANA_PORT_CFG_PORTID_VAL, x)
 
+/*      ANA:COMMON:DSCP_REWR_CFG */
+#define ANA_DSCP_REWR_CFG(r)      __REG(TARGET_ANA, 0, 1, 31232, 0, 1, 552, 332, r, 16, 4)
+
+#define ANA_DSCP_REWR_CFG_DSCP_QOS_REWR_VAL      GENMASK(5, 0)
+#define ANA_DSCP_REWR_CFG_DSCP_QOS_REWR_VAL_SET(x)\
+       FIELD_PREP(ANA_DSCP_REWR_CFG_DSCP_QOS_REWR_VAL, x)
+#define ANA_DSCP_REWR_CFG_DSCP_QOS_REWR_VAL_GET(x)\
+       FIELD_GET(ANA_DSCP_REWR_CFG_DSCP_QOS_REWR_VAL, x)
+
 /*      ANA:PORT:POL_CFG */
 #define ANA_POL_CFG(g)            __REG(TARGET_ANA, 0, 1, 28672, g, 9, 128, 116, 0, 1, 4)
 
@@ -547,6 +616,33 @@ enum lan966x_target {
 #define ANA_AGGR_CFG_AC_IP4_TCPUDP_ENA_GET(x)\
        FIELD_GET(ANA_AGGR_CFG_AC_IP4_TCPUDP_ENA, x)
 
+/*      ANA:COMMON:DSCP_CFG */
+#define ANA_DSCP_CFG(r)           __REG(TARGET_ANA, 0, 1, 31232, 0, 1, 552, 76, r, 64, 4)
+
+#define ANA_DSCP_CFG_DP_DSCP_VAL                 BIT(11)
+#define ANA_DSCP_CFG_DP_DSCP_VAL_SET(x)\
+       FIELD_PREP(ANA_DSCP_CFG_DP_DSCP_VAL, x)
+#define ANA_DSCP_CFG_DP_DSCP_VAL_GET(x)\
+       FIELD_GET(ANA_DSCP_CFG_DP_DSCP_VAL, x)
+
+#define ANA_DSCP_CFG_QOS_DSCP_VAL                GENMASK(10, 8)
+#define ANA_DSCP_CFG_QOS_DSCP_VAL_SET(x)\
+       FIELD_PREP(ANA_DSCP_CFG_QOS_DSCP_VAL, x)
+#define ANA_DSCP_CFG_QOS_DSCP_VAL_GET(x)\
+       FIELD_GET(ANA_DSCP_CFG_QOS_DSCP_VAL, x)
+
+#define ANA_DSCP_CFG_DSCP_TRUST_ENA              BIT(1)
+#define ANA_DSCP_CFG_DSCP_TRUST_ENA_SET(x)\
+       FIELD_PREP(ANA_DSCP_CFG_DSCP_TRUST_ENA, x)
+#define ANA_DSCP_CFG_DSCP_TRUST_ENA_GET(x)\
+       FIELD_GET(ANA_DSCP_CFG_DSCP_TRUST_ENA, x)
+
+#define ANA_DSCP_CFG_DSCP_REWR_ENA               BIT(0)
+#define ANA_DSCP_CFG_DSCP_REWR_ENA_SET(x)\
+       FIELD_PREP(ANA_DSCP_CFG_DSCP_REWR_ENA, x)
+#define ANA_DSCP_CFG_DSCP_REWR_ENA_GET(x)\
+       FIELD_GET(ANA_DSCP_CFG_DSCP_REWR_ENA, x)
+
 /*      ANA:POL:POL_PIR_CFG */
 #define ANA_POL_PIR_CFG(g)        __REG(TARGET_ANA, 0, 1, 16384, g, 345, 32, 0, 0, 1, 4)
 
@@ -1468,15 +1564,66 @@ enum lan966x_target {
 #define REW_TAG_CFG_TAG_TPID_CFG_GET(x)\
        FIELD_GET(REW_TAG_CFG_TAG_TPID_CFG, x)
 
+#define REW_TAG_CFG_TAG_PCP_CFG                  GENMASK(3, 2)
+#define REW_TAG_CFG_TAG_PCP_CFG_SET(x)\
+       FIELD_PREP(REW_TAG_CFG_TAG_PCP_CFG, x)
+#define REW_TAG_CFG_TAG_PCP_CFG_GET(x)\
+       FIELD_GET(REW_TAG_CFG_TAG_PCP_CFG, x)
+
+#define REW_TAG_CFG_TAG_DEI_CFG                  GENMASK(1, 0)
+#define REW_TAG_CFG_TAG_DEI_CFG_SET(x)\
+       FIELD_PREP(REW_TAG_CFG_TAG_DEI_CFG, x)
+#define REW_TAG_CFG_TAG_DEI_CFG_GET(x)\
+       FIELD_GET(REW_TAG_CFG_TAG_DEI_CFG, x)
+
 /*      REW:PORT:PORT_CFG */
 #define REW_PORT_CFG(g)           __REG(TARGET_REW, 0, 1, 0, g, 10, 128, 8, 0, 1, 4)
 
+#define REW_PORT_CFG_ES0_EN                      BIT(4)
+#define REW_PORT_CFG_ES0_EN_SET(x)\
+       FIELD_PREP(REW_PORT_CFG_ES0_EN, x)
+#define REW_PORT_CFG_ES0_EN_GET(x)\
+       FIELD_GET(REW_PORT_CFG_ES0_EN, x)
+
 #define REW_PORT_CFG_NO_REWRITE                  BIT(0)
 #define REW_PORT_CFG_NO_REWRITE_SET(x)\
        FIELD_PREP(REW_PORT_CFG_NO_REWRITE, x)
 #define REW_PORT_CFG_NO_REWRITE_GET(x)\
        FIELD_GET(REW_PORT_CFG_NO_REWRITE, x)
 
+/*      REW:PORT:DSCP_CFG */
+#define REW_DSCP_CFG(g)           __REG(TARGET_REW, 0, 1, 0, g, 10, 128, 12, 0, 1, 4)
+
+#define REW_DSCP_CFG_DSCP_REWR_CFG               GENMASK(1, 0)
+#define REW_DSCP_CFG_DSCP_REWR_CFG_SET(x)\
+       FIELD_PREP(REW_DSCP_CFG_DSCP_REWR_CFG, x)
+#define REW_DSCP_CFG_DSCP_REWR_CFG_GET(x)\
+       FIELD_GET(REW_DSCP_CFG_DSCP_REWR_CFG, x)
+
+/*      REW:PORT:PCP_DEI_QOS_MAP_CFG */
+#define REW_PCP_DEI_CFG(g, r)     __REG(TARGET_REW, 0, 1, 0, g, 10, 128, 16, r, 16, 4)
+
+#define REW_PCP_DEI_CFG_DEI_QOS_VAL              BIT(3)
+#define REW_PCP_DEI_CFG_DEI_QOS_VAL_SET(x)\
+       FIELD_PREP(REW_PCP_DEI_CFG_DEI_QOS_VAL, x)
+#define REW_PCP_DEI_CFG_DEI_QOS_VAL_GET(x)\
+       FIELD_GET(REW_PCP_DEI_CFG_DEI_QOS_VAL, x)
+
+#define REW_PCP_DEI_CFG_PCP_QOS_VAL              GENMASK(2, 0)
+#define REW_PCP_DEI_CFG_PCP_QOS_VAL_SET(x)\
+       FIELD_PREP(REW_PCP_DEI_CFG_PCP_QOS_VAL, x)
+#define REW_PCP_DEI_CFG_PCP_QOS_VAL_GET(x)\
+       FIELD_GET(REW_PCP_DEI_CFG_PCP_QOS_VAL, x)
+
+/*      REW:COMMON:STAT_CFG */
+#define REW_STAT_CFG              __REG(TARGET_REW, 0, 1, 3072, 0, 1, 528, 520, 0, 1, 4)
+
+#define REW_STAT_CFG_STAT_MODE                   GENMASK(1, 0)
+#define REW_STAT_CFG_STAT_MODE_SET(x)\
+       FIELD_PREP(REW_STAT_CFG_STAT_MODE, x)
+#define REW_STAT_CFG_STAT_MODE_GET(x)\
+       FIELD_GET(REW_STAT_CFG_STAT_MODE, x)
+
 /*      SYS:SYSTEM:RESET_CFG */
 #define SYS_RESET_CFG             __REG(TARGET_SYS, 0, 1, 4128, 0, 1, 168, 0, 0, 1, 4)
 
index cf0cc75..ee652f2 100644 (file)
@@ -21,8 +21,14 @@ static int lan966x_tc_setup_qdisc_mqprio(struct lan966x_port *port,
 static int lan966x_tc_setup_qdisc_taprio(struct lan966x_port *port,
                                         struct tc_taprio_qopt_offload *taprio)
 {
-       return taprio->enable ? lan966x_taprio_add(port, taprio) :
-                               lan966x_taprio_del(port);
+       switch (taprio->cmd) {
+       case TAPRIO_CMD_REPLACE:
+               return lan966x_taprio_add(port, taprio);
+       case TAPRIO_CMD_DESTROY:
+               return lan966x_taprio_del(port);
+       default:
+               return -EOPNOTSUPP;
+       }
 }
 
 static int lan966x_tc_setup_qdisc_tbf(struct lan966x_port *port,
index 47b2f75..96b3def 100644 (file)
@@ -5,6 +5,8 @@
 #include "vcap_api_client.h"
 #include "vcap_tc.h"
 
+#define LAN966X_FORCE_UNTAGED  3
+
 static bool lan966x_tc_is_known_etype(struct vcap_tc_flower_parse_usage *st,
                                      u16 etype)
 {
@@ -29,6 +31,8 @@ static bool lan966x_tc_is_known_etype(struct vcap_tc_flower_parse_usage *st,
                        return true;
                }
                break;
+       case VCAP_TYPE_ES0:
+               return true;
        default:
                NL_SET_ERR_MSG_MOD(st->fco->common.extack,
                                   "VCAP type not supported");
@@ -318,6 +322,9 @@ static int lan966x_tc_set_actionset(struct vcap_admin *admin,
        case VCAP_TYPE_IS2:
                aset = VCAP_AFS_BASE_TYPE;
                break;
+       case VCAP_TYPE_ES0:
+               aset = VCAP_AFS_VID;
+               break;
        default:
                return -EINVAL;
        }
@@ -353,6 +360,10 @@ static int lan966x_tc_add_rule_link_target(struct vcap_admin *admin,
                /* Add IS2 specific PAG key (for chaining rules from IS1) */
                return vcap_rule_add_key_u32(vrule, VCAP_KF_LOOKUP_PAG,
                                             link_val, ~0);
+       case VCAP_TYPE_ES0:
+               /* Add ES0 specific ISDX key (for chaining rules from IS1) */
+               return vcap_rule_add_key_u32(vrule, VCAP_KF_ISDX_CLS,
+                                            link_val, ~0);
        default:
                break;
        }
@@ -389,6 +400,18 @@ static int lan966x_tc_add_rule_link(struct vcap_control *vctrl,
                                               0xff);
                if (err)
                        return err;
+       } else if (admin->vtype == VCAP_TYPE_IS1 &&
+                  to_admin->vtype == VCAP_TYPE_ES0) {
+               /* This works for IS1->ES0 */
+               err = vcap_rule_add_action_u32(vrule, VCAP_AF_ISDX_ADD_VAL,
+                                              diff);
+               if (err)
+                       return err;
+
+               err = vcap_rule_add_action_bit(vrule, VCAP_AF_ISDX_REPLACE_ENA,
+                                              VCAP_BIT_1);
+               if (err)
+                       return err;
        } else {
                NL_SET_ERR_MSG_MOD(f->common.extack,
                                   "Unsupported chain destination");
@@ -398,6 +421,23 @@ static int lan966x_tc_add_rule_link(struct vcap_control *vctrl,
        return err;
 }
 
+static int lan966x_tc_add_rule_counter(struct vcap_admin *admin,
+                                      struct vcap_rule *vrule)
+{
+       int err = 0;
+
+       switch (admin->vtype) {
+       case VCAP_TYPE_ES0:
+               err = vcap_rule_mod_action_u32(vrule, VCAP_AF_ESDX,
+                                              vrule->id);
+               break;
+       default:
+               break;
+       }
+
+       return err;
+}
+
 static int lan966x_tc_flower_add(struct lan966x_port *port,
                                 struct flow_cls_offload *f,
                                 struct vcap_admin *admin,
@@ -466,6 +506,21 @@ static int lan966x_tc_flower_add(struct lan966x_port *port,
                                goto out;
 
                        break;
+               case FLOW_ACTION_VLAN_POP:
+                       if (admin->vtype != VCAP_TYPE_ES0) {
+                               NL_SET_ERR_MSG_MOD(f->common.extack,
+                                                  "Cannot use vlan pop on non es0");
+                               err = -EOPNOTSUPP;
+                               goto out;
+                       }
+
+                       /* Force untag */
+                       err = vcap_rule_add_action_u32(vrule, VCAP_AF_PUSH_OUTER_TAG,
+                                                      LAN966X_FORCE_UNTAGED);
+                       if (err)
+                               goto out;
+
+                       break;
                default:
                        NL_SET_ERR_MSG_MOD(f->common.extack,
                                           "Unsupported TC action");
@@ -474,6 +529,12 @@ static int lan966x_tc_flower_add(struct lan966x_port *port,
                }
        }
 
+       err = lan966x_tc_add_rule_counter(admin, vrule);
+       if (err) {
+               vcap_set_tc_exterr(f, vrule);
+               goto out;
+       }
+
        err = vcap_val_rule(vrule, l3_proto);
        if (err) {
                vcap_set_tc_exterr(f, vrule);
index 66400a0..fb6851b 100644 (file)
@@ -2121,6 +2121,69 @@ static const struct vcap_field is2_smac_sip6_keyfield[] = {
        },
 };
 
+static const struct vcap_field es0_vid_keyfield[] = {
+       [VCAP_KF_IF_EGR_PORT_NO] = {
+               .type = VCAP_FIELD_U32,
+               .offset = 0,
+               .width = 4,
+       },
+       [VCAP_KF_IF_IGR_PORT] = {
+               .type = VCAP_FIELD_U32,
+               .offset = 4,
+               .width = 4,
+       },
+       [VCAP_KF_ISDX_GT0_IS] = {
+               .type = VCAP_FIELD_BIT,
+               .offset = 8,
+               .width = 1,
+       },
+       [VCAP_KF_ISDX_CLS] = {
+               .type = VCAP_FIELD_U32,
+               .offset = 9,
+               .width = 8,
+       },
+       [VCAP_KF_L2_MC_IS] = {
+               .type = VCAP_FIELD_BIT,
+               .offset = 17,
+               .width = 1,
+       },
+       [VCAP_KF_L2_BC_IS] = {
+               .type = VCAP_FIELD_BIT,
+               .offset = 18,
+               .width = 1,
+       },
+       [VCAP_KF_8021Q_VID_CLS] = {
+               .type = VCAP_FIELD_U32,
+               .offset = 19,
+               .width = 12,
+       },
+       [VCAP_KF_8021Q_DEI_CLS] = {
+               .type = VCAP_FIELD_BIT,
+               .offset = 31,
+               .width = 1,
+       },
+       [VCAP_KF_8021Q_PCP_CLS] = {
+               .type = VCAP_FIELD_U32,
+               .offset = 32,
+               .width = 3,
+       },
+       [VCAP_KF_L3_DPL_CLS] = {
+               .type = VCAP_FIELD_BIT,
+               .offset = 35,
+               .width = 1,
+       },
+       [VCAP_KF_RTP_ID] = {
+               .type = VCAP_FIELD_U32,
+               .offset = 36,
+               .width = 10,
+       },
+       [VCAP_KF_PDU_TYPE] = {
+               .type = VCAP_FIELD_U32,
+               .offset = 46,
+               .width = 4,
+       },
+};
+
 /* keyfield_set */
 static const struct vcap_set is1_keyfield_set[] = {
        [VCAP_KFS_NORMAL] = {
@@ -2228,6 +2291,14 @@ static const struct vcap_set is2_keyfield_set[] = {
        },
 };
 
+static const struct vcap_set es0_keyfield_set[] = {
+       [VCAP_KFS_VID] = {
+               .type_id = -1,
+               .sw_per_item = 1,
+               .sw_cnt = 1,
+       },
+};
+
 /* keyfield_set map */
 static const struct vcap_field *is1_keyfield_set_map[] = {
        [VCAP_KFS_NORMAL] = is1_normal_keyfield,
@@ -2255,6 +2326,10 @@ static const struct vcap_field *is2_keyfield_set_map[] = {
        [VCAP_KFS_SMAC_SIP6] = is2_smac_sip6_keyfield,
 };
 
+static const struct vcap_field *es0_keyfield_set_map[] = {
+       [VCAP_KFS_VID] = es0_vid_keyfield,
+};
+
 /* keyfield_set map sizes */
 static int is1_keyfield_set_map_size[] = {
        [VCAP_KFS_NORMAL] = ARRAY_SIZE(is1_normal_keyfield),
@@ -2282,6 +2357,10 @@ static int is2_keyfield_set_map_size[] = {
        [VCAP_KFS_SMAC_SIP6] = ARRAY_SIZE(is2_smac_sip6_keyfield),
 };
 
+static int es0_keyfield_set_map_size[] = {
+       [VCAP_KFS_VID] = ARRAY_SIZE(es0_vid_keyfield),
+};
+
 /* actionfields */
 static const struct vcap_field is1_s1_actionfield[] = {
        [VCAP_AF_TYPE] = {
@@ -2522,6 +2601,94 @@ static const struct vcap_field is2_smac_sip_actionfield[] = {
        },
 };
 
+static const struct vcap_field es0_vid_actionfield[] = {
+       [VCAP_AF_PUSH_OUTER_TAG] = {
+               .type = VCAP_FIELD_U32,
+               .offset = 0,
+               .width = 2,
+       },
+       [VCAP_AF_PUSH_INNER_TAG] = {
+               .type = VCAP_FIELD_BIT,
+               .offset = 2,
+               .width = 1,
+       },
+       [VCAP_AF_TAG_A_TPID_SEL] = {
+               .type = VCAP_FIELD_U32,
+               .offset = 3,
+               .width = 2,
+       },
+       [VCAP_AF_TAG_A_VID_SEL] = {
+               .type = VCAP_FIELD_BIT,
+               .offset = 5,
+               .width = 1,
+       },
+       [VCAP_AF_TAG_A_PCP_SEL] = {
+               .type = VCAP_FIELD_U32,
+               .offset = 6,
+               .width = 2,
+       },
+       [VCAP_AF_TAG_A_DEI_SEL] = {
+               .type = VCAP_FIELD_U32,
+               .offset = 8,
+               .width = 2,
+       },
+       [VCAP_AF_TAG_B_TPID_SEL] = {
+               .type = VCAP_FIELD_U32,
+               .offset = 10,
+               .width = 2,
+       },
+       [VCAP_AF_TAG_B_VID_SEL] = {
+               .type = VCAP_FIELD_BIT,
+               .offset = 12,
+               .width = 1,
+       },
+       [VCAP_AF_TAG_B_PCP_SEL] = {
+               .type = VCAP_FIELD_U32,
+               .offset = 13,
+               .width = 2,
+       },
+       [VCAP_AF_TAG_B_DEI_SEL] = {
+               .type = VCAP_FIELD_U32,
+               .offset = 15,
+               .width = 2,
+       },
+       [VCAP_AF_VID_A_VAL] = {
+               .type = VCAP_FIELD_U32,
+               .offset = 17,
+               .width = 12,
+       },
+       [VCAP_AF_PCP_A_VAL] = {
+               .type = VCAP_FIELD_U32,
+               .offset = 29,
+               .width = 3,
+       },
+       [VCAP_AF_DEI_A_VAL] = {
+               .type = VCAP_FIELD_BIT,
+               .offset = 32,
+               .width = 1,
+       },
+       [VCAP_AF_VID_B_VAL] = {
+               .type = VCAP_FIELD_U32,
+               .offset = 33,
+               .width = 12,
+       },
+       [VCAP_AF_PCP_B_VAL] = {
+               .type = VCAP_FIELD_U32,
+               .offset = 45,
+               .width = 3,
+       },
+       [VCAP_AF_DEI_B_VAL] = {
+               .type = VCAP_FIELD_BIT,
+               .offset = 48,
+               .width = 1,
+       },
+       [VCAP_AF_ESDX] = {
+               .type = VCAP_FIELD_U32,
+               .offset = 49,
+               .width = 8,
+       },
+};
+
 /* actionfield_set */
 static const struct vcap_set is1_actionfield_set[] = {
        [VCAP_AFS_S1] = {
@@ -2544,6 +2711,14 @@ static const struct vcap_set is2_actionfield_set[] = {
        },
 };
 
+static const struct vcap_set es0_actionfield_set[] = {
+       [VCAP_AFS_VID] = {
+               .type_id = -1,
+               .sw_per_item = 1,
+               .sw_cnt = 1,
+       },
+};
+
 /* actionfield_set map */
 static const struct vcap_field *is1_actionfield_set_map[] = {
        [VCAP_AFS_S1] = is1_s1_actionfield,
@@ -2554,6 +2729,10 @@ static const struct vcap_field *is2_actionfield_set_map[] = {
        [VCAP_AFS_SMAC_SIP] = is2_smac_sip_actionfield,
 };
 
+static const struct vcap_field *es0_actionfield_set_map[] = {
+       [VCAP_AFS_VID] = es0_vid_actionfield,
+};
+
 /* actionfield_set map size */
 static int is1_actionfield_set_map_size[] = {
        [VCAP_AFS_S1] = ARRAY_SIZE(is1_s1_actionfield),
@@ -2564,6 +2743,10 @@ static int is2_actionfield_set_map_size[] = {
        [VCAP_AFS_SMAC_SIP] = ARRAY_SIZE(is2_smac_sip_actionfield),
 };
 
+static int es0_actionfield_set_map_size[] = {
+       [VCAP_AFS_VID] = ARRAY_SIZE(es0_vid_actionfield),
+};
+
 /* Type Groups */
 static const struct vcap_typegroup is1_x4_keyfield_set_typegroups[] = {
        {
@@ -2659,6 +2842,10 @@ static const struct vcap_typegroup is2_x1_keyfield_set_typegroups[] = {
        {}
 };
 
+static const struct vcap_typegroup es0_x1_keyfield_set_typegroups[] = {
+       {}
+};
+
 static const struct vcap_typegroup *is1_keyfield_set_typegroups[] = {
        [4] = is1_x4_keyfield_set_typegroups,
        [2] = is1_x2_keyfield_set_typegroups,
@@ -2673,6 +2860,11 @@ static const struct vcap_typegroup *is2_keyfield_set_typegroups[] = {
        [5] = NULL,
 };
 
+static const struct vcap_typegroup *es0_keyfield_set_typegroups[] = {
+       [1] = es0_x1_keyfield_set_typegroups,
+       [2] = NULL,
+};
+
 static const struct vcap_typegroup is1_x1_actionfield_set_typegroups[] = {
        {}
 };
@@ -2700,6 +2892,10 @@ static const struct vcap_typegroup is2_x1_actionfield_set_typegroups[] = {
        {}
 };
 
+static const struct vcap_typegroup es0_x1_actionfield_set_typegroups[] = {
+       {}
+};
+
 static const struct vcap_typegroup *is1_actionfield_set_typegroups[] = {
        [1] = is1_x1_actionfield_set_typegroups,
        [5] = NULL,
@@ -2711,6 +2907,11 @@ static const struct vcap_typegroup *is2_actionfield_set_typegroups[] = {
        [5] = NULL,
 };
 
+static const struct vcap_typegroup *es0_actionfield_set_typegroups[] = {
+       [1] = es0_x1_actionfield_set_typegroups,
+       [2] = NULL,
+};
+
 /* Keyfieldset names */
 static const char * const vcap_keyfield_set_names[] = {
        [VCAP_KFS_NO_VALUE]                      =  "(None)",
@@ -2743,6 +2944,7 @@ static const char * const vcap_keyfield_set_names[] = {
        [VCAP_KFS_RT]                            =  "VCAP_KFS_RT",
        [VCAP_KFS_SMAC_SIP4]                     =  "VCAP_KFS_SMAC_SIP4",
        [VCAP_KFS_SMAC_SIP6]                     =  "VCAP_KFS_SMAC_SIP6",
+       [VCAP_KFS_VID]                           =  "VCAP_KFS_VID",
 };
 
 /* Actionfieldset names */
@@ -2751,9 +2953,11 @@ static const char * const vcap_actionfield_set_names[] = {
        [VCAP_AFS_BASE_TYPE]                     =  "VCAP_AFS_BASE_TYPE",
        [VCAP_AFS_CLASSIFICATION]                =  "VCAP_AFS_CLASSIFICATION",
        [VCAP_AFS_CLASS_REDUCED]                 =  "VCAP_AFS_CLASS_REDUCED",
+       [VCAP_AFS_ES0]                           =  "VCAP_AFS_ES0",
        [VCAP_AFS_FULL]                          =  "VCAP_AFS_FULL",
        [VCAP_AFS_S1]                            =  "VCAP_AFS_S1",
        [VCAP_AFS_SMAC_SIP]                      =  "VCAP_AFS_SMAC_SIP",
+       [VCAP_AFS_VID]                           =  "VCAP_AFS_VID",
 };
 
 /* Keyfield names */
@@ -2774,6 +2978,7 @@ static const char * const vcap_keyfield_names[] = {
        [VCAP_KF_8021Q_PCP1]                     =  "8021Q_PCP1",
        [VCAP_KF_8021Q_PCP2]                     =  "8021Q_PCP2",
        [VCAP_KF_8021Q_PCP_CLS]                  =  "8021Q_PCP_CLS",
+       [VCAP_KF_8021Q_TPID]                     =  "8021Q_TPID",
        [VCAP_KF_8021Q_TPID0]                    =  "8021Q_TPID0",
        [VCAP_KF_8021Q_TPID1]                    =  "8021Q_TPID1",
        [VCAP_KF_8021Q_TPID2]                    =  "8021Q_TPID2",
@@ -2799,6 +3004,7 @@ static const char * const vcap_keyfield_names[] = {
        [VCAP_KF_HOST_MATCH]                     =  "HOST_MATCH",
        [VCAP_KF_IF_EGR_PORT_MASK]               =  "IF_EGR_PORT_MASK",
        [VCAP_KF_IF_EGR_PORT_MASK_RNG]           =  "IF_EGR_PORT_MASK_RNG",
+       [VCAP_KF_IF_EGR_PORT_NO]                 =  "IF_EGR_PORT_NO",
        [VCAP_KF_IF_IGR_PORT]                    =  "IF_IGR_PORT",
        [VCAP_KF_IF_IGR_PORT_MASK]               =  "IF_IGR_PORT_MASK",
        [VCAP_KF_IF_IGR_PORT_MASK_L3]            =  "IF_IGR_PORT_MASK_L3",
@@ -2873,7 +3079,9 @@ static const char * const vcap_keyfield_names[] = {
        [VCAP_KF_OAM_OPCODE]                     =  "OAM_OPCODE",
        [VCAP_KF_OAM_VER]                        =  "OAM_VER",
        [VCAP_KF_OAM_Y1731_IS]                   =  "OAM_Y1731_IS",
+       [VCAP_KF_PDU_TYPE]                       =  "PDU_TYPE",
        [VCAP_KF_PROT_ACTIVE]                    =  "PROT_ACTIVE",
+       [VCAP_KF_RTP_ID]                         =  "RTP_ID",
        [VCAP_KF_RT_FRMID]                       =  "RT_FRMID",
        [VCAP_KF_RT_TYPE]                        =  "RT_TYPE",
        [VCAP_KF_RT_VLAN_IDX]                    =  "RT_VLAN_IDX",
@@ -2891,18 +3099,25 @@ static const char * const vcap_actionfield_names[] = {
        [VCAP_AF_COPY_PORT_NUM]                  =  "COPY_PORT_NUM",
        [VCAP_AF_COPY_QUEUE_NUM]                 =  "COPY_QUEUE_NUM",
        [VCAP_AF_CPU_COPY_ENA]                   =  "CPU_COPY_ENA",
+       [VCAP_AF_CPU_QU]                         =  "CPU_QU",
        [VCAP_AF_CPU_QUEUE_NUM]                  =  "CPU_QUEUE_NUM",
        [VCAP_AF_CUSTOM_ACE_TYPE_ENA]            =  "CUSTOM_ACE_TYPE_ENA",
+       [VCAP_AF_DEI_A_VAL]                      =  "DEI_A_VAL",
+       [VCAP_AF_DEI_B_VAL]                      =  "DEI_B_VAL",
+       [VCAP_AF_DEI_C_VAL]                      =  "DEI_C_VAL",
        [VCAP_AF_DEI_ENA]                        =  "DEI_ENA",
        [VCAP_AF_DEI_VAL]                        =  "DEI_VAL",
        [VCAP_AF_DLR_SEL]                        =  "DLR_SEL",
        [VCAP_AF_DP_ENA]                         =  "DP_ENA",
        [VCAP_AF_DP_VAL]                         =  "DP_VAL",
        [VCAP_AF_DSCP_ENA]                       =  "DSCP_ENA",
+       [VCAP_AF_DSCP_SEL]                       =  "DSCP_SEL",
        [VCAP_AF_DSCP_VAL]                       =  "DSCP_VAL",
        [VCAP_AF_ES2_REW_CMD]                    =  "ES2_REW_CMD",
+       [VCAP_AF_ESDX]                           =  "ESDX",
        [VCAP_AF_FWD_KILL_ENA]                   =  "FWD_KILL_ENA",
        [VCAP_AF_FWD_MODE]                       =  "FWD_MODE",
+       [VCAP_AF_FWD_SEL]                        =  "FWD_SEL",
        [VCAP_AF_HIT_ME_ONCE]                    =  "HIT_ME_ONCE",
        [VCAP_AF_HOST_MATCH]                     =  "HOST_MATCH",
        [VCAP_AF_IGNORE_PIPELINE_CTRL]           =  "IGNORE_PIPELINE_CTRL",
@@ -2912,6 +3127,7 @@ static const char * const vcap_actionfield_names[] = {
        [VCAP_AF_ISDX_ENA]                       =  "ISDX_ENA",
        [VCAP_AF_ISDX_REPLACE_ENA]               =  "ISDX_REPLACE_ENA",
        [VCAP_AF_ISDX_VAL]                       =  "ISDX_VAL",
+       [VCAP_AF_LOOP_ENA]                       =  "LOOP_ENA",
        [VCAP_AF_LRN_DIS]                        =  "LRN_DIS",
        [VCAP_AF_MAP_IDX]                        =  "MAP_IDX",
        [VCAP_AF_MAP_KEY]                        =  "MAP_KEY",
@@ -2928,15 +3144,23 @@ static const char * const vcap_actionfield_names[] = {
        [VCAP_AF_OAM_SEL]                        =  "OAM_SEL",
        [VCAP_AF_PAG_OVERRIDE_MASK]              =  "PAG_OVERRIDE_MASK",
        [VCAP_AF_PAG_VAL]                        =  "PAG_VAL",
+       [VCAP_AF_PCP_A_VAL]                      =  "PCP_A_VAL",
+       [VCAP_AF_PCP_B_VAL]                      =  "PCP_B_VAL",
+       [VCAP_AF_PCP_C_VAL]                      =  "PCP_C_VAL",
        [VCAP_AF_PCP_ENA]                        =  "PCP_ENA",
        [VCAP_AF_PCP_VAL]                        =  "PCP_VAL",
+       [VCAP_AF_PIPELINE_ACT]                   =  "PIPELINE_ACT",
        [VCAP_AF_PIPELINE_FORCE_ENA]             =  "PIPELINE_FORCE_ENA",
        [VCAP_AF_PIPELINE_PT]                    =  "PIPELINE_PT",
        [VCAP_AF_POLICE_ENA]                     =  "POLICE_ENA",
        [VCAP_AF_POLICE_IDX]                     =  "POLICE_IDX",
        [VCAP_AF_POLICE_REMARK]                  =  "POLICE_REMARK",
        [VCAP_AF_POLICE_VCAP_ONLY]               =  "POLICE_VCAP_ONLY",
+       [VCAP_AF_POP_VAL]                        =  "POP_VAL",
        [VCAP_AF_PORT_MASK]                      =  "PORT_MASK",
+       [VCAP_AF_PUSH_CUSTOMER_TAG]              =  "PUSH_CUSTOMER_TAG",
+       [VCAP_AF_PUSH_INNER_TAG]                 =  "PUSH_INNER_TAG",
+       [VCAP_AF_PUSH_OUTER_TAG]                 =  "PUSH_OUTER_TAG",
        [VCAP_AF_QOS_ENA]                        =  "QOS_ENA",
        [VCAP_AF_QOS_VAL]                        =  "QOS_VAL",
        [VCAP_AF_REW_OP]                         =  "REW_OP",
@@ -2945,7 +3169,24 @@ static const char * const vcap_actionfield_names[] = {
        [VCAP_AF_SFID_VAL]                       =  "SFID_VAL",
        [VCAP_AF_SGID_ENA]                       =  "SGID_ENA",
        [VCAP_AF_SGID_VAL]                       =  "SGID_VAL",
+       [VCAP_AF_SWAP_MACS_ENA]                  =  "SWAP_MACS_ENA",
+       [VCAP_AF_TAG_A_DEI_SEL]                  =  "TAG_A_DEI_SEL",
+       [VCAP_AF_TAG_A_PCP_SEL]                  =  "TAG_A_PCP_SEL",
+       [VCAP_AF_TAG_A_TPID_SEL]                 =  "TAG_A_TPID_SEL",
+       [VCAP_AF_TAG_A_VID_SEL]                  =  "TAG_A_VID_SEL",
+       [VCAP_AF_TAG_B_DEI_SEL]                  =  "TAG_B_DEI_SEL",
+       [VCAP_AF_TAG_B_PCP_SEL]                  =  "TAG_B_PCP_SEL",
+       [VCAP_AF_TAG_B_TPID_SEL]                 =  "TAG_B_TPID_SEL",
+       [VCAP_AF_TAG_B_VID_SEL]                  =  "TAG_B_VID_SEL",
+       [VCAP_AF_TAG_C_DEI_SEL]                  =  "TAG_C_DEI_SEL",
+       [VCAP_AF_TAG_C_PCP_SEL]                  =  "TAG_C_PCP_SEL",
+       [VCAP_AF_TAG_C_TPID_SEL]                 =  "TAG_C_TPID_SEL",
+       [VCAP_AF_TAG_C_VID_SEL]                  =  "TAG_C_VID_SEL",
        [VCAP_AF_TYPE]                           =  "TYPE",
+       [VCAP_AF_UNTAG_VID_ENA]                  =  "UNTAG_VID_ENA",
+       [VCAP_AF_VID_A_VAL]                      =  "VID_A_VAL",
+       [VCAP_AF_VID_B_VAL]                      =  "VID_B_VAL",
+       [VCAP_AF_VID_C_VAL]                      =  "VID_C_VAL",
        [VCAP_AF_VID_REPLACE_ENA]                =  "VID_REPLACE_ENA",
        [VCAP_AF_VID_VAL]                        =  "VID_VAL",
        [VCAP_AF_VLAN_POP_CNT]                   =  "VLAN_POP_CNT",
@@ -2996,11 +3237,32 @@ const struct vcap_info lan966x_vcaps[] = {
                .keyfield_set_typegroups = is2_keyfield_set_typegroups,
                .actionfield_set_typegroups = is2_actionfield_set_typegroups,
        },
+       [VCAP_TYPE_ES0] = {
+               .name = "es0",
+               .rows = 256,
+               .sw_count = 1,
+               .sw_width = 96,
+               .sticky_width = 1,
+               .act_width = 65,
+               .default_cnt = 8,
+               .require_cnt_dis = 0,
+               .version = 1,
+               .keyfield_set = es0_keyfield_set,
+               .keyfield_set_size = ARRAY_SIZE(es0_keyfield_set),
+               .actionfield_set = es0_actionfield_set,
+               .actionfield_set_size = ARRAY_SIZE(es0_actionfield_set),
+               .keyfield_set_map = es0_keyfield_set_map,
+               .keyfield_set_map_size = es0_keyfield_set_map_size,
+               .actionfield_set_map = es0_actionfield_set_map,
+               .actionfield_set_map_size = es0_actionfield_set_map_size,
+               .keyfield_set_typegroups = es0_keyfield_set_typegroups,
+               .actionfield_set_typegroups = es0_actionfield_set_typegroups,
+       },
 };
 
 const struct vcap_statistics lan966x_vcap_stats = {
        .name = "lan966x",
-       .count = 2,
+       .count = 3,
        .keyfield_set_names = vcap_keyfield_set_names,
        .actionfield_set_names = vcap_actionfield_set_names,
        .keyfield_names = vcap_keyfield_names,
index d90c08c..ac525ff 100644 (file)
@@ -190,6 +190,26 @@ static void lan966x_vcap_is2_port_keys(struct lan966x_port *port,
        out->prf(out->dst, "\n");
 }
 
+static void lan966x_vcap_es0_port_keys(struct lan966x_port *port,
+                                      struct vcap_admin *admin,
+                                      struct vcap_output_print *out)
+{
+       struct lan966x *lan966x = port->lan966x;
+       u32 val;
+
+       out->prf(out->dst, "  port[%d] (%s): ", port->chip_port,
+                netdev_name(port->dev));
+
+       val = lan_rd(lan966x, REW_PORT_CFG(port->chip_port));
+       out->prf(out->dst, "\n    state: ");
+       if (REW_PORT_CFG_ES0_EN_GET(val))
+               out->prf(out->dst, "on");
+       else
+               out->prf(out->dst, "off");
+
+       out->prf(out->dst, "\n");
+}
+
 int lan966x_vcap_port_info(struct net_device *dev,
                           struct vcap_admin *admin,
                           struct vcap_output_print *out)
@@ -210,6 +230,9 @@ int lan966x_vcap_port_info(struct net_device *dev,
        case VCAP_TYPE_IS1:
                lan966x_vcap_is1_port_keys(port, admin, out);
                break;
+       case VCAP_TYPE_ES0:
+               lan966x_vcap_es0_port_keys(port, admin, out);
+               break;
        default:
                out->prf(out->dst, "  no info\n");
                break;
index 7ea8e86..a4414f6 100644 (file)
 
 #define LAN966X_IS1_LOOKUPS 3
 #define LAN966X_IS2_LOOKUPS 2
+#define LAN966X_ES0_LOOKUPS 1
+
+#define LAN966X_STAT_ESDX_GRN_BYTES 0x300
+#define LAN966X_STAT_ESDX_GRN_PKTS 0x301
+#define LAN966X_STAT_ESDX_YEL_BYTES 0x302
+#define LAN966X_STAT_ESDX_YEL_PKTS 0x303
 
 static struct lan966x_vcap_inst {
        enum vcap_type vtype; /* type of vcap */
@@ -21,6 +27,14 @@ static struct lan966x_vcap_inst {
        bool ingress; /* is vcap in the ingress path */
 } lan966x_vcap_inst_cfg[] = {
        {
+               .vtype = VCAP_TYPE_ES0,
+               .tgt_inst = 0,
+               .lookups = LAN966X_ES0_LOOKUPS,
+               .first_cid = LAN966X_VCAP_CID_ES0_L0,
+               .last_cid = LAN966X_VCAP_CID_ES0_MAX,
+               .count = 64,
+       },
+       {
                .vtype = VCAP_TYPE_IS1, /* IS1-0 */
                .tgt_inst = 1,
                .lookups = LAN966X_IS1_LOOKUPS,
@@ -279,6 +293,8 @@ lan966x_vcap_validate_keyset(struct net_device *dev,
                err = lan966x_vcap_is2_get_port_keysets(dev, lookup, &keysetlist,
                                                        l3_proto);
                break;
+       case VCAP_TYPE_ES0:
+               return kslist->keysets[0];
        default:
                pr_err("vcap type: %s not supported\n",
                       lan966x_vcaps[admin->vtype].name);
@@ -338,6 +354,14 @@ static void lan966x_vcap_is2_add_default_fields(struct lan966x_port *port,
                                      VCAP_BIT_0);
 }
 
+static void lan966x_vcap_es0_add_default_fields(struct lan966x_port *port,
+                                               struct vcap_admin *admin,
+                                               struct vcap_rule *rule)
+{
+       vcap_rule_add_key_u32(rule, VCAP_KF_IF_EGR_PORT_NO,
+                             port->chip_port, GENMASK(4, 0));
+}
+
 static void lan966x_vcap_add_default_fields(struct net_device *dev,
                                            struct vcap_admin *admin,
                                            struct vcap_rule *rule)
@@ -351,6 +375,9 @@ static void lan966x_vcap_add_default_fields(struct net_device *dev,
        case VCAP_TYPE_IS2:
                lan966x_vcap_is2_add_default_fields(port, admin, rule);
                break;
+       case VCAP_TYPE_ES0:
+               lan966x_vcap_es0_add_default_fields(port, admin, rule);
+               break;
        default:
                pr_err("vcap type: %s not supported\n",
                       lan966x_vcaps[admin->vtype].name);
@@ -366,6 +393,40 @@ static void lan966x_vcap_cache_erase(struct vcap_admin *admin)
        memset(&admin->cache.counter, 0, sizeof(admin->cache.counter));
 }
 
+/* The ESDX counter is only used/incremented if the frame has been classified
+ * with an ISDX > 0 (e.g by a rule in IS0).  This is not mentioned in the
+ * datasheet.
+ */
+static void lan966x_es0_read_esdx_counter(struct lan966x *lan966x,
+                                         struct vcap_admin *admin, u32 id)
+{
+       u32 counter;
+
+       id = id & 0xff; /* counter limit */
+       mutex_lock(&lan966x->stats_lock);
+       lan_wr(SYS_STAT_CFG_STAT_VIEW_SET(id), lan966x, SYS_STAT_CFG);
+       counter = lan_rd(lan966x, SYS_CNT(LAN966X_STAT_ESDX_GRN_PKTS)) +
+                 lan_rd(lan966x, SYS_CNT(LAN966X_STAT_ESDX_YEL_PKTS));
+       mutex_unlock(&lan966x->stats_lock);
+       if (counter)
+               admin->cache.counter = counter;
+}
+
+static void lan966x_es0_write_esdx_counter(struct lan966x *lan966x,
+                                          struct vcap_admin *admin, u32 id)
+{
+       id = id & 0xff; /* counter limit */
+
+       mutex_lock(&lan966x->stats_lock);
+       lan_wr(SYS_STAT_CFG_STAT_VIEW_SET(id), lan966x, SYS_STAT_CFG);
+       lan_wr(0, lan966x, SYS_CNT(LAN966X_STAT_ESDX_GRN_BYTES));
+       lan_wr(admin->cache.counter, lan966x,
+              SYS_CNT(LAN966X_STAT_ESDX_GRN_PKTS));
+       lan_wr(0, lan966x, SYS_CNT(LAN966X_STAT_ESDX_YEL_BYTES));
+       lan_wr(0, lan966x, SYS_CNT(LAN966X_STAT_ESDX_YEL_PKTS));
+       mutex_unlock(&lan966x->stats_lock);
+}
+
 static void lan966x_vcap_cache_write(struct net_device *dev,
                                     struct vcap_admin *admin,
                                     enum vcap_selection sel,
@@ -398,6 +459,9 @@ static void lan966x_vcap_cache_write(struct net_device *dev,
                admin->cache.sticky = admin->cache.counter > 0;
                lan_wr(admin->cache.counter, lan966x,
                       VCAP_CNT_DAT(admin->tgt_inst, 0));
+
+               if (admin->vtype == VCAP_TYPE_ES0)
+                       lan966x_es0_write_esdx_counter(lan966x, admin, start);
                break;
        default:
                break;
@@ -437,6 +501,9 @@ static void lan966x_vcap_cache_read(struct net_device *dev,
                admin->cache.counter =
                        lan_rd(lan966x, VCAP_CNT_DAT(instance, 0));
                admin->cache.sticky = admin->cache.counter > 0;
+
+               if (admin->vtype == VCAP_TYPE_ES0)
+                       lan966x_es0_read_esdx_counter(lan966x, admin, start);
        }
 }
 
@@ -625,6 +692,12 @@ static void lan966x_vcap_port_key_deselection(struct lan966x *lan966x,
                        lan_wr(0, lan966x, ANA_VCAP_S2_CFG(p));
 
                break;
+       case VCAP_TYPE_ES0:
+               for (int p = 0; p < lan966x->num_phys_ports; ++p)
+                       lan_rmw(REW_PORT_CFG_ES0_EN_SET(false),
+                               REW_PORT_CFG_ES0_EN, lan966x,
+                               REW_PORT_CFG(p));
+               break;
        default:
                pr_err("vcap type: %s not supported\n",
                       lan966x_vcaps[admin->vtype].name);
@@ -674,9 +747,18 @@ int lan966x_vcap_init(struct lan966x *lan966x)
                        lan_rmw(ANA_VCAP_CFG_S1_ENA_SET(true),
                                ANA_VCAP_CFG_S1_ENA, lan966x,
                                ANA_VCAP_CFG(lan966x->ports[p]->chip_port));
+
+                       lan_rmw(REW_PORT_CFG_ES0_EN_SET(true),
+                               REW_PORT_CFG_ES0_EN, lan966x,
+                               REW_PORT_CFG(lan966x->ports[p]->chip_port));
                }
        }
 
+       /* Statistics: Use ESDX from ES0 if hit, otherwise no counting */
+       lan_rmw(REW_STAT_CFG_STAT_MODE_SET(1),
+               REW_STAT_CFG_STAT_MODE, lan966x,
+               REW_STAT_CFG);
+
        lan966x->vcap_ctrl = ctrl;
 
        return 0;
index a556c44..c3569a4 100644 (file)
@@ -3,8 +3,8 @@
  * Microchip VCAP API
  */
 
-/* This file is autogenerated by cml-utils 2023-02-16 11:41:14 +0100.
- * Commit ID: be85f176b3a151fa748dcaf97c8824a5c2e065f3
+/* This file is autogenerated by cml-utils 2023-03-13 10:16:42 +0100.
+ * Commit ID: 259f0efd6d6d91bfbf62858de153cc757b6bffa3 (dirty)
  */
 
 #ifndef __VCAP_AG_API__
@@ -51,6 +51,7 @@ enum vcap_keyfield_set {
        VCAP_KFS_RT,                /* lan966x is1 X1 */
        VCAP_KFS_SMAC_SIP4,         /* lan966x is2 X1 */
        VCAP_KFS_SMAC_SIP6,         /* lan966x is2 X2 */
+       VCAP_KFS_VID,               /* lan966x es0 X1 */
 };
 
 /* List of keyfields with description
@@ -79,7 +80,7 @@ enum vcap_keyfield_set {
  *   Second DEI in multiple vlan tags (inner tag)
  * VCAP_KF_8021Q_DEI2: W1, sparx5: is0
  *   Third DEI in multiple vlan tags (not always available)
- * VCAP_KF_8021Q_DEI_CLS: W1, sparx5: is2/es2, lan966x: is2
+ * VCAP_KF_8021Q_DEI_CLS: W1, sparx5: is2/es2, lan966x: is2/es0
  *   Classified DEI
  * VCAP_KF_8021Q_PCP0: W3, sparx5: is0, lan966x: is1
  *   First PCP in multiple vlan tags (outer tag or default port tag)
@@ -87,7 +88,7 @@ enum vcap_keyfield_set {
  *   Second PCP in multiple vlan tags (inner tag)
  * VCAP_KF_8021Q_PCP2: W3, sparx5: is0
  *   Third PCP in multiple vlan tags (not always available)
- * VCAP_KF_8021Q_PCP_CLS: W3, sparx5: is2/es2, lan966x: is2
+ * VCAP_KF_8021Q_PCP_CLS: W3, sparx5: is2/es2, lan966x: is2/es0
  *   Classified PCP
  * VCAP_KF_8021Q_TPID: W3, sparx5: es0
  *   TPID for outer tag: 0: Customer TPID 1: Service TPID (88A8 or programmable)
@@ -104,7 +105,7 @@ enum vcap_keyfield_set {
  * VCAP_KF_8021Q_VID2: W12, sparx5: is0
  *   Third VID in multiple vlan tags (not always available)
  * VCAP_KF_8021Q_VID_CLS: sparx5 is2 W13, sparx5 es0 W13, sparx5 es2 W13,
- *   lan966x is2 W12
+ *   lan966x is2 W12, lan966x es0 W12
  *   Classified VID
  * VCAP_KF_8021Q_VLAN_DBL_TAGGED_IS: W1, lan966x: is1
  *   Set if frame has two or more Q-tags. Independent of port VLAN awareness
@@ -146,10 +147,10 @@ enum vcap_keyfield_set {
  * VCAP_KF_IF_EGR_PORT_MASK_RNG: W3, sparx5: es2
  *   Select which 32 port group is available in IF_EGR_PORT (or virtual ports or
  *   CPU queue)
- * VCAP_KF_IF_EGR_PORT_NO: W7, sparx5: es0
+ * VCAP_KF_IF_EGR_PORT_NO: sparx5 es0 W7, lan966x es0 W4
  *   Egress port number
  * VCAP_KF_IF_IGR_PORT: sparx5 is0 W7, sparx5 es2 W9, lan966x is1 W3, lan966x
- *   is2 W4
+ *   is2 W4, lan966x es0 W4
  *   Sparx5: Logical ingress port number retrieved from
  *   ANA_CL::PORT_ID_CFG.LPORT_NUM or ERLEG, LAN966x: ingress port nunmber
  * VCAP_KF_IF_IGR_PORT_MASK: sparx5 is0 W65, sparx5 is2 W32, sparx5 is2 W65,
@@ -178,11 +179,12 @@ enum vcap_keyfield_set {
  *   Payload after IPv6 header
  * VCAP_KF_IP_SNAP_IS: W1, sparx5: is0, lan966x: is1
  *   Set if frame is IPv4, IPv6, or SNAP frame
- * VCAP_KF_ISDX_CLS: W12, sparx5: is2/es0/es2
+ * VCAP_KF_ISDX_CLS: sparx5 is2 W12, sparx5 es0 W12, sparx5 es2 W12, lan966x es0
+ *   W8
  *   Classified ISDX
- * VCAP_KF_ISDX_GT0_IS: W1, sparx5: is2/es0/es2, lan966x: is2
+ * VCAP_KF_ISDX_GT0_IS: W1, sparx5: is2/es0/es2, lan966x: is2/es0
  *   Set if classified ISDX > 0
- * VCAP_KF_L2_BC_IS: W1, sparx5: is0/is2/es2, lan966x: is1/is2
+ * VCAP_KF_L2_BC_IS: W1, sparx5: is0/is2/es2, lan966x: is1/is2/es0
  *   Set if frame's destination MAC address is the broadcast address
  *   (FF-FF-FF-FF-FF-FF).
  * VCAP_KF_L2_DMAC: W48, sparx5: is0/is2/es2, lan966x: is1/is2
@@ -195,7 +197,7 @@ enum vcap_keyfield_set {
  *   LLC header and data after up to two VLAN tags and the type/length field
  * VCAP_KF_L2_MAC: W48, lan966x: is1
  *   MAC address (FIRST=1: SMAC, FIRST=0: DMAC)
- * VCAP_KF_L2_MC_IS: W1, sparx5: is0/is2/es2, lan966x: is1/is2
+ * VCAP_KF_L2_MC_IS: W1, sparx5: is0/is2/es2, lan966x: is1/is2/es0
  *   Set if frame's destination MAC address is a multicast address (bit 40 = 1).
  * VCAP_KF_L2_PAYLOAD0: W16, lan966x: is2
  *   Payload bytes 0-1 after the frame's EtherType
@@ -213,7 +215,7 @@ enum vcap_keyfield_set {
  *   SNAP header after LLC header (AA-AA-03)
  * VCAP_KF_L3_DIP_EQ_SIP_IS: W1, sparx5: is2/es2, lan966x: is2
  *   Set if Src IP matches Dst IP address
- * VCAP_KF_L3_DPL_CLS: W1, sparx5: es0/es2
+ * VCAP_KF_L3_DPL_CLS: W1, sparx5: es0/es2, lan966x: es0
  *   The frames drop precedence level
  * VCAP_KF_L3_DSCP: W6, sparx5: is0, lan966x: is1
  *   Frame's DSCP value
@@ -330,8 +332,12 @@ enum vcap_keyfield_set {
  *   Frame's OAM version
  * VCAP_KF_OAM_Y1731_IS: W1, sparx5: is2/es2, lan966x: is2
  *   Set if frame's EtherType = 0x8902
+ * VCAP_KF_PDU_TYPE: W4, lan966x: es0
+ *   PDU type value (none, OAM CCM, MRP, DLR, RTE, IPv4, IPv6, OAM non-CCM)
  * VCAP_KF_PROT_ACTIVE: W1, sparx5: es0/es2
  *   Protection is active
+ * VCAP_KF_RTP_ID: W10, lan966x: es0
+ *   Classified RTP_ID
  * VCAP_KF_RT_FRMID: W32, lan966x: is1
  *   Profinet or OPC-UA FrameId
  * VCAP_KF_RT_TYPE: W2, lan966x: is1
@@ -470,7 +476,9 @@ enum vcap_key_field {
        VCAP_KF_OAM_OPCODE,
        VCAP_KF_OAM_VER,
        VCAP_KF_OAM_Y1731_IS,
+       VCAP_KF_PDU_TYPE,
        VCAP_KF_PROT_ACTIVE,
+       VCAP_KF_RTP_ID,
        VCAP_KF_RT_FRMID,
        VCAP_KF_RT_TYPE,
        VCAP_KF_RT_VLAN_IDX,
@@ -489,6 +497,7 @@ enum vcap_actionfield_set {
        VCAP_AFS_FULL,              /* sparx5 is0 X3 */
        VCAP_AFS_S1,                /* lan966x is1 X1 */
        VCAP_AFS_SMAC_SIP,          /* lan966x is2 X1 */
+       VCAP_AFS_VID,               /* lan966x es0 X1 */
 };
 
 /* List of actionfields with description
@@ -523,9 +532,9 @@ enum vcap_actionfield_set {
  *   while bits 1:0 control first lookup. Encoding per lookup: 0: Disabled.  1:
  *   Extract 40 bytes after position corresponding to the location of the IPv4
  *   header and use as key.  2: Extract 40 bytes after SMAC and use as key
- * VCAP_AF_DEI_A_VAL: W1, sparx5: es0
+ * VCAP_AF_DEI_A_VAL: W1, sparx5: es0, lan966x: es0
  *   DEI used in ES0 tag A. See TAG_A_DEI_SEL.
- * VCAP_AF_DEI_B_VAL: W1, sparx5: es0
+ * VCAP_AF_DEI_B_VAL: W1, sparx5: es0, lan966x: es0
  *   DEI used in ES0 tag B. See TAG_B_DEI_SEL.
  * VCAP_AF_DEI_C_VAL: W1, sparx5: es0
  *   DEI used in ES0 tag C. See TAG_C_DEI_SEL.
@@ -556,7 +565,7 @@ enum vcap_actionfield_set {
  * VCAP_AF_ES2_REW_CMD: W3, sparx5: es2
  *   Command forwarded to REW: 0: No action. 1: SWAP MAC addresses. 2: Do L2CP
  *   DMAC translation when entering or leaving a tunnel.
- * VCAP_AF_ESDX: W13, sparx5: es0
+ * VCAP_AF_ESDX: sparx5 es0 W13, lan966x es0 W8
  *   Egress counter index. Used to index egress counter set as defined in
  *   REW::STAT_CFG.
  * VCAP_AF_FWD_KILL_ENA: W1, lan966x: is2
@@ -652,9 +661,9 @@ enum vcap_actionfield_set {
  *   (input) AND ~PAG_OVERRIDE_MASK) OR (PAG_VAL AND PAG_OVERRIDE_MASK)
  * VCAP_AF_PAG_VAL: W8, sparx5: is0, lan966x: is1
  *   See PAG_OVERRIDE_MASK.
- * VCAP_AF_PCP_A_VAL: W3, sparx5: es0
+ * VCAP_AF_PCP_A_VAL: W3, sparx5: es0, lan966x: es0
  *   PCP used in ES0 tag A. See TAG_A_PCP_SEL.
- * VCAP_AF_PCP_B_VAL: W3, sparx5: es0
+ * VCAP_AF_PCP_B_VAL: W3, sparx5: es0, lan966x: es0
  *   PCP used in ES0 tag B. See TAG_B_PCP_SEL.
  * VCAP_AF_PCP_C_VAL: W3, sparx5: es0
  *   PCP used in ES0 tag C. See TAG_C_PCP_SEL.
@@ -691,10 +700,10 @@ enum vcap_actionfield_set {
  *   Selects tag C mode: 0: Do not push tag C. 1: Push tag C if
  *   IFH.VSTAX.TAG.WAS_TAGGED = 1. 2: Push tag C if IFH.VSTAX.TAG.WAS_TAGGED = 0.
  *   3: Push tag C if UNTAG_VID_ENA = 0 or (C-TAG.VID ! = VID_C_VAL).
- * VCAP_AF_PUSH_INNER_TAG: W1, sparx5: es0
+ * VCAP_AF_PUSH_INNER_TAG: W1, sparx5: es0, lan966x: es0
  *   Controls inner tagging. 0: Do not push ES0 tag B as inner tag. 1: Push ES0
  *   tag B as inner tag.
- * VCAP_AF_PUSH_OUTER_TAG: W2, sparx5: es0
+ * VCAP_AF_PUSH_OUTER_TAG: W2, sparx5: es0, lan966x: es0
  *   Controls outer tagging. 0: No ES0 tag A: Port tag is allowed if enabled on
  *   port. 1: ES0 tag A: Push ES0 tag A. No port tag. 2: Force port tag: Always
  *   push port tag. No ES0 tag A. 3: Force untag: Never push port tag or ES0 tag
@@ -720,29 +729,29 @@ enum vcap_actionfield_set {
  * VCAP_AF_SWAP_MACS_ENA: W1, sparx5: es0
  *   This setting is only active when FWD_SEL = 1 or FWD_SEL = 2 and PIPELINE_ACT
  *   = LBK_ASM. 0: No action. 1: Swap MACs and clear bit 40 in new SMAC.
- * VCAP_AF_TAG_A_DEI_SEL: W3, sparx5: es0
+ * VCAP_AF_TAG_A_DEI_SEL: sparx5 es0 W3, lan966x es0 W2
  *   Selects PCP for ES0 tag A. 0: Classified DEI. 1: DEI_A_VAL. 2: DP and QoS
  *   mapped to PCP (per port table). 3: DP.
- * VCAP_AF_TAG_A_PCP_SEL: W3, sparx5: es0
+ * VCAP_AF_TAG_A_PCP_SEL: sparx5 es0 W3, lan966x es0 W2
  *   Selects PCP for ES0 tag A. 0: Classified PCP. 1: PCP_A_VAL. 2: DP and QoS
  *   mapped to PCP (per port table). 3: QoS class.
- * VCAP_AF_TAG_A_TPID_SEL: W3, sparx5: es0
+ * VCAP_AF_TAG_A_TPID_SEL: sparx5 es0 W3, lan966x es0 W2
  *   Selects TPID for ES0 tag A: 0: 0x8100. 1: 0x88A8. 2: Custom
  *   (REW:PORT:PORT_VLAN_CFG.PORT_TPID). 3: If IFH.TAG_TYPE = 0 then 0x8100 else
  *   custom.
- * VCAP_AF_TAG_A_VID_SEL: W2, sparx5: es0
+ * VCAP_AF_TAG_A_VID_SEL: sparx5 es0 W2, lan966x es0 W1
  *   Selects VID for ES0 tag A. 0: Classified VID + VID_A_VAL. 1: VID_A_VAL.
- * VCAP_AF_TAG_B_DEI_SEL: W3, sparx5: es0
+ * VCAP_AF_TAG_B_DEI_SEL: sparx5 es0 W3, lan966x es0 W2
  *   Selects PCP for ES0 tag B. 0: Classified DEI. 1: DEI_B_VAL. 2: DP and QoS
  *   mapped to PCP (per port table). 3: DP.
- * VCAP_AF_TAG_B_PCP_SEL: W3, sparx5: es0
+ * VCAP_AF_TAG_B_PCP_SEL: sparx5 es0 W3, lan966x es0 W2
  *   Selects PCP for ES0 tag B. 0: Classified PCP. 1: PCP_B_VAL. 2: DP and QoS
  *   mapped to PCP (per port table). 3: QoS class.
- * VCAP_AF_TAG_B_TPID_SEL: W3, sparx5: es0
+ * VCAP_AF_TAG_B_TPID_SEL: sparx5 es0 W3, lan966x es0 W2
  *   Selects TPID for ES0 tag B. 0: 0x8100. 1: 0x88A8. 2: Custom
  *   (REW:PORT:PORT_VLAN_CFG.PORT_TPID). 3: If IFH.TAG_TYPE = 0 then 0x8100 else
  *   custom.
- * VCAP_AF_TAG_B_VID_SEL: W2, sparx5: es0
+ * VCAP_AF_TAG_B_VID_SEL: sparx5 es0 W2, lan966x es0 W1
  *   Selects VID for ES0 tag B. 0: Classified VID + VID_B_VAL. 1: VID_B_VAL.
  * VCAP_AF_TAG_C_DEI_SEL: W3, sparx5: es0
  *   Selects DEI source for ES0 tag C. 0: Classified DEI. 1: DEI_C_VAL. 2:
@@ -770,9 +779,9 @@ enum vcap_actionfield_set {
  * VCAP_AF_UNTAG_VID_ENA: W1, sparx5: es0
  *   Controls insertion of tag C. Untag or insert mode can be selected. See
  *   PUSH_CUSTOMER_TAG.
- * VCAP_AF_VID_A_VAL: W12, sparx5: es0
+ * VCAP_AF_VID_A_VAL: W12, sparx5: es0, lan966x: es0
  *   VID used in ES0 tag A. See TAG_A_VID_SEL.
- * VCAP_AF_VID_B_VAL: W12, sparx5: es0
+ * VCAP_AF_VID_B_VAL: W12, sparx5: es0, lan966x: es0
  *   VID used in ES0 tag B. See TAG_B_VID_SEL.
  * VCAP_AF_VID_C_VAL: W12, sparx5: es0
  *   VID used in ES0 tag C. See TAG_C_VID_SEL.
index 5675b09..a418ad8 100644 (file)
@@ -1121,7 +1121,7 @@ static void vcap_copy_to_client_actionfield(struct vcap_rule_internal *ri,
                        vcap_copy_from_w32be(field->data.u128.value, value,
                                             field_size, width);
                        break;
-               };
+               }
        } else {
                switch (field->ctrl.type) {
                case VCAP_FIELD_BIT:
@@ -1162,7 +1162,7 @@ static void vcap_copy_to_client_actionfield(struct vcap_rule_internal *ri,
                                                      value,
                                                      width, field_size);
                        break;
-               };
+               }
        }
 }
 
@@ -1236,7 +1236,7 @@ static void vcap_copy_to_client_keyfield(struct vcap_rule_internal *ri,
                        vcap_copy_from_w32be(field->data.u128.mask,  mask,
                                             field_size, width);
                        break;
-               };
+               }
        } else {
                switch (field->ctrl.type) {
                case VCAP_FIELD_BIT:
@@ -1284,7 +1284,7 @@ static void vcap_copy_to_client_keyfield(struct vcap_rule_internal *ri,
                                                   value, mask,
                                                   width, field_size);
                        break;
-               };
+               }
        }
 }
 
index ee05240..e0916af 100644 (file)
@@ -592,6 +592,16 @@ ocelot_flower_parse_key(struct ocelot *ocelot, int port, bool ingress,
                return -EOPNOTSUPP;
        }
 
+       if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META)) {
+               struct flow_match_meta match;
+
+               flow_rule_match_meta(rule, &match);
+               if (match.mask->l2_miss) {
+                       NL_SET_ERR_MSG_MOD(extack, "Can't match on \"l2_miss\"");
+                       return -EOPNOTSUPP;
+               }
+       }
+
        /* For VCAP ES0 (egress rewriter) we can match on the ingress port */
        if (!ingress) {
                ret = ocelot_flower_parse_indev(ocelot, port, f, filter);
index bf6bae5..8c6954c 100644 (file)
@@ -311,8 +311,6 @@ nfp_devlink_flash_update(struct devlink *devlink,
 }
 
 const struct devlink_ops nfp_devlink_ops = {
-       .port_split             = nfp_devlink_port_split,
-       .port_unsplit           = nfp_devlink_port_unsplit,
        .sb_pool_get            = nfp_devlink_sb_pool_get,
        .sb_pool_set            = nfp_devlink_sb_pool_set,
        .eswitch_mode_get       = nfp_devlink_eswitch_mode_get,
@@ -321,6 +319,11 @@ const struct devlink_ops nfp_devlink_ops = {
        .flash_update           = nfp_devlink_flash_update,
 };
 
+static const struct devlink_port_ops nfp_devlink_port_ops = {
+       .port_split             = nfp_devlink_port_split,
+       .port_unsplit           = nfp_devlink_port_unsplit,
+};
+
 int nfp_devlink_port_register(struct nfp_app *app, struct nfp_port *port)
 {
        struct devlink_port_attrs attrs = {};
@@ -351,7 +354,8 @@ int nfp_devlink_port_register(struct nfp_app *app, struct nfp_port *port)
 
        devlink = priv_to_devlink(app->pf);
 
-       return devl_port_register(devlink, &port->dl_port, port->eth_id);
+       return devl_port_register_with_ops(devlink, &port->dl_port,
+                                          port->eth_id, &nfp_devlink_port_ops);
 }
 
 void nfp_devlink_port_unregister(struct nfp_port *port)
index 62f0bf9..b7cce74 100644 (file)
@@ -2418,6 +2418,8 @@ static void nfp_net_rss_init(struct nfp_net *nn)
        /* Enable IPv4/IPv6 TCP by default */
        nn->rss_cfg = NFP_NET_CFG_RSS_IPV4_TCP |
                      NFP_NET_CFG_RSS_IPV6_TCP |
+                     NFP_NET_CFG_RSS_IPV4_UDP |
+                     NFP_NET_CFG_RSS_IPV6_UDP |
                      FIELD_PREP(NFP_NET_CFG_RSS_HFUNC, nn->rss_hfunc) |
                      NFP_NET_CFG_RSS_MASK;
 }
index dfedb52..e75cbb2 100644 (file)
@@ -436,49 +436,41 @@ static void nfp_add_media_link_mode(struct nfp_port *port,
                                    struct nfp_eth_table_port *eth_port,
                                    struct ethtool_link_ksettings *cmd)
 {
-       u64 supported_modes[2], advertised_modes[2];
-       struct nfp_eth_media_buf ethm = {
-               .eth_index = eth_port->eth_index,
-       };
-       struct nfp_cpp *cpp = port->app->cpp;
-
-       if (nfp_eth_read_media(cpp, &ethm)) {
-               bitmap_fill(port->speed_bitmap, NFP_SUP_SPEED_NUMBER);
-               return;
-       }
-
        bitmap_zero(port->speed_bitmap, NFP_SUP_SPEED_NUMBER);
 
-       for (u32 i = 0; i < 2; i++) {
-               supported_modes[i] = le64_to_cpu(ethm.supported_modes[i]);
-               advertised_modes[i] = le64_to_cpu(ethm.advertised_modes[i]);
-       }
-
        for (u32 i = 0; i < NFP_MEDIA_LINK_MODES_NUMBER; i++) {
                if (i < 64) {
-                       if (supported_modes[0] & BIT_ULL(i)) {
+                       if (eth_port->link_modes_supp[0] & BIT_ULL(i)) {
                                __set_bit(nfp_eth_media_table[i].ethtool_link_mode,
                                          cmd->link_modes.supported);
                                __set_bit(nfp_eth_media_table[i].speed,
                                          port->speed_bitmap);
                        }
 
-                       if (advertised_modes[0] & BIT_ULL(i))
+                       if (eth_port->link_modes_ad[0] & BIT_ULL(i))
                                __set_bit(nfp_eth_media_table[i].ethtool_link_mode,
                                          cmd->link_modes.advertising);
                } else {
-                       if (supported_modes[1] & BIT_ULL(i - 64)) {
+                       if (eth_port->link_modes_supp[1] & BIT_ULL(i - 64)) {
                                __set_bit(nfp_eth_media_table[i].ethtool_link_mode,
                                          cmd->link_modes.supported);
                                __set_bit(nfp_eth_media_table[i].speed,
                                          port->speed_bitmap);
                        }
 
-                       if (advertised_modes[1] & BIT_ULL(i - 64))
+                       if (eth_port->link_modes_ad[1] & BIT_ULL(i - 64))
                                __set_bit(nfp_eth_media_table[i].ethtool_link_mode,
                                          cmd->link_modes.advertising);
                }
        }
+
+       /* We take all speeds as supported when it fails to read
+        * link modes due to old management firmware that doesn't
+        * support link modes reading or error occurring, so that
+        * speed change of this port is allowed.
+        */
+       if (bitmap_empty(port->speed_bitmap, NFP_SUP_SPEED_NUMBER))
+               bitmap_fill(port->speed_bitmap, NFP_SUP_SPEED_NUMBER);
 }
 
 /**
index 781edc4..6e044ac 100644 (file)
@@ -196,6 +196,9 @@ enum nfp_ethtool_link_mode_list {
  *                     subports)
  * @ports.is_split:    is interface part of a split port
  * @ports.fec_modes_supported: bitmap of FEC modes supported
+ *
+ * @ports.link_modes_supp:     bitmap of link modes supported
+ * @ports.link_modes_ad:       bitmap of link modes advertised
  */
 struct nfp_eth_table {
        unsigned int count;
@@ -235,6 +238,9 @@ struct nfp_eth_table {
                bool is_split;
 
                unsigned int fec_modes_supported;
+
+               u64 link_modes_supp[2];
+               u64 link_modes_ad[2];
        } ports[];
 };
 
@@ -313,7 +319,6 @@ struct nfp_eth_media_buf {
 };
 
 int nfp_nsp_read_media(struct nfp_nsp *state, void *buf, unsigned int size);
-int nfp_eth_read_media(struct nfp_cpp *cpp, struct nfp_eth_media_buf *ethm);
 
 #define NFP_NSP_VERSION_BUFSZ  1024 /* reasonable size, not in the ABI */
 
index 570ac1b..9d62085 100644 (file)
@@ -227,6 +227,30 @@ nfp_eth_calc_port_type(struct nfp_cpp *cpp, struct nfp_eth_table_port *entry)
                entry->port_type = PORT_DA;
 }
 
+static void
+nfp_eth_read_media(struct nfp_cpp *cpp, struct nfp_nsp *nsp, struct nfp_eth_table_port *entry)
+{
+       struct nfp_eth_media_buf ethm = {
+               .eth_index = entry->eth_index,
+       };
+       unsigned int i;
+       int ret;
+
+       if (!nfp_nsp_has_read_media(nsp))
+               return;
+
+       ret = nfp_nsp_read_media(nsp, &ethm, sizeof(ethm));
+       if (ret) {
+               nfp_err(cpp, "Reading media link modes failed: %d\n", ret);
+               return;
+       }
+
+       for (i = 0; i < 2; i++) {
+               entry->link_modes_supp[i] = le64_to_cpu(ethm.supported_modes[i]);
+               entry->link_modes_ad[i] = le64_to_cpu(ethm.advertised_modes[i]);
+       }
+}
+
 /**
  * nfp_eth_read_ports() - retrieve port information
  * @cpp:       NFP CPP handle
@@ -293,8 +317,10 @@ __nfp_eth_read_ports(struct nfp_cpp *cpp, struct nfp_nsp *nsp)
                                               &table->ports[j++]);
 
        nfp_eth_calc_port_geometry(cpp, table);
-       for (i = 0; i < table->count; i++)
+       for (i = 0; i < table->count; i++) {
                nfp_eth_calc_port_type(cpp, &table->ports[i]);
+               nfp_eth_read_media(cpp, nsp, &table->ports[i]);
+       }
 
        kfree(entries);
 
@@ -647,29 +673,3 @@ int __nfp_eth_set_split(struct nfp_nsp *nsp, unsigned int lanes)
        return NFP_ETH_SET_BIT_CONFIG(nsp, NSP_ETH_RAW_PORT, NSP_ETH_PORT_LANES,
                                      lanes, NSP_ETH_CTRL_SET_LANES);
 }
-
-int nfp_eth_read_media(struct nfp_cpp *cpp, struct nfp_eth_media_buf *ethm)
-{
-       struct nfp_nsp *nsp;
-       int ret;
-
-       nsp = nfp_nsp_open(cpp);
-       if (IS_ERR(nsp)) {
-               nfp_err(cpp, "Failed to access the NSP: %pe\n", nsp);
-               return PTR_ERR(nsp);
-       }
-
-       if (!nfp_nsp_has_read_media(nsp)) {
-               nfp_warn(cpp, "Reading media link modes not supported. Please update flash\n");
-               ret = -EOPNOTSUPP;
-               goto exit_close_nsp;
-       }
-
-       ret = nfp_nsp_read_media(nsp, ethm, sizeof(*ethm));
-       if (ret)
-               nfp_err(cpp, "Reading media link modes failed: %pe\n", ERR_PTR(ret));
-
-exit_close_nsp:
-       nfp_nsp_close(nsp);
-       return ret;
-}
index 4b19803..9445f04 100644 (file)
@@ -5164,6 +5164,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
        int jumbo_max, region, rc;
        enum mac_version chipset;
        struct net_device *dev;
+       u32 txconfig;
        u16 xid;
 
        dev = devm_alloc_etherdev(&pdev->dev, sizeof (*tp));
@@ -5195,38 +5196,35 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 
        /* enable device (incl. PCI PM wakeup and hotplug setup) */
        rc = pcim_enable_device(pdev);
-       if (rc < 0) {
-               dev_err(&pdev->dev, "enable failure\n");
-               return rc;
-       }
+       if (rc < 0)
+               return dev_err_probe(&pdev->dev, rc, "enable failure\n");
 
        if (pcim_set_mwi(pdev) < 0)
                dev_info(&pdev->dev, "Mem-Wr-Inval unavailable\n");
 
        /* use first MMIO region */
        region = ffs(pci_select_bars(pdev, IORESOURCE_MEM)) - 1;
-       if (region < 0) {
-               dev_err(&pdev->dev, "no MMIO resource found\n");
-               return -ENODEV;
-       }
+       if (region < 0)
+               return dev_err_probe(&pdev->dev, -ENODEV, "no MMIO resource found\n");
 
        rc = pcim_iomap_regions(pdev, BIT(region), KBUILD_MODNAME);
-       if (rc < 0) {
-               dev_err(&pdev->dev, "cannot remap MMIO, aborting\n");
-               return rc;
-       }
+       if (rc < 0)
+               return dev_err_probe(&pdev->dev, rc, "cannot remap MMIO, aborting\n");
 
        tp->mmio_addr = pcim_iomap_table(pdev)[region];
 
-       xid = (RTL_R32(tp, TxConfig) >> 20) & 0xfcf;
+       txconfig = RTL_R32(tp, TxConfig);
+       if (txconfig == ~0U)
+               return dev_err_probe(&pdev->dev, -EIO, "PCI read failed\n");
+
+       xid = (txconfig >> 20) & 0xfcf;
 
        /* Identify chip attached to board */
        chipset = rtl8169_get_mac_version(xid, tp->supports_gmii);
-       if (chipset == RTL_GIGA_MAC_NONE) {
-               dev_err(&pdev->dev, "unknown chip XID %03x, contact r8169 maintainers (see MAINTAINERS file)\n", xid);
-               return -ENODEV;
-       }
-
+       if (chipset == RTL_GIGA_MAC_NONE)
+               return dev_err_probe(&pdev->dev, -ENODEV,
+                                    "unknown chip XID %03x, contact r8169 maintainers (see MAINTAINERS file)\n",
+                                    xid);
        tp->mac_version = chipset;
 
        tp->dash_type = rtl_check_dash(tp);
@@ -5246,10 +5244,9 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
        rtl_hw_reset(tp);
 
        rc = rtl_alloc_irq(tp);
-       if (rc < 0) {
-               dev_err(&pdev->dev, "Can't allocate interrupt\n");
-               return rc;
-       }
+       if (rc < 0)
+               return dev_err_probe(&pdev->dev, rc, "Can't allocate interrupt\n");
+
        tp->irq = pci_irq_vector(pdev, 0);
 
        INIT_WORK(&tp->wk.work, rtl_task);
index 0f45107..d14e0cf 100644 (file)
@@ -511,7 +511,7 @@ struct sxgbe_priv_data {
 struct sxgbe_priv_data *sxgbe_drv_probe(struct device *device,
                                        struct sxgbe_plat_data *plat_dat,
                                        void __iomem *addr);
-int sxgbe_drv_remove(struct net_device *ndev);
+void sxgbe_drv_remove(struct net_device *ndev);
 void sxgbe_set_ethtool_ops(struct net_device *netdev);
 int sxgbe_mdio_unregister(struct net_device *ndev);
 int sxgbe_mdio_register(struct net_device *ndev);
index 9664f02..7143982 100644 (file)
@@ -2203,7 +2203,7 @@ error_free_netdev:
  * Description: this function resets the TX/RX processes, disables the MAC RX/TX
  * changes the link status, releases the DMA descriptor rings.
  */
-int sxgbe_drv_remove(struct net_device *ndev)
+void sxgbe_drv_remove(struct net_device *ndev)
 {
        struct sxgbe_priv_data *priv = netdev_priv(ndev);
        u8 queue_num;
@@ -2231,8 +2231,6 @@ int sxgbe_drv_remove(struct net_device *ndev)
        kfree(priv->hw);
 
        free_netdev(ndev);
-
-       return 0;
 }
 
 #ifdef CONFIG_PM
index 4e55263..fb59ff9 100644 (file)
@@ -172,9 +172,10 @@ err_out:
 static int sxgbe_platform_remove(struct platform_device *pdev)
 {
        struct net_device *ndev = platform_get_drvdata(pdev);
-       int ret = sxgbe_drv_remove(ndev);
 
-       return ret;
+       sxgbe_drv_remove(ndev);
+
+       return 0;
 }
 
 #ifdef CONFIG_PM
index be395cd..274f3a2 100644 (file)
@@ -40,19 +40,26 @@ static int ef100_alloc_vis(struct efx_nic *efx, unsigned int *allocated_vis)
        unsigned int tx_vis = efx->n_tx_channels + efx->n_extra_tx_channels;
        unsigned int rx_vis = efx->n_rx_channels;
        unsigned int min_vis, max_vis;
+       int rc;
 
        EFX_WARN_ON_PARANOID(efx->tx_queues_per_channel != 1);
 
        tx_vis += efx->n_xdp_channels * efx->xdp_tx_per_channel;
 
        max_vis = max(rx_vis, tx_vis);
-       /* Currently don't handle resource starvation and only accept
-        * our maximum needs and no less.
+       /* We require at least a single complete TX channel worth of queues. */
+       min_vis = efx->tx_queues_per_channel;
+
+       rc = efx_mcdi_alloc_vis(efx, min_vis, max_vis,
+                               NULL, allocated_vis);
+
+       /* We retry allocating VIs by reallocating channels when we have not
+        * been able to allocate the maximum VIs.
         */
-       min_vis = max_vis;
+       if (!rc && *allocated_vis < max_vis)
+               rc = -EAGAIN;
 
-       return efx_mcdi_alloc_vis(efx, min_vis, max_vis,
-                                 NULL, allocated_vis);
+       return rc;
 }
 
 static int ef100_remap_bar(struct efx_nic *efx, int max_vis)
@@ -133,9 +140,41 @@ static int ef100_net_open(struct net_device *net_dev)
                goto fail;
 
        rc = ef100_alloc_vis(efx, &allocated_vis);
-       if (rc)
+       if (rc && rc != -EAGAIN)
                goto fail;
 
+       /* Try one more time but with the maximum number of channels
+        * equal to the allocated VIs, which would more likely succeed.
+        */
+       if (rc == -EAGAIN) {
+               rc = efx_mcdi_free_vis(efx);
+               if (rc)
+                       goto fail;
+
+               efx_remove_interrupts(efx);
+               efx->max_channels = allocated_vis;
+
+               rc = efx_probe_interrupts(efx);
+               if (rc)
+                       goto fail;
+
+               rc = efx_set_channels(efx);
+               if (rc)
+                       goto fail;
+
+               rc = ef100_alloc_vis(efx, &allocated_vis);
+               if (rc && rc != -EAGAIN)
+                       goto fail;
+
+               /* It should be very unlikely that we failed here again, but in
+                * such a case we return ENOSPC.
+                */
+               if (rc == -EAGAIN) {
+                       rc = -ENOSPC;
+                       goto fail;
+               }
+       }
+
        rc = efx_probe_channels(efx);
        if (rc)
                return rc;
index ef9971c..b82dad5 100644 (file)
@@ -25,40 +25,6 @@ struct efx_devlink {
 };
 
 #ifdef CONFIG_SFC_SRIOV
-static void efx_devlink_del_port(struct devlink_port *dl_port)
-{
-       if (!dl_port)
-               return;
-       devl_port_unregister(dl_port);
-}
-
-static int efx_devlink_add_port(struct efx_nic *efx,
-                               struct mae_mport_desc *mport)
-{
-       bool external = false;
-
-       if (!ef100_mport_on_local_intf(efx, mport))
-               external = true;
-
-       switch (mport->mport_type) {
-       case MAE_MPORT_DESC_MPORT_TYPE_VNIC:
-               if (mport->vf_idx != MAE_MPORT_DESC_VF_IDX_NULL)
-                       devlink_port_attrs_pci_vf_set(&mport->dl_port, 0, mport->pf_idx,
-                                                     mport->vf_idx,
-                                                     external);
-               else
-                       devlink_port_attrs_pci_pf_set(&mport->dl_port, 0, mport->pf_idx,
-                                                     external);
-               break;
-       default:
-               /* MAE_MPORT_DESC_MPORT_ALIAS and UNDEFINED */
-               return 0;
-       }
-
-       mport->dl_port.index = mport->mport_id;
-
-       return devl_port_register(efx->devlink, &mport->dl_port, mport->mport_id);
-}
 
 static int efx_devlink_port_addr_get(struct devlink_port *port, u8 *hw_addr,
                                     int *hw_addr_len,
@@ -158,6 +124,48 @@ static int efx_devlink_port_addr_set(struct devlink_port *port,
        return rc;
 }
 
+static const struct devlink_port_ops sfc_devlink_port_ops = {
+       .port_fn_hw_addr_get = efx_devlink_port_addr_get,
+       .port_fn_hw_addr_set = efx_devlink_port_addr_set,
+};
+
+static void efx_devlink_del_port(struct devlink_port *dl_port)
+{
+       if (!dl_port)
+               return;
+       devl_port_unregister(dl_port);
+}
+
+static int efx_devlink_add_port(struct efx_nic *efx,
+                               struct mae_mport_desc *mport)
+{
+       bool external = false;
+
+       if (!ef100_mport_on_local_intf(efx, mport))
+               external = true;
+
+       switch (mport->mport_type) {
+       case MAE_MPORT_DESC_MPORT_TYPE_VNIC:
+               if (mport->vf_idx != MAE_MPORT_DESC_VF_IDX_NULL)
+                       devlink_port_attrs_pci_vf_set(&mport->dl_port, 0, mport->pf_idx,
+                                                     mport->vf_idx,
+                                                     external);
+               else
+                       devlink_port_attrs_pci_pf_set(&mport->dl_port, 0, mport->pf_idx,
+                                                     external);
+               break;
+       default:
+               /* MAE_MPORT_DESC_MPORT_ALIAS and UNDEFINED */
+               return 0;
+       }
+
+       mport->dl_port.index = mport->mport_id;
+
+       return devl_port_register_with_ops(efx->devlink, &mport->dl_port,
+                                          mport->mport_id,
+                                          &sfc_devlink_port_ops);
+}
+
 #endif
 
 static int efx_devlink_info_nvram_partition(struct efx_nic *efx,
@@ -609,10 +617,6 @@ static int efx_devlink_info_get(struct devlink *devlink,
 
 static const struct devlink_ops sfc_devlink_ops = {
        .info_get                       = efx_devlink_info_get,
-#ifdef CONFIG_SFC_SRIOV
-       .port_function_hw_addr_get      = efx_devlink_port_addr_get,
-       .port_function_hw_addr_set      = efx_devlink_port_addr_set,
-#endif
 };
 
 #ifdef CONFIG_SFC_SRIOV
index 49706a7..37a4c69 100644 (file)
@@ -482,12 +482,14 @@ int efx_mae_match_check_caps(struct efx_nic *efx,
        rc;                                                                    \
 })
 /* Checks that the fields needed for encap-rule matches are supported by the
- * MAE.  All the fields are exact-match.
+ * MAE.  All the fields are exact-match, except possibly ENC_IP_TOS.
  */
 int efx_mae_check_encap_match_caps(struct efx_nic *efx, bool ipv6,
+                                  u8 ip_tos_mask, __be16 udp_sport_mask,
                                   struct netlink_ext_ack *extack)
 {
        u8 *supported_fields = efx->tc->caps->outer_rule_fields;
+       enum mask_type typ;
        int rc;
 
        if (CHECK(ENC_ETHER_TYPE))
@@ -504,6 +506,22 @@ int efx_mae_check_encap_match_caps(struct efx_nic *efx, bool ipv6,
        if (CHECK(ENC_L4_DPORT) ||
            CHECK(ENC_IP_PROTO))
                return rc;
+       typ = classify_mask((const u8 *)&udp_sport_mask, sizeof(udp_sport_mask));
+       rc = efx_mae_match_check_cap_typ(supported_fields[MAE_FIELD_ENC_L4_SPORT],
+                                        typ);
+       if (rc) {
+               NL_SET_ERR_MSG_FMT_MOD(extack, "No support for %s mask in field %s",
+                                      mask_type_name(typ), "enc_src_port");
+               return rc;
+       }
+       typ = classify_mask(&ip_tos_mask, sizeof(ip_tos_mask));
+       rc = efx_mae_match_check_cap_typ(supported_fields[MAE_FIELD_ENC_IP_TOS],
+                                        typ);
+       if (rc) {
+               NL_SET_ERR_MSG_FMT_MOD(extack, "No support for %s mask in field %s",
+                                      mask_type_name(typ), "enc_ip_tos");
+               return rc;
+       }
        return 0;
 }
 #undef CHECK
@@ -1001,8 +1019,16 @@ int efx_mae_register_encap_match(struct efx_nic *efx,
                                encap->udp_dport);
        MCDI_STRUCT_SET_WORD_BE(match_crit, MAE_ENC_FIELD_PAIRS_ENC_L4_DPORT_BE_MASK,
                                ~(__be16)0);
+       MCDI_STRUCT_SET_WORD_BE(match_crit, MAE_ENC_FIELD_PAIRS_ENC_L4_DPORT_BE,
+                               encap->udp_sport);
+       MCDI_STRUCT_SET_WORD_BE(match_crit, MAE_ENC_FIELD_PAIRS_ENC_L4_DPORT_BE_MASK,
+                               encap->udp_sport_mask);
        MCDI_STRUCT_SET_BYTE(match_crit, MAE_ENC_FIELD_PAIRS_ENC_IP_PROTO, IPPROTO_UDP);
        MCDI_STRUCT_SET_BYTE(match_crit, MAE_ENC_FIELD_PAIRS_ENC_IP_PROTO_MASK, ~0);
+       MCDI_STRUCT_SET_BYTE(match_crit, MAE_ENC_FIELD_PAIRS_ENC_IP_TOS,
+                            encap->ip_tos);
+       MCDI_STRUCT_SET_BYTE(match_crit, MAE_ENC_FIELD_PAIRS_ENC_IP_TOS_MASK,
+                            encap->ip_tos_mask);
        rc = efx_mcdi_rpc(efx, MC_CMD_MAE_OUTER_RULE_INSERT, inbuf,
                          sizeof(inbuf), outbuf, sizeof(outbuf), &outlen);
        if (rc)
index 9226219..1cf8dfe 100644 (file)
@@ -82,6 +82,7 @@ int efx_mae_match_check_caps(struct efx_nic *efx,
                             const struct efx_tc_match_fields *mask,
                             struct netlink_ext_ack *extack);
 int efx_mae_check_encap_match_caps(struct efx_nic *efx, bool ipv6,
+                                  u8 ip_tos_mask, __be16 udp_sport_mask,
                                   struct netlink_ext_ack *extack);
 int efx_mae_check_encap_type_supported(struct efx_nic *efx,
                                       enum efx_encap_type typ);
index c004443..bb9ec1e 100644 (file)
@@ -132,23 +132,6 @@ static void efx_tc_free_action_set_list(struct efx_nic *efx,
        /* Don't kfree, as acts is embedded inside a struct efx_tc_flow_rule */
 }
 
-static void efx_tc_flow_free(void *ptr, void *arg)
-{
-       struct efx_tc_flow_rule *rule = ptr;
-       struct efx_nic *efx = arg;
-
-       netif_err(efx, drv, efx->net_dev,
-                 "tc rule %lx still present at teardown, removing\n",
-                 rule->cookie);
-
-       efx_mae_delete_rule(efx, rule->fw_id);
-
-       /* Release entries in subsidiary tables */
-       efx_tc_free_action_set_list(efx, &rule->acts, true);
-
-       kfree(rule);
-}
-
 /* Boilerplate for the simple 'copy a field' cases */
 #define _MAP_KEY_AND_MASK(_name, _type, _tcget, _tcfield, _field)      \
 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_##_name)) {           \
@@ -219,6 +202,7 @@ static int efx_tc_flower_parse_match(struct efx_nic *efx,
              BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) |
              BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) |
              BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) |
+             BIT(FLOW_DISSECTOR_KEY_ENC_IP) |
              BIT(FLOW_DISSECTOR_KEY_ENC_PORTS) |
              BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) |
              BIT(FLOW_DISSECTOR_KEY_TCP) |
@@ -363,20 +347,48 @@ static int efx_tc_flower_parse_match(struct efx_nic *efx,
        return 0;
 }
 
+static void efx_tc_flower_release_encap_match(struct efx_nic *efx,
+                                             struct efx_tc_encap_match *encap)
+{
+       int rc;
+
+       if (!refcount_dec_and_test(&encap->ref))
+               return; /* still in use */
+
+       if (encap->type == EFX_TC_EM_DIRECT) {
+               rc = efx_mae_unregister_encap_match(efx, encap);
+               if (rc)
+                       /* Display message but carry on and remove entry from our
+                        * SW tables, because there's not much we can do about it.
+                        */
+                       netif_err(efx, drv, efx->net_dev,
+                                 "Failed to release encap match %#x, rc %d\n",
+                                 encap->fw_id, rc);
+       }
+       rhashtable_remove_fast(&efx->tc->encap_match_ht, &encap->linkage,
+                              efx_tc_encap_match_ht_params);
+       if (encap->pseudo)
+               efx_tc_flower_release_encap_match(efx, encap->pseudo);
+       kfree(encap);
+}
+
 static int efx_tc_flower_record_encap_match(struct efx_nic *efx,
                                            struct efx_tc_match *match,
                                            enum efx_encap_type type,
+                                           enum efx_tc_em_pseudo_type em_type,
+                                           u8 child_ip_tos_mask,
+                                           __be16 child_udp_sport_mask,
                                            struct netlink_ext_ack *extack)
 {
-       struct efx_tc_encap_match *encap, *old;
+       struct efx_tc_encap_match *encap, *old, *pseudo = NULL;
        bool ipv6 = false;
        int rc;
 
        /* We require that the socket-defining fields (IP addrs and UDP dest
-        * port) are present and exact-match.  Other fields are currently not
-        * allowed.  This meets what OVS will ask for, and means that we don't
-        * need to handle difficult checks for overlapping matches as could
-        * come up if we allowed masks or varying sets of match fields.
+        * port) are present and exact-match.  Other fields may only be used
+        * if the field-set (and any masks) are the same for all encap
+        * matches on the same <sip,dip,dport> tuple; this is enforced by
+        * pseudo encap matches.
         */
        if (match->mask.enc_dst_ip | match->mask.enc_src_ip) {
                if (!IS_ALL_ONES(match->mask.enc_dst_ip)) {
@@ -414,29 +426,42 @@ static int efx_tc_flower_record_encap_match(struct efx_nic *efx,
                NL_SET_ERR_MSG_MOD(extack, "Egress encap match is not exact on dst UDP port");
                return -EOPNOTSUPP;
        }
-       if (match->mask.enc_sport) {
-               NL_SET_ERR_MSG_MOD(extack, "Egress encap match on src UDP port not supported");
-               return -EOPNOTSUPP;
-       }
-       if (match->mask.enc_ip_tos) {
-               NL_SET_ERR_MSG_MOD(extack, "Egress encap match on IP ToS not supported");
-               return -EOPNOTSUPP;
+       if (match->mask.enc_sport || match->mask.enc_ip_tos) {
+               struct efx_tc_match pmatch = *match;
+
+               if (em_type == EFX_TC_EM_PSEUDO_MASK) { /* can't happen */
+                       NL_SET_ERR_MSG_MOD(extack, "Bad recursion in egress encap match handler");
+                       return -EOPNOTSUPP;
+               }
+               pmatch.value.enc_ip_tos = 0;
+               pmatch.mask.enc_ip_tos = 0;
+               pmatch.value.enc_sport = 0;
+               pmatch.mask.enc_sport = 0;
+               rc = efx_tc_flower_record_encap_match(efx, &pmatch, type,
+                                                     EFX_TC_EM_PSEUDO_MASK,
+                                                     match->mask.enc_ip_tos,
+                                                     match->mask.enc_sport,
+                                                     extack);
+               if (rc)
+                       return rc;
+               pseudo = pmatch.encap;
        }
        if (match->mask.enc_ip_ttl) {
                NL_SET_ERR_MSG_MOD(extack, "Egress encap match on IP TTL not supported");
-               return -EOPNOTSUPP;
+               rc = -EOPNOTSUPP;
+               goto fail_pseudo;
        }
 
-       rc = efx_mae_check_encap_match_caps(efx, ipv6, extack);
-       if (rc) {
-               NL_SET_ERR_MSG_FMT_MOD(extack, "MAE hw reports no support for IPv%d encap matches",
-                                      ipv6 ? 6 : 4);
-               return -EOPNOTSUPP;
-       }
+       rc = efx_mae_check_encap_match_caps(efx, ipv6, match->mask.enc_ip_tos,
+                                           match->mask.enc_sport, extack);
+       if (rc)
+               goto fail_pseudo;
 
        encap = kzalloc(sizeof(*encap), GFP_USER);
-       if (!encap)
-               return -ENOMEM;
+       if (!encap) {
+               rc = -ENOMEM;
+               goto fail_pseudo;
+       }
        encap->src_ip = match->value.enc_src_ip;
        encap->dst_ip = match->value.enc_dst_ip;
 #ifdef CONFIG_IPV6
@@ -445,12 +470,66 @@ static int efx_tc_flower_record_encap_match(struct efx_nic *efx,
 #endif
        encap->udp_dport = match->value.enc_dport;
        encap->tun_type = type;
+       encap->ip_tos = match->value.enc_ip_tos;
+       encap->ip_tos_mask = match->mask.enc_ip_tos;
+       encap->child_ip_tos_mask = child_ip_tos_mask;
+       encap->udp_sport = match->value.enc_sport;
+       encap->udp_sport_mask = match->mask.enc_sport;
+       encap->child_udp_sport_mask = child_udp_sport_mask;
+       encap->type = em_type;
+       encap->pseudo = pseudo;
        old = rhashtable_lookup_get_insert_fast(&efx->tc->encap_match_ht,
                                                &encap->linkage,
                                                efx_tc_encap_match_ht_params);
        if (old) {
                /* don't need our new entry */
                kfree(encap);
+               if (pseudo) /* don't need our new pseudo either */
+                       efx_tc_flower_release_encap_match(efx, pseudo);
+               /* check old and new em_types are compatible */
+               switch (old->type) {
+               case EFX_TC_EM_DIRECT:
+                       /* old EM is in hardware, so mustn't overlap with a
+                        * pseudo, but may be shared with another direct EM
+                        */
+                       if (em_type == EFX_TC_EM_DIRECT)
+                               break;
+                       NL_SET_ERR_MSG_MOD(extack, "Pseudo encap match conflicts with existing direct entry");
+                       return -EEXIST;
+               case EFX_TC_EM_PSEUDO_MASK:
+                       /* old EM is protecting a ToS- or src port-qualified
+                        * filter, so may only be shared with another pseudo
+                        * for the same ToS and src port masks.
+                        */
+                       if (em_type != EFX_TC_EM_PSEUDO_MASK) {
+                               NL_SET_ERR_MSG_FMT_MOD(extack,
+                                                      "%s encap match conflicts with existing pseudo(MASK) entry",
+                                                      em_type ? "Pseudo" : "Direct");
+                               return -EEXIST;
+                       }
+                       if (child_ip_tos_mask != old->child_ip_tos_mask) {
+                               NL_SET_ERR_MSG_FMT_MOD(extack,
+                                                      "Pseudo encap match for TOS mask %#04x conflicts with existing pseudo(MASK) entry for TOS mask %#04x",
+                                                      child_ip_tos_mask,
+                                                      old->child_ip_tos_mask);
+                               return -EEXIST;
+                       }
+                       if (child_udp_sport_mask != old->child_udp_sport_mask) {
+                               NL_SET_ERR_MSG_FMT_MOD(extack,
+                                                      "Pseudo encap match for UDP src port mask %#x conflicts with existing pseudo(MASK) entry for mask %#x",
+                                                      child_udp_sport_mask,
+                                                      old->child_udp_sport_mask);
+                               return -EEXIST;
+                       }
+                       break;
+               default: /* Unrecognised pseudo-type.  Just say no */
+                       NL_SET_ERR_MSG_FMT_MOD(extack,
+                                              "%s encap match conflicts with existing pseudo(%d) entry",
+                                              em_type ? "Pseudo" : "Direct",
+                                              old->type);
+                       return -EEXIST;
+               }
+               /* check old and new tun_types are compatible */
                if (old->tun_type != type) {
                        NL_SET_ERR_MSG_FMT_MOD(extack,
                                               "Egress encap match with conflicting tun_type %u != %u",
@@ -462,10 +541,12 @@ static int efx_tc_flower_record_encap_match(struct efx_nic *efx,
                /* existing entry found */
                encap = old;
        } else {
-               rc = efx_mae_register_encap_match(efx, encap);
-               if (rc) {
-                       NL_SET_ERR_MSG_MOD(extack, "Failed to record egress encap match in HW");
-                       goto fail;
+               if (em_type == EFX_TC_EM_DIRECT) {
+                       rc = efx_mae_register_encap_match(efx, encap);
+                       if (rc) {
+                               NL_SET_ERR_MSG_MOD(extack, "Failed to record egress encap match in HW");
+                               goto fail;
+                       }
                }
                refcount_set(&encap->ref, 1);
        }
@@ -475,30 +556,12 @@ fail:
        rhashtable_remove_fast(&efx->tc->encap_match_ht, &encap->linkage,
                               efx_tc_encap_match_ht_params);
        kfree(encap);
+fail_pseudo:
+       if (pseudo)
+               efx_tc_flower_release_encap_match(efx, pseudo);
        return rc;
 }
 
-static void efx_tc_flower_release_encap_match(struct efx_nic *efx,
-                                             struct efx_tc_encap_match *encap)
-{
-       int rc;
-
-       if (!refcount_dec_and_test(&encap->ref))
-               return; /* still in use */
-
-       rc = efx_mae_unregister_encap_match(efx, encap);
-       if (rc)
-               /* Display message but carry on and remove entry from our
-                * SW tables, because there's not much we can do about it.
-                */
-               netif_err(efx, drv, efx->net_dev,
-                         "Failed to release encap match %#x, rc %d\n",
-                         encap->fw_id, rc);
-       rhashtable_remove_fast(&efx->tc->encap_match_ht, &encap->linkage,
-                              efx_tc_encap_match_ht_params);
-       kfree(encap);
-}
-
 static void efx_tc_delete_rule(struct efx_nic *efx, struct efx_tc_flow_rule *rule)
 {
        efx_mae_delete_rule(efx, rule->fw_id);
@@ -650,6 +713,7 @@ static int efx_tc_flower_replace_foreign(struct efx_nic *efx,
                }
 
                rc = efx_tc_flower_record_encap_match(efx, &match, type,
+                                                     EFX_TC_EM_DIRECT, 0, 0,
                                                      extack);
                if (rc)
                        return rc;
@@ -1451,6 +1515,21 @@ static void efx_tc_encap_match_free(void *ptr, void *__unused)
        kfree(encap);
 }
 
+static void efx_tc_flow_free(void *ptr, void *arg)
+{
+       struct efx_tc_flow_rule *rule = ptr;
+       struct efx_nic *efx = arg;
+
+       netif_err(efx, drv, efx->net_dev,
+                 "tc rule %lx still present at teardown, removing\n",
+                 rule->cookie);
+
+       /* Also releases entries in subsidiary tables */
+       efx_tc_delete_rule(efx, rule);
+
+       kfree(rule);
+}
+
 int efx_init_struct_tc(struct efx_nic *efx)
 {
        int rc;
index 04cced6..24e9640 100644 (file)
@@ -74,14 +74,41 @@ static inline bool efx_tc_match_is_encap(const struct efx_tc_match_fields *mask)
               mask->enc_ip_ttl || mask->enc_sport || mask->enc_dport;
 }
 
+/**
+ * enum efx_tc_em_pseudo_type - &struct efx_tc_encap_match pseudo type
+ *
+ * These are used to classify "pseudo" encap matches, which don't refer
+ * to an entry in hardware but rather indicate that a section of the
+ * match space is in use by another Outer Rule.
+ *
+ * @EFX_TC_EM_DIRECT: real HW entry in Outer Rule table; not a pseudo.
+ *     Hardware index in &struct efx_tc_encap_match.fw_id is valid.
+ * @EFX_TC_EM_PSEUDO_MASK: registered by an encap match which includes a
+ *     match on an optional field (currently ip_tos and/or udp_sport),
+ *     to prevent an overlapping encap match _without_ optional fields.
+ *     The pseudo encap match may be referenced again by an encap match
+ *     with different values for these fields, but all masks must match the
+ *     first (stored in our child_* fields).
+ */
+enum efx_tc_em_pseudo_type {
+       EFX_TC_EM_DIRECT,
+       EFX_TC_EM_PSEUDO_MASK,
+};
+
 struct efx_tc_encap_match {
        __be32 src_ip, dst_ip;
        struct in6_addr src_ip6, dst_ip6;
        __be16 udp_dport;
+       __be16 udp_sport, udp_sport_mask;
+       u8 ip_tos, ip_tos_mask;
        struct rhash_head linkage;
        enum efx_encap_type tun_type;
+       u8 child_ip_tos_mask;
+       __be16 child_udp_sport_mask;
        refcount_t ref;
+       enum efx_tc_em_pseudo_type type;
        u32 fw_id; /* index of this entry in firmware encap match table */
+       struct efx_tc_encap_match *pseudo; /* Referenced pseudo EM if needed */
 };
 
 struct efx_tc_match {
index 5f5a997..5583f0b 100644 (file)
@@ -158,6 +158,9 @@ config DWMAC_SOCFPGA
        default ARCH_INTEL_SOCFPGA
        depends on OF && (ARCH_INTEL_SOCFPGA || COMPILE_TEST)
        select MFD_SYSCON
+       select MDIO_REGMAP
+       select REGMAP_MMIO
+       select PCS_LYNX
        help
          Support for ethernet controller on Altera SOCFPGA
 
index 8738fdb..7dd3d38 100644 (file)
@@ -35,7 +35,7 @@ obj-$(CONFIG_DWMAC_IMX8)      += dwmac-imx.o
 obj-$(CONFIG_DWMAC_TEGRA)      += dwmac-tegra.o
 obj-$(CONFIG_DWMAC_VISCONTI)   += dwmac-visconti.o
 stmmac-platform-objs:= stmmac_platform.o
-dwmac-altr-socfpga-objs := altr_tse_pcs.o dwmac-socfpga.o
+dwmac-altr-socfpga-objs := dwmac-socfpga.o
 
 obj-$(CONFIG_STMMAC_PCI)       += stmmac-pci.o
 obj-$(CONFIG_DWMAC_INTEL)      += dwmac-intel.o
diff --git a/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c b/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c
deleted file mode 100644 (file)
index 00f6d34..0000000
+++ /dev/null
@@ -1,257 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/* Copyright Altera Corporation (C) 2016. All rights reserved.
- *
- * Author: Tien Hock Loh <thloh@altera.com>
- */
-
-#include <linux/mfd/syscon.h>
-#include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/of_net.h>
-#include <linux/phy.h>
-#include <linux/regmap.h>
-#include <linux/reset.h>
-#include <linux/stmmac.h>
-
-#include "stmmac.h"
-#include "stmmac_platform.h"
-#include "altr_tse_pcs.h"
-
-#define SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_GMII_MII       0
-#define SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_RGMII          BIT(1)
-#define SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_RMII           BIT(2)
-#define SYSMGR_EMACGRP_CTRL_PHYSEL_WIDTH               2
-#define SYSMGR_EMACGRP_CTRL_PHYSEL_MASK                        GENMASK(1, 0)
-
-#define TSE_PCS_CONTROL_AN_EN_MASK                     BIT(12)
-#define TSE_PCS_CONTROL_REG                            0x00
-#define TSE_PCS_CONTROL_RESTART_AN_MASK                        BIT(9)
-#define TSE_PCS_CTRL_AUTONEG_SGMII                     0x1140
-#define TSE_PCS_IF_MODE_REG                            0x28
-#define TSE_PCS_LINK_TIMER_0_REG                       0x24
-#define TSE_PCS_LINK_TIMER_1_REG                       0x26
-#define TSE_PCS_SIZE                                   0x40
-#define TSE_PCS_STATUS_AN_COMPLETED_MASK               BIT(5)
-#define TSE_PCS_STATUS_LINK_MASK                       0x0004
-#define TSE_PCS_STATUS_REG                             0x02
-#define TSE_PCS_SGMII_SPEED_1000                       BIT(3)
-#define TSE_PCS_SGMII_SPEED_100                                BIT(2)
-#define TSE_PCS_SGMII_SPEED_10                         0x0
-#define TSE_PCS_SW_RST_MASK                            0x8000
-#define TSE_PCS_PARTNER_ABILITY_REG                    0x0A
-#define TSE_PCS_PARTNER_DUPLEX_FULL                    0x1000
-#define TSE_PCS_PARTNER_DUPLEX_HALF                    0x0000
-#define TSE_PCS_PARTNER_DUPLEX_MASK                    0x1000
-#define TSE_PCS_PARTNER_SPEED_MASK                     GENMASK(11, 10)
-#define TSE_PCS_PARTNER_SPEED_1000                     BIT(11)
-#define TSE_PCS_PARTNER_SPEED_100                      BIT(10)
-#define TSE_PCS_PARTNER_SPEED_10                       0x0000
-#define TSE_PCS_PARTNER_SPEED_1000                     BIT(11)
-#define TSE_PCS_PARTNER_SPEED_100                      BIT(10)
-#define TSE_PCS_PARTNER_SPEED_10                       0x0000
-#define TSE_PCS_SGMII_SPEED_MASK                       GENMASK(3, 2)
-#define TSE_PCS_SGMII_LINK_TIMER_0                     0x0D40
-#define TSE_PCS_SGMII_LINK_TIMER_1                     0x0003
-#define TSE_PCS_SW_RESET_TIMEOUT                       100
-#define TSE_PCS_USE_SGMII_AN_MASK                      BIT(1)
-#define TSE_PCS_USE_SGMII_ENA                          BIT(0)
-#define TSE_PCS_IF_USE_SGMII                           0x03
-
-#define AUTONEGO_LINK_TIMER                            20
-
-static int tse_pcs_reset(void __iomem *base, struct tse_pcs *pcs)
-{
-       int counter = 0;
-       u16 val;
-
-       val = readw(base + TSE_PCS_CONTROL_REG);
-       val |= TSE_PCS_SW_RST_MASK;
-       writew(val, base + TSE_PCS_CONTROL_REG);
-
-       while (counter < TSE_PCS_SW_RESET_TIMEOUT) {
-               val = readw(base + TSE_PCS_CONTROL_REG);
-               val &= TSE_PCS_SW_RST_MASK;
-               if (val == 0)
-                       break;
-               counter++;
-               udelay(1);
-       }
-       if (counter >= TSE_PCS_SW_RESET_TIMEOUT) {
-               dev_err(pcs->dev, "PCS could not get out of sw reset\n");
-               return -ETIMEDOUT;
-       }
-
-       return 0;
-}
-
-int tse_pcs_init(void __iomem *base, struct tse_pcs *pcs)
-{
-       int ret = 0;
-
-       writew(TSE_PCS_IF_USE_SGMII, base + TSE_PCS_IF_MODE_REG);
-
-       writew(TSE_PCS_CTRL_AUTONEG_SGMII, base + TSE_PCS_CONTROL_REG);
-
-       writew(TSE_PCS_SGMII_LINK_TIMER_0, base + TSE_PCS_LINK_TIMER_0_REG);
-       writew(TSE_PCS_SGMII_LINK_TIMER_1, base + TSE_PCS_LINK_TIMER_1_REG);
-
-       ret = tse_pcs_reset(base, pcs);
-       if (ret == 0)
-               writew(SGMII_ADAPTER_ENABLE,
-                      pcs->sgmii_adapter_base + SGMII_ADAPTER_CTRL_REG);
-
-       return ret;
-}
-
-static void pcs_link_timer_callback(struct tse_pcs *pcs)
-{
-       u16 val = 0;
-       void __iomem *tse_pcs_base = pcs->tse_pcs_base;
-       void __iomem *sgmii_adapter_base = pcs->sgmii_adapter_base;
-
-       val = readw(tse_pcs_base + TSE_PCS_STATUS_REG);
-       val &= TSE_PCS_STATUS_LINK_MASK;
-
-       if (val != 0) {
-               dev_dbg(pcs->dev, "Adapter: Link is established\n");
-               writew(SGMII_ADAPTER_ENABLE,
-                      sgmii_adapter_base + SGMII_ADAPTER_CTRL_REG);
-       } else {
-               mod_timer(&pcs->aneg_link_timer, jiffies +
-                         msecs_to_jiffies(AUTONEGO_LINK_TIMER));
-       }
-}
-
-static void auto_nego_timer_callback(struct tse_pcs *pcs)
-{
-       u16 val = 0;
-       u16 speed = 0;
-       u16 duplex = 0;
-       void __iomem *tse_pcs_base = pcs->tse_pcs_base;
-       void __iomem *sgmii_adapter_base = pcs->sgmii_adapter_base;
-
-       val = readw(tse_pcs_base + TSE_PCS_STATUS_REG);
-       val &= TSE_PCS_STATUS_AN_COMPLETED_MASK;
-
-       if (val != 0) {
-               dev_dbg(pcs->dev, "Adapter: Auto Negotiation is completed\n");
-               val = readw(tse_pcs_base + TSE_PCS_PARTNER_ABILITY_REG);
-               speed = val & TSE_PCS_PARTNER_SPEED_MASK;
-               duplex = val & TSE_PCS_PARTNER_DUPLEX_MASK;
-
-               if (speed == TSE_PCS_PARTNER_SPEED_10 &&
-                   duplex == TSE_PCS_PARTNER_DUPLEX_FULL)
-                       dev_dbg(pcs->dev,
-                               "Adapter: Link Partner is Up - 10/Full\n");
-               else if (speed == TSE_PCS_PARTNER_SPEED_100 &&
-                        duplex == TSE_PCS_PARTNER_DUPLEX_FULL)
-                       dev_dbg(pcs->dev,
-                               "Adapter: Link Partner is Up - 100/Full\n");
-               else if (speed == TSE_PCS_PARTNER_SPEED_1000 &&
-                        duplex == TSE_PCS_PARTNER_DUPLEX_FULL)
-                       dev_dbg(pcs->dev,
-                               "Adapter: Link Partner is Up - 1000/Full\n");
-               else if (speed == TSE_PCS_PARTNER_SPEED_10 &&
-                        duplex == TSE_PCS_PARTNER_DUPLEX_HALF)
-                       dev_err(pcs->dev,
-                               "Adapter does not support Half Duplex\n");
-               else if (speed == TSE_PCS_PARTNER_SPEED_100 &&
-                        duplex == TSE_PCS_PARTNER_DUPLEX_HALF)
-                       dev_err(pcs->dev,
-                               "Adapter does not support Half Duplex\n");
-               else if (speed == TSE_PCS_PARTNER_SPEED_1000 &&
-                        duplex == TSE_PCS_PARTNER_DUPLEX_HALF)
-                       dev_err(pcs->dev,
-                               "Adapter does not support Half Duplex\n");
-               else
-                       dev_err(pcs->dev,
-                               "Adapter: Invalid Partner Speed and Duplex\n");
-
-               if (duplex == TSE_PCS_PARTNER_DUPLEX_FULL &&
-                   (speed == TSE_PCS_PARTNER_SPEED_10 ||
-                    speed == TSE_PCS_PARTNER_SPEED_100 ||
-                    speed == TSE_PCS_PARTNER_SPEED_1000))
-                       writew(SGMII_ADAPTER_ENABLE,
-                              sgmii_adapter_base + SGMII_ADAPTER_CTRL_REG);
-       } else {
-               val = readw(tse_pcs_base + TSE_PCS_CONTROL_REG);
-               val |= TSE_PCS_CONTROL_RESTART_AN_MASK;
-               writew(val, tse_pcs_base + TSE_PCS_CONTROL_REG);
-
-               tse_pcs_reset(tse_pcs_base, pcs);
-               mod_timer(&pcs->aneg_link_timer, jiffies +
-                         msecs_to_jiffies(AUTONEGO_LINK_TIMER));
-       }
-}
-
-static void aneg_link_timer_callback(struct timer_list *t)
-{
-       struct tse_pcs *pcs = from_timer(pcs, t, aneg_link_timer);
-
-       if (pcs->autoneg == AUTONEG_ENABLE)
-               auto_nego_timer_callback(pcs);
-       else if (pcs->autoneg == AUTONEG_DISABLE)
-               pcs_link_timer_callback(pcs);
-}
-
-void tse_pcs_fix_mac_speed(struct tse_pcs *pcs, struct phy_device *phy_dev,
-                          unsigned int speed)
-{
-       void __iomem *tse_pcs_base = pcs->tse_pcs_base;
-       u32 val;
-
-       pcs->autoneg = phy_dev->autoneg;
-
-       if (phy_dev->autoneg == AUTONEG_ENABLE) {
-               val = readw(tse_pcs_base + TSE_PCS_CONTROL_REG);
-               val |= TSE_PCS_CONTROL_AN_EN_MASK;
-               writew(val, tse_pcs_base + TSE_PCS_CONTROL_REG);
-
-               val = readw(tse_pcs_base + TSE_PCS_IF_MODE_REG);
-               val |= TSE_PCS_USE_SGMII_AN_MASK;
-               writew(val, tse_pcs_base + TSE_PCS_IF_MODE_REG);
-
-               val = readw(tse_pcs_base + TSE_PCS_CONTROL_REG);
-               val |= TSE_PCS_CONTROL_RESTART_AN_MASK;
-
-               tse_pcs_reset(tse_pcs_base, pcs);
-
-               timer_setup(&pcs->aneg_link_timer, aneg_link_timer_callback,
-                           0);
-               mod_timer(&pcs->aneg_link_timer, jiffies +
-                         msecs_to_jiffies(AUTONEGO_LINK_TIMER));
-       } else if (phy_dev->autoneg == AUTONEG_DISABLE) {
-               val = readw(tse_pcs_base + TSE_PCS_CONTROL_REG);
-               val &= ~TSE_PCS_CONTROL_AN_EN_MASK;
-               writew(val, tse_pcs_base + TSE_PCS_CONTROL_REG);
-
-               val = readw(tse_pcs_base + TSE_PCS_IF_MODE_REG);
-               val &= ~TSE_PCS_USE_SGMII_AN_MASK;
-               writew(val, tse_pcs_base + TSE_PCS_IF_MODE_REG);
-
-               val = readw(tse_pcs_base + TSE_PCS_IF_MODE_REG);
-               val &= ~TSE_PCS_SGMII_SPEED_MASK;
-
-               switch (speed) {
-               case 1000:
-                       val |= TSE_PCS_SGMII_SPEED_1000;
-                       break;
-               case 100:
-                       val |= TSE_PCS_SGMII_SPEED_100;
-                       break;
-               case 10:
-                       val |= TSE_PCS_SGMII_SPEED_10;
-                       break;
-               default:
-                       return;
-               }
-               writew(val, tse_pcs_base + TSE_PCS_IF_MODE_REG);
-
-               tse_pcs_reset(tse_pcs_base, pcs);
-
-               timer_setup(&pcs->aneg_link_timer, aneg_link_timer_callback,
-                           0);
-               mod_timer(&pcs->aneg_link_timer, jiffies +
-                         msecs_to_jiffies(AUTONEGO_LINK_TIMER));
-       }
-}
diff --git a/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.h b/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.h
deleted file mode 100644 (file)
index 694ac25..0000000
+++ /dev/null
@@ -1,29 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/* Copyright Altera Corporation (C) 2016. All rights reserved.
- *
- * Author: Tien Hock Loh <thloh@altera.com>
- */
-
-#ifndef __TSE_PCS_H__
-#define __TSE_PCS_H__
-
-#include <linux/phy.h>
-#include <linux/timer.h>
-
-#define SGMII_ADAPTER_CTRL_REG         0x00
-#define SGMII_ADAPTER_ENABLE           0x0000
-#define SGMII_ADAPTER_DISABLE          0x0001
-
-struct tse_pcs {
-       struct device *dev;
-       void __iomem *tse_pcs_base;
-       void __iomem *sgmii_adapter_base;
-       struct timer_list aneg_link_timer;
-       int autoneg;
-};
-
-int tse_pcs_init(void __iomem *base, struct tse_pcs *pcs);
-void tse_pcs_fix_mac_speed(struct tse_pcs *pcs, struct phy_device *phy_dev,
-                          unsigned int speed);
-
-#endif /* __TSE_PCS_H__ */
index 4ad692c..16e67c1 100644 (file)
@@ -519,6 +519,7 @@ struct mac_device_info {
        const struct stmmac_tc_ops *tc;
        const struct stmmac_mmc_ops *mmc;
        struct dw_xpcs *xpcs;
+       struct phylink_pcs *lynx_pcs; /* Lynx external PCS */
        struct mii_regs mii;    /* MII register Addresses */
        struct mac_link link;
        void __iomem *pcsr;     /* vpointer to device CSRs */
index 9354bf4..58a7f08 100644 (file)
@@ -141,7 +141,7 @@ MODULE_DEVICE_TABLE(of, anarion_dwmac_match);
 
 static struct platform_driver anarion_dwmac_driver = {
        .probe  = anarion_dwmac_probe,
-       .remove = stmmac_pltfr_remove,
+       .remove_new = stmmac_pltfr_remove,
        .driver = {
                .name           = "anarion-dwmac",
                .pm             = &stmmac_pltfr_pm_ops,
index 18acf7d..9f88530 100644 (file)
@@ -464,7 +464,7 @@ remove_config:
        return ret;
 }
 
-static int dwc_eth_dwmac_remove(struct platform_device *pdev)
+static void dwc_eth_dwmac_remove(struct platform_device *pdev)
 {
        struct net_device *ndev = platform_get_drvdata(pdev);
        struct stmmac_priv *priv = netdev_priv(ndev);
@@ -477,8 +477,6 @@ static int dwc_eth_dwmac_remove(struct platform_device *pdev)
        data->remove(pdev);
 
        stmmac_remove_config_dt(pdev, priv->plat);
-
-       return 0;
 }
 
 static const struct of_device_id dwc_eth_dwmac_match[] = {
@@ -490,7 +488,7 @@ MODULE_DEVICE_TABLE(of, dwc_eth_dwmac_match);
 
 static struct platform_driver dwc_eth_dwmac_driver = {
        .probe  = dwc_eth_dwmac_probe,
-       .remove = dwc_eth_dwmac_remove,
+       .remove_new = dwc_eth_dwmac_remove,
        .driver = {
                .name           = "dwc-eth-dwmac",
                .pm             = &stmmac_pltfr_pm_ops,
index ef8f3a9..ef10239 100644 (file)
@@ -87,7 +87,7 @@ MODULE_DEVICE_TABLE(of, dwmac_generic_match);
 
 static struct platform_driver dwmac_generic_driver = {
        .probe  = dwmac_generic_probe,
-       .remove = stmmac_pltfr_remove,
+       .remove_new = stmmac_pltfr_remove,
        .driver = {
                .name           = STMMAC_RESOURCE_NAME,
                .pm             = &stmmac_pltfr_pm_ops,
index 7c228bd..b9378a6 100644 (file)
@@ -376,7 +376,7 @@ MODULE_DEVICE_TABLE(of, imx_dwmac_match);
 
 static struct platform_driver imx_dwmac_driver = {
        .probe  = imx_dwmac_probe,
-       .remove = stmmac_pltfr_remove,
+       .remove_new = stmmac_pltfr_remove,
        .driver = {
                .name           = "imx-dwmac",
                .pm             = &stmmac_pltfr_pm_ops,
index 378b4dd..8063ba1 100644 (file)
@@ -386,7 +386,7 @@ MODULE_DEVICE_TABLE(of, ingenic_mac_of_matches);
 
 static struct platform_driver ingenic_mac_driver = {
        .probe          = ingenic_mac_probe,
-       .remove         = stmmac_pltfr_remove,
+       .remove_new     = stmmac_pltfr_remove,
        .driver         = {
                .name   = "ingenic-mac",
                .pm             = pm_ptr(&ingenic_mac_pm_ops),
index 06d287f..a5e639a 100644 (file)
@@ -169,20 +169,17 @@ err_remove_config_dt:
        return ret;
 }
 
-static int intel_eth_plat_remove(struct platform_device *pdev)
+static void intel_eth_plat_remove(struct platform_device *pdev)
 {
        struct intel_dwmac *dwmac = get_stmmac_bsp_priv(&pdev->dev);
-       int ret;
 
-       ret = stmmac_pltfr_remove(pdev);
+       stmmac_pltfr_remove(pdev);
        clk_disable_unprepare(dwmac->tx_clk);
-
-       return ret;
 }
 
 static struct platform_driver intel_eth_plat_driver = {
        .probe  = intel_eth_plat_probe,
-       .remove = intel_eth_plat_remove,
+       .remove_new = intel_eth_plat_remove,
        .driver = {
                .name           = "intel-eth-plat",
                .pm             = &stmmac_pltfr_pm_ops,
index e888c8a..e39406d 100644 (file)
@@ -498,7 +498,7 @@ MODULE_DEVICE_TABLE(of, ipq806x_gmac_dwmac_match);
 
 static struct platform_driver ipq806x_gmac_dwmac_driver = {
        .probe = ipq806x_gmac_probe,
-       .remove = stmmac_pltfr_remove,
+       .remove_new = stmmac_pltfr_remove,
        .driver = {
                .name           = "ipq806x-gmac-dwmac",
                .pm             = &stmmac_pltfr_pm_ops,
index 9d77c64..18e84ba 100644 (file)
@@ -83,7 +83,7 @@ MODULE_DEVICE_TABLE(of, lpc18xx_dwmac_match);
 
 static struct platform_driver lpc18xx_dwmac_driver = {
        .probe  = lpc18xx_dwmac_probe,
-       .remove = stmmac_pltfr_remove,
+       .remove_new = stmmac_pltfr_remove,
        .driver = {
                .name           = "lpc18xx-dwmac",
                .pm             = &stmmac_pltfr_pm_ops,
index 9ae31e3..73c1dfa 100644 (file)
@@ -678,15 +678,12 @@ err_remove_config_dt:
        return ret;
 }
 
-static int mediatek_dwmac_remove(struct platform_device *pdev)
+static void mediatek_dwmac_remove(struct platform_device *pdev)
 {
        struct mediatek_dwmac_plat_data *priv_plat = get_stmmac_bsp_priv(&pdev->dev);
-       int ret;
 
-       ret = stmmac_pltfr_remove(pdev);
+       stmmac_pltfr_remove(pdev);
        mediatek_dwmac_clks_config(priv_plat, false);
-
-       return ret;
 }
 
 static const struct of_device_id mediatek_dwmac_match[] = {
@@ -701,7 +698,7 @@ MODULE_DEVICE_TABLE(of, mediatek_dwmac_match);
 
 static struct platform_driver mediatek_dwmac_driver = {
        .probe  = mediatek_dwmac_probe,
-       .remove = mediatek_dwmac_remove,
+       .remove_new = mediatek_dwmac_remove,
        .driver = {
                .name           = "dwmac-mediatek",
                .pm             = &stmmac_pltfr_pm_ops,
index 16fb66a..7aa5e6b 100644 (file)
@@ -91,7 +91,7 @@ MODULE_DEVICE_TABLE(of, meson6_dwmac_match);
 
 static struct platform_driver meson6_dwmac_driver = {
        .probe  = meson6_dwmac_probe,
-       .remove = stmmac_pltfr_remove,
+       .remove_new = stmmac_pltfr_remove,
        .driver = {
                .name           = "meson6-dwmac",
                .pm             = &stmmac_pltfr_pm_ops,
index f6754e3..92b1604 100644 (file)
@@ -539,7 +539,7 @@ MODULE_DEVICE_TABLE(of, meson8b_dwmac_match);
 
 static struct platform_driver meson8b_dwmac_driver = {
        .probe  = meson8b_dwmac_probe,
-       .remove = stmmac_pltfr_remove,
+       .remove_new = stmmac_pltfr_remove,
        .driver = {
                .name           = "meson8b-dwmac",
                .pm             = &stmmac_pltfr_pm_ops,
index 62a69a9..4295402 100644 (file)
@@ -231,7 +231,7 @@ MODULE_DEVICE_TABLE(of, oxnas_dwmac_match);
 
 static struct platform_driver oxnas_dwmac_driver = {
        .probe  = oxnas_dwmac_probe,
-       .remove = stmmac_pltfr_remove,
+       .remove_new = stmmac_pltfr_remove,
        .driver = {
                .name           = "oxnas-dwmac",
                .pm             = &stmmac_pltfr_pm_ops,
index f07905f..c801838 100644 (file)
@@ -666,19 +666,12 @@ err_mem:
        return ret;
 }
 
-static int qcom_ethqos_remove(struct platform_device *pdev)
+static void qcom_ethqos_remove(struct platform_device *pdev)
 {
-       struct qcom_ethqos *ethqos;
-       int ret;
-
-       ethqos = get_stmmac_bsp_priv(&pdev->dev);
-       if (!ethqos)
-               return -ENODEV;
+       struct qcom_ethqos *ethqos = get_stmmac_bsp_priv(&pdev->dev);
 
-       ret = stmmac_pltfr_remove(pdev);
+       stmmac_pltfr_remove(pdev);
        ethqos_clks_config(ethqos, false);
-
-       return ret;
 }
 
 static const struct of_device_id qcom_ethqos_match[] = {
@@ -691,7 +684,7 @@ MODULE_DEVICE_TABLE(of, qcom_ethqos_match);
 
 static struct platform_driver qcom_ethqos_driver = {
        .probe  = qcom_ethqos_probe,
-       .remove = qcom_ethqos_remove,
+       .remove_new = qcom_ethqos_remove,
        .driver = {
                .name           = "qcom-ethqos",
                .pm             = &stmmac_pltfr_pm_ops,
index 4ea31cc..d81591b 100644 (file)
@@ -1863,15 +1863,13 @@ err_remove_config_dt:
        return ret;
 }
 
-static int rk_gmac_remove(struct platform_device *pdev)
+static void rk_gmac_remove(struct platform_device *pdev)
 {
        struct rk_priv_data *bsp_priv = get_stmmac_bsp_priv(&pdev->dev);
 
        stmmac_dvr_remove(&pdev->dev);
 
        rk_gmac_powerdown(bsp_priv);
-
-       return 0;
 }
 
 #ifdef CONFIG_PM_SLEEP
@@ -1925,7 +1923,7 @@ MODULE_DEVICE_TABLE(of, rk_gmac_dwmac_match);
 
 static struct platform_driver rk_gmac_dwmac_driver = {
        .probe  = rk_gmac_probe,
-       .remove = rk_gmac_remove,
+       .remove_new = rk_gmac_remove,
        .driver = {
                .name           = "rk_gmac-dwmac",
                .pm             = &rk_gmac_pm_ops,
index 6b447d8..6267bcb 100644 (file)
 #include <linux/of_net.h>
 #include <linux/phy.h>
 #include <linux/regmap.h>
+#include <linux/mdio/mdio-regmap.h>
+#include <linux/pcs-lynx.h>
 #include <linux/reset.h>
 #include <linux/stmmac.h>
 
 #include "stmmac.h"
 #include "stmmac_platform.h"
 
-#include "altr_tse_pcs.h"
-
 #define SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_GMII_MII 0x0
 #define SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_RGMII 0x1
 #define SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_RMII 0x2
 #define EMAC_SPLITTER_CTRL_SPEED_100           0x3
 #define EMAC_SPLITTER_CTRL_SPEED_1000          0x0
 
+#define SGMII_ADAPTER_CTRL_REG         0x00
+#define SGMII_ADAPTER_ENABLE           0x0000
+#define SGMII_ADAPTER_DISABLE          0x0001
+
 struct socfpga_dwmac;
 struct socfpga_dwmac_ops {
        int (*set_phy_mode)(struct socfpga_dwmac *dwmac_priv);
@@ -50,16 +54,18 @@ struct socfpga_dwmac {
        struct reset_control *stmmac_rst;
        struct reset_control *stmmac_ocp_rst;
        void __iomem *splitter_base;
+       void __iomem *tse_pcs_base;
+       void __iomem *sgmii_adapter_base;
        bool f2h_ptp_ref_clk;
-       struct tse_pcs pcs;
        const struct socfpga_dwmac_ops *ops;
+       struct mdio_device *pcs_mdiodev;
 };
 
 static void socfpga_dwmac_fix_mac_speed(void *priv, unsigned int speed)
 {
        struct socfpga_dwmac *dwmac = (struct socfpga_dwmac *)priv;
        void __iomem *splitter_base = dwmac->splitter_base;
-       void __iomem *sgmii_adapter_base = dwmac->pcs.sgmii_adapter_base;
+       void __iomem *sgmii_adapter_base = dwmac->sgmii_adapter_base;
        struct device *dev = dwmac->dev;
        struct net_device *ndev = dev_get_drvdata(dev);
        struct phy_device *phy_dev = ndev->phydev;
@@ -89,11 +95,9 @@ static void socfpga_dwmac_fix_mac_speed(void *priv, unsigned int speed)
                writel(val, splitter_base + EMAC_SPLITTER_CTRL_REG);
        }
 
-       if (phy_dev && sgmii_adapter_base) {
+       if (phy_dev && sgmii_adapter_base)
                writew(SGMII_ADAPTER_ENABLE,
                       sgmii_adapter_base + SGMII_ADAPTER_CTRL_REG);
-               tse_pcs_fix_mac_speed(&dwmac->pcs, phy_dev, speed);
-       }
 }
 
 static int socfpga_dwmac_parse_data(struct socfpga_dwmac *dwmac, struct device *dev)
@@ -183,11 +187,11 @@ static int socfpga_dwmac_parse_data(struct socfpga_dwmac *dwmac, struct device *
                                goto err_node_put;
                        }
 
-                       dwmac->pcs.sgmii_adapter_base =
+                       dwmac->sgmii_adapter_base =
                            devm_ioremap_resource(dev, &res_sgmii_adapter);
 
-                       if (IS_ERR(dwmac->pcs.sgmii_adapter_base)) {
-                               ret = PTR_ERR(dwmac->pcs.sgmii_adapter_base);
+                       if (IS_ERR(dwmac->sgmii_adapter_base)) {
+                               ret = PTR_ERR(dwmac->sgmii_adapter_base);
                                goto err_node_put;
                        }
                }
@@ -205,11 +209,11 @@ static int socfpga_dwmac_parse_data(struct socfpga_dwmac *dwmac, struct device *
                                goto err_node_put;
                        }
 
-                       dwmac->pcs.tse_pcs_base =
+                       dwmac->tse_pcs_base =
                            devm_ioremap_resource(dev, &res_tse_pcs);
 
-                       if (IS_ERR(dwmac->pcs.tse_pcs_base)) {
-                               ret = PTR_ERR(dwmac->pcs.tse_pcs_base);
+                       if (IS_ERR(dwmac->tse_pcs_base)) {
+                               ret = PTR_ERR(dwmac->tse_pcs_base);
                                goto err_node_put;
                        }
                }
@@ -235,6 +239,13 @@ static int socfpga_get_plat_phymode(struct socfpga_dwmac *dwmac)
        return priv->plat->interface;
 }
 
+static void socfpga_sgmii_config(struct socfpga_dwmac *dwmac, bool enable)
+{
+       u16 val = enable ? SGMII_ADAPTER_ENABLE : SGMII_ADAPTER_DISABLE;
+
+       writew(val, dwmac->sgmii_adapter_base + SGMII_ADAPTER_CTRL_REG);
+}
+
 static int socfpga_set_phy_mode_common(int phymode, u32 *val)
 {
        switch (phymode) {
@@ -310,12 +321,8 @@ static int socfpga_gen5_set_phy_mode(struct socfpga_dwmac *dwmac)
         */
        reset_control_deassert(dwmac->stmmac_ocp_rst);
        reset_control_deassert(dwmac->stmmac_rst);
-       if (phymode == PHY_INTERFACE_MODE_SGMII) {
-               if (tse_pcs_init(dwmac->pcs.tse_pcs_base, &dwmac->pcs) != 0) {
-                       dev_err(dwmac->dev, "Unable to initialize TSE PCS");
-                       return -EINVAL;
-               }
-       }
+       if (phymode == PHY_INTERFACE_MODE_SGMII)
+               socfpga_sgmii_config(dwmac, true);
 
        return 0;
 }
@@ -367,12 +374,8 @@ static int socfpga_gen10_set_phy_mode(struct socfpga_dwmac *dwmac)
         */
        reset_control_deassert(dwmac->stmmac_ocp_rst);
        reset_control_deassert(dwmac->stmmac_rst);
-       if (phymode == PHY_INTERFACE_MODE_SGMII) {
-               if (tse_pcs_init(dwmac->pcs.tse_pcs_base, &dwmac->pcs) != 0) {
-                       dev_err(dwmac->dev, "Unable to initialize TSE PCS");
-                       return -EINVAL;
-               }
-       }
+       if (phymode == PHY_INTERFACE_MODE_SGMII)
+               socfpga_sgmii_config(dwmac, true);
        return 0;
 }
 
@@ -443,6 +446,48 @@ static int socfpga_dwmac_probe(struct platform_device *pdev)
        if (ret)
                goto err_dvr_remove;
 
+       /* Create a regmap for the PCS so that it can be used by the PCS driver,
+        * if we have such a PCS
+        */
+       if (dwmac->tse_pcs_base) {
+               struct regmap_config pcs_regmap_cfg;
+               struct mdio_regmap_config mrc;
+               struct regmap *pcs_regmap;
+               struct mii_bus *pcs_bus;
+
+               memset(&pcs_regmap_cfg, 0, sizeof(pcs_regmap_cfg));
+               memset(&mrc, 0, sizeof(mrc));
+
+               pcs_regmap_cfg.reg_bits = 16;
+               pcs_regmap_cfg.val_bits = 16;
+               pcs_regmap_cfg.reg_shift = REGMAP_UPSHIFT(1);
+
+               pcs_regmap = devm_regmap_init_mmio(&pdev->dev, dwmac->tse_pcs_base,
+                                                  &pcs_regmap_cfg);
+               if (IS_ERR(pcs_regmap)) {
+                       ret = PTR_ERR(pcs_regmap);
+                       goto err_dvr_remove;
+               }
+
+               mrc.regmap = pcs_regmap;
+               mrc.parent = &pdev->dev;
+               mrc.valid_addr = 0x0;
+               mrc.autoscan = false;
+
+               snprintf(mrc.name, MII_BUS_ID_SIZE, "%s-pcs-mii", ndev->name);
+               pcs_bus = devm_mdio_regmap_register(&pdev->dev, &mrc);
+               if (IS_ERR(pcs_bus)) {
+                       ret = PTR_ERR(pcs_bus);
+                       goto err_dvr_remove;
+               }
+
+               stpriv->hw->lynx_pcs = lynx_pcs_create_mdiodev(pcs_bus, 0);
+               if (IS_ERR(stpriv->hw->lynx_pcs)) {
+                       ret = PTR_ERR(stpriv->hw->lynx_pcs);
+                       goto err_dvr_remove;
+               }
+       }
+
        return 0;
 
 err_dvr_remove:
@@ -453,6 +498,17 @@ err_remove_config_dt:
        return ret;
 }
 
+static void socfpga_dwmac_remove(struct platform_device *pdev)
+{
+       struct net_device *ndev = platform_get_drvdata(pdev);
+       struct stmmac_priv *priv = netdev_priv(ndev);
+       struct phylink_pcs *pcs = priv->hw->lynx_pcs;
+
+       stmmac_pltfr_remove(pdev);
+
+       lynx_pcs_destroy(pcs);
+}
+
 #ifdef CONFIG_PM_SLEEP
 static int socfpga_dwmac_resume(struct device *dev)
 {
@@ -524,7 +580,7 @@ MODULE_DEVICE_TABLE(of, socfpga_dwmac_match);
 
 static struct platform_driver socfpga_dwmac_driver = {
        .probe  = socfpga_dwmac_probe,
-       .remove = stmmac_pltfr_remove,
+       .remove_new = socfpga_dwmac_remove,
        .driver = {
                .name           = "socfpga-dwmac",
                .pm             = &socfpga_dwmac_pm_ops,
index 4f51a78..d3a39d2 100644 (file)
@@ -156,7 +156,7 @@ MODULE_DEVICE_TABLE(of, starfive_dwmac_match);
 
 static struct platform_driver starfive_dwmac_driver = {
        .probe  = starfive_dwmac_probe,
-       .remove = stmmac_pltfr_remove,
+       .remove_new = stmmac_pltfr_remove,
        .driver = {
                .name = "starfive-dwmac",
                .pm = &stmmac_pltfr_pm_ops,
index 465ce66..dcbb17c 100644 (file)
@@ -317,15 +317,13 @@ err_remove_config_dt:
        return ret;
 }
 
-static int sti_dwmac_remove(struct platform_device *pdev)
+static void sti_dwmac_remove(struct platform_device *pdev)
 {
        struct sti_dwmac *dwmac = get_stmmac_bsp_priv(&pdev->dev);
 
        stmmac_dvr_remove(&pdev->dev);
 
        clk_disable_unprepare(dwmac->clk);
-
-       return 0;
 }
 
 #ifdef CONFIG_PM_SLEEP
@@ -365,7 +363,7 @@ MODULE_DEVICE_TABLE(of, sti_dwmac_match);
 
 static struct platform_driver sti_dwmac_driver = {
        .probe  = sti_dwmac_probe,
-       .remove = sti_dwmac_remove,
+       .remove_new = sti_dwmac_remove,
        .driver = {
                .name           = "sti-dwmac",
                .pm             = &sti_dwmac_pm_ops,
index 0616b3a..bdb4de5 100644 (file)
@@ -417,7 +417,7 @@ err_remove_config_dt:
        return ret;
 }
 
-static int stm32_dwmac_remove(struct platform_device *pdev)
+static void stm32_dwmac_remove(struct platform_device *pdev)
 {
        struct net_device *ndev = platform_get_drvdata(pdev);
        struct stmmac_priv *priv = netdev_priv(ndev);
@@ -431,8 +431,6 @@ static int stm32_dwmac_remove(struct platform_device *pdev)
                dev_pm_clear_wake_irq(&pdev->dev);
                device_init_wakeup(&pdev->dev, false);
        }
-
-       return 0;
 }
 
 static int stm32mp1_suspend(struct stm32_dwmac *dwmac)
@@ -528,7 +526,7 @@ MODULE_DEVICE_TABLE(of, stm32_dwmac_match);
 
 static struct platform_driver stm32_dwmac_driver = {
        .probe  = stm32_dwmac_probe,
-       .remove = stm32_dwmac_remove,
+       .remove_new = stm32_dwmac_remove,
        .driver = {
                .name           = "stm32-dwmac",
                .pm             = &stm32_dwmac_pm_ops,
index c2c592b..1e71438 100644 (file)
@@ -1294,7 +1294,7 @@ dwmac_deconfig:
        return ret;
 }
 
-static int sun8i_dwmac_remove(struct platform_device *pdev)
+static void sun8i_dwmac_remove(struct platform_device *pdev)
 {
        struct net_device *ndev = platform_get_drvdata(pdev);
        struct stmmac_priv *priv = netdev_priv(ndev);
@@ -1309,8 +1309,6 @@ static int sun8i_dwmac_remove(struct platform_device *pdev)
 
        stmmac_pltfr_remove(pdev);
        sun8i_dwmac_unset_syscon(gmac);
-
-       return 0;
 }
 
 static void sun8i_dwmac_shutdown(struct platform_device *pdev)
@@ -1341,7 +1339,7 @@ MODULE_DEVICE_TABLE(of, sun8i_dwmac_match);
 
 static struct platform_driver sun8i_dwmac_driver = {
        .probe  = sun8i_dwmac_probe,
-       .remove = sun8i_dwmac_remove,
+       .remove_new = sun8i_dwmac_remove,
        .shutdown = sun8i_dwmac_shutdown,
        .driver = {
                .name           = "dwmac-sun8i",
index fc3b0ac..50963e9 100644 (file)
@@ -179,7 +179,7 @@ MODULE_DEVICE_TABLE(of, sun7i_dwmac_match);
 
 static struct platform_driver sun7i_dwmac_driver = {
        .probe  = sun7i_gmac_probe,
-       .remove = stmmac_pltfr_remove,
+       .remove_new = stmmac_pltfr_remove,
        .driver = {
                .name           = "sun7i-dwmac",
                .pm             = &stmmac_pltfr_pm_ops,
index bdf990c..f8367c5 100644 (file)
@@ -353,15 +353,13 @@ disable_clks:
        return err;
 }
 
-static int tegra_mgbe_remove(struct platform_device *pdev)
+static void tegra_mgbe_remove(struct platform_device *pdev)
 {
        struct tegra_mgbe *mgbe = get_stmmac_bsp_priv(&pdev->dev);
 
        clk_bulk_disable_unprepare(ARRAY_SIZE(mgbe_clks), mgbe->clks);
 
        stmmac_pltfr_remove(pdev);
-
-       return 0;
 }
 
 static const struct of_device_id tegra_mgbe_match[] = {
@@ -374,7 +372,7 @@ static SIMPLE_DEV_PM_OPS(tegra_mgbe_pm_ops, tegra_mgbe_suspend, tegra_mgbe_resum
 
 static struct platform_driver tegra_mgbe_driver = {
        .probe = tegra_mgbe_probe,
-       .remove = tegra_mgbe_remove,
+       .remove_new = tegra_mgbe_remove,
        .driver = {
                .name = "tegra-mgbe",
                .pm             = &tegra_mgbe_pm_ops,
index c3f10a9..acbb284 100644 (file)
@@ -198,7 +198,7 @@ static int visconti_eth_clock_probe(struct platform_device *pdev,
        return 0;
 }
 
-static int visconti_eth_clock_remove(struct platform_device *pdev)
+static void visconti_eth_clock_remove(struct platform_device *pdev)
 {
        struct visconti_eth *dwmac = get_stmmac_bsp_priv(&pdev->dev);
        struct net_device *ndev = platform_get_drvdata(pdev);
@@ -206,8 +206,6 @@ static int visconti_eth_clock_remove(struct platform_device *pdev)
 
        clk_disable_unprepare(dwmac->phy_ref_clk);
        clk_disable_unprepare(priv->plat->stmmac_clk);
-
-       return 0;
 }
 
 static int visconti_eth_dwmac_probe(struct platform_device *pdev)
@@ -259,23 +257,16 @@ remove_config:
        return ret;
 }
 
-static int visconti_eth_dwmac_remove(struct platform_device *pdev)
+static void visconti_eth_dwmac_remove(struct platform_device *pdev)
 {
        struct net_device *ndev = platform_get_drvdata(pdev);
        struct stmmac_priv *priv = netdev_priv(ndev);
-       int err;
 
-       err = stmmac_pltfr_remove(pdev);
-       if (err < 0)
-               dev_err(&pdev->dev, "failed to remove platform: %d\n", err);
+       stmmac_pltfr_remove(pdev);
 
-       err = visconti_eth_clock_remove(pdev);
-       if (err < 0)
-               dev_err(&pdev->dev, "failed to remove clock: %d\n", err);
+       visconti_eth_clock_remove(pdev);
 
        stmmac_remove_config_dt(pdev, priv->plat);
-
-       return err;
 }
 
 static const struct of_device_id visconti_eth_dwmac_match[] = {
@@ -286,7 +277,7 @@ MODULE_DEVICE_TABLE(of, visconti_eth_dwmac_match);
 
 static struct platform_driver visconti_eth_dwmac_driver = {
        .probe  = visconti_eth_dwmac_probe,
-       .remove = visconti_eth_dwmac_remove,
+       .remove_new = visconti_eth_dwmac_remove,
        .driver = {
                .name           = "visconti-eth-dwmac",
                .of_match_table = visconti_eth_dwmac_match,
index dfd5326..070bd91 100644 (file)
@@ -368,10 +368,12 @@ static int dwxgmac2_dma_interrupt(struct stmmac_priv *priv,
 
                if (likely(intr_status & XGMAC_RI)) {
                        x->rx_normal_irq_n++;
+                       x->rxq_stats[chan].rx_normal_irq_n++;
                        ret |= handle_rx;
                }
                if (likely(intr_status & (XGMAC_TI | XGMAC_TBU))) {
                        x->tx_normal_irq_n++;
+                       x->txq_stats[chan].tx_normal_irq_n++;
                        ret |= handle_tx;
                }
        }
index 52cab9d..fa07b0d 100644 (file)
@@ -937,10 +937,13 @@ static struct phylink_pcs *stmmac_mac_select_pcs(struct phylink_config *config,
 {
        struct stmmac_priv *priv = netdev_priv(to_net_dev(config->dev));
 
-       if (!priv->hw->xpcs)
-               return NULL;
+       if (priv->hw->xpcs)
+               return &priv->hw->xpcs->pcs;
+
+       if (priv->hw->lynx_pcs)
+               return priv->hw->lynx_pcs;
 
-       return &priv->hw->xpcs->pcs;
+       return NULL;
 }
 
 static void stmmac_mac_config(struct phylink_config *config, unsigned int mode,
@@ -3813,7 +3816,8 @@ static int __stmmac_open(struct net_device *dev,
        if (priv->hw->pcs != STMMAC_PCS_TBI &&
            priv->hw->pcs != STMMAC_PCS_RTBI &&
            (!priv->hw->xpcs ||
-            xpcs_get_an_mode(priv->hw->xpcs, mode) != DW_AN_C73)) {
+            xpcs_get_an_mode(priv->hw->xpcs, mode) != DW_AN_C73) &&
+           !priv->hw->lynx_pcs) {
                ret = stmmac_init_phy(dev);
                if (ret) {
                        netdev_err(priv->dev,
index 6807c4c..3db1cb0 100644 (file)
@@ -491,7 +491,6 @@ int stmmac_mdio_reset(struct mii_bus *bus)
 int stmmac_xpcs_setup(struct mii_bus *bus)
 {
        struct net_device *ndev = bus->priv;
-       struct mdio_device *mdiodev;
        struct stmmac_priv *priv;
        struct dw_xpcs *xpcs;
        int mode, addr;
@@ -501,16 +500,10 @@ int stmmac_xpcs_setup(struct mii_bus *bus)
 
        /* Try to probe the XPCS by scanning all addresses. */
        for (addr = 0; addr < PHY_MAX_ADDR; addr++) {
-               mdiodev = mdio_device_create(bus, addr);
-               if (IS_ERR(mdiodev))
+               xpcs = xpcs_create_mdiodev(bus, addr, mode);
+               if (IS_ERR(xpcs))
                        continue;
 
-               xpcs = xpcs_create(mdiodev, mode);
-               if (IS_ERR_OR_NULL(xpcs)) {
-                       mdio_device_free(mdiodev);
-                       continue;
-               }
-
                priv->hw->xpcs = xpcs;
                break;
        }
@@ -669,10 +662,8 @@ int stmmac_mdio_unregister(struct net_device *ndev)
        if (!priv->mii)
                return 0;
 
-       if (priv->hw->xpcs) {
-               mdio_device_free(priv->hw->xpcs->mdiodev);
+       if (priv->hw->xpcs)
                xpcs_destroy(priv->hw->xpcs);
-       }
 
        mdiobus_unregister(priv->mii);
        priv->mii->priv = NULL;
index eb0b289..3c6b55b 100644 (file)
@@ -707,7 +707,7 @@ EXPORT_SYMBOL_GPL(stmmac_get_platform_resources);
  * Description: this function calls the main to free the net resources
  * and calls the platforms hook and release the resources (e.g. mem).
  */
-int stmmac_pltfr_remove(struct platform_device *pdev)
+void stmmac_pltfr_remove(struct platform_device *pdev)
 {
        struct net_device *ndev = platform_get_drvdata(pdev);
        struct stmmac_priv *priv = netdev_priv(ndev);
@@ -719,8 +719,6 @@ int stmmac_pltfr_remove(struct platform_device *pdev)
                plat->exit(pdev, plat->bsp_priv);
 
        stmmac_remove_config_dt(pdev, plat);
-
-       return 0;
 }
 EXPORT_SYMBOL_GPL(stmmac_pltfr_remove);
 
index 3fff3f5..f7e4579 100644 (file)
@@ -19,7 +19,7 @@ void stmmac_remove_config_dt(struct platform_device *pdev,
 int stmmac_get_platform_resources(struct platform_device *pdev,
                                  struct stmmac_resources *stmmac_res);
 
-int stmmac_pltfr_remove(struct platform_device *pdev);
+void stmmac_pltfr_remove(struct platform_device *pdev);
 extern const struct dev_pm_ops stmmac_pltfr_pm_ops;
 
 static inline void *get_stmmac_bsp_priv(struct device *dev)
index 9d55226..ac41ef4 100644 (file)
@@ -966,8 +966,11 @@ static int tc_setup_taprio(struct stmmac_priv *priv,
                return -EOPNOTSUPP;
        }
 
-       if (!qopt->enable)
+       if (qopt->cmd == TAPRIO_CMD_DESTROY)
                goto disable;
+       else if (qopt->cmd != TAPRIO_CMD_REPLACE)
+               return -EOPNOTSUPP;
+
        if (qopt->num_entries >= dep)
                return -EINVAL;
        if (!qopt->cycle_time)
@@ -988,7 +991,7 @@ static int tc_setup_taprio(struct stmmac_priv *priv,
 
        mutex_lock(&priv->plat->est->lock);
        priv->plat->est->gcl_size = size;
-       priv->plat->est->enable = qopt->enable;
+       priv->plat->est->enable = qopt->cmd == TAPRIO_CMD_REPLACE;
        mutex_unlock(&priv->plat->est->lock);
 
        for (i = 0; i < size; i++) {
index d61dfa2..b317b94 100644 (file)
@@ -1998,10 +1998,8 @@ static int cas_rx_process_pkt(struct cas *cp, struct cas_rx_comp *rxc,
                skb->truesize += hlen - swivel;
                skb->len      += hlen - swivel;
 
-               __skb_frag_set_page(frag, page->buffer);
+               skb_frag_fill_page_desc(frag, page->buffer, off, hlen - swivel);
                __skb_frag_ref(frag);
-               skb_frag_off_set(frag, off);
-               skb_frag_size_set(frag, hlen - swivel);
 
                /* any more data? */
                if ((words[0] & RX_COMP1_SPLIT_PKT) && ((dlen -= hlen) > 0)) {
@@ -2024,10 +2022,8 @@ static int cas_rx_process_pkt(struct cas *cp, struct cas_rx_comp *rxc,
                        skb->len      += hlen;
                        frag++;
 
-                       __skb_frag_set_page(frag, page->buffer);
+                       skb_frag_fill_page_desc(frag, page->buffer, 0, hlen);
                        __skb_frag_ref(frag);
-                       skb_frag_off_set(frag, 0);
-                       skb_frag_size_set(frag, hlen);
                        RX_USED_ADD(page, hlen + cp->crc_size);
                }
 
index 3a908db..eced87f 100644 (file)
@@ -450,7 +450,7 @@ static int am65_cpsw_configure_taprio(struct net_device *ndev,
 
        am65_cpsw_est_update_state(ndev);
 
-       if (!est_new->taprio.enable) {
+       if (est_new->taprio.cmd == TAPRIO_CMD_DESTROY) {
                am65_cpsw_stop_est(ndev);
                return ret;
        }
@@ -476,7 +476,7 @@ static int am65_cpsw_configure_taprio(struct net_device *ndev,
        am65_cpsw_est_set_sched_list(ndev, est_new);
        am65_cpsw_port_est_assign_buf_num(ndev, est_new->buf);
 
-       am65_cpsw_est_set(ndev, est_new->taprio.enable);
+       am65_cpsw_est_set(ndev, est_new->taprio.cmd == TAPRIO_CMD_REPLACE);
 
        if (tact == TACT_PROG) {
                ret = am65_cpsw_timer_set(ndev, est_new);
@@ -520,7 +520,7 @@ static int am65_cpsw_set_taprio(struct net_device *ndev, void *type_data)
        am65_cpsw_cp_taprio(taprio, &est_new->taprio);
        ret = am65_cpsw_configure_taprio(ndev, est_new);
        if (!ret) {
-               if (taprio->enable) {
+               if (taprio->cmd == TAPRIO_CMD_REPLACE) {
                        devm_kfree(&ndev->dev, port->qos.est_admin);
 
                        port->qos.est_admin = est_new;
@@ -564,8 +564,13 @@ purge_est:
 static int am65_cpsw_setup_taprio(struct net_device *ndev, void *type_data)
 {
        struct am65_cpsw_port *port = am65_ndev_to_port(ndev);
+       struct tc_taprio_qopt_offload *taprio = type_data;
        struct am65_cpsw_common *common = port->common;
 
+       if (taprio->cmd != TAPRIO_CMD_REPLACE &&
+           taprio->cmd != TAPRIO_CMD_DESTROY)
+               return -EOPNOTSUPP;
+
        if (!IS_ENABLED(CONFIG_TI_AM65_CPSW_TAS))
                return -ENODEV;
 
index c9d8867..39596cd 100644 (file)
@@ -40,6 +40,16 @@ config NGBE
 config TXGBE
        tristate "Wangxun(R) 10GbE PCI Express adapters support"
        depends on PCI
+       depends on COMMON_CLK
+       select REGMAP
+       select I2C
+       select I2C_DESIGNWARE_PLATFORM
+       select PHYLINK
+       select HWMON if TXGBE=y
+       select SFP
+       select GPIOLIB
+       select GPIOLIB_IRQCHIP
+       select PCS_XPCS
        select LIBWX
        help
          This driver supports Wangxun(R) 10GbE PCI Express family of
index ca409b4..39a9aee 100644 (file)
@@ -1182,12 +1182,28 @@ static void wx_enable_sec_rx_path(struct wx *wx)
        WX_WRITE_FLUSH(wx);
 }
 
+static void wx_vlan_strip_control(struct wx *wx, bool enable)
+{
+       int i, j;
+
+       for (i = 0; i < wx->num_rx_queues; i++) {
+               struct wx_ring *ring = wx->rx_ring[i];
+
+               j = ring->reg_idx;
+               wr32m(wx, WX_PX_RR_CFG(j), WX_PX_RR_CFG_VLAN,
+                     enable ? WX_PX_RR_CFG_VLAN : 0);
+       }
+}
+
 void wx_set_rx_mode(struct net_device *netdev)
 {
        struct wx *wx = netdev_priv(netdev);
+       netdev_features_t features;
        u32 fctrl, vmolr, vlnctrl;
        int count;
 
+       features = netdev->features;
+
        /* Check for Promiscuous and All Multicast modes */
        fctrl = rd32(wx, WX_PSR_CTL);
        fctrl &= ~(WX_PSR_CTL_UPE | WX_PSR_CTL_MPE);
@@ -1254,6 +1270,13 @@ void wx_set_rx_mode(struct net_device *netdev)
        wr32(wx, WX_PSR_VLAN_CTL, vlnctrl);
        wr32(wx, WX_PSR_CTL, fctrl);
        wr32(wx, WX_PSR_VM_L2CTL(0), vmolr);
+
+       if ((features & NETIF_F_HW_VLAN_CTAG_RX) &&
+           (features & NETIF_F_HW_VLAN_STAG_RX))
+               wx_vlan_strip_control(wx, true);
+       else
+               wx_vlan_strip_control(wx, false);
+
 }
 EXPORT_SYMBOL(wx_set_rx_mode);
 
@@ -1462,6 +1485,16 @@ static void wx_configure_tx(struct wx *wx)
              WX_MAC_TX_CFG_TE, WX_MAC_TX_CFG_TE);
 }
 
+static void wx_restore_vlan(struct wx *wx)
+{
+       u16 vid = 1;
+
+       wx_vlan_rx_add_vid(wx->netdev, htons(ETH_P_8021Q), 0);
+
+       for_each_set_bit_from(vid, wx->active_vlans, VLAN_N_VID)
+               wx_vlan_rx_add_vid(wx->netdev, htons(ETH_P_8021Q), vid);
+}
+
 /**
  * wx_configure_rx - Configure Receive Unit after Reset
  * @wx: pointer to private structure
@@ -1527,7 +1560,7 @@ void wx_configure(struct wx *wx)
        wx_configure_port(wx);
 
        wx_set_rx_mode(wx->netdev);
-
+       wx_restore_vlan(wx);
        wx_enable_sec_rx_path(wx);
 
        wx_configure_tx(wx);
@@ -1727,4 +1760,241 @@ int wx_sw_init(struct wx *wx)
 }
 EXPORT_SYMBOL(wx_sw_init);
 
+/**
+ *  wx_find_vlvf_slot - find the vlanid or the first empty slot
+ *  @wx: pointer to hardware structure
+ *  @vlan: VLAN id to write to VLAN filter
+ *
+ *  return the VLVF index where this VLAN id should be placed
+ *
+ **/
+static int wx_find_vlvf_slot(struct wx *wx, u32 vlan)
+{
+       u32 bits = 0, first_empty_slot = 0;
+       int regindex;
+
+       /* short cut the special case */
+       if (vlan == 0)
+               return 0;
+
+       /* Search for the vlan id in the VLVF entries. Save off the first empty
+        * slot found along the way
+        */
+       for (regindex = 1; regindex < WX_PSR_VLAN_SWC_ENTRIES; regindex++) {
+               wr32(wx, WX_PSR_VLAN_SWC_IDX, regindex);
+               bits = rd32(wx, WX_PSR_VLAN_SWC);
+               if (!bits && !(first_empty_slot))
+                       first_empty_slot = regindex;
+               else if ((bits & 0x0FFF) == vlan)
+                       break;
+       }
+
+       if (regindex >= WX_PSR_VLAN_SWC_ENTRIES) {
+               if (first_empty_slot)
+                       regindex = first_empty_slot;
+               else
+                       regindex = -ENOMEM;
+       }
+
+       return regindex;
+}
+
+/**
+ *  wx_set_vlvf - Set VLAN Pool Filter
+ *  @wx: pointer to hardware structure
+ *  @vlan: VLAN id to write to VLAN filter
+ *  @vind: VMDq output index that maps queue to VLAN id in VFVFB
+ *  @vlan_on: boolean flag to turn on/off VLAN in VFVF
+ *  @vfta_changed: pointer to boolean flag which indicates whether VFTA
+ *                 should be changed
+ *
+ *  Turn on/off specified bit in VLVF table.
+ **/
+static int wx_set_vlvf(struct wx *wx, u32 vlan, u32 vind, bool vlan_on,
+                      bool *vfta_changed)
+{
+       int vlvf_index;
+       u32 vt, bits;
+
+       /* If VT Mode is set
+        *   Either vlan_on
+        *     make sure the vlan is in VLVF
+        *     set the vind bit in the matching VLVFB
+        *   Or !vlan_on
+        *     clear the pool bit and possibly the vind
+        */
+       vt = rd32(wx, WX_CFG_PORT_CTL);
+       if (!(vt & WX_CFG_PORT_CTL_NUM_VT_MASK))
+               return 0;
+
+       vlvf_index = wx_find_vlvf_slot(wx, vlan);
+       if (vlvf_index < 0)
+               return vlvf_index;
+
+       wr32(wx, WX_PSR_VLAN_SWC_IDX, vlvf_index);
+       if (vlan_on) {
+               /* set the pool bit */
+               if (vind < 32) {
+                       bits = rd32(wx, WX_PSR_VLAN_SWC_VM_L);
+                       bits |= (1 << vind);
+                       wr32(wx, WX_PSR_VLAN_SWC_VM_L, bits);
+               } else {
+                       bits = rd32(wx, WX_PSR_VLAN_SWC_VM_H);
+                       bits |= (1 << (vind - 32));
+                       wr32(wx, WX_PSR_VLAN_SWC_VM_H, bits);
+               }
+       } else {
+               /* clear the pool bit */
+               if (vind < 32) {
+                       bits = rd32(wx, WX_PSR_VLAN_SWC_VM_L);
+                       bits &= ~(1 << vind);
+                       wr32(wx, WX_PSR_VLAN_SWC_VM_L, bits);
+                       bits |= rd32(wx, WX_PSR_VLAN_SWC_VM_H);
+               } else {
+                       bits = rd32(wx, WX_PSR_VLAN_SWC_VM_H);
+                       bits &= ~(1 << (vind - 32));
+                       wr32(wx, WX_PSR_VLAN_SWC_VM_H, bits);
+                       bits |= rd32(wx, WX_PSR_VLAN_SWC_VM_L);
+               }
+       }
+
+       if (bits) {
+               wr32(wx, WX_PSR_VLAN_SWC, (WX_PSR_VLAN_SWC_VIEN | vlan));
+               if (!vlan_on && vfta_changed)
+                       *vfta_changed = false;
+       } else {
+               wr32(wx, WX_PSR_VLAN_SWC, 0);
+       }
+
+       return 0;
+}
+
+/**
+ *  wx_set_vfta - Set VLAN filter table
+ *  @wx: pointer to hardware structure
+ *  @vlan: VLAN id to write to VLAN filter
+ *  @vind: VMDq output index that maps queue to VLAN id in VFVFB
+ *  @vlan_on: boolean flag to turn on/off VLAN in VFVF
+ *
+ *  Turn on/off specified VLAN in the VLAN filter table.
+ **/
+static int wx_set_vfta(struct wx *wx, u32 vlan, u32 vind, bool vlan_on)
+{
+       u32 bitindex, vfta, targetbit;
+       bool vfta_changed = false;
+       int regindex, ret;
+
+       /* this is a 2 part operation - first the VFTA, then the
+        * VLVF and VLVFB if VT Mode is set
+        * We don't write the VFTA until we know the VLVF part succeeded.
+        */
+
+       /* Part 1
+        * The VFTA is a bitstring made up of 128 32-bit registers
+        * that enable the particular VLAN id, much like the MTA:
+        *    bits[11-5]: which register
+        *    bits[4-0]:  which bit in the register
+        */
+       regindex = (vlan >> 5) & 0x7F;
+       bitindex = vlan & 0x1F;
+       targetbit = (1 << bitindex);
+       /* errata 5 */
+       vfta = wx->mac.vft_shadow[regindex];
+       if (vlan_on) {
+               if (!(vfta & targetbit)) {
+                       vfta |= targetbit;
+                       vfta_changed = true;
+               }
+       } else {
+               if ((vfta & targetbit)) {
+                       vfta &= ~targetbit;
+                       vfta_changed = true;
+               }
+       }
+       /* Part 2
+        * Call wx_set_vlvf to set VLVFB and VLVF
+        */
+       ret = wx_set_vlvf(wx, vlan, vind, vlan_on, &vfta_changed);
+       if (ret != 0)
+               return ret;
+
+       if (vfta_changed)
+               wr32(wx, WX_PSR_VLAN_TBL(regindex), vfta);
+       wx->mac.vft_shadow[regindex] = vfta;
+
+       return 0;
+}
+
+/**
+ *  wx_clear_vfta - Clear VLAN filter table
+ *  @wx: pointer to hardware structure
+ *
+ *  Clears the VLAN filer table, and the VMDq index associated with the filter
+ **/
+static void wx_clear_vfta(struct wx *wx)
+{
+       u32 offset;
+
+       for (offset = 0; offset < wx->mac.vft_size; offset++) {
+               wr32(wx, WX_PSR_VLAN_TBL(offset), 0);
+               wx->mac.vft_shadow[offset] = 0;
+       }
+
+       for (offset = 0; offset < WX_PSR_VLAN_SWC_ENTRIES; offset++) {
+               wr32(wx, WX_PSR_VLAN_SWC_IDX, offset);
+               wr32(wx, WX_PSR_VLAN_SWC, 0);
+               wr32(wx, WX_PSR_VLAN_SWC_VM_L, 0);
+               wr32(wx, WX_PSR_VLAN_SWC_VM_H, 0);
+       }
+}
+
+int wx_vlan_rx_add_vid(struct net_device *netdev,
+                      __be16 proto, u16 vid)
+{
+       struct wx *wx = netdev_priv(netdev);
+
+       /* add VID to filter table */
+       wx_set_vfta(wx, vid, VMDQ_P(0), true);
+       set_bit(vid, wx->active_vlans);
+
+       return 0;
+}
+EXPORT_SYMBOL(wx_vlan_rx_add_vid);
+
+int wx_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid)
+{
+       struct wx *wx = netdev_priv(netdev);
+
+       /* remove VID from filter table */
+       if (vid)
+               wx_set_vfta(wx, vid, VMDQ_P(0), false);
+       clear_bit(vid, wx->active_vlans);
+
+       return 0;
+}
+EXPORT_SYMBOL(wx_vlan_rx_kill_vid);
+
+/**
+ *  wx_start_hw - Prepare hardware for Tx/Rx
+ *  @wx: pointer to hardware structure
+ *
+ *  Starts the hardware using the generic start_hw function
+ *  and the generation start_hw function.
+ *  Then performs revision-specific operations, if any.
+ **/
+void wx_start_hw(struct wx *wx)
+{
+       int i;
+
+       /* Clear the VLAN filter table */
+       wx_clear_vfta(wx);
+       WX_WRITE_FLUSH(wx);
+       /* Clear the rate limiters */
+       for (i = 0; i < wx->mac.max_tx_queues; i++) {
+               wr32(wx, WX_TDM_RP_IDX, i);
+               wr32(wx, WX_TDM_RP_RATE, 0);
+       }
+}
+EXPORT_SYMBOL(wx_start_hw);
+
 MODULE_LICENSE("GPL");
index c173c56..1f93ca3 100644 (file)
@@ -26,10 +26,13 @@ void wx_set_rx_mode(struct net_device *netdev);
 int wx_change_mtu(struct net_device *netdev, int new_mtu);
 void wx_disable_rx_queue(struct wx *wx, struct wx_ring *ring);
 void wx_configure(struct wx *wx);
+void wx_start_hw(struct wx *wx);
 int wx_disable_pcie_master(struct wx *wx);
 int wx_stop_adapter(struct wx *wx);
 void wx_reset_misc(struct wx *wx);
 int wx_get_pcie_msix_counts(struct wx *wx, u16 *msix_count, u16 max_msix_count);
 int wx_sw_init(struct wx *wx);
+int wx_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid);
+int wx_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid);
 
 #endif /* _WX_HW_H_ */
index 1e8d8b7..2c3f08b 100644 (file)
 /* Copyright (c) 2019 - 2022 Beijing WangXun Technology Co., Ltd. */
 
 #include <linux/etherdevice.h>
+#include <net/ip6_checksum.h>
 #include <net/page_pool.h>
+#include <net/inet_ecn.h>
 #include <linux/iopoll.h>
+#include <linux/sctp.h>
 #include <linux/pci.h>
+#include <net/tcp.h>
+#include <net/ip.h>
 
 #include "wx_type.h"
 #include "wx_lib.h"
 #include "wx_hw.h"
 
+/* Lookup table mapping the HW PTYPE to the bit field for decoding */
+static struct wx_dec_ptype wx_ptype_lookup[256] = {
+       /* L2: mac */
+       [0x11] = WX_PTT(L2, NONE, NONE, NONE, NONE, PAY2),
+       [0x12] = WX_PTT(L2, NONE, NONE, NONE, TS,   PAY2),
+       [0x13] = WX_PTT(L2, NONE, NONE, NONE, NONE, PAY2),
+       [0x14] = WX_PTT(L2, NONE, NONE, NONE, NONE, PAY2),
+       [0x15] = WX_PTT(L2, NONE, NONE, NONE, NONE, NONE),
+       [0x16] = WX_PTT(L2, NONE, NONE, NONE, NONE, PAY2),
+       [0x17] = WX_PTT(L2, NONE, NONE, NONE, NONE, NONE),
+
+       /* L2: ethertype filter */
+       [0x18 ... 0x1F] = WX_PTT(L2, NONE, NONE, NONE, NONE, NONE),
+
+       /* L3: ip non-tunnel */
+       [0x21] = WX_PTT(IP, FGV4, NONE, NONE, NONE, PAY3),
+       [0x22] = WX_PTT(IP, IPV4, NONE, NONE, NONE, PAY3),
+       [0x23] = WX_PTT(IP, IPV4, NONE, NONE, UDP,  PAY4),
+       [0x24] = WX_PTT(IP, IPV4, NONE, NONE, TCP,  PAY4),
+       [0x25] = WX_PTT(IP, IPV4, NONE, NONE, SCTP, PAY4),
+       [0x29] = WX_PTT(IP, FGV6, NONE, NONE, NONE, PAY3),
+       [0x2A] = WX_PTT(IP, IPV6, NONE, NONE, NONE, PAY3),
+       [0x2B] = WX_PTT(IP, IPV6, NONE, NONE, UDP,  PAY3),
+       [0x2C] = WX_PTT(IP, IPV6, NONE, NONE, TCP,  PAY4),
+       [0x2D] = WX_PTT(IP, IPV6, NONE, NONE, SCTP, PAY4),
+
+       /* L2: fcoe */
+       [0x30 ... 0x34] = WX_PTT(FCOE, NONE, NONE, NONE, NONE, PAY3),
+       [0x38 ... 0x3C] = WX_PTT(FCOE, NONE, NONE, NONE, NONE, PAY3),
+
+       /* IPv4 --> IPv4/IPv6 */
+       [0x81] = WX_PTT(IP, IPV4, IPIP, FGV4, NONE, PAY3),
+       [0x82] = WX_PTT(IP, IPV4, IPIP, IPV4, NONE, PAY3),
+       [0x83] = WX_PTT(IP, IPV4, IPIP, IPV4, UDP,  PAY4),
+       [0x84] = WX_PTT(IP, IPV4, IPIP, IPV4, TCP,  PAY4),
+       [0x85] = WX_PTT(IP, IPV4, IPIP, IPV4, SCTP, PAY4),
+       [0x89] = WX_PTT(IP, IPV4, IPIP, FGV6, NONE, PAY3),
+       [0x8A] = WX_PTT(IP, IPV4, IPIP, IPV6, NONE, PAY3),
+       [0x8B] = WX_PTT(IP, IPV4, IPIP, IPV6, UDP,  PAY4),
+       [0x8C] = WX_PTT(IP, IPV4, IPIP, IPV6, TCP,  PAY4),
+       [0x8D] = WX_PTT(IP, IPV4, IPIP, IPV6, SCTP, PAY4),
+
+       /* IPv4 --> GRE/NAT --> NONE/IPv4/IPv6 */
+       [0x90] = WX_PTT(IP, IPV4, IG, NONE, NONE, PAY3),
+       [0x91] = WX_PTT(IP, IPV4, IG, FGV4, NONE, PAY3),
+       [0x92] = WX_PTT(IP, IPV4, IG, IPV4, NONE, PAY3),
+       [0x93] = WX_PTT(IP, IPV4, IG, IPV4, UDP,  PAY4),
+       [0x94] = WX_PTT(IP, IPV4, IG, IPV4, TCP,  PAY4),
+       [0x95] = WX_PTT(IP, IPV4, IG, IPV4, SCTP, PAY4),
+       [0x99] = WX_PTT(IP, IPV4, IG, FGV6, NONE, PAY3),
+       [0x9A] = WX_PTT(IP, IPV4, IG, IPV6, NONE, PAY3),
+       [0x9B] = WX_PTT(IP, IPV4, IG, IPV6, UDP,  PAY4),
+       [0x9C] = WX_PTT(IP, IPV4, IG, IPV6, TCP,  PAY4),
+       [0x9D] = WX_PTT(IP, IPV4, IG, IPV6, SCTP, PAY4),
+
+       /* IPv4 --> GRE/NAT --> MAC --> NONE/IPv4/IPv6 */
+       [0xA0] = WX_PTT(IP, IPV4, IGM, NONE, NONE, PAY3),
+       [0xA1] = WX_PTT(IP, IPV4, IGM, FGV4, NONE, PAY3),
+       [0xA2] = WX_PTT(IP, IPV4, IGM, IPV4, NONE, PAY3),
+       [0xA3] = WX_PTT(IP, IPV4, IGM, IPV4, UDP,  PAY4),
+       [0xA4] = WX_PTT(IP, IPV4, IGM, IPV4, TCP,  PAY4),
+       [0xA5] = WX_PTT(IP, IPV4, IGM, IPV4, SCTP, PAY4),
+       [0xA9] = WX_PTT(IP, IPV4, IGM, FGV6, NONE, PAY3),
+       [0xAA] = WX_PTT(IP, IPV4, IGM, IPV6, NONE, PAY3),
+       [0xAB] = WX_PTT(IP, IPV4, IGM, IPV6, UDP,  PAY4),
+       [0xAC] = WX_PTT(IP, IPV4, IGM, IPV6, TCP,  PAY4),
+       [0xAD] = WX_PTT(IP, IPV4, IGM, IPV6, SCTP, PAY4),
+
+       /* IPv4 --> GRE/NAT --> MAC+VLAN --> NONE/IPv4/IPv6 */
+       [0xB0] = WX_PTT(IP, IPV4, IGMV, NONE, NONE, PAY3),
+       [0xB1] = WX_PTT(IP, IPV4, IGMV, FGV4, NONE, PAY3),
+       [0xB2] = WX_PTT(IP, IPV4, IGMV, IPV4, NONE, PAY3),
+       [0xB3] = WX_PTT(IP, IPV4, IGMV, IPV4, UDP,  PAY4),
+       [0xB4] = WX_PTT(IP, IPV4, IGMV, IPV4, TCP,  PAY4),
+       [0xB5] = WX_PTT(IP, IPV4, IGMV, IPV4, SCTP, PAY4),
+       [0xB9] = WX_PTT(IP, IPV4, IGMV, FGV6, NONE, PAY3),
+       [0xBA] = WX_PTT(IP, IPV4, IGMV, IPV6, NONE, PAY3),
+       [0xBB] = WX_PTT(IP, IPV4, IGMV, IPV6, UDP,  PAY4),
+       [0xBC] = WX_PTT(IP, IPV4, IGMV, IPV6, TCP,  PAY4),
+       [0xBD] = WX_PTT(IP, IPV4, IGMV, IPV6, SCTP, PAY4),
+
+       /* IPv6 --> IPv4/IPv6 */
+       [0xC1] = WX_PTT(IP, IPV6, IPIP, FGV4, NONE, PAY3),
+       [0xC2] = WX_PTT(IP, IPV6, IPIP, IPV4, NONE, PAY3),
+       [0xC3] = WX_PTT(IP, IPV6, IPIP, IPV4, UDP,  PAY4),
+       [0xC4] = WX_PTT(IP, IPV6, IPIP, IPV4, TCP,  PAY4),
+       [0xC5] = WX_PTT(IP, IPV6, IPIP, IPV4, SCTP, PAY4),
+       [0xC9] = WX_PTT(IP, IPV6, IPIP, FGV6, NONE, PAY3),
+       [0xCA] = WX_PTT(IP, IPV6, IPIP, IPV6, NONE, PAY3),
+       [0xCB] = WX_PTT(IP, IPV6, IPIP, IPV6, UDP,  PAY4),
+       [0xCC] = WX_PTT(IP, IPV6, IPIP, IPV6, TCP,  PAY4),
+       [0xCD] = WX_PTT(IP, IPV6, IPIP, IPV6, SCTP, PAY4),
+
+       /* IPv6 --> GRE/NAT -> NONE/IPv4/IPv6 */
+       [0xD0] = WX_PTT(IP, IPV6, IG, NONE, NONE, PAY3),
+       [0xD1] = WX_PTT(IP, IPV6, IG, FGV4, NONE, PAY3),
+       [0xD2] = WX_PTT(IP, IPV6, IG, IPV4, NONE, PAY3),
+       [0xD3] = WX_PTT(IP, IPV6, IG, IPV4, UDP,  PAY4),
+       [0xD4] = WX_PTT(IP, IPV6, IG, IPV4, TCP,  PAY4),
+       [0xD5] = WX_PTT(IP, IPV6, IG, IPV4, SCTP, PAY4),
+       [0xD9] = WX_PTT(IP, IPV6, IG, FGV6, NONE, PAY3),
+       [0xDA] = WX_PTT(IP, IPV6, IG, IPV6, NONE, PAY3),
+       [0xDB] = WX_PTT(IP, IPV6, IG, IPV6, UDP,  PAY4),
+       [0xDC] = WX_PTT(IP, IPV6, IG, IPV6, TCP,  PAY4),
+       [0xDD] = WX_PTT(IP, IPV6, IG, IPV6, SCTP, PAY4),
+
+       /* IPv6 --> GRE/NAT -> MAC -> NONE/IPv4/IPv6 */
+       [0xE0] = WX_PTT(IP, IPV6, IGM, NONE, NONE, PAY3),
+       [0xE1] = WX_PTT(IP, IPV6, IGM, FGV4, NONE, PAY3),
+       [0xE2] = WX_PTT(IP, IPV6, IGM, IPV4, NONE, PAY3),
+       [0xE3] = WX_PTT(IP, IPV6, IGM, IPV4, UDP,  PAY4),
+       [0xE4] = WX_PTT(IP, IPV6, IGM, IPV4, TCP,  PAY4),
+       [0xE5] = WX_PTT(IP, IPV6, IGM, IPV4, SCTP, PAY4),
+       [0xE9] = WX_PTT(IP, IPV6, IGM, FGV6, NONE, PAY3),
+       [0xEA] = WX_PTT(IP, IPV6, IGM, IPV6, NONE, PAY3),
+       [0xEB] = WX_PTT(IP, IPV6, IGM, IPV6, UDP,  PAY4),
+       [0xEC] = WX_PTT(IP, IPV6, IGM, IPV6, TCP,  PAY4),
+       [0xED] = WX_PTT(IP, IPV6, IGM, IPV6, SCTP, PAY4),
+
+       /* IPv6 --> GRE/NAT -> MAC--> NONE/IPv */
+       [0xF0] = WX_PTT(IP, IPV6, IGMV, NONE, NONE, PAY3),
+       [0xF1] = WX_PTT(IP, IPV6, IGMV, FGV4, NONE, PAY3),
+       [0xF2] = WX_PTT(IP, IPV6, IGMV, IPV4, NONE, PAY3),
+       [0xF3] = WX_PTT(IP, IPV6, IGMV, IPV4, UDP,  PAY4),
+       [0xF4] = WX_PTT(IP, IPV6, IGMV, IPV4, TCP,  PAY4),
+       [0xF5] = WX_PTT(IP, IPV6, IGMV, IPV4, SCTP, PAY4),
+       [0xF9] = WX_PTT(IP, IPV6, IGMV, FGV6, NONE, PAY3),
+       [0xFA] = WX_PTT(IP, IPV6, IGMV, IPV6, NONE, PAY3),
+       [0xFB] = WX_PTT(IP, IPV6, IGMV, IPV6, UDP,  PAY4),
+       [0xFC] = WX_PTT(IP, IPV6, IGMV, IPV6, TCP,  PAY4),
+       [0xFD] = WX_PTT(IP, IPV6, IGMV, IPV6, SCTP, PAY4),
+};
+
+static struct wx_dec_ptype wx_decode_ptype(const u8 ptype)
+{
+       return wx_ptype_lookup[ptype];
+}
+
 /* wx_test_staterr - tests bits in Rx descriptor status and error fields */
 static __le32 wx_test_staterr(union wx_rx_desc *rx_desc,
                              const u32 stat_err_bits)
@@ -419,6 +562,116 @@ static bool wx_cleanup_headers(struct wx_ring *rx_ring,
        return false;
 }
 
+static void wx_rx_hash(struct wx_ring *ring,
+                      union wx_rx_desc *rx_desc,
+                      struct sk_buff *skb)
+{
+       u16 rss_type;
+
+       if (!(ring->netdev->features & NETIF_F_RXHASH))
+               return;
+
+       rss_type = le16_to_cpu(rx_desc->wb.lower.lo_dword.hs_rss.pkt_info) &
+                              WX_RXD_RSSTYPE_MASK;
+
+       if (!rss_type)
+               return;
+
+       skb_set_hash(skb, le32_to_cpu(rx_desc->wb.lower.hi_dword.rss),
+                    (WX_RSS_L4_TYPES_MASK & (1ul << rss_type)) ?
+                    PKT_HASH_TYPE_L4 : PKT_HASH_TYPE_L3);
+}
+
+/**
+ * wx_rx_checksum - indicate in skb if hw indicated a good cksum
+ * @ring: structure containing ring specific data
+ * @rx_desc: current Rx descriptor being processed
+ * @skb: skb currently being received and modified
+ **/
+static void wx_rx_checksum(struct wx_ring *ring,
+                          union wx_rx_desc *rx_desc,
+                          struct sk_buff *skb)
+{
+       struct wx_dec_ptype dptype = wx_decode_ptype(WX_RXD_PKTTYPE(rx_desc));
+
+       skb_checksum_none_assert(skb);
+       /* Rx csum disabled */
+       if (!(ring->netdev->features & NETIF_F_RXCSUM))
+               return;
+
+       /* if IPv4 header checksum error */
+       if ((wx_test_staterr(rx_desc, WX_RXD_STAT_IPCS) &&
+            wx_test_staterr(rx_desc, WX_RXD_ERR_IPE)) ||
+           (wx_test_staterr(rx_desc, WX_RXD_STAT_OUTERIPCS) &&
+            wx_test_staterr(rx_desc, WX_RXD_ERR_OUTERIPER))) {
+               ring->rx_stats.csum_err++;
+               return;
+       }
+
+       /* L4 checksum offload flag must set for the below code to work */
+       if (!wx_test_staterr(rx_desc, WX_RXD_STAT_L4CS))
+               return;
+
+       /* Hardware can't guarantee csum if IPv6 Dest Header found */
+       if (dptype.prot != WX_DEC_PTYPE_PROT_SCTP && WX_RXD_IPV6EX(rx_desc))
+               return;
+
+       /* if L4 checksum error */
+       if (wx_test_staterr(rx_desc, WX_RXD_ERR_TCPE)) {
+               ring->rx_stats.csum_err++;
+               return;
+       }
+
+       /* It must be a TCP or UDP or SCTP packet with a valid checksum */
+       skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+       /* If there is an outer header present that might contain a checksum
+        * we need to bump the checksum level by 1 to reflect the fact that
+        * we are indicating we validated the inner checksum.
+        */
+       if (dptype.etype >= WX_DEC_PTYPE_ETYPE_IG)
+               __skb_incr_checksum_unnecessary(skb);
+       ring->rx_stats.csum_good_cnt++;
+}
+
+static void wx_rx_vlan(struct wx_ring *ring, union wx_rx_desc *rx_desc,
+                      struct sk_buff *skb)
+{
+       u16 ethertype;
+       u8 idx = 0;
+
+       if ((ring->netdev->features &
+            (NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_STAG_RX)) &&
+           wx_test_staterr(rx_desc, WX_RXD_STAT_VP)) {
+               idx = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hs_rss.pkt_info) &
+                      0x1c0) >> 6;
+               ethertype = ring->q_vector->wx->tpid[idx];
+               __vlan_hwaccel_put_tag(skb, htons(ethertype),
+                                      le16_to_cpu(rx_desc->wb.upper.vlan));
+       }
+}
+
+/**
+ * wx_process_skb_fields - Populate skb header fields from Rx descriptor
+ * @rx_ring: rx descriptor ring packet is being transacted on
+ * @rx_desc: pointer to the EOP Rx descriptor
+ * @skb: pointer to current skb being populated
+ *
+ * This function checks the ring, descriptor, and packet information in
+ * order to populate the hash, checksum, protocol, and
+ * other fields within the skb.
+ **/
+static void wx_process_skb_fields(struct wx_ring *rx_ring,
+                                 union wx_rx_desc *rx_desc,
+                                 struct sk_buff *skb)
+{
+       wx_rx_hash(rx_ring, rx_desc, skb);
+       wx_rx_checksum(rx_ring, rx_desc, skb);
+       wx_rx_vlan(rx_ring, rx_desc, skb);
+       skb_record_rx_queue(skb, rx_ring->queue_index);
+       skb->protocol = eth_type_trans(skb, rx_ring->netdev);
+}
+
 /**
  * wx_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf
  * @q_vector: structure containing interrupt and ring information
@@ -486,8 +739,8 @@ static int wx_clean_rx_irq(struct wx_q_vector *q_vector,
                /* probably a little skewed due to removing CRC */
                total_rx_bytes += skb->len;
 
-               skb_record_rx_queue(skb, rx_ring->queue_index);
-               skb->protocol = eth_type_trans(skb, rx_ring->netdev);
+               /* populate checksum, timestamp, VLAN, and protocol */
+               wx_process_skb_fields(rx_ring, rx_desc, skb);
                napi_gro_receive(&q_vector->napi, skb);
 
                /* update budget accounting */
@@ -707,11 +960,50 @@ static int wx_maybe_stop_tx(struct wx_ring *tx_ring, u16 size)
        return 0;
 }
 
+static u32 wx_tx_cmd_type(u32 tx_flags)
+{
+       /* set type for advanced descriptor with frame checksum insertion */
+       u32 cmd_type = WX_TXD_DTYP_DATA | WX_TXD_IFCS;
+
+       /* set HW vlan bit if vlan is present */
+       cmd_type |= WX_SET_FLAG(tx_flags, WX_TX_FLAGS_HW_VLAN, WX_TXD_VLE);
+       /* set segmentation enable bits for TSO/FSO */
+       cmd_type |= WX_SET_FLAG(tx_flags, WX_TX_FLAGS_TSO, WX_TXD_TSE);
+       /* set timestamp bit if present */
+       cmd_type |= WX_SET_FLAG(tx_flags, WX_TX_FLAGS_TSTAMP, WX_TXD_MAC_TSTAMP);
+       cmd_type |= WX_SET_FLAG(tx_flags, WX_TX_FLAGS_LINKSEC, WX_TXD_LINKSEC);
+
+       return cmd_type;
+}
+
+static void wx_tx_olinfo_status(union wx_tx_desc *tx_desc,
+                               u32 tx_flags, unsigned int paylen)
+{
+       u32 olinfo_status = paylen << WX_TXD_PAYLEN_SHIFT;
+
+       /* enable L4 checksum for TSO and TX checksum offload */
+       olinfo_status |= WX_SET_FLAG(tx_flags, WX_TX_FLAGS_CSUM, WX_TXD_L4CS);
+       /* enable IPv4 checksum for TSO */
+       olinfo_status |= WX_SET_FLAG(tx_flags, WX_TX_FLAGS_IPV4, WX_TXD_IIPCS);
+       /* enable outer IPv4 checksum for TSO */
+       olinfo_status |= WX_SET_FLAG(tx_flags, WX_TX_FLAGS_OUTER_IPV4,
+                                    WX_TXD_EIPCS);
+       /* Check Context must be set if Tx switch is enabled, which it
+        * always is for case where virtual functions are running
+        */
+       olinfo_status |= WX_SET_FLAG(tx_flags, WX_TX_FLAGS_CC, WX_TXD_CC);
+       olinfo_status |= WX_SET_FLAG(tx_flags, WX_TX_FLAGS_IPSEC,
+                                    WX_TXD_IPSEC);
+       tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
+}
+
 static void wx_tx_map(struct wx_ring *tx_ring,
-                     struct wx_tx_buffer *first)
+                     struct wx_tx_buffer *first,
+                     const u8 hdr_len)
 {
        struct sk_buff *skb = first->skb;
        struct wx_tx_buffer *tx_buffer;
+       u32 tx_flags = first->tx_flags;
        u16 i = tx_ring->next_to_use;
        unsigned int data_len, size;
        union wx_tx_desc *tx_desc;
@@ -719,10 +1011,9 @@ static void wx_tx_map(struct wx_ring *tx_ring,
        dma_addr_t dma;
        u32 cmd_type;
 
-       cmd_type = WX_TXD_DTYP_DATA | WX_TXD_IFCS;
+       cmd_type = wx_tx_cmd_type(tx_flags);
        tx_desc = WX_TX_DESC(tx_ring, i);
-
-       tx_desc->read.olinfo_status = cpu_to_le32(skb->len << WX_TXD_PAYLEN_SHIFT);
+       wx_tx_olinfo_status(tx_desc, tx_flags, skb->len - hdr_len);
 
        size = skb_headlen(skb);
        data_len = skb->data_len;
@@ -838,12 +1129,399 @@ dma_error:
        tx_ring->next_to_use = i;
 }
 
+static void wx_tx_ctxtdesc(struct wx_ring *tx_ring, u32 vlan_macip_lens,
+                          u32 fcoe_sof_eof, u32 type_tucmd, u32 mss_l4len_idx)
+{
+       struct wx_tx_context_desc *context_desc;
+       u16 i = tx_ring->next_to_use;
+
+       context_desc = WX_TX_CTXTDESC(tx_ring, i);
+       i++;
+       tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
+
+       /* set bits to identify this as an advanced context descriptor */
+       type_tucmd |= WX_TXD_DTYP_CTXT;
+       context_desc->vlan_macip_lens   = cpu_to_le32(vlan_macip_lens);
+       context_desc->seqnum_seed       = cpu_to_le32(fcoe_sof_eof);
+       context_desc->type_tucmd_mlhl   = cpu_to_le32(type_tucmd);
+       context_desc->mss_l4len_idx     = cpu_to_le32(mss_l4len_idx);
+}
+
+static void wx_get_ipv6_proto(struct sk_buff *skb, int offset, u8 *nexthdr)
+{
+       struct ipv6hdr *hdr = (struct ipv6hdr *)(skb->data + offset);
+
+       *nexthdr = hdr->nexthdr;
+       offset += sizeof(struct ipv6hdr);
+       while (ipv6_ext_hdr(*nexthdr)) {
+               struct ipv6_opt_hdr _hdr, *hp;
+
+               if (*nexthdr == NEXTHDR_NONE)
+                       return;
+               hp = skb_header_pointer(skb, offset, sizeof(_hdr), &_hdr);
+               if (!hp)
+                       return;
+               if (*nexthdr == NEXTHDR_FRAGMENT)
+                       break;
+               *nexthdr = hp->nexthdr;
+       }
+}
+
+union network_header {
+       struct iphdr *ipv4;
+       struct ipv6hdr *ipv6;
+       void *raw;
+};
+
+static u8 wx_encode_tx_desc_ptype(const struct wx_tx_buffer *first)
+{
+       u8 tun_prot = 0, l4_prot = 0, ptype = 0;
+       struct sk_buff *skb = first->skb;
+
+       if (skb->encapsulation) {
+               union network_header hdr;
+
+               switch (first->protocol) {
+               case htons(ETH_P_IP):
+                       tun_prot = ip_hdr(skb)->protocol;
+                       ptype = WX_PTYPE_TUN_IPV4;
+                       break;
+               case htons(ETH_P_IPV6):
+                       wx_get_ipv6_proto(skb, skb_network_offset(skb), &tun_prot);
+                       ptype = WX_PTYPE_TUN_IPV6;
+                       break;
+               default:
+                       return ptype;
+               }
+
+               if (tun_prot == IPPROTO_IPIP) {
+                       hdr.raw = (void *)inner_ip_hdr(skb);
+                       ptype |= WX_PTYPE_PKT_IPIP;
+               } else if (tun_prot == IPPROTO_UDP) {
+                       hdr.raw = (void *)inner_ip_hdr(skb);
+                       if (skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
+                           skb->inner_protocol != htons(ETH_P_TEB)) {
+                               ptype |= WX_PTYPE_PKT_IG;
+                       } else {
+                               if (((struct ethhdr *)skb_inner_mac_header(skb))->h_proto
+                                    == htons(ETH_P_8021Q))
+                                       ptype |= WX_PTYPE_PKT_IGMV;
+                               else
+                                       ptype |= WX_PTYPE_PKT_IGM;
+                       }
+
+               } else if (tun_prot == IPPROTO_GRE) {
+                       hdr.raw = (void *)inner_ip_hdr(skb);
+                       if (skb->inner_protocol ==  htons(ETH_P_IP) ||
+                           skb->inner_protocol ==  htons(ETH_P_IPV6)) {
+                               ptype |= WX_PTYPE_PKT_IG;
+                       } else {
+                               if (((struct ethhdr *)skb_inner_mac_header(skb))->h_proto
+                                   == htons(ETH_P_8021Q))
+                                       ptype |= WX_PTYPE_PKT_IGMV;
+                               else
+                                       ptype |= WX_PTYPE_PKT_IGM;
+                       }
+               } else {
+                       return ptype;
+               }
+
+               switch (hdr.ipv4->version) {
+               case IPVERSION:
+                       l4_prot = hdr.ipv4->protocol;
+                       break;
+               case 6:
+                       wx_get_ipv6_proto(skb, skb_inner_network_offset(skb), &l4_prot);
+                       ptype |= WX_PTYPE_PKT_IPV6;
+                       break;
+               default:
+                       return ptype;
+               }
+       } else {
+               switch (first->protocol) {
+               case htons(ETH_P_IP):
+                       l4_prot = ip_hdr(skb)->protocol;
+                       ptype = WX_PTYPE_PKT_IP;
+                       break;
+               case htons(ETH_P_IPV6):
+                       wx_get_ipv6_proto(skb, skb_network_offset(skb), &l4_prot);
+                       ptype = WX_PTYPE_PKT_IP | WX_PTYPE_PKT_IPV6;
+                       break;
+               default:
+                       return WX_PTYPE_PKT_MAC | WX_PTYPE_TYP_MAC;
+               }
+       }
+       switch (l4_prot) {
+       case IPPROTO_TCP:
+               ptype |= WX_PTYPE_TYP_TCP;
+               break;
+       case IPPROTO_UDP:
+               ptype |= WX_PTYPE_TYP_UDP;
+               break;
+       case IPPROTO_SCTP:
+               ptype |= WX_PTYPE_TYP_SCTP;
+               break;
+       default:
+               ptype |= WX_PTYPE_TYP_IP;
+               break;
+       }
+
+       return ptype;
+}
+
+static int wx_tso(struct wx_ring *tx_ring, struct wx_tx_buffer *first,
+                 u8 *hdr_len, u8 ptype)
+{
+       u32 vlan_macip_lens, type_tucmd, mss_l4len_idx;
+       struct net_device *netdev = tx_ring->netdev;
+       u32 l4len, tunhdr_eiplen_tunlen = 0;
+       struct sk_buff *skb = first->skb;
+       bool enc = skb->encapsulation;
+       struct ipv6hdr *ipv6h;
+       struct tcphdr *tcph;
+       struct iphdr *iph;
+       u8 tun_prot = 0;
+       int err;
+
+       if (skb->ip_summed != CHECKSUM_PARTIAL)
+               return 0;
+
+       if (!skb_is_gso(skb))
+               return 0;
+
+       err = skb_cow_head(skb, 0);
+       if (err < 0)
+               return err;
+
+       /* indicates the inner headers in the skbuff are valid. */
+       iph = enc ? inner_ip_hdr(skb) : ip_hdr(skb);
+       if (iph->version == 4) {
+               tcph = enc ? inner_tcp_hdr(skb) : tcp_hdr(skb);
+               iph->tot_len = 0;
+               iph->check = 0;
+               tcph->check = ~csum_tcpudp_magic(iph->saddr,
+                                                iph->daddr, 0,
+                                                IPPROTO_TCP, 0);
+               first->tx_flags |= WX_TX_FLAGS_TSO |
+                                  WX_TX_FLAGS_CSUM |
+                                  WX_TX_FLAGS_IPV4 |
+                                  WX_TX_FLAGS_CC;
+       } else if (iph->version == 6 && skb_is_gso_v6(skb)) {
+               ipv6h = enc ? inner_ipv6_hdr(skb) : ipv6_hdr(skb);
+               tcph = enc ? inner_tcp_hdr(skb) : tcp_hdr(skb);
+               ipv6h->payload_len = 0;
+               tcph->check = ~csum_ipv6_magic(&ipv6h->saddr,
+                                              &ipv6h->daddr, 0,
+                                              IPPROTO_TCP, 0);
+               first->tx_flags |= WX_TX_FLAGS_TSO |
+                                  WX_TX_FLAGS_CSUM |
+                                  WX_TX_FLAGS_CC;
+       }
+
+       /* compute header lengths */
+       l4len = enc ? inner_tcp_hdrlen(skb) : tcp_hdrlen(skb);
+       *hdr_len = enc ? (skb_inner_transport_header(skb) - skb->data) :
+                        skb_transport_offset(skb);
+       *hdr_len += l4len;
+
+       /* update gso size and bytecount with header size */
+       first->gso_segs = skb_shinfo(skb)->gso_segs;
+       first->bytecount += (first->gso_segs - 1) * *hdr_len;
+
+       /* mss_l4len_id: use 0 as index for TSO */
+       mss_l4len_idx = l4len << WX_TXD_L4LEN_SHIFT;
+       mss_l4len_idx |= skb_shinfo(skb)->gso_size << WX_TXD_MSS_SHIFT;
+
+       /* vlan_macip_lens: HEADLEN, MACLEN, VLAN tag */
+       if (enc) {
+               switch (first->protocol) {
+               case htons(ETH_P_IP):
+                       tun_prot = ip_hdr(skb)->protocol;
+                       first->tx_flags |= WX_TX_FLAGS_OUTER_IPV4;
+                       break;
+               case htons(ETH_P_IPV6):
+                       tun_prot = ipv6_hdr(skb)->nexthdr;
+                       break;
+               default:
+                       break;
+               }
+               switch (tun_prot) {
+               case IPPROTO_UDP:
+                       tunhdr_eiplen_tunlen = WX_TXD_TUNNEL_UDP;
+                       tunhdr_eiplen_tunlen |= ((skb_network_header_len(skb) >> 2) <<
+                                                WX_TXD_OUTER_IPLEN_SHIFT) |
+                                               (((skb_inner_mac_header(skb) -
+                                               skb_transport_header(skb)) >> 1) <<
+                                               WX_TXD_TUNNEL_LEN_SHIFT);
+                       break;
+               case IPPROTO_GRE:
+                       tunhdr_eiplen_tunlen = WX_TXD_TUNNEL_GRE;
+                       tunhdr_eiplen_tunlen |= ((skb_network_header_len(skb) >> 2) <<
+                                                WX_TXD_OUTER_IPLEN_SHIFT) |
+                                               (((skb_inner_mac_header(skb) -
+                                               skb_transport_header(skb)) >> 1) <<
+                                               WX_TXD_TUNNEL_LEN_SHIFT);
+                       break;
+               case IPPROTO_IPIP:
+                       tunhdr_eiplen_tunlen = (((char *)inner_ip_hdr(skb) -
+                                               (char *)ip_hdr(skb)) >> 2) <<
+                                               WX_TXD_OUTER_IPLEN_SHIFT;
+                       break;
+               default:
+                       break;
+               }
+               vlan_macip_lens = skb_inner_network_header_len(skb) >> 1;
+       } else {
+               vlan_macip_lens = skb_network_header_len(skb) >> 1;
+       }
+
+       vlan_macip_lens |= skb_network_offset(skb) << WX_TXD_MACLEN_SHIFT;
+       vlan_macip_lens |= first->tx_flags & WX_TX_FLAGS_VLAN_MASK;
+
+       type_tucmd = ptype << 24;
+       if (skb->vlan_proto == htons(ETH_P_8021AD) &&
+           netdev->features & NETIF_F_HW_VLAN_STAG_TX)
+               type_tucmd |= WX_SET_FLAG(first->tx_flags,
+                                         WX_TX_FLAGS_HW_VLAN,
+                                         0x1 << WX_TXD_TAG_TPID_SEL_SHIFT);
+       wx_tx_ctxtdesc(tx_ring, vlan_macip_lens, tunhdr_eiplen_tunlen,
+                      type_tucmd, mss_l4len_idx);
+
+       return 1;
+}
+
+static void wx_tx_csum(struct wx_ring *tx_ring, struct wx_tx_buffer *first,
+                      u8 ptype)
+{
+       u32 tunhdr_eiplen_tunlen = 0, vlan_macip_lens = 0;
+       struct net_device *netdev = tx_ring->netdev;
+       u32 mss_l4len_idx = 0, type_tucmd;
+       struct sk_buff *skb = first->skb;
+       u8 tun_prot = 0;
+
+       if (skb->ip_summed != CHECKSUM_PARTIAL) {
+               if (!(first->tx_flags & WX_TX_FLAGS_HW_VLAN) &&
+                   !(first->tx_flags & WX_TX_FLAGS_CC))
+                       return;
+               vlan_macip_lens = skb_network_offset(skb) <<
+                                 WX_TXD_MACLEN_SHIFT;
+       } else {
+               u8 l4_prot = 0;
+               union {
+                       struct iphdr *ipv4;
+                       struct ipv6hdr *ipv6;
+                       u8 *raw;
+               } network_hdr;
+               union {
+                       struct tcphdr *tcphdr;
+                       u8 *raw;
+               } transport_hdr;
+
+               if (skb->encapsulation) {
+                       network_hdr.raw = skb_inner_network_header(skb);
+                       transport_hdr.raw = skb_inner_transport_header(skb);
+                       vlan_macip_lens = skb_network_offset(skb) <<
+                                         WX_TXD_MACLEN_SHIFT;
+                       switch (first->protocol) {
+                       case htons(ETH_P_IP):
+                               tun_prot = ip_hdr(skb)->protocol;
+                               break;
+                       case htons(ETH_P_IPV6):
+                               tun_prot = ipv6_hdr(skb)->nexthdr;
+                               break;
+                       default:
+                               return;
+                       }
+                       switch (tun_prot) {
+                       case IPPROTO_UDP:
+                               tunhdr_eiplen_tunlen = WX_TXD_TUNNEL_UDP;
+                               tunhdr_eiplen_tunlen |=
+                                       ((skb_network_header_len(skb) >> 2) <<
+                                       WX_TXD_OUTER_IPLEN_SHIFT) |
+                                       (((skb_inner_mac_header(skb) -
+                                       skb_transport_header(skb)) >> 1) <<
+                                       WX_TXD_TUNNEL_LEN_SHIFT);
+                               break;
+                       case IPPROTO_GRE:
+                               tunhdr_eiplen_tunlen = WX_TXD_TUNNEL_GRE;
+                               tunhdr_eiplen_tunlen |= ((skb_network_header_len(skb) >> 2) <<
+                                                        WX_TXD_OUTER_IPLEN_SHIFT) |
+                                                        (((skb_inner_mac_header(skb) -
+                                                           skb_transport_header(skb)) >> 1) <<
+                                                         WX_TXD_TUNNEL_LEN_SHIFT);
+                               break;
+                       case IPPROTO_IPIP:
+                               tunhdr_eiplen_tunlen = (((char *)inner_ip_hdr(skb) -
+                                                       (char *)ip_hdr(skb)) >> 2) <<
+                                                       WX_TXD_OUTER_IPLEN_SHIFT;
+                               break;
+                       default:
+                               break;
+                       }
+
+               } else {
+                       network_hdr.raw = skb_network_header(skb);
+                       transport_hdr.raw = skb_transport_header(skb);
+                       vlan_macip_lens = skb_network_offset(skb) <<
+                                         WX_TXD_MACLEN_SHIFT;
+               }
+
+               switch (network_hdr.ipv4->version) {
+               case IPVERSION:
+                       vlan_macip_lens |= (transport_hdr.raw - network_hdr.raw) >> 1;
+                       l4_prot = network_hdr.ipv4->protocol;
+                       break;
+               case 6:
+                       vlan_macip_lens |= (transport_hdr.raw - network_hdr.raw) >> 1;
+                       l4_prot = network_hdr.ipv6->nexthdr;
+                       break;
+               default:
+                       break;
+               }
+
+               switch (l4_prot) {
+               case IPPROTO_TCP:
+               mss_l4len_idx = (transport_hdr.tcphdr->doff * 4) <<
+                               WX_TXD_L4LEN_SHIFT;
+                       break;
+               case IPPROTO_SCTP:
+                       mss_l4len_idx = sizeof(struct sctphdr) <<
+                                       WX_TXD_L4LEN_SHIFT;
+                       break;
+               case IPPROTO_UDP:
+                       mss_l4len_idx = sizeof(struct udphdr) <<
+                                       WX_TXD_L4LEN_SHIFT;
+                       break;
+               default:
+                       break;
+               }
+
+               /* update TX checksum flag */
+               first->tx_flags |= WX_TX_FLAGS_CSUM;
+       }
+       first->tx_flags |= WX_TX_FLAGS_CC;
+       /* vlan_macip_lens: MACLEN, VLAN tag */
+       vlan_macip_lens |= first->tx_flags & WX_TX_FLAGS_VLAN_MASK;
+
+       type_tucmd = ptype << 24;
+       if (skb->vlan_proto == htons(ETH_P_8021AD) &&
+           netdev->features & NETIF_F_HW_VLAN_STAG_TX)
+               type_tucmd |= WX_SET_FLAG(first->tx_flags,
+                                         WX_TX_FLAGS_HW_VLAN,
+                                         0x1 << WX_TXD_TAG_TPID_SEL_SHIFT);
+       wx_tx_ctxtdesc(tx_ring, vlan_macip_lens, tunhdr_eiplen_tunlen,
+                      type_tucmd, mss_l4len_idx);
+}
+
 static netdev_tx_t wx_xmit_frame_ring(struct sk_buff *skb,
                                      struct wx_ring *tx_ring)
 {
        u16 count = TXD_USE_COUNT(skb_headlen(skb));
        struct wx_tx_buffer *first;
+       u8 hdr_len = 0, ptype;
        unsigned short f;
+       u32 tx_flags = 0;
+       int tso;
 
        /* need: 1 descriptor per page * PAGE_SIZE/WX_MAX_DATA_PER_TXD,
         *       + 1 desc for skb_headlen/WX_MAX_DATA_PER_TXD,
@@ -864,7 +1542,29 @@ static netdev_tx_t wx_xmit_frame_ring(struct sk_buff *skb,
        first->bytecount = skb->len;
        first->gso_segs = 1;
 
-       wx_tx_map(tx_ring, first);
+       /* if we have a HW VLAN tag being added default to the HW one */
+       if (skb_vlan_tag_present(skb)) {
+               tx_flags |= skb_vlan_tag_get(skb) << WX_TX_FLAGS_VLAN_SHIFT;
+               tx_flags |= WX_TX_FLAGS_HW_VLAN;
+       }
+
+       /* record initial flags and protocol */
+       first->tx_flags = tx_flags;
+       first->protocol = vlan_get_protocol(skb);
+
+       ptype = wx_encode_tx_desc_ptype(first);
+
+       tso = wx_tso(tx_ring, first, &hdr_len, ptype);
+       if (tso < 0)
+               goto out_drop;
+       else if (!tso)
+               wx_tx_csum(tx_ring, first, ptype);
+       wx_tx_map(tx_ring, first, hdr_len);
+
+       return NETDEV_TX_OK;
+out_drop:
+       dev_kfree_skb_any(first->skb);
+       first->skb = NULL;
 
        return NETDEV_TX_OK;
 }
@@ -1348,7 +2048,8 @@ void wx_free_irq(struct wx *wx)
                free_irq(entry->vector, q_vector);
        }
 
-       free_irq(wx->msix_entries[vector].vector, wx);
+       if (wx->mac.type == wx_mac_em)
+               free_irq(wx->msix_entries[vector].vector, wx);
 }
 EXPORT_SYMBOL(wx_free_irq);
 
@@ -2004,4 +2705,24 @@ void wx_get_stats64(struct net_device *netdev,
 }
 EXPORT_SYMBOL(wx_get_stats64);
 
+int wx_set_features(struct net_device *netdev, netdev_features_t features)
+{
+       netdev_features_t changed = netdev->features ^ features;
+       struct wx *wx = netdev_priv(netdev);
+
+       if (changed & NETIF_F_RXHASH)
+               wr32m(wx, WX_RDB_RA_CTL, WX_RDB_RA_CTL_RSS_EN,
+                     WX_RDB_RA_CTL_RSS_EN);
+       else
+               wr32m(wx, WX_RDB_RA_CTL, WX_RDB_RA_CTL_RSS_EN, 0);
+
+       if (changed &
+           (NETIF_F_HW_VLAN_CTAG_RX |
+            NETIF_F_HW_VLAN_STAG_RX))
+               wx_set_rx_mode(netdev);
+
+       return 1;
+}
+EXPORT_SYMBOL(wx_set_features);
+
 MODULE_LICENSE("GPL");
index 50ee41f..df1f4a5 100644 (file)
@@ -28,5 +28,6 @@ void wx_free_resources(struct wx *wx);
 int wx_setup_resources(struct wx *wx);
 void wx_get_stats64(struct net_device *netdev,
                    struct rtnl_link_stats64 *stats);
+int wx_set_features(struct net_device *netdev, netdev_features_t features);
 
 #endif /* _NGBE_LIB_H_ */
index 32f952d..29dfb56 100644 (file)
@@ -6,6 +6,8 @@
 
 #include <linux/bitfield.h>
 #include <linux/netdevice.h>
+#include <linux/if_vlan.h>
+#include <net/ip.h>
 
 #define WX_NCSI_SUP                             0x8000
 #define WX_NCSI_MASK                            0x8000
@@ -64,6 +66,8 @@
 #define WX_CFG_PORT_CTL_QINQ         BIT(2)
 #define WX_CFG_PORT_CTL_D_VLAN       BIT(0) /* double vlan*/
 #define WX_CFG_TAG_TPID(_i)          (0x14430 + ((_i) * 4))
+#define WX_CFG_PORT_CTL_NUM_VT_MASK  GENMASK(13, 12) /* number of TVs */
+
 
 /* GPIO Registers */
 #define WX_GPIO_DR                   0x14800
@@ -79,7 +83,9 @@
 #define WX_GPIO_INTMASK              0x14834
 #define WX_GPIO_INTTYPE_LEVEL        0x14838
 #define WX_GPIO_POLARITY             0x1483C
+#define WX_GPIO_INTSTATUS            0x14844
 #define WX_GPIO_EOI                  0x1484C
+#define WX_GPIO_EXT                  0x14850
 
 /*********************** Transmit DMA registers **************************/
 /* transmit global control */
@@ -87,6 +93,8 @@
 /* TDM CTL BIT */
 #define WX_TDM_CTL_TE                BIT(0) /* Transmit Enable */
 #define WX_TDM_PB_THRE(_i)           (0x18020 + ((_i) * 4))
+#define WX_TDM_RP_IDX                0x1820C
+#define WX_TDM_RP_RATE               0x18404
 
 /***************************** RDB registers *********************************/
 /* receive packet buffer */
 #define WX_RDB_PL_CFG_L2HDR          BIT(3)
 #define WX_RDB_PL_CFG_TUN_TUNHDR     BIT(4)
 #define WX_RDB_PL_CFG_TUN_OUTL2HDR   BIT(5)
+#define WX_RDB_RA_CTL                0x194F4
+#define WX_RDB_RA_CTL_RSS_EN         BIT(2) /* RSS Enable */
 
 /******************************* PSR Registers *******************************/
 /* psr control */
 #define WX_PSR_LAN_FLEX_DW_H(_i)     (0x15C04 + ((_i) * 16))
 #define WX_PSR_LAN_FLEX_MSK(_i)      (0x15C08 + ((_i) * 16))
 
+/* vlan tbl */
+#define WX_PSR_VLAN_TBL(_i)          (0x16000 + ((_i) * 4))
+
 /* mac switcher */
 #define WX_PSR_MAC_SWC_AD_L          0x16200
 #define WX_PSR_MAC_SWC_AD_H          0x16204
 #define WX_PSR_MAC_SWC_IDX           0x16210
 #define WX_CLEAR_VMDQ_ALL            0xFFFFFFFFU
 
+/* vlan switch */
+#define WX_PSR_VLAN_SWC              0x16220
+#define WX_PSR_VLAN_SWC_VM_L         0x16224
+#define WX_PSR_VLAN_SWC_VM_H         0x16228
+#define WX_PSR_VLAN_SWC_IDX          0x16230         /* 64 vlan entries */
+/* VLAN pool filtering masks */
+#define WX_PSR_VLAN_SWC_VIEN         BIT(31)  /* filter is valid */
+#define WX_PSR_VLAN_SWC_ENTRIES      64
+
 /********************************* RSEC **************************************/
 /* general rsec */
 #define WX_RSC_CTL                   0x17000
 #define WX_PX_RR_RP(_i)              (0x0100C + ((_i) * 0x40))
 #define WX_PX_RR_CFG(_i)             (0x01010 + ((_i) * 0x40))
 /* PX_RR_CFG bit definitions */
+#define WX_PX_RR_CFG_VLAN            BIT(31)
 #define WX_PX_RR_CFG_SPLIT_MODE      BIT(26)
 #define WX_PX_RR_CFG_RR_THER_SHIFT   16
 #define WX_PX_RR_CFG_RR_HDR_SZ       GENMASK(15, 12)
 #define WX_MAX_TXD                   8192
 
 #define WX_MAX_JUMBO_FRAME_SIZE      9432 /* max payload 9414 */
+#define VMDQ_P(p)                    p
 
 /* Supported Rx Buffer Sizes */
 #define WX_RXBUFFER_256      256    /* Used for skb receive header */
 #define TXD_USE_COUNT(S)     DIV_ROUND_UP((S), WX_MAX_DATA_PER_TXD)
 #define DESC_NEEDED          (MAX_SKB_FRAGS + 4)
 
-/* Ether Types */
-#define WX_ETH_P_CNM                 0x22E7
-
 #define WX_CFG_PORT_ST               0x14404
 
 /******************* Receive Descriptor bit definitions **********************/
 #define WX_RXD_STAT_DD               BIT(0) /* Done */
 #define WX_RXD_STAT_EOP              BIT(1) /* End of Packet */
+#define WX_RXD_STAT_VP               BIT(5) /* IEEE VLAN Pkt */
+#define WX_RXD_STAT_L4CS             BIT(7) /* L4 xsum calculated */
+#define WX_RXD_STAT_IPCS             BIT(8) /* IP xsum calculated */
+#define WX_RXD_STAT_OUTERIPCS        BIT(10) /* Cloud IP xsum calculated*/
 
+#define WX_RXD_ERR_OUTERIPER         BIT(26) /* CRC IP Header error */
 #define WX_RXD_ERR_RXE               BIT(29) /* Any MAC Error */
-
+#define WX_RXD_ERR_TCPE              BIT(30) /* TCP/UDP Checksum Error */
+#define WX_RXD_ERR_IPE               BIT(31) /* IP Checksum Error */
+
+/* RSS Hash results */
+#define WX_RXD_RSSTYPE_MASK          GENMASK(3, 0)
+#define WX_RXD_RSSTYPE_IPV4_TCP      0x00000001U
+#define WX_RXD_RSSTYPE_IPV6_TCP      0x00000003U
+#define WX_RXD_RSSTYPE_IPV4_SCTP     0x00000004U
+#define WX_RXD_RSSTYPE_IPV6_SCTP     0x00000006U
+#define WX_RXD_RSSTYPE_IPV4_UDP      0x00000007U
+#define WX_RXD_RSSTYPE_IPV6_UDP      0x00000008U
+
+#define WX_RSS_L4_TYPES_MASK \
+       ((1ul << WX_RXD_RSSTYPE_IPV4_TCP) | \
+        (1ul << WX_RXD_RSSTYPE_IPV4_UDP) | \
+        (1ul << WX_RXD_RSSTYPE_IPV4_SCTP) | \
+        (1ul << WX_RXD_RSSTYPE_IPV6_TCP) | \
+        (1ul << WX_RXD_RSSTYPE_IPV6_UDP) | \
+        (1ul << WX_RXD_RSSTYPE_IPV6_SCTP))
+/* TUN */
+#define WX_PTYPE_TUN_IPV4            0x80
+#define WX_PTYPE_TUN_IPV6            0xC0
+
+/* PKT for TUN */
+#define WX_PTYPE_PKT_IPIP            0x00 /* IP+IP */
+#define WX_PTYPE_PKT_IG              0x10 /* IP+GRE */
+#define WX_PTYPE_PKT_IGM             0x20 /* IP+GRE+MAC */
+#define WX_PTYPE_PKT_IGMV            0x30 /* IP+GRE+MAC+VLAN */
+/* PKT for !TUN */
+#define WX_PTYPE_PKT_MAC             0x10
+#define WX_PTYPE_PKT_IP              0x20
+
+/* TYP for PKT=mac */
+#define WX_PTYPE_TYP_MAC             0x01
+/* TYP for PKT=ip */
+#define WX_PTYPE_PKT_IPV6            0x08
+#define WX_PTYPE_TYP_IPFRAG          0x01
+#define WX_PTYPE_TYP_IP              0x02
+#define WX_PTYPE_TYP_UDP             0x03
+#define WX_PTYPE_TYP_TCP             0x04
+#define WX_PTYPE_TYP_SCTP            0x05
+
+#define WX_RXD_PKTTYPE(_rxd) \
+       ((le32_to_cpu((_rxd)->wb.lower.lo_dword.data) >> 9) & 0xFF)
+#define WX_RXD_IPV6EX(_rxd) \
+       ((le32_to_cpu((_rxd)->wb.lower.lo_dword.data) >> 6) & 0x1)
 /*********************** Transmit Descriptor Config Masks ****************/
 #define WX_TXD_STAT_DD               BIT(0)  /* Descriptor Done */
 #define WX_TXD_DTYP_DATA             0       /* Adv Data Descriptor */
 #define WX_TXD_IFCS                  BIT(25) /* Insert FCS */
 #define WX_TXD_RS                    BIT(27) /* Report Status */
 
+/*********************** Adv Transmit Descriptor Config Masks ****************/
+#define WX_TXD_MAC_TSTAMP            BIT(19) /* IEEE1588 time stamp */
+#define WX_TXD_DTYP_CTXT             BIT(20) /* Adv Context Desc */
+#define WX_TXD_LINKSEC               BIT(26) /* enable linksec */
+#define WX_TXD_VLE                   BIT(30) /* VLAN pkt enable */
+#define WX_TXD_TSE                   BIT(31) /* TCP Seg enable */
+#define WX_TXD_CC                    BIT(7) /* Check Context */
+#define WX_TXD_IPSEC                 BIT(8) /* enable ipsec esp */
+#define WX_TXD_L4CS                  BIT(9)
+#define WX_TXD_IIPCS                 BIT(10)
+#define WX_TXD_EIPCS                 BIT(11)
+#define WX_TXD_PAYLEN_SHIFT          13 /* Adv desc PAYLEN shift */
+#define WX_TXD_MACLEN_SHIFT          9  /* Adv ctxt desc mac len shift */
+#define WX_TXD_TAG_TPID_SEL_SHIFT    11
+
+#define WX_TXD_L4LEN_SHIFT           8  /* Adv ctxt L4LEN shift */
+#define WX_TXD_MSS_SHIFT             16  /* Adv ctxt MSS shift */
+
+#define WX_TXD_OUTER_IPLEN_SHIFT     12 /* Adv ctxt OUTERIPLEN shift */
+#define WX_TXD_TUNNEL_LEN_SHIFT      21 /* Adv ctxt TUNNELLEN shift */
+#define WX_TXD_TUNNEL_TYPE_SHIFT     11 /* Adv Tx Desc Tunnel Type shift */
+#define WX_TXD_TUNNEL_UDP            FIELD_PREP(BIT(WX_TXD_TUNNEL_TYPE_SHIFT), 0)
+#define WX_TXD_TUNNEL_GRE            FIELD_PREP(BIT(WX_TXD_TUNNEL_TYPE_SHIFT), 1)
+
+enum wx_tx_flags {
+       /* cmd_type flags */
+       WX_TX_FLAGS_HW_VLAN     = 0x01,
+       WX_TX_FLAGS_TSO         = 0x02,
+       WX_TX_FLAGS_TSTAMP      = 0x04,
+
+       /* olinfo flags */
+       WX_TX_FLAGS_CC          = 0x08,
+       WX_TX_FLAGS_IPV4        = 0x10,
+       WX_TX_FLAGS_CSUM        = 0x20,
+       WX_TX_FLAGS_OUTER_IPV4  = 0x100,
+       WX_TX_FLAGS_LINKSEC     = 0x200,
+       WX_TX_FLAGS_IPSEC       = 0x400,
+};
+
+/* VLAN info */
+#define WX_TX_FLAGS_VLAN_MASK                  GENMASK(31, 16)
+#define WX_TX_FLAGS_VLAN_SHIFT                 16
+
+/* wx_dec_ptype.mac: outer mac */
+enum wx_dec_ptype_mac {
+       WX_DEC_PTYPE_MAC_IP     = 0,
+       WX_DEC_PTYPE_MAC_L2     = 2,
+       WX_DEC_PTYPE_MAC_FCOE   = 3,
+};
+
+/* wx_dec_ptype.[e]ip: outer&encaped ip */
+#define WX_DEC_PTYPE_IP_FRAG   0x4
+enum wx_dec_ptype_ip {
+       WX_DEC_PTYPE_IP_NONE = 0,
+       WX_DEC_PTYPE_IP_IPV4 = 1,
+       WX_DEC_PTYPE_IP_IPV6 = 2,
+       WX_DEC_PTYPE_IP_FGV4 = WX_DEC_PTYPE_IP_FRAG | WX_DEC_PTYPE_IP_IPV4,
+       WX_DEC_PTYPE_IP_FGV6 = WX_DEC_PTYPE_IP_FRAG | WX_DEC_PTYPE_IP_IPV6,
+};
+
+/* wx_dec_ptype.etype: encaped type */
+enum wx_dec_ptype_etype {
+       WX_DEC_PTYPE_ETYPE_NONE = 0,
+       WX_DEC_PTYPE_ETYPE_IPIP = 1,    /* IP+IP */
+       WX_DEC_PTYPE_ETYPE_IG   = 2,    /* IP+GRE */
+       WX_DEC_PTYPE_ETYPE_IGM  = 3,    /* IP+GRE+MAC */
+       WX_DEC_PTYPE_ETYPE_IGMV = 4,    /* IP+GRE+MAC+VLAN */
+};
+
+/* wx_dec_ptype.proto: payload proto */
+enum wx_dec_ptype_prot {
+       WX_DEC_PTYPE_PROT_NONE  = 0,
+       WX_DEC_PTYPE_PROT_UDP   = 1,
+       WX_DEC_PTYPE_PROT_TCP   = 2,
+       WX_DEC_PTYPE_PROT_SCTP  = 3,
+       WX_DEC_PTYPE_PROT_ICMP  = 4,
+       WX_DEC_PTYPE_PROT_TS    = 5,    /* time sync */
+};
+
+/* wx_dec_ptype.layer: payload layer */
+enum wx_dec_ptype_layer {
+       WX_DEC_PTYPE_LAYER_NONE = 0,
+       WX_DEC_PTYPE_LAYER_PAY2 = 1,
+       WX_DEC_PTYPE_LAYER_PAY3 = 2,
+       WX_DEC_PTYPE_LAYER_PAY4 = 3,
+};
+
+struct wx_dec_ptype {
+       u32 known:1;
+       u32 mac:2;      /* outer mac */
+       u32 ip:3;       /* outer ip*/
+       u32 etype:3;    /* encaped type */
+       u32 eip:3;      /* encaped ip */
+       u32 prot:4;     /* payload proto */
+       u32 layer:3;    /* payload layer */
+};
+
+/* macro to make the table lines short */
+#define WX_PTT(mac, ip, etype, eip, proto, layer)\
+             {1, \
+              WX_DEC_PTYPE_MAC_##mac,          /* mac */\
+              WX_DEC_PTYPE_IP_##ip,            /* ip */ \
+              WX_DEC_PTYPE_ETYPE_##etype,      /* etype */\
+              WX_DEC_PTYPE_IP_##eip,           /* eip */\
+              WX_DEC_PTYPE_PROT_##proto,       /* proto */\
+              WX_DEC_PTYPE_LAYER_##layer       /* layer */}
+
 /* Host Interface Command Structures */
 struct wx_hic_hdr {
        u8 cmd;
@@ -412,6 +590,8 @@ struct wx_mac_info {
        u32 mta_shadow[128];
        s32 mc_filter_type;
        u32 mcft_size;
+       u32 vft_shadow[128];
+       u32 vft_size;
        u32 num_rar_entries;
        u32 rx_pb_size;
        u32 tx_pb_size;
@@ -508,10 +688,25 @@ union wx_rx_desc {
        } wb;  /* writeback */
 };
 
+struct wx_tx_context_desc {
+       __le32 vlan_macip_lens;
+       __le32 seqnum_seed;
+       __le32 type_tucmd_mlhl;
+       __le32 mss_l4len_idx;
+};
+
+/* if _flag is in _input, return _result */
+#define WX_SET_FLAG(_input, _flag, _result) \
+       (((_flag) <= (_result)) ? \
+        ((u32)((_input) & (_flag)) * ((_result) / (_flag))) : \
+        ((u32)((_input) & (_flag)) / ((_flag) / (_result))))
+
 #define WX_RX_DESC(R, i)     \
        (&(((union wx_rx_desc *)((R)->desc))[i]))
 #define WX_TX_DESC(R, i)     \
        (&(((union wx_tx_desc *)((R)->desc))[i]))
+#define WX_TX_CTXTDESC(R, i) \
+       (&(((struct wx_tx_context_desc *)((R)->desc))[i]))
 
 /* wrapper around a pointer to a socket buffer,
  * so a DMA handle can be stored along with the buffer
@@ -523,6 +718,8 @@ struct wx_tx_buffer {
        unsigned short gso_segs;
        DEFINE_DMA_UNMAP_ADDR(dma);
        DEFINE_DMA_UNMAP_LEN(len);
+       __be16 protocol;
+       u32 tx_flags;
 };
 
 struct wx_rx_buffer {
@@ -539,6 +736,11 @@ struct wx_queue_stats {
        u64 bytes;
 };
 
+struct wx_rx_queue_stats {
+       u64 csum_good_cnt;
+       u64 csum_err;
+};
+
 /* iterator for handling rings in ring container */
 #define wx_for_each_ring(posm, headm) \
        for (posm = (headm).ring; posm; posm = posm->next)
@@ -550,7 +752,6 @@ struct wx_ring_container {
        u8 count;                       /* total number of rings in vector */
        u8 itr;                         /* current ITR setting for ring */
 };
-
 struct wx_ring {
        struct wx_ring *next;           /* pointer to next ring in q_vector */
        struct wx_q_vector *q_vector;   /* backpointer to host q_vector */
@@ -580,6 +781,9 @@ struct wx_ring {
 
        struct wx_queue_stats stats;
        struct u64_stats_sync syncp;
+       union {
+               struct wx_rx_queue_stats rx_stats;
+       };
 } ____cacheline_internodealigned_in_smp;
 
 struct wx_q_vector {
@@ -598,7 +802,7 @@ struct wx_q_vector {
        char name[IFNAMSIZ + 17];
 
        /* for dynamic allocation of rings associated with this q_vector */
-       struct wx_ring ring[0] ____cacheline_internodealigned_in_smp;
+       struct wx_ring ring[] ____cacheline_internodealigned_in_smp;
 };
 
 enum wx_isb_idx {
@@ -610,6 +814,9 @@ enum wx_isb_idx {
 };
 
 struct wx {
+       unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
+
+       void *priv;
        u8 __iomem *hw_addr;
        struct pci_dev *pdev;
        struct net_device *netdev;
@@ -642,6 +849,7 @@ struct wx {
        bool wol_enabled;
        bool ncsi_enabled;
        bool gpio_ctrl;
+       raw_spinlock_t gpio_lock;
 
        /* Tx fast path data */
        int num_tx_queues;
index df6b870..c99a5d3 100644 (file)
@@ -115,6 +115,7 @@ static int ngbe_sw_init(struct wx *wx)
        wx->mac.max_rx_queues = NGBE_MAX_RX_QUEUES;
        wx->mac.max_tx_queues = NGBE_MAX_TX_QUEUES;
        wx->mac.mcft_size = NGBE_MC_TBL_SIZE;
+       wx->mac.vft_size = NGBE_SP_VFT_TBL_SIZE;
        wx->mac.rx_pb_size = NGBE_RX_PB_SIZE;
        wx->mac.tx_pb_size = NGBE_TDB_PB_SZ;
 
@@ -473,9 +474,12 @@ static const struct net_device_ops ngbe_netdev_ops = {
        .ndo_change_mtu         = wx_change_mtu,
        .ndo_start_xmit         = wx_xmit_frame,
        .ndo_set_rx_mode        = wx_set_rx_mode,
+       .ndo_set_features       = wx_set_features,
        .ndo_validate_addr      = eth_validate_addr,
        .ndo_set_mac_address    = wx_set_mac,
        .ndo_get_stats64        = wx_get_stats64,
+       .ndo_vlan_rx_add_vid    = wx_vlan_rx_add_vid,
+       .ndo_vlan_rx_kill_vid   = wx_vlan_rx_kill_vid,
 };
 
 /**
@@ -551,12 +555,18 @@ static int ngbe_probe(struct pci_dev *pdev,
        ngbe_set_ethtool_ops(netdev);
        netdev->netdev_ops = &ngbe_netdev_ops;
 
-       netdev->features |= NETIF_F_HIGHDMA;
-       netdev->features = NETIF_F_SG;
-
+       netdev->features = NETIF_F_SG | NETIF_F_IP_CSUM |
+                          NETIF_F_TSO | NETIF_F_TSO6 |
+                          NETIF_F_RXHASH | NETIF_F_RXCSUM;
+       netdev->features |= NETIF_F_SCTP_CRC | NETIF_F_TSO_MANGLEID;
+       netdev->vlan_features |= netdev->features;
+       netdev->features |= NETIF_F_IPV6_CSUM | NETIF_F_VLAN_FEATURES;
        /* copy netdev features into list of user selectable features */
-       netdev->hw_features |= netdev->features |
-                              NETIF_F_RXALL;
+       netdev->hw_features |= netdev->features | NETIF_F_RXALL;
+       netdev->hw_features |= NETIF_F_NTUPLE | NETIF_F_HW_TC;
+       netdev->features |= NETIF_F_HIGHDMA;
+       netdev->hw_features |= NETIF_F_GRO;
+       netdev->features |= NETIF_F_GRO;
 
        netdev->priv_flags |= IFF_UNICAST_FLT;
        netdev->priv_flags |= IFF_SUPP_NOFCS;
index 373d5af..b70eca3 100644 (file)
@@ -136,6 +136,7 @@ enum NGBE_MSCA_CMD_value {
 #define NGBE_RAR_ENTRIES                       32
 #define NGBE_RX_PB_SIZE                                42
 #define NGBE_MC_TBL_SIZE                       128
+#define NGBE_SP_VFT_TBL_SIZE                   128
 #define NGBE_TDB_PB_SZ                         (20 * 1024) /* 160KB Packet Buffer */
 
 /* TX/RX descriptor defines */
index 6db14a2..7507f76 100644 (file)
@@ -8,4 +8,5 @@ obj-$(CONFIG_TXGBE) += txgbe.o
 
 txgbe-objs := txgbe_main.o \
               txgbe_hw.o \
+              txgbe_phy.o \
               txgbe_ethtool.o
index d914e9a..859da11 100644 (file)
@@ -6,11 +6,39 @@
 #include <linux/netdevice.h>
 
 #include "../libwx/wx_ethtool.h"
+#include "../libwx/wx_type.h"
+#include "txgbe_type.h"
 #include "txgbe_ethtool.h"
 
+static int txgbe_nway_reset(struct net_device *netdev)
+{
+       struct txgbe *txgbe = netdev_to_txgbe(netdev);
+
+       return phylink_ethtool_nway_reset(txgbe->phylink);
+}
+
+static int txgbe_get_link_ksettings(struct net_device *netdev,
+                                   struct ethtool_link_ksettings *cmd)
+{
+       struct txgbe *txgbe = netdev_to_txgbe(netdev);
+
+       return phylink_ethtool_ksettings_get(txgbe->phylink, cmd);
+}
+
+static int txgbe_set_link_ksettings(struct net_device *netdev,
+                                   const struct ethtool_link_ksettings *cmd)
+{
+       struct txgbe *txgbe = netdev_to_txgbe(netdev);
+
+       return phylink_ethtool_ksettings_set(txgbe->phylink, cmd);
+}
+
 static const struct ethtool_ops txgbe_ethtool_ops = {
        .get_drvinfo            = wx_get_drvinfo,
+       .nway_reset             = txgbe_nway_reset,
        .get_link               = ethtool_op_get_link,
+       .get_link_ksettings     = txgbe_get_link_ksettings,
+       .set_link_ksettings     = txgbe_set_link_ksettings,
 };
 
 void txgbe_set_ethtool_ops(struct net_device *netdev)
index 5b8a121..46eba6d 100644 (file)
@@ -7,6 +7,7 @@
 #include <linux/netdevice.h>
 #include <linux/string.h>
 #include <linux/etherdevice.h>
+#include <linux/phylink.h>
 #include <net/ip.h>
 #include <linux/if_vlan.h>
 
@@ -15,6 +16,7 @@
 #include "../libwx/wx_hw.h"
 #include "txgbe_type.h"
 #include "txgbe_hw.h"
+#include "txgbe_phy.h"
 #include "txgbe_ethtool.h"
 
 char txgbe_driver_name[] = "txgbe";
@@ -81,6 +83,8 @@ static int txgbe_enumerate_functions(struct wx *wx)
  **/
 static void txgbe_irq_enable(struct wx *wx, bool queues)
 {
+       wr32(wx, WX_PX_MISC_IEN, TXGBE_PX_MISC_IEN_MASK);
+
        /* unmask interrupt */
        wx_intr_enable(wx, TXGBE_INTR_MISC(wx));
        if (queues)
@@ -128,17 +132,6 @@ static irqreturn_t txgbe_intr(int __always_unused irq, void *data)
        return IRQ_HANDLED;
 }
 
-static irqreturn_t txgbe_msix_other(int __always_unused irq, void *data)
-{
-       struct wx *wx = data;
-
-       /* re-enable the original interrupt state */
-       if (netif_running(wx->netdev))
-               txgbe_irq_enable(wx, false);
-
-       return IRQ_HANDLED;
-}
-
 /**
  * txgbe_request_msix_irqs - Initialize MSI-X interrupts
  * @wx: board private structure
@@ -170,13 +163,6 @@ static int txgbe_request_msix_irqs(struct wx *wx)
                }
        }
 
-       err = request_irq(wx->msix_entries[vector].vector,
-                         txgbe_msix_other, 0, netdev->name, wx);
-       if (err) {
-               wx_err(wx, "request_irq for msix_other failed: %d\n", err);
-               goto free_queue_irqs;
-       }
-
        return 0;
 
 free_queue_irqs:
@@ -219,7 +205,8 @@ static int txgbe_request_irq(struct wx *wx)
 
 static void txgbe_up_complete(struct wx *wx)
 {
-       u32 reg;
+       struct net_device *netdev = wx->netdev;
+       struct txgbe *txgbe;
 
        wx_control_hw(wx, true);
        wx_configure_vectors(wx);
@@ -228,24 +215,17 @@ static void txgbe_up_complete(struct wx *wx)
        smp_mb__before_atomic();
        wx_napi_enable_all(wx);
 
+       txgbe = netdev_to_txgbe(netdev);
+       phylink_start(txgbe->phylink);
+
        /* clear any pending interrupts, may auto mask */
        rd32(wx, WX_PX_IC(0));
        rd32(wx, WX_PX_IC(1));
        rd32(wx, WX_PX_MISC_IC);
        txgbe_irq_enable(wx, true);
 
-       /* Configure MAC Rx and Tx when link is up */
-       reg = rd32(wx, WX_MAC_RX_CFG);
-       wr32(wx, WX_MAC_RX_CFG, reg);
-       wr32(wx, WX_MAC_PKT_FLT, WX_MAC_PKT_FLT_PR);
-       reg = rd32(wx, WX_MAC_WDG_TIMEOUT);
-       wr32(wx, WX_MAC_WDG_TIMEOUT, reg);
-       reg = rd32(wx, WX_MAC_TX_CFG);
-       wr32(wx, WX_MAC_TX_CFG, (reg & ~WX_MAC_TX_CFG_SPEED_MASK) | WX_MAC_TX_CFG_SPEED_10G);
-
        /* enable transmits */
-       netif_tx_start_all_queues(wx->netdev);
-       netif_carrier_on(wx->netdev);
+       netif_tx_start_all_queues(netdev);
 }
 
 static void txgbe_reset(struct wx *wx)
@@ -258,6 +238,7 @@ static void txgbe_reset(struct wx *wx)
        if (err != 0)
                wx_err(wx, "Hardware Error: %d\n", err);
 
+       wx_start_hw(wx);
        /* do not flush user set addresses */
        memcpy(old_addr, &wx->mac_table[0].addr, netdev->addr_len);
        wx_flush_sw_mac_table(wx);
@@ -279,7 +260,6 @@ static void txgbe_disable_device(struct wx *wx)
                wx_disable_rx_queue(wx, wx->rx_ring[i]);
 
        netif_tx_stop_all_queues(netdev);
-       netif_carrier_off(netdev);
        netif_tx_disable(netdev);
 
        wx_irq_disable(wx);
@@ -310,8 +290,11 @@ static void txgbe_disable_device(struct wx *wx)
 
 static void txgbe_down(struct wx *wx)
 {
+       struct txgbe *txgbe = netdev_to_txgbe(wx->netdev);
+
        txgbe_disable_device(wx);
        txgbe_reset(wx);
+       phylink_stop(txgbe->phylink);
 
        wx_clean_all_tx_rings(wx);
        wx_clean_all_rx_rings(wx);
@@ -330,6 +313,7 @@ static int txgbe_sw_init(struct wx *wx)
        wx->mac.max_tx_queues = TXGBE_SP_MAX_TX_QUEUES;
        wx->mac.max_rx_queues = TXGBE_SP_MAX_RX_QUEUES;
        wx->mac.mcft_size = TXGBE_SP_MC_TBL_SIZE;
+       wx->mac.vft_size = TXGBE_SP_VFT_TBL_SIZE;
        wx->mac.rx_pb_size = TXGBE_SP_RX_PB_SIZE;
        wx->mac.tx_pb_size = TXGBE_SP_TDB_PB_SZ;
 
@@ -455,7 +439,7 @@ static int txgbe_close(struct net_device *netdev)
        return 0;
 }
 
-static void txgbe_dev_shutdown(struct pci_dev *pdev, bool *enable_wake)
+static void txgbe_dev_shutdown(struct pci_dev *pdev)
 {
        struct wx *wx = pci_get_drvdata(pdev);
        struct net_device *netdev;
@@ -475,12 +459,10 @@ static void txgbe_dev_shutdown(struct pci_dev *pdev, bool *enable_wake)
 
 static void txgbe_shutdown(struct pci_dev *pdev)
 {
-       bool wake;
-
-       txgbe_dev_shutdown(pdev, &wake);
+       txgbe_dev_shutdown(pdev);
 
        if (system_state == SYSTEM_POWER_OFF) {
-               pci_wake_from_d3(pdev, wake);
+               pci_wake_from_d3(pdev, false);
                pci_set_power_state(pdev, PCI_D3hot);
        }
 }
@@ -491,9 +473,12 @@ static const struct net_device_ops txgbe_netdev_ops = {
        .ndo_change_mtu         = wx_change_mtu,
        .ndo_start_xmit         = wx_xmit_frame,
        .ndo_set_rx_mode        = wx_set_rx_mode,
+       .ndo_set_features       = wx_set_features,
        .ndo_validate_addr      = eth_validate_addr,
        .ndo_set_mac_address    = wx_set_mac,
        .ndo_get_stats64        = wx_get_stats64,
+       .ndo_vlan_rx_add_vid    = wx_vlan_rx_add_vid,
+       .ndo_vlan_rx_kill_vid   = wx_vlan_rx_kill_vid,
 };
 
 /**
@@ -513,6 +498,7 @@ static int txgbe_probe(struct pci_dev *pdev,
        struct net_device *netdev;
        int err, expected_gts;
        struct wx *wx = NULL;
+       struct txgbe *txgbe;
 
        u16 eeprom_verh = 0, eeprom_verl = 0, offset = 0;
        u16 eeprom_cfg_blkh = 0, eeprom_cfg_blkl = 0;
@@ -596,11 +582,25 @@ static int txgbe_probe(struct pci_dev *pdev,
                goto err_free_mac_table;
        }
 
-       netdev->features |= NETIF_F_HIGHDMA;
-       netdev->features = NETIF_F_SG;
-
+       netdev->features = NETIF_F_SG |
+                          NETIF_F_TSO |
+                          NETIF_F_TSO6 |
+                          NETIF_F_RXHASH |
+                          NETIF_F_RXCSUM |
+                          NETIF_F_HW_CSUM;
+
+       netdev->gso_partial_features =  NETIF_F_GSO_ENCAP_ALL;
+       netdev->features |= netdev->gso_partial_features;
+       netdev->features |= NETIF_F_SCTP_CRC;
+       netdev->vlan_features |= netdev->features | NETIF_F_TSO_MANGLEID;
+       netdev->hw_enc_features |= netdev->vlan_features;
+       netdev->features |= NETIF_F_VLAN_FEATURES;
        /* copy netdev features into list of user selectable features */
        netdev->hw_features |= netdev->features | NETIF_F_RXALL;
+       netdev->hw_features |= NETIF_F_NTUPLE | NETIF_F_HW_TC;
+       netdev->features |= NETIF_F_HIGHDMA;
+       netdev->hw_features |= NETIF_F_GRO;
+       netdev->features |= NETIF_F_GRO;
 
        netdev->priv_flags |= IFF_UNICAST_FLT;
        netdev->priv_flags |= IFF_SUPP_NOFCS;
@@ -663,10 +663,23 @@ static int txgbe_probe(struct pci_dev *pdev,
                         "0x%08x", etrack_id);
        }
 
-       err = register_netdev(netdev);
+       txgbe = devm_kzalloc(&pdev->dev, sizeof(*txgbe), GFP_KERNEL);
+       if (!txgbe) {
+               err = -ENOMEM;
+               goto err_release_hw;
+       }
+
+       txgbe->wx = wx;
+       wx->priv = txgbe;
+
+       err = txgbe_init_phy(txgbe);
        if (err)
                goto err_release_hw;
 
+       err = register_netdev(netdev);
+       if (err)
+               goto err_remove_phy;
+
        pci_set_drvdata(pdev, wx);
 
        netif_tx_stop_all_queues(netdev);
@@ -694,6 +707,8 @@ static int txgbe_probe(struct pci_dev *pdev,
 
        return 0;
 
+err_remove_phy:
+       txgbe_remove_phy(txgbe);
 err_release_hw:
        wx_clear_interrupt_scheme(wx);
        wx_control_hw(wx, false);
@@ -719,11 +734,14 @@ err_pci_disable_dev:
 static void txgbe_remove(struct pci_dev *pdev)
 {
        struct wx *wx = pci_get_drvdata(pdev);
+       struct txgbe *txgbe = wx->priv;
        struct net_device *netdev;
 
        netdev = wx->netdev;
        unregister_netdev(netdev);
 
+       txgbe_remove_phy(txgbe);
+
        pci_release_selected_regions(pdev,
                                     pci_select_bars(pdev, IORESOURCE_MEM));
 
diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c
new file mode 100644 (file)
index 0000000..8779645
--- /dev/null
@@ -0,0 +1,673 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2015 - 2023 Beijing WangXun Technology Co., Ltd. */
+
+#include <linux/gpio/machine.h>
+#include <linux/gpio/driver.h>
+#include <linux/gpio/property.h>
+#include <linux/clk-provider.h>
+#include <linux/clkdev.h>
+#include <linux/i2c.h>
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/pcs/pcs-xpcs.h>
+#include <linux/phylink.h>
+
+#include "../libwx/wx_type.h"
+#include "../libwx/wx_lib.h"
+#include "../libwx/wx_hw.h"
+#include "txgbe_type.h"
+#include "txgbe_phy.h"
+
+static int txgbe_swnodes_register(struct txgbe *txgbe)
+{
+       struct txgbe_nodes *nodes = &txgbe->nodes;
+       struct pci_dev *pdev = txgbe->wx->pdev;
+       struct software_node *swnodes;
+       u32 id;
+
+       id = (pdev->bus->number << 8) | pdev->devfn;
+
+       snprintf(nodes->gpio_name, sizeof(nodes->gpio_name), "txgbe_gpio-%x", id);
+       snprintf(nodes->i2c_name, sizeof(nodes->i2c_name), "txgbe_i2c-%x", id);
+       snprintf(nodes->sfp_name, sizeof(nodes->sfp_name), "txgbe_sfp-%x", id);
+       snprintf(nodes->phylink_name, sizeof(nodes->phylink_name), "txgbe_phylink-%x", id);
+
+       swnodes = nodes->swnodes;
+
+       /* GPIO 0: tx fault
+        * GPIO 1: tx disable
+        * GPIO 2: sfp module absent
+        * GPIO 3: rx signal lost
+        * GPIO 4: rate select, 1G(0) 10G(1)
+        * GPIO 5: rate select, 1G(0) 10G(1)
+        */
+       nodes->gpio_props[0] = PROPERTY_ENTRY_STRING("pinctrl-names", "default");
+       swnodes[SWNODE_GPIO] = NODE_PROP(nodes->gpio_name, nodes->gpio_props);
+       nodes->gpio0_ref[0] = SOFTWARE_NODE_REFERENCE(&swnodes[SWNODE_GPIO], 0, GPIO_ACTIVE_HIGH);
+       nodes->gpio1_ref[0] = SOFTWARE_NODE_REFERENCE(&swnodes[SWNODE_GPIO], 1, GPIO_ACTIVE_HIGH);
+       nodes->gpio2_ref[0] = SOFTWARE_NODE_REFERENCE(&swnodes[SWNODE_GPIO], 2, GPIO_ACTIVE_LOW);
+       nodes->gpio3_ref[0] = SOFTWARE_NODE_REFERENCE(&swnodes[SWNODE_GPIO], 3, GPIO_ACTIVE_HIGH);
+       nodes->gpio4_ref[0] = SOFTWARE_NODE_REFERENCE(&swnodes[SWNODE_GPIO], 4, GPIO_ACTIVE_HIGH);
+       nodes->gpio5_ref[0] = SOFTWARE_NODE_REFERENCE(&swnodes[SWNODE_GPIO], 5, GPIO_ACTIVE_HIGH);
+
+       nodes->i2c_props[0] = PROPERTY_ENTRY_STRING("compatible", "snps,designware-i2c");
+       nodes->i2c_props[1] = PROPERTY_ENTRY_BOOL("wx,i2c-snps-model");
+       nodes->i2c_props[2] = PROPERTY_ENTRY_U32("clock-frequency", I2C_MAX_STANDARD_MODE_FREQ);
+       swnodes[SWNODE_I2C] = NODE_PROP(nodes->i2c_name, nodes->i2c_props);
+       nodes->i2c_ref[0] = SOFTWARE_NODE_REFERENCE(&swnodes[SWNODE_I2C]);
+
+       nodes->sfp_props[0] = PROPERTY_ENTRY_STRING("compatible", "sff,sfp");
+       nodes->sfp_props[1] = PROPERTY_ENTRY_REF_ARRAY("i2c-bus", nodes->i2c_ref);
+       nodes->sfp_props[2] = PROPERTY_ENTRY_REF_ARRAY("tx-fault-gpios", nodes->gpio0_ref);
+       nodes->sfp_props[3] = PROPERTY_ENTRY_REF_ARRAY("tx-disable-gpios", nodes->gpio1_ref);
+       nodes->sfp_props[4] = PROPERTY_ENTRY_REF_ARRAY("mod-def0-gpios", nodes->gpio2_ref);
+       nodes->sfp_props[5] = PROPERTY_ENTRY_REF_ARRAY("los-gpios", nodes->gpio3_ref);
+       nodes->sfp_props[6] = PROPERTY_ENTRY_REF_ARRAY("rate-select1-gpios", nodes->gpio4_ref);
+       nodes->sfp_props[7] = PROPERTY_ENTRY_REF_ARRAY("rate-select0-gpios", nodes->gpio5_ref);
+       swnodes[SWNODE_SFP] = NODE_PROP(nodes->sfp_name, nodes->sfp_props);
+       nodes->sfp_ref[0] = SOFTWARE_NODE_REFERENCE(&swnodes[SWNODE_SFP]);
+
+       nodes->phylink_props[0] = PROPERTY_ENTRY_STRING("managed", "in-band-status");
+       nodes->phylink_props[1] = PROPERTY_ENTRY_REF_ARRAY("sfp", nodes->sfp_ref);
+       swnodes[SWNODE_PHYLINK] = NODE_PROP(nodes->phylink_name, nodes->phylink_props);
+
+       nodes->group[SWNODE_GPIO] = &swnodes[SWNODE_GPIO];
+       nodes->group[SWNODE_I2C] = &swnodes[SWNODE_I2C];
+       nodes->group[SWNODE_SFP] = &swnodes[SWNODE_SFP];
+       nodes->group[SWNODE_PHYLINK] = &swnodes[SWNODE_PHYLINK];
+
+       return software_node_register_node_group(nodes->group);
+}
+
+static int txgbe_pcs_read(struct mii_bus *bus, int addr, int devnum, int regnum)
+{
+       struct wx *wx  = bus->priv;
+       u32 offset, val;
+
+       if (addr)
+               return -EOPNOTSUPP;
+
+       offset = devnum << 16 | regnum;
+
+       /* Set the LAN port indicator to IDA_ADDR */
+       wr32(wx, TXGBE_XPCS_IDA_ADDR, offset);
+
+       /* Read the data from IDA_DATA register */
+       val = rd32(wx, TXGBE_XPCS_IDA_DATA);
+
+       return (u16)val;
+}
+
+static int txgbe_pcs_write(struct mii_bus *bus, int addr, int devnum, int regnum, u16 val)
+{
+       struct wx *wx = bus->priv;
+       u32 offset;
+
+       if (addr)
+               return -EOPNOTSUPP;
+
+       offset = devnum << 16 | regnum;
+
+       /* Set the LAN port indicator to IDA_ADDR */
+       wr32(wx, TXGBE_XPCS_IDA_ADDR, offset);
+
+       /* Write the data to IDA_DATA register */
+       wr32(wx, TXGBE_XPCS_IDA_DATA, val);
+
+       return 0;
+}
+
+static int txgbe_mdio_pcs_init(struct txgbe *txgbe)
+{
+       struct mii_bus *mii_bus;
+       struct dw_xpcs *xpcs;
+       struct pci_dev *pdev;
+       struct wx *wx;
+       int ret = 0;
+
+       wx = txgbe->wx;
+       pdev = wx->pdev;
+
+       mii_bus = devm_mdiobus_alloc(&pdev->dev);
+       if (!mii_bus)
+               return -ENOMEM;
+
+       mii_bus->name = "txgbe_pcs_mdio_bus";
+       mii_bus->read_c45 = &txgbe_pcs_read;
+       mii_bus->write_c45 = &txgbe_pcs_write;
+       mii_bus->parent = &pdev->dev;
+       mii_bus->phy_mask = ~0;
+       mii_bus->priv = wx;
+       snprintf(mii_bus->id, MII_BUS_ID_SIZE, "txgbe_pcs-%x",
+                (pdev->bus->number << 8) | pdev->devfn);
+
+       ret = devm_mdiobus_register(&pdev->dev, mii_bus);
+       if (ret)
+               return ret;
+
+       xpcs = xpcs_create_mdiodev(mii_bus, 0, PHY_INTERFACE_MODE_10GBASER);
+       if (IS_ERR(xpcs))
+               return PTR_ERR(xpcs);
+
+       txgbe->xpcs = xpcs;
+
+       return 0;
+}
+
+static struct phylink_pcs *txgbe_phylink_mac_select(struct phylink_config *config,
+                                                   phy_interface_t interface)
+{
+       struct txgbe *txgbe = netdev_to_txgbe(to_net_dev(config->dev));
+
+       return &txgbe->xpcs->pcs;
+}
+
+static void txgbe_mac_config(struct phylink_config *config, unsigned int mode,
+                            const struct phylink_link_state *state)
+{
+}
+
+static void txgbe_mac_link_down(struct phylink_config *config,
+                               unsigned int mode, phy_interface_t interface)
+{
+       struct wx *wx = netdev_priv(to_net_dev(config->dev));
+
+       wr32m(wx, WX_MAC_TX_CFG, WX_MAC_TX_CFG_TE, 0);
+}
+
+static void txgbe_mac_link_up(struct phylink_config *config,
+                             struct phy_device *phy,
+                             unsigned int mode, phy_interface_t interface,
+                             int speed, int duplex,
+                             bool tx_pause, bool rx_pause)
+{
+       struct wx *wx = netdev_priv(to_net_dev(config->dev));
+       u32 txcfg, wdg;
+
+       txcfg = rd32(wx, WX_MAC_TX_CFG);
+       txcfg &= ~WX_MAC_TX_CFG_SPEED_MASK;
+
+       switch (speed) {
+       case SPEED_10000:
+               txcfg |= WX_MAC_TX_CFG_SPEED_10G;
+               break;
+       case SPEED_1000:
+       case SPEED_100:
+       case SPEED_10:
+               txcfg |= WX_MAC_TX_CFG_SPEED_1G;
+               break;
+       default:
+               break;
+       }
+
+       wr32(wx, WX_MAC_TX_CFG, txcfg | WX_MAC_TX_CFG_TE);
+
+       /* Re configure MAC Rx */
+       wr32m(wx, WX_MAC_RX_CFG, WX_MAC_RX_CFG_RE, WX_MAC_RX_CFG_RE);
+       wr32(wx, WX_MAC_PKT_FLT, WX_MAC_PKT_FLT_PR);
+       wdg = rd32(wx, WX_MAC_WDG_TIMEOUT);
+       wr32(wx, WX_MAC_WDG_TIMEOUT, wdg);
+}
+
+static const struct phylink_mac_ops txgbe_mac_ops = {
+       .mac_select_pcs = txgbe_phylink_mac_select,
+       .mac_config = txgbe_mac_config,
+       .mac_link_down = txgbe_mac_link_down,
+       .mac_link_up = txgbe_mac_link_up,
+};
+
+static int txgbe_phylink_init(struct txgbe *txgbe)
+{
+       struct phylink_config *config;
+       struct fwnode_handle *fwnode;
+       struct wx *wx = txgbe->wx;
+       phy_interface_t phy_mode;
+       struct phylink *phylink;
+
+       config = devm_kzalloc(&wx->pdev->dev, sizeof(*config), GFP_KERNEL);
+       if (!config)
+               return -ENOMEM;
+
+       config->dev = &wx->netdev->dev;
+       config->type = PHYLINK_NETDEV;
+       config->mac_capabilities = MAC_10000FD | MAC_1000FD | MAC_SYM_PAUSE | MAC_ASYM_PAUSE;
+       phy_mode = PHY_INTERFACE_MODE_10GBASER;
+       __set_bit(PHY_INTERFACE_MODE_10GBASER, config->supported_interfaces);
+       fwnode = software_node_fwnode(txgbe->nodes.group[SWNODE_PHYLINK]);
+       phylink = phylink_create(config, fwnode, phy_mode, &txgbe_mac_ops);
+       if (IS_ERR(phylink))
+               return PTR_ERR(phylink);
+
+       txgbe->phylink = phylink;
+
+       return 0;
+}
+
+static int txgbe_gpio_get(struct gpio_chip *chip, unsigned int offset)
+{
+       struct wx *wx = gpiochip_get_data(chip);
+       int val;
+
+       val = rd32m(wx, WX_GPIO_EXT, BIT(offset));
+
+       return !!(val & BIT(offset));
+}
+
+static int txgbe_gpio_get_direction(struct gpio_chip *chip, unsigned int offset)
+{
+       struct wx *wx = gpiochip_get_data(chip);
+       u32 val;
+
+       val = rd32(wx, WX_GPIO_DDR);
+       if (BIT(offset) & val)
+               return GPIO_LINE_DIRECTION_OUT;
+
+       return GPIO_LINE_DIRECTION_IN;
+}
+
+static int txgbe_gpio_direction_in(struct gpio_chip *chip, unsigned int offset)
+{
+       struct wx *wx = gpiochip_get_data(chip);
+       unsigned long flags;
+
+       raw_spin_lock_irqsave(&wx->gpio_lock, flags);
+       wr32m(wx, WX_GPIO_DDR, BIT(offset), 0);
+       raw_spin_unlock_irqrestore(&wx->gpio_lock, flags);
+
+       return 0;
+}
+
+static int txgbe_gpio_direction_out(struct gpio_chip *chip, unsigned int offset,
+                                   int val)
+{
+       struct wx *wx = gpiochip_get_data(chip);
+       unsigned long flags;
+       u32 set;
+
+       set = val ? BIT(offset) : 0;
+
+       raw_spin_lock_irqsave(&wx->gpio_lock, flags);
+       wr32m(wx, WX_GPIO_DR, BIT(offset), set);
+       wr32m(wx, WX_GPIO_DDR, BIT(offset), BIT(offset));
+       raw_spin_unlock_irqrestore(&wx->gpio_lock, flags);
+
+       return 0;
+}
+
+static void txgbe_gpio_irq_ack(struct irq_data *d)
+{
+       struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
+       irq_hw_number_t hwirq = irqd_to_hwirq(d);
+       struct wx *wx = gpiochip_get_data(gc);
+       unsigned long flags;
+
+       raw_spin_lock_irqsave(&wx->gpio_lock, flags);
+       wr32(wx, WX_GPIO_EOI, BIT(hwirq));
+       raw_spin_unlock_irqrestore(&wx->gpio_lock, flags);
+}
+
+static void txgbe_gpio_irq_mask(struct irq_data *d)
+{
+       struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
+       irq_hw_number_t hwirq = irqd_to_hwirq(d);
+       struct wx *wx = gpiochip_get_data(gc);
+       unsigned long flags;
+
+       gpiochip_disable_irq(gc, hwirq);
+
+       raw_spin_lock_irqsave(&wx->gpio_lock, flags);
+       wr32m(wx, WX_GPIO_INTMASK, BIT(hwirq), BIT(hwirq));
+       raw_spin_unlock_irqrestore(&wx->gpio_lock, flags);
+}
+
+static void txgbe_gpio_irq_unmask(struct irq_data *d)
+{
+       struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
+       irq_hw_number_t hwirq = irqd_to_hwirq(d);
+       struct wx *wx = gpiochip_get_data(gc);
+       unsigned long flags;
+
+       gpiochip_enable_irq(gc, hwirq);
+
+       raw_spin_lock_irqsave(&wx->gpio_lock, flags);
+       wr32m(wx, WX_GPIO_INTMASK, BIT(hwirq), 0);
+       raw_spin_unlock_irqrestore(&wx->gpio_lock, flags);
+}
+
+static void txgbe_toggle_trigger(struct gpio_chip *gc, unsigned int offset)
+{
+       struct wx *wx = gpiochip_get_data(gc);
+       u32 pol, val;
+
+       pol = rd32(wx, WX_GPIO_POLARITY);
+       val = rd32(wx, WX_GPIO_EXT);
+
+       if (val & BIT(offset))
+               pol &= ~BIT(offset);
+       else
+               pol |= BIT(offset);
+
+       wr32(wx, WX_GPIO_POLARITY, pol);
+}
+
+static int txgbe_gpio_set_type(struct irq_data *d, unsigned int type)
+{
+       struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
+       irq_hw_number_t hwirq = irqd_to_hwirq(d);
+       struct wx *wx = gpiochip_get_data(gc);
+       u32 level, polarity, mask;
+       unsigned long flags;
+
+       mask = BIT(hwirq);
+
+       if (type & IRQ_TYPE_LEVEL_MASK) {
+               level = 0;
+               irq_set_handler_locked(d, handle_level_irq);
+       } else {
+               level = mask;
+               irq_set_handler_locked(d, handle_edge_irq);
+       }
+
+       if (type == IRQ_TYPE_EDGE_RISING || type == IRQ_TYPE_LEVEL_HIGH)
+               polarity = mask;
+       else
+               polarity = 0;
+
+       raw_spin_lock_irqsave(&wx->gpio_lock, flags);
+
+       wr32m(wx, WX_GPIO_INTEN, mask, mask);
+       wr32m(wx, WX_GPIO_INTTYPE_LEVEL, mask, level);
+       if (type == IRQ_TYPE_EDGE_BOTH)
+               txgbe_toggle_trigger(gc, hwirq);
+       else
+               wr32m(wx, WX_GPIO_POLARITY, mask, polarity);
+
+       raw_spin_unlock_irqrestore(&wx->gpio_lock, flags);
+
+       return 0;
+}
+
+static const struct irq_chip txgbe_gpio_irq_chip = {
+       .name = "txgbe_gpio_irq",
+       .irq_ack = txgbe_gpio_irq_ack,
+       .irq_mask = txgbe_gpio_irq_mask,
+       .irq_unmask = txgbe_gpio_irq_unmask,
+       .irq_set_type = txgbe_gpio_set_type,
+       .flags = IRQCHIP_IMMUTABLE,
+       GPIOCHIP_IRQ_RESOURCE_HELPERS,
+};
+
+static void txgbe_irq_handler(struct irq_desc *desc)
+{
+       struct irq_chip *chip = irq_desc_get_chip(desc);
+       struct wx *wx = irq_desc_get_handler_data(desc);
+       struct txgbe *txgbe = wx->priv;
+       irq_hw_number_t hwirq;
+       unsigned long gpioirq;
+       struct gpio_chip *gc;
+       unsigned long flags;
+       u32 eicr;
+
+       eicr = wx_misc_isb(wx, WX_ISB_MISC);
+
+       chained_irq_enter(chip, desc);
+
+       gpioirq = rd32(wx, WX_GPIO_INTSTATUS);
+
+       gc = txgbe->gpio;
+       for_each_set_bit(hwirq, &gpioirq, gc->ngpio) {
+               int gpio = irq_find_mapping(gc->irq.domain, hwirq);
+               u32 irq_type = irq_get_trigger_type(gpio);
+
+               generic_handle_domain_irq(gc->irq.domain, hwirq);
+
+               if ((irq_type & IRQ_TYPE_SENSE_MASK) == IRQ_TYPE_EDGE_BOTH) {
+                       raw_spin_lock_irqsave(&wx->gpio_lock, flags);
+                       txgbe_toggle_trigger(gc, hwirq);
+                       raw_spin_unlock_irqrestore(&wx->gpio_lock, flags);
+               }
+       }
+
+       chained_irq_exit(chip, desc);
+
+       if (eicr & (TXGBE_PX_MISC_ETH_LK | TXGBE_PX_MISC_ETH_LKDN)) {
+               u32 reg = rd32(wx, TXGBE_CFG_PORT_ST);
+
+               phylink_mac_change(txgbe->phylink, !!(reg & TXGBE_CFG_PORT_ST_LINK_UP));
+       }
+
+       /* unmask interrupt */
+       wx_intr_enable(wx, TXGBE_INTR_MISC(wx));
+}
+
+static int txgbe_gpio_init(struct txgbe *txgbe)
+{
+       struct gpio_irq_chip *girq;
+       struct gpio_chip *gc;
+       struct device *dev;
+       struct wx *wx;
+       int ret;
+
+       wx = txgbe->wx;
+       dev = &wx->pdev->dev;
+
+       raw_spin_lock_init(&wx->gpio_lock);
+
+       gc = devm_kzalloc(dev, sizeof(*gc), GFP_KERNEL);
+       if (!gc)
+               return -ENOMEM;
+
+       gc->label = devm_kasprintf(dev, GFP_KERNEL, "txgbe_gpio-%x",
+                                  (wx->pdev->bus->number << 8) | wx->pdev->devfn);
+       if (!gc->label)
+               return -ENOMEM;
+
+       gc->base = -1;
+       gc->ngpio = 6;
+       gc->owner = THIS_MODULE;
+       gc->parent = dev;
+       gc->fwnode = software_node_fwnode(txgbe->nodes.group[SWNODE_GPIO]);
+       gc->get = txgbe_gpio_get;
+       gc->get_direction = txgbe_gpio_get_direction;
+       gc->direction_input = txgbe_gpio_direction_in;
+       gc->direction_output = txgbe_gpio_direction_out;
+
+       girq = &gc->irq;
+       gpio_irq_chip_set_chip(girq, &txgbe_gpio_irq_chip);
+       girq->parent_handler = txgbe_irq_handler;
+       girq->parent_handler_data = wx;
+       girq->num_parents = 1;
+       girq->parents = devm_kcalloc(dev, girq->num_parents,
+                                    sizeof(*girq->parents), GFP_KERNEL);
+       if (!girq->parents)
+               return -ENOMEM;
+       girq->parents[0] = wx->msix_entries[wx->num_q_vectors].vector;
+       girq->default_type = IRQ_TYPE_NONE;
+       girq->handler = handle_bad_irq;
+
+       ret = devm_gpiochip_add_data(dev, gc, wx);
+       if (ret)
+               return ret;
+
+       txgbe->gpio = gc;
+
+       return 0;
+}
+
+static int txgbe_clock_register(struct txgbe *txgbe)
+{
+       struct pci_dev *pdev = txgbe->wx->pdev;
+       struct clk_lookup *clock;
+       char clk_name[32];
+       struct clk *clk;
+
+       snprintf(clk_name, sizeof(clk_name), "i2c_designware.%d",
+                (pdev->bus->number << 8) | pdev->devfn);
+
+       clk = clk_register_fixed_rate(NULL, clk_name, NULL, 0, 156250000);
+       if (IS_ERR(clk))
+               return PTR_ERR(clk);
+
+       clock = clkdev_create(clk, NULL, clk_name);
+       if (!clock) {
+               clk_unregister(clk);
+               return -ENOMEM;
+       }
+
+       txgbe->clk = clk;
+       txgbe->clock = clock;
+
+       return 0;
+}
+
+static int txgbe_i2c_read(void *context, unsigned int reg, unsigned int *val)
+{
+       struct wx *wx = context;
+
+       *val = rd32(wx, reg + TXGBE_I2C_BASE);
+
+       return 0;
+}
+
+static int txgbe_i2c_write(void *context, unsigned int reg, unsigned int val)
+{
+       struct wx *wx = context;
+
+       wr32(wx, reg + TXGBE_I2C_BASE, val);
+
+       return 0;
+}
+
+static const struct regmap_config i2c_regmap_config = {
+       .reg_bits = 32,
+       .val_bits = 32,
+       .reg_read = txgbe_i2c_read,
+       .reg_write = txgbe_i2c_write,
+       .fast_io = true,
+};
+
+static int txgbe_i2c_register(struct txgbe *txgbe)
+{
+       struct platform_device_info info = {};
+       struct platform_device *i2c_dev;
+       struct regmap *i2c_regmap;
+       struct pci_dev *pdev;
+       struct wx *wx;
+
+       wx = txgbe->wx;
+       pdev = wx->pdev;
+       i2c_regmap = devm_regmap_init(&pdev->dev, NULL, wx, &i2c_regmap_config);
+       if (IS_ERR(i2c_regmap)) {
+               wx_err(wx, "failed to init I2C regmap\n");
+               return PTR_ERR(i2c_regmap);
+       }
+
+       info.parent = &pdev->dev;
+       info.fwnode = software_node_fwnode(txgbe->nodes.group[SWNODE_I2C]);
+       info.name = "i2c_designware";
+       info.id = (pdev->bus->number << 8) | pdev->devfn;
+
+       info.res = &DEFINE_RES_IRQ(pdev->irq);
+       info.num_res = 1;
+       i2c_dev = platform_device_register_full(&info);
+       if (IS_ERR(i2c_dev))
+               return PTR_ERR(i2c_dev);
+
+       txgbe->i2c_dev = i2c_dev;
+
+       return 0;
+}
+
+static int txgbe_sfp_register(struct txgbe *txgbe)
+{
+       struct pci_dev *pdev = txgbe->wx->pdev;
+       struct platform_device_info info = {};
+       struct platform_device *sfp_dev;
+
+       info.parent = &pdev->dev;
+       info.fwnode = software_node_fwnode(txgbe->nodes.group[SWNODE_SFP]);
+       info.name = "sfp";
+       info.id = (pdev->bus->number << 8) | pdev->devfn;
+       sfp_dev = platform_device_register_full(&info);
+       if (IS_ERR(sfp_dev))
+               return PTR_ERR(sfp_dev);
+
+       txgbe->sfp_dev = sfp_dev;
+
+       return 0;
+}
+
+int txgbe_init_phy(struct txgbe *txgbe)
+{
+       int ret;
+
+       ret = txgbe_swnodes_register(txgbe);
+       if (ret) {
+               wx_err(txgbe->wx, "failed to register software nodes\n");
+               return ret;
+       }
+
+       ret = txgbe_mdio_pcs_init(txgbe);
+       if (ret) {
+               wx_err(txgbe->wx, "failed to init mdio pcs: %d\n", ret);
+               goto err_unregister_swnode;
+       }
+
+       ret = txgbe_phylink_init(txgbe);
+       if (ret) {
+               wx_err(txgbe->wx, "failed to init phylink\n");
+               goto err_destroy_xpcs;
+       }
+
+       ret = txgbe_gpio_init(txgbe);
+       if (ret) {
+               wx_err(txgbe->wx, "failed to init gpio\n");
+               goto err_destroy_phylink;
+       }
+
+       ret = txgbe_clock_register(txgbe);
+       if (ret) {
+               wx_err(txgbe->wx, "failed to register clock: %d\n", ret);
+               goto err_destroy_phylink;
+       }
+
+       ret = txgbe_i2c_register(txgbe);
+       if (ret) {
+               wx_err(txgbe->wx, "failed to init i2c interface: %d\n", ret);
+               goto err_unregister_clk;
+       }
+
+       ret = txgbe_sfp_register(txgbe);
+       if (ret) {
+               wx_err(txgbe->wx, "failed to register sfp\n");
+               goto err_unregister_i2c;
+       }
+
+       return 0;
+
+err_unregister_i2c:
+       platform_device_unregister(txgbe->i2c_dev);
+err_unregister_clk:
+       clkdev_drop(txgbe->clock);
+       clk_unregister(txgbe->clk);
+err_destroy_phylink:
+       phylink_destroy(txgbe->phylink);
+err_destroy_xpcs:
+       xpcs_destroy(txgbe->xpcs);
+err_unregister_swnode:
+       software_node_unregister_node_group(txgbe->nodes.group);
+
+       return ret;
+}
+
+void txgbe_remove_phy(struct txgbe *txgbe)
+{
+       platform_device_unregister(txgbe->sfp_dev);
+       platform_device_unregister(txgbe->i2c_dev);
+       clkdev_drop(txgbe->clock);
+       clk_unregister(txgbe->clk);
+       phylink_destroy(txgbe->phylink);
+       xpcs_destroy(txgbe->xpcs);
+       software_node_unregister_node_group(txgbe->nodes.group);
+}
diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.h b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.h
new file mode 100644 (file)
index 0000000..1ab5921
--- /dev/null
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2015 - 2023 Beijing WangXun Technology Co., Ltd. */
+
+#ifndef _TXGBE_PHY_H_
+#define _TXGBE_PHY_H_
+
+int txgbe_init_phy(struct txgbe *txgbe);
+void txgbe_remove_phy(struct txgbe *txgbe);
+
+#endif /* _TXGBE_NODE_H_ */
index 63a1c73..51199c3 100644 (file)
@@ -4,6 +4,8 @@
 #ifndef _TXGBE_TYPE_H_
 #define _TXGBE_TYPE_H_
 
+#include <linux/property.h>
+
 /* Device IDs */
 #define TXGBE_DEV_ID_SP1000                     0x1001
 #define TXGBE_DEV_ID_WX1820                     0x2001
 #define TXGBE_TS_CTL                            0x10300
 #define TXGBE_TS_CTL_EVAL_MD                    BIT(31)
 
+/* GPIO register bit */
+#define TXGBE_GPIOBIT_0                         BIT(0) /* I:tx fault */
+#define TXGBE_GPIOBIT_1                         BIT(1) /* O:tx disabled */
+#define TXGBE_GPIOBIT_2                         BIT(2) /* I:sfp module absent */
+#define TXGBE_GPIOBIT_3                         BIT(3) /* I:rx signal lost */
+#define TXGBE_GPIOBIT_4                         BIT(4) /* O:rate select, 1G(0) 10G(1) */
+#define TXGBE_GPIOBIT_5                         BIT(5) /* O:rate select, 1G(0) 10G(1) */
+
+/* Extended Interrupt Enable Set */
+#define TXGBE_PX_MISC_ETH_LKDN                  BIT(8)
+#define TXGBE_PX_MISC_DEV_RST                   BIT(10)
+#define TXGBE_PX_MISC_ETH_EVENT                 BIT(17)
+#define TXGBE_PX_MISC_ETH_LK                    BIT(18)
+#define TXGBE_PX_MISC_ETH_AN                    BIT(19)
+#define TXGBE_PX_MISC_INT_ERR                   BIT(20)
+#define TXGBE_PX_MISC_GPIO                      BIT(26)
+#define TXGBE_PX_MISC_IEN_MASK                            \
+       (TXGBE_PX_MISC_ETH_LKDN | TXGBE_PX_MISC_DEV_RST | \
+        TXGBE_PX_MISC_ETH_EVENT | TXGBE_PX_MISC_ETH_LK | \
+        TXGBE_PX_MISC_ETH_AN | TXGBE_PX_MISC_INT_ERR |   \
+        TXGBE_PX_MISC_GPIO)
+
+/* Port cfg registers */
+#define TXGBE_CFG_PORT_ST                       0x14404
+#define TXGBE_CFG_PORT_ST_LINK_UP               BIT(0)
+
+/* I2C registers */
+#define TXGBE_I2C_BASE                          0x14900
+
+/************************************** ETH PHY ******************************/
+#define TXGBE_XPCS_IDA_ADDR                     0x13000
+#define TXGBE_XPCS_IDA_DATA                     0x13004
+
 /* Part Number String Length */
 #define TXGBE_PBANUM_LENGTH                     32
 
 #define TXGBE_SP_MAX_RX_QUEUES  128
 #define TXGBE_SP_RAR_ENTRIES    128
 #define TXGBE_SP_MC_TBL_SIZE    128
+#define TXGBE_SP_VFT_TBL_SIZE   128
 #define TXGBE_SP_RX_PB_SIZE     512
 #define TXGBE_SP_TDB_PB_SZ      (160 * 1024) /* 160KB Packet Buffer */
 
 
 extern char txgbe_driver_name[];
 
+static inline struct txgbe *netdev_to_txgbe(struct net_device *netdev)
+{
+       struct wx *wx = netdev_priv(netdev);
+
+       return wx->priv;
+}
+
+#define NODE_PROP(_NAME, _PROP)                        \
+       (const struct software_node) {          \
+               .name = _NAME,                  \
+               .properties = _PROP,            \
+       }
+
+enum txgbe_swnodes {
+       SWNODE_GPIO = 0,
+       SWNODE_I2C,
+       SWNODE_SFP,
+       SWNODE_PHYLINK,
+       SWNODE_MAX
+};
+
+struct txgbe_nodes {
+       char gpio_name[32];
+       char i2c_name[32];
+       char sfp_name[32];
+       char phylink_name[32];
+       struct property_entry gpio_props[1];
+       struct property_entry i2c_props[3];
+       struct property_entry sfp_props[8];
+       struct property_entry phylink_props[2];
+       struct software_node_ref_args i2c_ref[1];
+       struct software_node_ref_args gpio0_ref[1];
+       struct software_node_ref_args gpio1_ref[1];
+       struct software_node_ref_args gpio2_ref[1];
+       struct software_node_ref_args gpio3_ref[1];
+       struct software_node_ref_args gpio4_ref[1];
+       struct software_node_ref_args gpio5_ref[1];
+       struct software_node_ref_args sfp_ref[1];
+       struct software_node swnodes[SWNODE_MAX];
+       const struct software_node *group[SWNODE_MAX + 1];
+};
+
+struct txgbe {
+       struct wx *wx;
+       struct txgbe_nodes nodes;
+       struct dw_xpcs *xpcs;
+       struct phylink *phylink;
+       struct platform_device *sfp_dev;
+       struct platform_device *i2c_dev;
+       struct clk_lookup *clock;
+       struct clk *clk;
+       struct gpio_chip *gpio;
+};
+
 #endif /* _TXGBE_TYPE_H_ */
index 33d51e3..c9dd69d 100644 (file)
@@ -74,6 +74,7 @@ struct ndis_recv_scale_cap { /* NDIS_RECEIVE_SCALE_CAPABILITIES */
 #define NDIS_RSS_HASH_SECRET_KEY_MAX_SIZE_REVISION_2   40
 
 #define ITAB_NUM 128
+#define ITAB_NUM_MAX 256
 
 struct ndis_recv_scale_param { /* NDIS_RECEIVE_SCALE_PARAMETERS */
        struct ndis_obj_header hdr;
@@ -1034,7 +1035,9 @@ struct net_device_context {
 
        u32 tx_table[VRSS_SEND_TAB_SIZE];
 
-       u16 rx_table[ITAB_NUM];
+       u16 *rx_table;
+
+       u32 rx_table_sz;
 
        /* Ethtool settings */
        u8 duplex;
index 0103ff9..3ba3c8f 100644 (file)
@@ -1747,7 +1747,9 @@ static u32 netvsc_get_rxfh_key_size(struct net_device *dev)
 
 static u32 netvsc_rss_indir_size(struct net_device *dev)
 {
-       return ITAB_NUM;
+       struct net_device_context *ndc = netdev_priv(dev);
+
+       return ndc->rx_table_sz;
 }
 
 static int netvsc_get_rxfh(struct net_device *dev, u32 *indir, u8 *key,
@@ -1766,7 +1768,7 @@ static int netvsc_get_rxfh(struct net_device *dev, u32 *indir, u8 *key,
 
        rndis_dev = ndev->extension;
        if (indir) {
-               for (i = 0; i < ITAB_NUM; i++)
+               for (i = 0; i < ndc->rx_table_sz; i++)
                        indir[i] = ndc->rx_table[i];
        }
 
@@ -1792,11 +1794,11 @@ static int netvsc_set_rxfh(struct net_device *dev, const u32 *indir,
 
        rndis_dev = ndev->extension;
        if (indir) {
-               for (i = 0; i < ITAB_NUM; i++)
+               for (i = 0; i < ndc->rx_table_sz; i++)
                        if (indir[i] >= ndev->num_chn)
                                return -EINVAL;
 
-               for (i = 0; i < ITAB_NUM; i++)
+               for (i = 0; i < ndc->rx_table_sz; i++)
                        ndc->rx_table[i] = indir[i];
        }
 
index eea777e..af95947 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/rtnetlink.h>
 #include <linux/ucs2_string.h>
 #include <linux/string.h>
+#include <linux/slab.h>
 
 #include "hyperv_net.h"
 #include "netvsc_trace.h"
@@ -927,7 +928,7 @@ static int rndis_set_rss_param_msg(struct rndis_device *rdev,
        struct rndis_set_request *set;
        struct rndis_set_complete *set_complete;
        u32 extlen = sizeof(struct ndis_recv_scale_param) +
-                    4 * ITAB_NUM + NETVSC_HASH_KEYLEN;
+                    4 * ndc->rx_table_sz + NETVSC_HASH_KEYLEN;
        struct ndis_recv_scale_param *rssp;
        u32 *itab;
        u8 *keyp;
@@ -953,7 +954,7 @@ static int rndis_set_rss_param_msg(struct rndis_device *rdev,
        rssp->hashinfo = NDIS_HASH_FUNC_TOEPLITZ | NDIS_HASH_IPV4 |
                         NDIS_HASH_TCP_IPV4 | NDIS_HASH_IPV6 |
                         NDIS_HASH_TCP_IPV6;
-       rssp->indirect_tabsize = 4*ITAB_NUM;
+       rssp->indirect_tabsize = 4 * ndc->rx_table_sz;
        rssp->indirect_taboffset = sizeof(struct ndis_recv_scale_param);
        rssp->hashkey_size = NETVSC_HASH_KEYLEN;
        rssp->hashkey_offset = rssp->indirect_taboffset +
@@ -961,7 +962,7 @@ static int rndis_set_rss_param_msg(struct rndis_device *rdev,
 
        /* Set indirection table entries */
        itab = (u32 *)(rssp + 1);
-       for (i = 0; i < ITAB_NUM; i++)
+       for (i = 0; i < ndc->rx_table_sz; i++)
                itab[i] = ndc->rx_table[i];
 
        /* Set hask key values */
@@ -1548,6 +1549,18 @@ struct netvsc_device *rndis_filter_device_add(struct hv_device *dev,
        if (ret || rsscap.num_recv_que < 2)
                goto out;
 
+       if (rsscap.num_indirect_tabent &&
+           rsscap.num_indirect_tabent <= ITAB_NUM_MAX)
+               ndc->rx_table_sz = rsscap.num_indirect_tabent;
+       else
+               ndc->rx_table_sz = ITAB_NUM;
+
+       ndc->rx_table = kcalloc(ndc->rx_table_sz, sizeof(u16), GFP_KERNEL);
+       if (!ndc->rx_table) {
+               ret = -ENOMEM;
+               goto err_dev_remv;
+       }
+
        /* This guarantees that num_possible_rss_qs <= num_online_cpus */
        num_possible_rss_qs = min_t(u32, num_online_cpus(),
                                    rsscap.num_recv_que);
@@ -1558,7 +1571,7 @@ struct netvsc_device *rndis_filter_device_add(struct hv_device *dev,
        net_device->num_chn = min(net_device->max_chn, device_info->num_chn);
 
        if (!netif_is_rxfh_configured(net)) {
-               for (i = 0; i < ITAB_NUM; i++)
+               for (i = 0; i < ndc->rx_table_sz; i++)
                        ndc->rx_table[i] = ethtool_rxfh_indir_default(
                                                i, net_device->num_chn);
        }
@@ -1596,11 +1609,19 @@ void rndis_filter_device_remove(struct hv_device *dev,
                                struct netvsc_device *net_dev)
 {
        struct rndis_device *rndis_dev = net_dev->extension;
+       struct net_device *net = hv_get_drvdata(dev);
+       struct net_device_context *ndc;
+
+       ndc = netdev_priv(net);
 
        /* Halt and release the rndis device */
        rndis_filter_halt_device(net_dev, rndis_dev);
 
        netvsc_device_remove(dev);
+
+       ndc->rx_table_sz = 0;
+       kfree(ndc->rx_table);
+       ndc->rx_table = NULL;
 }
 
 int rndis_filter_open(struct netvsc_device *nvdev)
index ab5133e..a897796 100644 (file)
@@ -555,8 +555,7 @@ static void ipvlan_multicast_enqueue(struct ipvl_port *port,
 
        spin_lock(&port->backlog.lock);
        if (skb_queue_len(&port->backlog) < IPVLAN_QBACKLOG_LIMIT) {
-               if (skb->dev)
-                       dev_hold(skb->dev);
+               dev_hold(skb->dev);
                __skb_queue_tail(&port->backlog, skb);
                spin_unlock(&port->backlog.lock);
                schedule_work(&port->wq);
index 9ff2e6f..4a7a303 100644 (file)
@@ -185,6 +185,17 @@ config MDIO_IPQ8064
          This driver supports the MDIO interface found in the network
          interface units of the IPQ8064 SoC
 
+config MDIO_REGMAP
+       tristate
+       help
+         This driver allows using MDIO devices that are not sitting on a
+         regular MDIO bus, but still exposes the standard 802.3 register
+         layout. It's regmap-based so that it can be used on integrated,
+         memory-mapped PHYs, SPI PHYs and so on. A new virtual MDIO bus is
+         created, and its read/write operations are mapped to the underlying
+         regmap. Users willing to use this driver must explicitly select
+         REGMAP.
+
 config MDIO_THUNDER
        tristate "ThunderX SOCs MDIO buses"
        depends on 64BIT
index 7d4cb4c..1015f0d 100644 (file)
@@ -19,6 +19,7 @@ obj-$(CONFIG_MDIO_MOXART)             += mdio-moxart.o
 obj-$(CONFIG_MDIO_MSCC_MIIM)           += mdio-mscc-miim.o
 obj-$(CONFIG_MDIO_MVUSB)               += mdio-mvusb.o
 obj-$(CONFIG_MDIO_OCTEON)              += mdio-octeon.o
+obj-$(CONFIG_MDIO_REGMAP)              += mdio-regmap.o
 obj-$(CONFIG_MDIO_SUN4I)               += mdio-sun4i.o
 obj-$(CONFIG_MDIO_THUNDER)             += mdio-thunder.o
 obj-$(CONFIG_MDIO_XGENE)               += mdio-xgene.o
diff --git a/drivers/net/mdio/mdio-regmap.c b/drivers/net/mdio/mdio-regmap.c
new file mode 100644 (file)
index 0000000..8a742a8
--- /dev/null
@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Driver for MMIO-Mapped MDIO devices. Some IPs expose internal PHYs or PCS
+ * within the MMIO-mapped area
+ *
+ * Copyright (C) 2023 Maxime Chevallier <maxime.chevallier@bootlin.com>
+ */
+#include <linux/bitfield.h>
+#include <linux/delay.h>
+#include <linux/mdio.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_mdio.h>
+#include <linux/phy.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/mdio/mdio-regmap.h>
+
+#define DRV_NAME "mdio-regmap"
+
+struct mdio_regmap_priv {
+       struct regmap *regmap;
+       u8 valid_addr;
+};
+
+static int mdio_regmap_read_c22(struct mii_bus *bus, int addr, int regnum)
+{
+       struct mdio_regmap_priv *ctx = bus->priv;
+       unsigned int val;
+       int ret;
+
+       if (ctx->valid_addr != addr)
+               return -ENODEV;
+
+       ret = regmap_read(ctx->regmap, regnum, &val);
+       if (ret < 0)
+               return ret;
+
+       return val;
+}
+
+static int mdio_regmap_write_c22(struct mii_bus *bus, int addr, int regnum,
+                                u16 val)
+{
+       struct mdio_regmap_priv *ctx = bus->priv;
+
+       if (ctx->valid_addr != addr)
+               return -ENODEV;
+
+       return regmap_write(ctx->regmap, regnum, val);
+}
+
+struct mii_bus *devm_mdio_regmap_register(struct device *dev,
+                                         const struct mdio_regmap_config *config)
+{
+       struct mdio_regmap_priv *mr;
+       struct mii_bus *mii;
+       int rc;
+
+       if (!config->parent)
+               return ERR_PTR(-EINVAL);
+
+       mii = devm_mdiobus_alloc_size(config->parent, sizeof(*mr));
+       if (!mii)
+               return ERR_PTR(-ENOMEM);
+
+       mr = mii->priv;
+       mr->regmap = config->regmap;
+       mr->valid_addr = config->valid_addr;
+
+       mii->name = DRV_NAME;
+       strscpy(mii->id, config->name, MII_BUS_ID_SIZE);
+       mii->parent = config->parent;
+       mii->read = mdio_regmap_read_c22;
+       mii->write = mdio_regmap_write_c22;
+
+       if (config->autoscan)
+               mii->phy_mask = ~BIT(config->valid_addr);
+       else
+               mii->phy_mask = ~0;
+
+       rc = devm_mdiobus_register(dev, mii);
+       if (rc) {
+               dev_err(config->parent, "Cannot register MDIO bus![%s] (%d)\n", mii->id, rc);
+               return ERR_PTR(rc);
+       }
+
+       return mii;
+}
+EXPORT_SYMBOL_GPL(devm_mdio_regmap_register);
+
+MODULE_DESCRIPTION("MDIO API over regmap");
+MODULE_AUTHOR("Maxime Chevallier <maxime.chevallier@bootlin.com>");
+MODULE_LICENSE("GPL");
index 7c34fb7..87cf308 100644 (file)
@@ -33,10 +33,4 @@ config PCS_RZN1_MIIC
          on RZ/N1 SoCs. This PCS converts MII to RMII/RGMII or can be set in
          pass-through mode for MII.
 
-config PCS_ALTERA_TSE
-       tristate
-       help
-         This module provides helper functions for the Altera Triple Speed
-         Ethernet SGMII PCS, that can be found on the Intel Socfpga family.
-
 endmenu
index 9b9afd6..ea662a7 100644 (file)
@@ -7,4 +7,3 @@ obj-$(CONFIG_PCS_XPCS)          += pcs_xpcs.o
 obj-$(CONFIG_PCS_LYNX)         += pcs-lynx.o
 obj-$(CONFIG_PCS_MTK_LYNXI)    += pcs-mtk-lynxi.o
 obj-$(CONFIG_PCS_RZN1_MIIC)    += pcs-rzn1-miic.o
-obj-$(CONFIG_PCS_ALTERA_TSE)   += pcs-altera-tse.o
diff --git a/drivers/net/pcs/pcs-altera-tse.c b/drivers/net/pcs/pcs-altera-tse.c
deleted file mode 100644 (file)
index d616749..0000000
+++ /dev/null
@@ -1,160 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2022 Bootlin
- *
- * Maxime Chevallier <maxime.chevallier@bootlin.com>
- */
-
-#include <linux/netdevice.h>
-#include <linux/phy.h>
-#include <linux/phylink.h>
-#include <linux/pcs-altera-tse.h>
-
-/* SGMII PCS register addresses
- */
-#define SGMII_PCS_LINK_TIMER_0 0x12
-#define SGMII_PCS_LINK_TIMER_1 0x13
-#define SGMII_PCS_IF_MODE      0x14
-#define   PCS_IF_MODE_SGMII_ENA                BIT(0)
-#define   PCS_IF_MODE_USE_SGMII_AN     BIT(1)
-#define   PCS_IF_MODE_SGMI_HALF_DUPLEX BIT(4)
-#define   PCS_IF_MODE_SGMI_PHY_AN      BIT(5)
-#define SGMII_PCS_SW_RESET_TIMEOUT 100 /* usecs */
-
-struct altera_tse_pcs {
-       struct phylink_pcs pcs;
-       void __iomem *base;
-       int reg_width;
-};
-
-static struct altera_tse_pcs *phylink_pcs_to_tse_pcs(struct phylink_pcs *pcs)
-{
-       return container_of(pcs, struct altera_tse_pcs, pcs);
-}
-
-static u16 tse_pcs_read(struct altera_tse_pcs *tse_pcs, int regnum)
-{
-       if (tse_pcs->reg_width == 4)
-               return readl(tse_pcs->base + regnum * 4);
-       else
-               return readw(tse_pcs->base + regnum * 2);
-}
-
-static void tse_pcs_write(struct altera_tse_pcs *tse_pcs, int regnum,
-                         u16 value)
-{
-       if (tse_pcs->reg_width == 4)
-               writel(value, tse_pcs->base + regnum * 4);
-       else
-               writew(value, tse_pcs->base + regnum * 2);
-}
-
-static int tse_pcs_reset(struct altera_tse_pcs *tse_pcs)
-{
-       u16 bmcr;
-
-       /* Reset PCS block */
-       bmcr = tse_pcs_read(tse_pcs, MII_BMCR);
-       bmcr |= BMCR_RESET;
-       tse_pcs_write(tse_pcs, MII_BMCR, bmcr);
-
-       return read_poll_timeout(tse_pcs_read, bmcr, (bmcr & BMCR_RESET),
-                                10, SGMII_PCS_SW_RESET_TIMEOUT, 1,
-                                tse_pcs, MII_BMCR);
-}
-
-static int alt_tse_pcs_validate(struct phylink_pcs *pcs,
-                               unsigned long *supported,
-                               const struct phylink_link_state *state)
-{
-       if (state->interface == PHY_INTERFACE_MODE_SGMII ||
-           state->interface == PHY_INTERFACE_MODE_1000BASEX)
-               return 1;
-
-       return -EINVAL;
-}
-
-static int alt_tse_pcs_config(struct phylink_pcs *pcs, unsigned int mode,
-                             phy_interface_t interface,
-                             const unsigned long *advertising,
-                             bool permit_pause_to_mac)
-{
-       struct altera_tse_pcs *tse_pcs = phylink_pcs_to_tse_pcs(pcs);
-       u32 ctrl, if_mode;
-
-       ctrl = tse_pcs_read(tse_pcs, MII_BMCR);
-       if_mode = tse_pcs_read(tse_pcs, SGMII_PCS_IF_MODE);
-
-       /* Set link timer to 1.6ms, as per the MegaCore Function User Guide */
-       tse_pcs_write(tse_pcs, SGMII_PCS_LINK_TIMER_0, 0x0D40);
-       tse_pcs_write(tse_pcs, SGMII_PCS_LINK_TIMER_1, 0x03);
-
-       if (interface == PHY_INTERFACE_MODE_SGMII) {
-               if_mode |= PCS_IF_MODE_USE_SGMII_AN | PCS_IF_MODE_SGMII_ENA;
-       } else if (interface == PHY_INTERFACE_MODE_1000BASEX) {
-               if_mode &= ~(PCS_IF_MODE_USE_SGMII_AN | PCS_IF_MODE_SGMII_ENA);
-       }
-
-       ctrl |= (BMCR_SPEED1000 | BMCR_FULLDPLX | BMCR_ANENABLE);
-
-       tse_pcs_write(tse_pcs, MII_BMCR, ctrl);
-       tse_pcs_write(tse_pcs, SGMII_PCS_IF_MODE, if_mode);
-
-       return tse_pcs_reset(tse_pcs);
-}
-
-static void alt_tse_pcs_get_state(struct phylink_pcs *pcs,
-                                 struct phylink_link_state *state)
-{
-       struct altera_tse_pcs *tse_pcs = phylink_pcs_to_tse_pcs(pcs);
-       u16 bmsr, lpa;
-
-       bmsr = tse_pcs_read(tse_pcs, MII_BMSR);
-       lpa = tse_pcs_read(tse_pcs, MII_LPA);
-
-       phylink_mii_c22_pcs_decode_state(state, bmsr, lpa);
-}
-
-static void alt_tse_pcs_an_restart(struct phylink_pcs *pcs)
-{
-       struct altera_tse_pcs *tse_pcs = phylink_pcs_to_tse_pcs(pcs);
-       u16 bmcr;
-
-       bmcr = tse_pcs_read(tse_pcs, MII_BMCR);
-       bmcr |= BMCR_ANRESTART;
-       tse_pcs_write(tse_pcs, MII_BMCR, bmcr);
-
-       /* This PCS seems to require a soft reset to re-sync the AN logic */
-       tse_pcs_reset(tse_pcs);
-}
-
-static const struct phylink_pcs_ops alt_tse_pcs_ops = {
-       .pcs_validate = alt_tse_pcs_validate,
-       .pcs_get_state = alt_tse_pcs_get_state,
-       .pcs_config = alt_tse_pcs_config,
-       .pcs_an_restart = alt_tse_pcs_an_restart,
-};
-
-struct phylink_pcs *alt_tse_pcs_create(struct net_device *ndev,
-                                      void __iomem *pcs_base, int reg_width)
-{
-       struct altera_tse_pcs *tse_pcs;
-
-       if (reg_width != 4 && reg_width != 2)
-               return ERR_PTR(-EINVAL);
-
-       tse_pcs = devm_kzalloc(&ndev->dev, sizeof(*tse_pcs), GFP_KERNEL);
-       if (!tse_pcs)
-               return ERR_PTR(-ENOMEM);
-
-       tse_pcs->pcs.ops = &alt_tse_pcs_ops;
-       tse_pcs->base = pcs_base;
-       tse_pcs->reg_width = reg_width;
-
-       return &tse_pcs->pcs;
-}
-EXPORT_SYMBOL_GPL(alt_tse_pcs_create);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("Altera TSE PCS driver");
-MODULE_AUTHOR("Maxime Chevallier <maxime.chevallier@bootlin.com>");
index 622c3de..f04dc58 100644 (file)
@@ -323,6 +323,7 @@ struct phylink_pcs *lynx_pcs_create(struct mdio_device *mdio)
        if (!lynx)
                return NULL;
 
+       mdio_device_get(mdio);
        lynx->mdio = mdio;
        lynx->pcs.ops = &lynx_pcs_phylink_ops;
        lynx->pcs.poll = true;
@@ -331,10 +332,40 @@ struct phylink_pcs *lynx_pcs_create(struct mdio_device *mdio)
 }
 EXPORT_SYMBOL(lynx_pcs_create);
 
+struct phylink_pcs *lynx_pcs_create_mdiodev(struct mii_bus *bus, int addr)
+{
+       struct mdio_device *mdio;
+       struct phylink_pcs *pcs;
+
+       mdio = mdio_device_create(bus, addr);
+       if (IS_ERR(mdio))
+               return ERR_CAST(mdio);
+
+       pcs = lynx_pcs_create(mdio);
+
+       /* Convert failure to create the PCS to an error pointer, so this
+        * function has a consistent return value strategy.
+        */
+       if (!pcs)
+               pcs = ERR_PTR(-ENOMEM);
+
+       /* lynx_create() has taken a refcount on the mdiodev if it was
+        * successful. If lynx_create() fails, this will free the mdio
+        * device here. In any case, we don't need to hold our reference
+        * anymore, and putting it here will allow mdio_device_put() in
+        * lynx_destroy() to automatically free the mdio device.
+        */
+       mdio_device_put(mdio);
+
+       return pcs;
+}
+EXPORT_SYMBOL(lynx_pcs_create_mdiodev);
+
 void lynx_pcs_destroy(struct phylink_pcs *pcs)
 {
        struct lynx_pcs *lynx = phylink_pcs_to_lynx(pcs);
 
+       mdio_device_put(lynx->mdio);
        kfree(lynx);
 }
 EXPORT_SYMBOL(lynx_pcs_destroy);
index 72f25e7..e4e59aa 100644 (file)
@@ -64,6 +64,16 @@ static const int xpcs_xlgmii_features[] = {
        __ETHTOOL_LINK_MODE_MASK_NBITS,
 };
 
+static const int xpcs_10gbaser_features[] = {
+       ETHTOOL_LINK_MODE_Pause_BIT,
+       ETHTOOL_LINK_MODE_Asym_Pause_BIT,
+       ETHTOOL_LINK_MODE_10000baseSR_Full_BIT,
+       ETHTOOL_LINK_MODE_10000baseLR_Full_BIT,
+       ETHTOOL_LINK_MODE_10000baseLRM_Full_BIT,
+       ETHTOOL_LINK_MODE_10000baseER_Full_BIT,
+       __ETHTOOL_LINK_MODE_MASK_NBITS,
+};
+
 static const int xpcs_sgmii_features[] = {
        ETHTOOL_LINK_MODE_Pause_BIT,
        ETHTOOL_LINK_MODE_Asym_Pause_BIT,
@@ -106,6 +116,10 @@ static const phy_interface_t xpcs_xlgmii_interfaces[] = {
        PHY_INTERFACE_MODE_XLGMII,
 };
 
+static const phy_interface_t xpcs_10gbaser_interfaces[] = {
+       PHY_INTERFACE_MODE_10GBASER,
+};
+
 static const phy_interface_t xpcs_sgmii_interfaces[] = {
        PHY_INTERFACE_MODE_SGMII,
 };
@@ -123,6 +137,7 @@ enum {
        DW_XPCS_USXGMII,
        DW_XPCS_10GKR,
        DW_XPCS_XLGMII,
+       DW_XPCS_10GBASER,
        DW_XPCS_SGMII,
        DW_XPCS_1000BASEX,
        DW_XPCS_2500BASEX,
@@ -246,6 +261,7 @@ static int xpcs_soft_reset(struct dw_xpcs *xpcs,
 
        switch (compat->an_mode) {
        case DW_AN_C73:
+       case DW_10GBASER:
                dev = MDIO_MMD_PCS;
                break;
        case DW_AN_C37_SGMII:
@@ -271,15 +287,12 @@ static int xpcs_soft_reset(struct dw_xpcs *xpcs,
 })
 
 static int xpcs_read_fault_c73(struct dw_xpcs *xpcs,
-                              struct phylink_link_state *state)
+                              struct phylink_link_state *state,
+                              u16 pcs_stat1)
 {
        int ret;
 
-       ret = xpcs_read(xpcs, MDIO_MMD_PCS, MDIO_STAT1);
-       if (ret < 0)
-               return ret;
-
-       if (ret & MDIO_STAT1_FAULT) {
+       if (pcs_stat1 & MDIO_STAT1_FAULT) {
                xpcs_warn(xpcs, state, "Link fault condition detected!\n");
                return -EFAULT;
        }
@@ -321,37 +334,6 @@ static int xpcs_read_fault_c73(struct dw_xpcs *xpcs,
        return 0;
 }
 
-static int xpcs_read_link_c73(struct dw_xpcs *xpcs)
-{
-       bool link = true;
-       int ret;
-
-       ret = xpcs_read(xpcs, MDIO_MMD_PCS, MDIO_STAT1);
-       if (ret < 0)
-               return ret;
-
-       if (!(ret & MDIO_STAT1_LSTATUS))
-               link = false;
-
-       return link;
-}
-
-static int xpcs_get_max_usxgmii_speed(const unsigned long *supported)
-{
-       int max = SPEED_UNKNOWN;
-
-       if (phylink_test(supported, 1000baseKX_Full))
-               max = SPEED_1000;
-       if (phylink_test(supported, 2500baseX_Full))
-               max = SPEED_2500;
-       if (phylink_test(supported, 10000baseKX4_Full))
-               max = SPEED_10000;
-       if (phylink_test(supported, 10000baseKR_Full))
-               max = SPEED_10000;
-
-       return max;
-}
-
 static void xpcs_config_usxgmii(struct dw_xpcs *xpcs, int speed)
 {
        int ret, speed_sel;
@@ -478,16 +460,12 @@ static int xpcs_config_aneg_c73(struct dw_xpcs *xpcs,
 
 static int xpcs_aneg_done_c73(struct dw_xpcs *xpcs,
                              struct phylink_link_state *state,
-                             const struct xpcs_compat *compat)
+                             const struct xpcs_compat *compat, u16 an_stat1)
 {
        int ret;
 
-       ret = xpcs_read(xpcs, MDIO_MMD_AN, MDIO_STAT1);
-       if (ret < 0)
-               return ret;
-
-       if (ret & MDIO_AN_STAT1_COMPLETE) {
-               ret = xpcs_read(xpcs, MDIO_MMD_AN, DW_SR_AN_LP_ABL1);
+       if (an_stat1 & MDIO_AN_STAT1_COMPLETE) {
+               ret = xpcs_read(xpcs, MDIO_MMD_AN, MDIO_AN_LPA);
                if (ret < 0)
                        return ret;
 
@@ -504,64 +482,32 @@ static int xpcs_aneg_done_c73(struct dw_xpcs *xpcs,
 }
 
 static int xpcs_read_lpa_c73(struct dw_xpcs *xpcs,
-                            struct phylink_link_state *state)
+                            struct phylink_link_state *state, u16 an_stat1)
 {
-       int ret;
-
-       ret = xpcs_read(xpcs, MDIO_MMD_AN, MDIO_STAT1);
-       if (ret < 0)
-               return ret;
+       u16 lpa[3];
+       int i, ret;
 
-       if (!(ret & MDIO_AN_STAT1_LPABLE)) {
+       if (!(an_stat1 & MDIO_AN_STAT1_LPABLE)) {
                phylink_clear(state->lp_advertising, Autoneg);
                return 0;
        }
 
        phylink_set(state->lp_advertising, Autoneg);
 
-       /* Clause 73 outcome */
-       ret = xpcs_read(xpcs, MDIO_MMD_AN, DW_SR_AN_LP_ABL3);
-       if (ret < 0)
-               return ret;
-
-       if (ret & DW_C73_2500KX)
-               phylink_set(state->lp_advertising, 2500baseX_Full);
-
-       ret = xpcs_read(xpcs, MDIO_MMD_AN, DW_SR_AN_LP_ABL2);
-       if (ret < 0)
-               return ret;
-
-       if (ret & DW_C73_1000KX)
-               phylink_set(state->lp_advertising, 1000baseKX_Full);
-       if (ret & DW_C73_10000KX4)
-               phylink_set(state->lp_advertising, 10000baseKX4_Full);
-       if (ret & DW_C73_10000KR)
-               phylink_set(state->lp_advertising, 10000baseKR_Full);
+       /* Read Clause 73 link partner advertisement */
+       for (i = ARRAY_SIZE(lpa); --i >= 0; ) {
+               ret = xpcs_read(xpcs, MDIO_MMD_AN, MDIO_AN_LPA + i);
+               if (ret < 0)
+                       return ret;
 
-       ret = xpcs_read(xpcs, MDIO_MMD_AN, DW_SR_AN_LP_ABL1);
-       if (ret < 0)
-               return ret;
+               lpa[i] = ret;
+       }
 
-       if (ret & DW_C73_PAUSE)
-               phylink_set(state->lp_advertising, Pause);
-       if (ret & DW_C73_ASYM_PAUSE)
-               phylink_set(state->lp_advertising, Asym_Pause);
+       mii_c73_mod_linkmode(state->lp_advertising, lpa);
 
-       linkmode_and(state->lp_advertising, state->lp_advertising,
-                    state->advertising);
        return 0;
 }
 
-static void xpcs_resolve_lpa_c73(struct dw_xpcs *xpcs,
-                                struct phylink_link_state *state)
-{
-       int max_speed = xpcs_get_max_usxgmii_speed(state->lp_advertising);
-
-       state->pause = MLO_PAUSE_TX | MLO_PAUSE_RX;
-       state->speed = max_speed;
-       state->duplex = DUPLEX_FULL;
-}
-
 static int xpcs_get_max_xlgmii_speed(struct dw_xpcs *xpcs,
                                     struct phylink_link_state *state)
 {
@@ -872,6 +818,8 @@ int xpcs_do_config(struct dw_xpcs *xpcs, phy_interface_t interface,
                return -ENODEV;
 
        switch (compat->an_mode) {
+       case DW_10GBASER:
+               break;
        case DW_AN_C73:
                if (test_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, advertising)) {
                        ret = xpcs_config_aneg_c73(xpcs, compat);
@@ -924,13 +872,25 @@ static int xpcs_get_state_c73(struct dw_xpcs *xpcs,
                              const struct xpcs_compat *compat)
 {
        bool an_enabled;
+       int pcs_stat1;
+       int an_stat1;
        int ret;
 
+       /* The link status bit is latching-low, so it is important to
+        * avoid unnecessary re-reads of this register to avoid missing
+        * a link-down event.
+        */
+       pcs_stat1 = xpcs_read(xpcs, MDIO_MMD_PCS, MDIO_STAT1);
+       if (pcs_stat1 < 0) {
+               state->link = false;
+               return pcs_stat1;
+       }
+
        /* Link needs to be read first ... */
-       state->link = xpcs_read_link_c73(xpcs) > 0 ? 1 : 0;
+       state->link = !!(pcs_stat1 & MDIO_STAT1_LSTATUS);
 
        /* ... and then we check the faults. */
-       ret = xpcs_read_fault_c73(xpcs, state);
+       ret = xpcs_read_fault_c73(xpcs, state, pcs_stat1);
        if (ret) {
                ret = xpcs_soft_reset(xpcs, compat);
                if (ret)
@@ -941,15 +901,38 @@ static int xpcs_get_state_c73(struct dw_xpcs *xpcs,
                return xpcs_do_config(xpcs, state->interface, MLO_AN_INBAND, NULL);
        }
 
+       /* There is no point doing anything else if the link is down. */
+       if (!state->link)
+               return 0;
+
        an_enabled = linkmode_test_bit(ETHTOOL_LINK_MODE_Autoneg_BIT,
                                       state->advertising);
-       if (an_enabled && xpcs_aneg_done_c73(xpcs, state, compat)) {
-               state->an_complete = true;
-               xpcs_read_lpa_c73(xpcs, state);
-               xpcs_resolve_lpa_c73(xpcs, state);
-       } else if (an_enabled) {
-               state->link = 0;
-       } else if (state->link) {
+       if (an_enabled) {
+               /* The link status bit is latching-low, so it is important to
+                * avoid unnecessary re-reads of this register to avoid missing
+                * a link-down event.
+                */
+               an_stat1 = xpcs_read(xpcs, MDIO_MMD_AN, MDIO_STAT1);
+               if (an_stat1 < 0) {
+                       state->link = false;
+                       return an_stat1;
+               }
+
+               state->an_complete = xpcs_aneg_done_c73(xpcs, state, compat,
+                                                       an_stat1);
+               if (!state->an_complete) {
+                       state->link = false;
+                       return 0;
+               }
+
+               ret = xpcs_read_lpa_c73(xpcs, state, an_stat1);
+               if (ret < 0) {
+                       state->link = false;
+                       return ret;
+               }
+
+               phylink_resolve_c73(state);
+       } else {
                xpcs_resolve_pma(xpcs, state);
        }
 
@@ -1033,6 +1016,9 @@ static void xpcs_get_state(struct phylink_pcs *pcs,
                return;
 
        switch (compat->an_mode) {
+       case DW_10GBASER:
+               phylink_mii_c45_pcs_get_state(xpcs->mdiodev, state);
+               break;
        case DW_AN_C73:
                ret = xpcs_get_state_c73(xpcs, state, compat);
                if (ret) {
@@ -1188,6 +1174,12 @@ static const struct xpcs_compat synopsys_xpcs_compat[DW_XPCS_INTERFACE_MAX] = {
                .num_interfaces = ARRAY_SIZE(xpcs_xlgmii_interfaces),
                .an_mode = DW_AN_C73,
        },
+       [DW_XPCS_10GBASER] = {
+               .supported = xpcs_10gbaser_features,
+               .interface = xpcs_10gbaser_interfaces,
+               .num_interfaces = ARRAY_SIZE(xpcs_10gbaser_interfaces),
+               .an_mode = DW_10GBASER,
+       },
        [DW_XPCS_SGMII] = {
                .supported = xpcs_sgmii_features,
                .interface = xpcs_sgmii_interfaces,
@@ -1259,8 +1251,8 @@ static const struct phylink_pcs_ops xpcs_phylink_ops = {
        .pcs_link_up = xpcs_link_up,
 };
 
-struct dw_xpcs *xpcs_create(struct mdio_device *mdiodev,
-                           phy_interface_t interface)
+static struct dw_xpcs *xpcs_create(struct mdio_device *mdiodev,
+                                  phy_interface_t interface)
 {
        struct dw_xpcs *xpcs;
        u32 xpcs_id;
@@ -1270,6 +1262,7 @@ struct dw_xpcs *xpcs_create(struct mdio_device *mdiodev,
        if (!xpcs)
                return ERR_PTR(-ENOMEM);
 
+       mdio_device_get(mdiodev);
        xpcs->mdiodev = mdiodev;
 
        xpcs_id = xpcs_get_id(xpcs);
@@ -1290,6 +1283,9 @@ struct dw_xpcs *xpcs_create(struct mdio_device *mdiodev,
                }
 
                xpcs->pcs.ops = &xpcs_phylink_ops;
+               if (compat->an_mode == DW_10GBASER)
+                       return xpcs;
+
                xpcs->pcs.poll = true;
 
                ret = xpcs_soft_reset(xpcs, compat);
@@ -1302,16 +1298,42 @@ struct dw_xpcs *xpcs_create(struct mdio_device *mdiodev,
        ret = -ENODEV;
 
 out:
+       mdio_device_put(mdiodev);
        kfree(xpcs);
 
        return ERR_PTR(ret);
 }
-EXPORT_SYMBOL_GPL(xpcs_create);
 
 void xpcs_destroy(struct dw_xpcs *xpcs)
 {
+       if (xpcs)
+               mdio_device_put(xpcs->mdiodev);
        kfree(xpcs);
 }
 EXPORT_SYMBOL_GPL(xpcs_destroy);
 
+struct dw_xpcs *xpcs_create_mdiodev(struct mii_bus *bus, int addr,
+                                   phy_interface_t interface)
+{
+       struct mdio_device *mdiodev;
+       struct dw_xpcs *xpcs;
+
+       mdiodev = mdio_device_create(bus, addr);
+       if (IS_ERR(mdiodev))
+               return ERR_CAST(mdiodev);
+
+       xpcs = xpcs_create(mdiodev, interface);
+
+       /* xpcs_create() has taken a refcount on the mdiodev if it was
+        * successful. If xpcs_create() fails, this will free the mdio
+        * device here. In any case, we don't need to hold our reference
+        * anymore, and putting it here will allow mdio_device_put() in
+        * xpcs_destroy() to automatically free the mdio device.
+        */
+       mdio_device_put(mdiodev);
+
+       return xpcs;
+}
+EXPORT_SYMBOL_GPL(xpcs_create_mdiodev);
+
 MODULE_LICENSE("GPL v2");
index 770df50..68c6b5a 100644 (file)
@@ -32,9 +32,6 @@
 #define DW_SR_AN_ADV1                  0x10
 #define DW_SR_AN_ADV2                  0x11
 #define DW_SR_AN_ADV3                  0x12
-#define DW_SR_AN_LP_ABL1               0x13
-#define DW_SR_AN_LP_ABL2               0x14
-#define DW_SR_AN_LP_ABL3               0x15
 
 /* Clause 73 Defines */
 /* AN_LP_ABL1 */
index 93b8efc..059bd06 100644 (file)
@@ -243,9 +243,10 @@ config MICREL_PHY
          Supports the KSZ9021, VSC8201, KS8001 PHYs.
 
 config MICROCHIP_T1S_PHY
-       tristate "Microchip 10BASE-T1S Ethernet PHY"
+       tristate "Microchip 10BASE-T1S Ethernet PHYs"
        help
-         Currently supports the LAN8670, LAN8671, LAN8672
+         Currently supports the LAN8670/1/2 Rev.B1 and LAN8650/1 Rev.B0 Internal
+         PHYs.
 
 config MICROCHIP_PHY
        tristate "Microchip PHYs"
index b2c0baa..c6e2e5f 100644 (file)
@@ -6,12 +6,14 @@
 #include "bcm-phy-lib.h"
 #include <linux/bitfield.h>
 #include <linux/brcmphy.h>
+#include <linux/etherdevice.h>
 #include <linux/export.h>
 #include <linux/mdio.h>
 #include <linux/module.h>
 #include <linux/phy.h>
 #include <linux/ethtool.h>
 #include <linux/ethtool_netlink.h>
+#include <linux/netdevice.h>
 
 #define MII_BCM_CHANNEL_WIDTH     0x2000
 #define BCM_CL45VEN_EEE_ADV       0x3c
@@ -494,18 +496,20 @@ EXPORT_SYMBOL_GPL(bcm_phy_downshift_set);
 
 struct bcm_phy_hw_stat {
        const char *string;
-       u8 reg;
+       int devad;
+       u16 reg;
        u8 shift;
        u8 bits;
 };
 
 /* Counters freeze at either 0xffff or 0xff, better than nothing */
 static const struct bcm_phy_hw_stat bcm_phy_hw_stats[] = {
-       { "phy_receive_errors", MII_BRCM_CORE_BASE12, 0, 16 },
-       { "phy_serdes_ber_errors", MII_BRCM_CORE_BASE13, 8, 8 },
-       { "phy_false_carrier_sense_errors", MII_BRCM_CORE_BASE13, 0, 8 },
-       { "phy_local_rcvr_nok", MII_BRCM_CORE_BASE14, 8, 8 },
-       { "phy_remote_rcv_nok", MII_BRCM_CORE_BASE14, 0, 8 },
+       { "phy_receive_errors", -1, MII_BRCM_CORE_BASE12, 0, 16 },
+       { "phy_serdes_ber_errors", -1, MII_BRCM_CORE_BASE13, 8, 8 },
+       { "phy_false_carrier_sense_errors", -1, MII_BRCM_CORE_BASE13, 0, 8 },
+       { "phy_local_rcvr_nok", -1, MII_BRCM_CORE_BASE14, 8, 8 },
+       { "phy_remote_rcv_nok", -1, MII_BRCM_CORE_BASE14, 0, 8 },
+       { "phy_lpi_count", MDIO_MMD_AN, BRCM_CL45VEN_EEE_LPI_CNT, 0, 16 },
 };
 
 int bcm_phy_get_sset_count(struct phy_device *phydev)
@@ -534,7 +538,10 @@ static u64 bcm_phy_get_stat(struct phy_device *phydev, u64 *shadow,
        int val;
        u64 ret;
 
-       val = phy_read(phydev, stat.reg);
+       if (stat.devad < 0)
+               val = phy_read(phydev, stat.reg);
+       else
+               val = phy_read_mmd(phydev, stat.devad, stat.reg);
        if (val < 0) {
                ret = U64_MAX;
        } else {
@@ -816,6 +823,222 @@ int bcm_phy_cable_test_get_status_rdb(struct phy_device *phydev,
 }
 EXPORT_SYMBOL_GPL(bcm_phy_cable_test_get_status_rdb);
 
+#define BCM54XX_WOL_SUPPORTED_MASK     (WAKE_UCAST | \
+                                        WAKE_MCAST | \
+                                        WAKE_BCAST | \
+                                        WAKE_MAGIC | \
+                                        WAKE_MAGICSECURE)
+
+int bcm_phy_set_wol(struct phy_device *phydev, struct ethtool_wolinfo *wol)
+{
+       struct net_device *ndev = phydev->attached_dev;
+       u8 da[ETH_ALEN], mask[ETH_ALEN];
+       unsigned int i;
+       u16 ctl;
+       int ret;
+
+       /* Allow a MAC driver to play through its own Wake-on-LAN
+        * implementation
+        */
+       if (wol->wolopts & ~BCM54XX_WOL_SUPPORTED_MASK)
+               return -EOPNOTSUPP;
+
+       /* The PHY supports passwords of 4, 6 and 8 bytes in size, but Linux's
+        * ethtool only supports 6, for now.
+        */
+       BUILD_BUG_ON(sizeof(wol->sopass) != ETH_ALEN);
+
+       /* Clear previous interrupts */
+       ret = bcm_phy_read_exp(phydev, BCM54XX_WOL_INT_STATUS);
+       if (ret < 0)
+               return ret;
+
+       ret = bcm_phy_read_exp(phydev, BCM54XX_WOL_MAIN_CTL);
+       if (ret < 0)
+               return ret;
+
+       ctl = ret;
+
+       if (!wol->wolopts) {
+               if (phy_interrupt_is_valid(phydev))
+                       disable_irq_wake(phydev->irq);
+
+               /* Leave all interrupts disabled */
+               ret = bcm_phy_write_exp(phydev, BCM54XX_WOL_INT_MASK,
+                                       BCM54XX_WOL_ALL_INTRS);
+               if (ret < 0)
+                       return ret;
+
+               /* Disable the global Wake-on-LAN enable bit */
+               ctl &= ~BCM54XX_WOL_EN;
+
+               return bcm_phy_write_exp(phydev, BCM54XX_WOL_MAIN_CTL, ctl);
+       }
+
+       /* Clear the previously configured mode and mask mode for Wake-on-LAN */
+       ctl &= ~(BCM54XX_WOL_MODE_MASK << BCM54XX_WOL_MODE_SHIFT);
+       ctl &= ~(BCM54XX_WOL_MASK_MODE_MASK << BCM54XX_WOL_MASK_MODE_SHIFT);
+       ctl &= ~BCM54XX_WOL_DIR_PKT_EN;
+       ctl &= ~(BCM54XX_WOL_SECKEY_OPT_MASK << BCM54XX_WOL_SECKEY_OPT_SHIFT);
+
+       /* When using WAKE_MAGIC, we program the magic pattern filter to match
+        * the device's MAC address and we accept any MAC DA in the Ethernet
+        * frame.
+        *
+        * When using WAKE_UCAST, WAKE_BCAST or WAKE_MCAST, we program the
+        * following:
+        * - WAKE_UCAST -> MAC DA is the device's MAC with a perfect match
+        * - WAKE_MCAST -> MAC DA is X1:XX:XX:XX:XX:XX where XX is don't care
+        * - WAKE_BCAST -> MAC DA is FF:FF:FF:FF:FF:FF with a perfect match
+        *
+        * Note that the Broadcast MAC DA is inherently going to match the
+        * multicast pattern being matched.
+        */
+       memset(mask, 0, sizeof(mask));
+
+       if (wol->wolopts & WAKE_MCAST) {
+               memset(da, 0, sizeof(da));
+               memset(mask, 0xff, sizeof(mask));
+               da[0] = 0x01;
+               mask[0] = ~da[0];
+       } else {
+               if (wol->wolopts & WAKE_UCAST) {
+                       ether_addr_copy(da, ndev->dev_addr);
+               } else if (wol->wolopts & WAKE_BCAST) {
+                       eth_broadcast_addr(da);
+               } else if (wol->wolopts & WAKE_MAGICSECURE) {
+                       ether_addr_copy(da, wol->sopass);
+               } else if (wol->wolopts & WAKE_MAGIC) {
+                       memset(da, 0, sizeof(da));
+                       memset(mask, 0xff, sizeof(mask));
+               }
+       }
+
+       for (i = 0; i < ETH_ALEN / 2; i++) {
+               if (wol->wolopts & (WAKE_MAGIC | WAKE_MAGICSECURE)) {
+                       ret = bcm_phy_write_exp(phydev,
+                                               BCM54XX_WOL_MPD_DATA1(2 - i),
+                                               ndev->dev_addr[i * 2] << 8 |
+                                               ndev->dev_addr[i * 2 + 1]);
+                       if (ret < 0)
+                               return ret;
+               }
+
+               ret = bcm_phy_write_exp(phydev, BCM54XX_WOL_MPD_DATA2(2 - i),
+                                       da[i * 2] << 8 | da[i * 2 + 1]);
+               if (ret < 0)
+                       return ret;
+
+               ret = bcm_phy_write_exp(phydev, BCM54XX_WOL_MASK(2 - i),
+                                       mask[i * 2] << 8 | mask[i * 2 + 1]);
+               if (ret)
+                       return ret;
+       }
+
+       if (wol->wolopts & WAKE_MAGICSECURE) {
+               ctl |= BCM54XX_WOL_SECKEY_OPT_6B <<
+                      BCM54XX_WOL_SECKEY_OPT_SHIFT;
+               ctl |= BCM54XX_WOL_MODE_SINGLE_MPDSEC << BCM54XX_WOL_MODE_SHIFT;
+               ctl |= BCM54XX_WOL_MASK_MODE_DA_FF <<
+                      BCM54XX_WOL_MASK_MODE_SHIFT;
+       } else {
+               if (wol->wolopts & WAKE_MAGIC)
+                       ctl |= BCM54XX_WOL_MODE_SINGLE_MPD;
+               else
+                       ctl |= BCM54XX_WOL_DIR_PKT_EN;
+               ctl |= BCM54XX_WOL_MASK_MODE_DA_ONLY <<
+                      BCM54XX_WOL_MASK_MODE_SHIFT;
+       }
+
+       /* Globally enable Wake-on-LAN */
+       ctl |= BCM54XX_WOL_EN | BCM54XX_WOL_CRC_CHK;
+
+       ret = bcm_phy_write_exp(phydev, BCM54XX_WOL_MAIN_CTL, ctl);
+       if (ret < 0)
+               return ret;
+
+       /* Enable WOL interrupt on LED4 */
+       ret = bcm_phy_read_exp(phydev, BCM54XX_TOP_MISC_LED_CTL);
+       if (ret < 0)
+               return ret;
+
+       ret |= BCM54XX_LED4_SEL_INTR;
+       ret = bcm_phy_write_exp(phydev, BCM54XX_TOP_MISC_LED_CTL, ret);
+       if (ret < 0)
+               return ret;
+
+       /* Enable all Wake-on-LAN interrupt sources */
+       ret = bcm_phy_write_exp(phydev, BCM54XX_WOL_INT_MASK, 0);
+       if (ret < 0)
+               return ret;
+
+       if (phy_interrupt_is_valid(phydev))
+               enable_irq_wake(phydev->irq);
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(bcm_phy_set_wol);
+
+void bcm_phy_get_wol(struct phy_device *phydev, struct ethtool_wolinfo *wol)
+{
+       struct net_device *ndev = phydev->attached_dev;
+       u8 da[ETH_ALEN];
+       unsigned int i;
+       int ret;
+       u16 ctl;
+
+       wol->supported = BCM54XX_WOL_SUPPORTED_MASK;
+       wol->wolopts = 0;
+
+       ret = bcm_phy_read_exp(phydev, BCM54XX_WOL_MAIN_CTL);
+       if (ret < 0)
+               return;
+
+       ctl = ret;
+
+       if (!(ctl & BCM54XX_WOL_EN))
+               return;
+
+       for (i = 0; i < sizeof(da) / 2; i++) {
+               ret = bcm_phy_read_exp(phydev,
+                                      BCM54XX_WOL_MPD_DATA2(2 - i));
+               if (ret < 0)
+                       return;
+
+               da[i * 2] = ret >> 8;
+               da[i * 2 + 1] = ret & 0xff;
+       }
+
+       if (ctl & BCM54XX_WOL_DIR_PKT_EN) {
+               if (is_broadcast_ether_addr(da))
+                       wol->wolopts |= WAKE_BCAST;
+               else if (is_multicast_ether_addr(da))
+                       wol->wolopts |= WAKE_MCAST;
+               else if (ether_addr_equal(da, ndev->dev_addr))
+                       wol->wolopts |= WAKE_UCAST;
+       } else {
+               ctl = (ctl >> BCM54XX_WOL_MODE_SHIFT) & BCM54XX_WOL_MODE_MASK;
+               switch (ctl) {
+               case BCM54XX_WOL_MODE_SINGLE_MPD:
+                       wol->wolopts |= WAKE_MAGIC;
+                       break;
+               case BCM54XX_WOL_MODE_SINGLE_MPDSEC:
+                       wol->wolopts |= WAKE_MAGICSECURE;
+                       memcpy(wol->sopass, da, sizeof(da));
+                       break;
+               default:
+                       break;
+               }
+       }
+}
+EXPORT_SYMBOL_GPL(bcm_phy_get_wol);
+
+irqreturn_t bcm_phy_wol_isr(int irq, void *dev_id)
+{
+       return IRQ_HANDLED;
+}
+EXPORT_SYMBOL_GPL(bcm_phy_wol_isr);
+
 MODULE_DESCRIPTION("Broadcom PHY Library");
 MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR("Broadcom Corporation");
index 729db44..2f30ce0 100644 (file)
@@ -8,6 +8,9 @@
 
 #include <linux/brcmphy.h>
 #include <linux/phy.h>
+#include <linux/interrupt.h>
+
+struct ethtool_wolinfo;
 
 /* 28nm only register definitions */
 #define MISC_ADDR(base, channel)       base, channel
@@ -111,4 +114,8 @@ static inline void bcm_ptp_stop(struct bcm_ptp_private *priv)
 }
 #endif
 
+int bcm_phy_set_wol(struct phy_device *phydev, struct ethtool_wolinfo *wol);
+void bcm_phy_get_wol(struct phy_device *phydev, struct ethtool_wolinfo *wol);
+irqreturn_t bcm_phy_wol_isr(int irq, void *dev_id);
+
 #endif /* _LINUX_BCM_PHY_LIB_H */
index ad71c88..822c8b0 100644 (file)
 #include <linux/delay.h>
 #include <linux/module.h>
 #include <linux/phy.h>
+#include <linux/pm_wakeup.h>
 #include <linux/brcmphy.h>
 #include <linux/of.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/gpio/consumer.h>
 
 #define BRCM_PHY_MODEL(phydev) \
        ((phydev)->drv->phy_id & (phydev)->drv->phy_id_mask)
@@ -30,8 +34,17 @@ MODULE_LICENSE("GPL");
 struct bcm54xx_phy_priv {
        u64     *stats;
        struct bcm_ptp_private *ptp;
+       int     wake_irq;
+       bool    wake_irq_enabled;
 };
 
+static bool bcm54xx_phy_can_wakeup(struct phy_device *phydev)
+{
+       struct bcm54xx_phy_priv *priv = phydev->priv;
+
+       return phy_interrupt_is_valid(phydev) || priv->wake_irq >= 0;
+}
+
 static int bcm54xx_config_clock_delay(struct phy_device *phydev)
 {
        int rc, val;
@@ -413,6 +426,16 @@ static int bcm54xx_config_init(struct phy_device *phydev)
 
        bcm54xx_ptp_config_init(phydev);
 
+       /* Acknowledge any left over interrupt and charge the device for
+        * wake-up.
+        */
+       err = bcm_phy_read_exp(phydev, BCM54XX_WOL_INT_STATUS);
+       if (err < 0)
+               return err;
+
+       if (err)
+               pm_wakeup_event(&phydev->mdio.dev, 0);
+
        return 0;
 }
 
@@ -437,12 +460,39 @@ out:
        return ret;
 }
 
+static int bcm54xx_set_wakeup_irq(struct phy_device *phydev, bool state)
+{
+       struct bcm54xx_phy_priv *priv = phydev->priv;
+       int ret = 0;
+
+       if (!bcm54xx_phy_can_wakeup(phydev))
+               return ret;
+
+       if (priv->wake_irq_enabled != state) {
+               if (state)
+                       ret = enable_irq_wake(priv->wake_irq);
+               else
+                       ret = disable_irq_wake(priv->wake_irq);
+               priv->wake_irq_enabled = state;
+       }
+
+       return ret;
+}
+
 static int bcm54xx_suspend(struct phy_device *phydev)
 {
-       int ret;
+       int ret = 0;
 
        bcm54xx_ptp_stop(phydev);
 
+       /* Acknowledge any Wake-on-LAN interrupt prior to suspend */
+       ret = bcm_phy_read_exp(phydev, BCM54XX_WOL_INT_STATUS);
+       if (ret < 0)
+               return ret;
+
+       if (phydev->wol_enabled)
+               return bcm54xx_set_wakeup_irq(phydev, true);
+
        /* We cannot use a read/modify/write here otherwise the PHY gets into
         * a bad state where its LEDs keep flashing, thus defeating the purpose
         * of low power mode.
@@ -456,7 +506,13 @@ static int bcm54xx_suspend(struct phy_device *phydev)
 
 static int bcm54xx_resume(struct phy_device *phydev)
 {
-       int ret;
+       int ret = 0;
+
+       if (phydev->wol_enabled) {
+               ret = bcm54xx_set_wakeup_irq(phydev, false);
+               if (ret)
+                       return ret;
+       }
 
        ret = bcm54xx_iddq_set(phydev, false);
        if (ret < 0)
@@ -801,14 +857,54 @@ static int brcm_fet_suspend(struct phy_device *phydev)
        return err;
 }
 
+static void bcm54xx_phy_get_wol(struct phy_device *phydev,
+                               struct ethtool_wolinfo *wol)
+{
+       /* We cannot wake-up if we do not have a dedicated PHY interrupt line
+        * or an out of band GPIO descriptor for wake-up. Zeroing
+        * wol->supported allows the caller (MAC driver) to play through and
+        * offer its own Wake-on-LAN scheme if available.
+        */
+       if (!bcm54xx_phy_can_wakeup(phydev)) {
+               wol->supported = 0;
+               return;
+       }
+
+       bcm_phy_get_wol(phydev, wol);
+}
+
+static int bcm54xx_phy_set_wol(struct phy_device *phydev,
+                              struct ethtool_wolinfo *wol)
+{
+       int ret;
+
+       /* We cannot wake-up if we do not have a dedicated PHY interrupt line
+        * or an out of band GPIO descriptor for wake-up. Returning -EOPNOTSUPP
+        * allows the caller (MAC driver) to play through and offer its own
+        * Wake-on-LAN scheme if available.
+        */
+       if (!bcm54xx_phy_can_wakeup(phydev))
+               return -EOPNOTSUPP;
+
+       ret = bcm_phy_set_wol(phydev, wol);
+       if (ret < 0)
+               return ret;
+
+       return 0;
+}
+
 static int bcm54xx_phy_probe(struct phy_device *phydev)
 {
        struct bcm54xx_phy_priv *priv;
+       struct gpio_desc *wakeup_gpio;
+       int ret = 0;
 
        priv = devm_kzalloc(&phydev->mdio.dev, sizeof(*priv), GFP_KERNEL);
        if (!priv)
                return -ENOMEM;
 
+       priv->wake_irq = -ENXIO;
+
        phydev->priv = priv;
 
        priv->stats = devm_kcalloc(&phydev->mdio.dev,
@@ -821,7 +917,35 @@ static int bcm54xx_phy_probe(struct phy_device *phydev)
        if (IS_ERR(priv->ptp))
                return PTR_ERR(priv->ptp);
 
-       return 0;
+       /* We cannot utilize the _optional variant here since we want to know
+        * whether the GPIO descriptor exists or not to advertise Wake-on-LAN
+        * support or not.
+        */
+       wakeup_gpio = devm_gpiod_get(&phydev->mdio.dev, "wakeup", GPIOD_IN);
+       if (PTR_ERR(wakeup_gpio) == -EPROBE_DEFER)
+               return PTR_ERR(wakeup_gpio);
+
+       if (!IS_ERR(wakeup_gpio)) {
+               priv->wake_irq = gpiod_to_irq(wakeup_gpio);
+
+               /* Dummy interrupt handler which is not enabled but is provided
+                * in order for the interrupt descriptor to be fully set-up.
+                */
+               ret = devm_request_irq(&phydev->mdio.dev, priv->wake_irq,
+                                      bcm_phy_wol_isr,
+                                      IRQF_TRIGGER_LOW | IRQF_NO_AUTOEN,
+                                      dev_name(&phydev->mdio.dev), phydev);
+               if (ret)
+                       return ret;
+       }
+
+       /* If we do not have a main interrupt or a side-band wake-up interrupt,
+        * then the device cannot be marked as wake-up capable.
+        */
+       if (!bcm54xx_phy_can_wakeup(phydev))
+               return 0;
+
+       return device_init_wakeup(&phydev->mdio.dev, true);
 }
 
 static void bcm54xx_get_stats(struct phy_device *phydev,
@@ -894,6 +1018,7 @@ static struct phy_driver broadcom_drivers[] = {
        .phy_id_mask    = 0xfffffff0,
        .name           = "Broadcom BCM54210E",
        /* PHY_GBIT_FEATURES */
+       .flags          = PHY_ALWAYS_CALL_SUSPEND,
        .get_sset_count = bcm_phy_get_sset_count,
        .get_strings    = bcm_phy_get_strings,
        .get_stats      = bcm54xx_get_stats,
@@ -904,6 +1029,8 @@ static struct phy_driver broadcom_drivers[] = {
        .link_change_notify     = bcm54xx_link_change_notify,
        .suspend        = bcm54xx_suspend,
        .resume         = bcm54xx_resume,
+       .get_wol        = bcm54xx_phy_get_wol,
+       .set_wol        = bcm54xx_phy_set_wol,
 }, {
        .phy_id         = PHY_ID_BCM5461,
        .phy_id_mask    = 0xfffffff0,
index 9ab5eff..fa8c6fd 100644 (file)
@@ -692,8 +692,19 @@ static int dp83869_configure_mode(struct phy_device *phydev,
        /* Below init sequence for each operational mode is defined in
         * section 9.4.8 of the datasheet.
         */
+       phy_ctrl_val = dp83869->mode;
+       if (phydev->interface == PHY_INTERFACE_MODE_MII) {
+               if (dp83869->mode == DP83869_100M_MEDIA_CONVERT ||
+                   dp83869->mode == DP83869_RGMII_100_BASE) {
+                       phy_ctrl_val |= DP83869_OP_MODE_MII;
+               } else {
+                       phydev_err(phydev, "selected op-mode is not valid with MII mode\n");
+                       return -EINVAL;
+               }
+       }
+
        ret = phy_write_mmd(phydev, DP83869_DEVADDR, DP83869_OP_MODE,
-                           dp83869->mode);
+                           phy_ctrl_val);
        if (ret)
                return ret;
 
index 3f81bb8..6d18ea1 100644 (file)
@@ -637,7 +637,7 @@ static int ksz8051_ksz8795_match_phy_device(struct phy_device *phydev,
 {
        int ret;
 
-       if ((phydev->phy_id & MICREL_PHY_ID_MASK) != PHY_ID_KSZ8051)
+       if (!phy_id_compare(phydev->phy_id, PHY_ID_KSZ8051, MICREL_PHY_ID_MASK))
                return 0;
 
        ret = phy_read(phydev, MII_BMSR);
@@ -1566,7 +1566,7 @@ static int ksz9x31_cable_test_fault_length(struct phy_device *phydev, u16 stat)
         *
         * distance to fault = (VCT_DATA - 22) * 4 / cable propagation velocity
         */
-       if ((phydev->phy_id & MICREL_PHY_ID_MASK) == PHY_ID_KSZ9131)
+       if (phydev_id_compare(phydev, PHY_ID_KSZ9131))
                dt = clamp(dt - 22, 0, 255);
 
        return (dt * 400) / 10;
@@ -1774,6 +1774,79 @@ static int ksz886x_read_status(struct phy_device *phydev)
        return genphy_read_status(phydev);
 }
 
+struct ksz9477_errata_write {
+       u8 dev_addr;
+       u8 reg_addr;
+       u16 val;
+};
+
+static const struct ksz9477_errata_write ksz9477_errata_writes[] = {
+        /* Register settings are needed to improve PHY receive performance */
+       {0x01, 0x6f, 0xdd0b},
+       {0x01, 0x8f, 0x6032},
+       {0x01, 0x9d, 0x248c},
+       {0x01, 0x75, 0x0060},
+       {0x01, 0xd3, 0x7777},
+       {0x1c, 0x06, 0x3008},
+       {0x1c, 0x08, 0x2000},
+
+       /* Transmit waveform amplitude can be improved (1000BASE-T, 100BASE-TX, 10BASE-Te) */
+       {0x1c, 0x04, 0x00d0},
+
+       /* Energy Efficient Ethernet (EEE) feature select must be manually disabled */
+       {0x07, 0x3c, 0x0000},
+
+       /* Register settings are required to meet data sheet supply current specifications */
+       {0x1c, 0x13, 0x6eff},
+       {0x1c, 0x14, 0xe6ff},
+       {0x1c, 0x15, 0x6eff},
+       {0x1c, 0x16, 0xe6ff},
+       {0x1c, 0x17, 0x00ff},
+       {0x1c, 0x18, 0x43ff},
+       {0x1c, 0x19, 0xc3ff},
+       {0x1c, 0x1a, 0x6fff},
+       {0x1c, 0x1b, 0x07ff},
+       {0x1c, 0x1c, 0x0fff},
+       {0x1c, 0x1d, 0xe7ff},
+       {0x1c, 0x1e, 0xefff},
+       {0x1c, 0x20, 0xeeee},
+};
+
+static int ksz9477_config_init(struct phy_device *phydev)
+{
+       int err;
+       int i;
+
+       /* Apply PHY settings to address errata listed in
+        * KSZ9477, KSZ9897, KSZ9896, KSZ9567, KSZ8565
+        * Silicon Errata and Data Sheet Clarification documents.
+        *
+        * Document notes: Before configuring the PHY MMD registers, it is
+        * necessary to set the PHY to 100 Mbps speed with auto-negotiation
+        * disabled by writing to register 0xN100-0xN101. After writing the
+        * MMD registers, and after all errata workarounds that involve PHY
+        * register settings, write register 0xN100-0xN101 again to enable
+        * and restart auto-negotiation.
+        */
+       err = phy_write(phydev, MII_BMCR, BMCR_SPEED100 | BMCR_FULLDPLX);
+       if (err)
+               return err;
+
+       for (i = 0; i < ARRAY_SIZE(ksz9477_errata_writes); ++i) {
+               const struct ksz9477_errata_write *errata = &ksz9477_errata_writes[i];
+
+               err = phy_write_mmd(phydev, errata->dev_addr, errata->reg_addr, errata->val);
+               if (err)
+                       return err;
+       }
+
+       err = genphy_restart_aneg(phydev);
+       if (err)
+               return err;
+
+       return kszphy_config_init(phydev);
+}
+
 static int kszphy_get_sset_count(struct phy_device *phydev)
 {
        return ARRAY_SIZE(kszphy_hw_stats);
@@ -1998,7 +2071,7 @@ static __always_inline int ksz886x_cable_test_fault_length(struct phy_device *ph
         */
        dt = FIELD_GET(data_mask, status);
 
-       if ((phydev->phy_id & MICREL_PHY_ID_MASK) == PHY_ID_LAN8814)
+       if (phydev_id_compare(phydev, PHY_ID_LAN8814))
                return ((dt - 22) * 800) / 10;
        else
                return (dt * 400) / 10;
@@ -4735,7 +4808,7 @@ static struct phy_driver ksphy_driver[] = {
        .phy_id_mask    = MICREL_PHY_ID_MASK,
        .name           = "Microchip KSZ9477",
        /* PHY_GBIT_FEATURES */
-       .config_init    = kszphy_config_init,
+       .config_init    = ksz9477_config_init,
        .config_intr    = kszphy_config_intr,
        .handle_interrupt = kszphy_handle_interrupt,
        .suspend        = genphy_suspend,
index 094967b..534ca7d 100644 (file)
@@ -1,19 +1,29 @@
 // SPDX-License-Identifier: GPL-2.0+
 /*
- * Driver for Microchip 10BASE-T1S LAN867X PHY
+ * Driver for Microchip 10BASE-T1S PHYs
  *
  * Support: Microchip Phys:
- *  lan8670, lan8671, lan8672
+ *  lan8670/1/2 Rev.B1
+ *  lan8650/1 Rev.B0 Internal PHYs
  */
 
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/phy.h>
 
-#define PHY_ID_LAN867X 0x0007C160
+#define PHY_ID_LAN867X_REVB1 0x0007C162
+#define PHY_ID_LAN865X_REVB0 0x0007C1B3
 
-#define LAN867X_REG_IRQ_1_CTL 0x001C
-#define LAN867X_REG_IRQ_2_CTL 0x001D
+#define LAN867X_REG_STS2 0x0019
+
+#define LAN867x_RESET_COMPLETE_STS BIT(11)
+
+#define LAN865X_REG_CFGPARAM_ADDR 0x00D8
+#define LAN865X_REG_CFGPARAM_DATA 0x00D9
+#define LAN865X_REG_CFGPARAM_CTRL 0x00DA
+#define LAN865X_REG_STS2 0x0019
+
+#define LAN865X_CFGPARAM_READ_ENABLE BIT(1)
 
 /* The arrays below are pulled from the following table from AN1699
  * Access MMD Address Value Mask
  * W   0x1F 0x0099 0x7F80 ------
  */
 
-static const int lan867x_fixup_registers[12] = {
+static const u32 lan867x_revb1_fixup_registers[12] = {
        0x00D0, 0x00D1, 0x0084, 0x0085,
        0x008A, 0x0087, 0x0088, 0x008B,
        0x0080, 0x00F1, 0x0096, 0x0099,
 };
 
-static const int lan867x_fixup_values[12] = {
+static const u16 lan867x_revb1_fixup_values[12] = {
        0x0002, 0x0000, 0x3380, 0x0006,
        0xC000, 0x801C, 0x033F, 0x0404,
        0x0600, 0x2400, 0x2000, 0x7F80,
 };
 
-static const int lan867x_fixup_masks[12] = {
+static const u16 lan867x_revb1_fixup_masks[12] = {
        0x0E03, 0x0300, 0xFFC0, 0x000F,
        0xF800, 0x801C, 0x1FFF, 0xFFFF,
        0x0600, 0x7F00, 0x2000, 0xFFFF,
 };
 
-static int lan867x_config_init(struct phy_device *phydev)
+/* LAN865x Rev.B0 configuration parameters from AN1760 */
+static const u32 lan865x_revb0_fixup_registers[28] = {
+       0x0091, 0x0081, 0x0043, 0x0044,
+       0x0045, 0x0053, 0x0054, 0x0055,
+       0x0040, 0x0050, 0x00D0, 0x00E9,
+       0x00F5, 0x00F4, 0x00F8, 0x00F9,
+       0x00B0, 0x00B1, 0x00B2, 0x00B3,
+       0x00B4, 0x00B5, 0x00B6, 0x00B7,
+       0x00B8, 0x00B9, 0x00BA, 0x00BB,
+};
+
+static const u16 lan865x_revb0_fixup_values[28] = {
+       0x9660, 0x00C0, 0x00FF, 0xFFFF,
+       0x0000, 0x00FF, 0xFFFF, 0x0000,
+       0x0002, 0x0002, 0x5F21, 0x9E50,
+       0x1CF8, 0xC020, 0x9B00, 0x4E53,
+       0x0103, 0x0910, 0x1D26, 0x002A,
+       0x0103, 0x070D, 0x1720, 0x0027,
+       0x0509, 0x0E13, 0x1C25, 0x002B,
+};
+
+static const u16 lan865x_revb0_fixup_cfg_regs[5] = {
+       0x0084, 0x008A, 0x00AD, 0x00AE, 0x00AF
+};
+
+/* Pulled from AN1760 describing 'indirect read'
+ *
+ * write_register(0x4, 0x00D8, addr)
+ * write_register(0x4, 0x00DA, 0x2)
+ * return (int8)(read_register(0x4, 0x00D9))
+ *
+ * 0x4 refers to memory map selector 4, which maps to MDIO_MMD_VEND2
+ */
+static int lan865x_revb0_indirect_read(struct phy_device *phydev, u16 addr)
+{
+       int ret;
+
+       ret = phy_write_mmd(phydev, MDIO_MMD_VEND2, LAN865X_REG_CFGPARAM_ADDR,
+                           addr);
+       if (ret)
+               return ret;
+
+       ret = phy_write_mmd(phydev, MDIO_MMD_VEND2, LAN865X_REG_CFGPARAM_CTRL,
+                           LAN865X_CFGPARAM_READ_ENABLE);
+       if (ret)
+               return ret;
+
+       return phy_read_mmd(phydev, MDIO_MMD_VEND2, LAN865X_REG_CFGPARAM_DATA);
+}
+
+/* This is pulled straight from AN1760 from 'calculation of offset 1' &
+ * 'calculation of offset 2'
+ */
+static int lan865x_generate_cfg_offsets(struct phy_device *phydev, s8 offsets[2])
+{
+       const u16 fixup_regs[2] = {0x0004, 0x0008};
+       int ret;
+
+       for (int i = 0; i < ARRAY_SIZE(fixup_regs); i++) {
+               ret = lan865x_revb0_indirect_read(phydev, fixup_regs[i]);
+               if (ret < 0)
+                       return ret;
+               if (ret & BIT(4))
+                       offsets[i] = ret | 0xE0;
+               else
+                       offsets[i] = ret;
+       }
+
+       return 0;
+}
+
+static int lan865x_read_cfg_params(struct phy_device *phydev, u16 cfg_params[])
+{
+       int ret;
+
+       for (int i = 0; i < ARRAY_SIZE(lan865x_revb0_fixup_cfg_regs); i++) {
+               ret = phy_read_mmd(phydev, MDIO_MMD_VEND2,
+                                  lan865x_revb0_fixup_cfg_regs[i]);
+               if (ret < 0)
+                       return ret;
+               cfg_params[i] = (u16)ret;
+       }
+
+       return 0;
+}
+
+static int lan865x_write_cfg_params(struct phy_device *phydev, u16 cfg_params[])
 {
-       /* HW quirk: Microchip states in the application note (AN1699) for the phy
-        * that a set of read-modify-write (rmw) operations has to be performed
-        * on a set of seemingly magic registers.
-        * The result of these operations is just described as 'optimal performance'
-        * Microchip gives no explanation as to what these mmd regs do,
-        * in fact they are marked as reserved in the datasheet.
-        * It is unclear if phy_modify_mmd would be safe to use or if a write
-        * really has to happen to each register.
-        * In order to exactly conform to what is stated in the AN phy_write_mmd is
-        * used, which might then write the same value back as read + modified.
+       int ret;
+
+       for (int i = 0; i < ARRAY_SIZE(lan865x_revb0_fixup_cfg_regs); i++) {
+               ret = phy_write_mmd(phydev, MDIO_MMD_VEND2,
+                                   lan865x_revb0_fixup_cfg_regs[i],
+                                   cfg_params[i]);
+               if (ret)
+                       return ret;
+       }
+
+       return 0;
+}
+
+static int lan865x_setup_cfgparam(struct phy_device *phydev)
+{
+       u16 cfg_params[ARRAY_SIZE(lan865x_revb0_fixup_cfg_regs)];
+       u16 cfg_results[5];
+       s8 offsets[2];
+       int ret;
+
+       ret = lan865x_generate_cfg_offsets(phydev, offsets);
+       if (ret)
+               return ret;
+
+       ret = lan865x_read_cfg_params(phydev, cfg_params);
+       if (ret)
+               return ret;
+
+       cfg_results[0] = (cfg_params[0] & 0x000F) |
+                         FIELD_PREP(GENMASK(15, 10), 9 + offsets[0]) |
+                         FIELD_PREP(GENMASK(15, 4), 14 + offsets[0]);
+       cfg_results[1] = (cfg_params[1] & 0x03FF) |
+                         FIELD_PREP(GENMASK(15, 10), 40 + offsets[1]);
+       cfg_results[2] = (cfg_params[2] & 0xC0C0) |
+                         FIELD_PREP(GENMASK(15, 8), 5 + offsets[0]) |
+                         (9 + offsets[0]);
+       cfg_results[3] = (cfg_params[3] & 0xC0C0) |
+                         FIELD_PREP(GENMASK(15, 8), 9 + offsets[0]) |
+                         (14 + offsets[0]);
+       cfg_results[4] = (cfg_params[4] & 0xC0C0) |
+                         FIELD_PREP(GENMASK(15, 8), 17 + offsets[0]) |
+                         (22 + offsets[0]);
+
+       return lan865x_write_cfg_params(phydev, cfg_results);
+}
+
+static int lan865x_revb0_config_init(struct phy_device *phydev)
+{
+       int ret;
+
+       /* Reference to AN1760
+        * https://ww1.microchip.com/downloads/aemDocuments/documents/AIS/ProductDocuments/SupportingCollateral/AN-LAN8650-1-Configuration-60001760.pdf
+        */
+       for (int i = 0; i < ARRAY_SIZE(lan865x_revb0_fixup_registers); i++) {
+               ret = phy_write_mmd(phydev, MDIO_MMD_VEND2,
+                                   lan865x_revb0_fixup_registers[i],
+                                   lan865x_revb0_fixup_values[i]);
+               if (ret)
+                       return ret;
+       }
+       /* Function to calculate and write the configuration parameters in the
+        * 0x0084, 0x008A, 0x00AD, 0x00AE and 0x00AF registers (from AN1760)
         */
+       return lan865x_setup_cfgparam(phydev);
+}
 
-       int reg_value;
+static int lan867x_revb1_config_init(struct phy_device *phydev)
+{
        int err;
-       int reg;
 
-       /* Read-Modified Write Pseudocode (from AN1699)
-        * current_val = read_register(mmd, addr) // Read current register value
-        * new_val = current_val AND (NOT mask) // Clear bit fields to be written
-        * new_val = new_val OR value // Set bits
-        * write_register(mmd, addr, new_val) // Write back updated register value
+       /* The chip completes a reset in 3us, we might get here earlier than
+        * that, as an added margin we'll conditionally sleep 5us.
         */
-       for (int i = 0; i < ARRAY_SIZE(lan867x_fixup_registers); i++) {
-               reg = lan867x_fixup_registers[i];
-               reg_value = phy_read_mmd(phydev, MDIO_MMD_VEND2, reg);
-               reg_value &= ~lan867x_fixup_masks[i];
-               reg_value |= lan867x_fixup_values[i];
-               err = phy_write_mmd(phydev, MDIO_MMD_VEND2, reg, reg_value);
-               if (err != 0)
+       err = phy_read_mmd(phydev, MDIO_MMD_VEND2, LAN867X_REG_STS2);
+       if (err < 0)
+               return err;
+
+       if (!(err & LAN867x_RESET_COMPLETE_STS)) {
+               udelay(5);
+               err = phy_read_mmd(phydev, MDIO_MMD_VEND2, LAN867X_REG_STS2);
+               if (err < 0)
                        return err;
+               if (!(err & LAN867x_RESET_COMPLETE_STS)) {
+                       phydev_err(phydev, "PHY reset failed\n");
+                       return -ENODEV;
+               }
        }
 
-       /* None of the interrupts in the lan867x phy seem relevant.
-        * Other phys inspect the link status and call phy_trigger_machine
-        * in the interrupt handler.
-        * This phy does not support link status, and thus has no interrupt
-        * for it either.
-        * So we'll just disable all interrupts on the chip.
+       /* Reference to AN1699
+        * https://ww1.microchip.com/downloads/aemDocuments/documents/AIS/ProductDocuments/SupportingCollateral/AN-LAN8670-1-2-config-60001699.pdf
+        * AN1699 says Read, Modify, Write, but the Write is not required if the
+        * register already has the required value. So it is safe to use
+        * phy_modify_mmd here.
         */
-       err = phy_write_mmd(phydev, MDIO_MMD_VEND2, LAN867X_REG_IRQ_1_CTL, 0xFFFF);
-       if (err != 0)
-               return err;
-       return phy_write_mmd(phydev, MDIO_MMD_VEND2, LAN867X_REG_IRQ_2_CTL, 0xFFFF);
+       for (int i = 0; i < ARRAY_SIZE(lan867x_revb1_fixup_registers); i++) {
+               err = phy_modify_mmd(phydev, MDIO_MMD_VEND2,
+                                    lan867x_revb1_fixup_registers[i],
+                                    lan867x_revb1_fixup_masks[i],
+                                    lan867x_revb1_fixup_values[i]);
+               if (err)
+                       return err;
+       }
+
+       return 0;
 }
 
-static int lan867x_read_status(struct phy_device *phydev)
+static int lan86xx_read_status(struct phy_device *phydev)
 {
        /* The phy has some limitations, namely:
         *  - always reports link up
@@ -111,28 +268,39 @@ static int lan867x_read_status(struct phy_device *phydev)
        return 0;
 }
 
-static struct phy_driver lan867x_driver[] = {
+static struct phy_driver microchip_t1s_driver[] = {
        {
-               PHY_ID_MATCH_MODEL(PHY_ID_LAN867X),
-               .name               = "LAN867X",
+               PHY_ID_MATCH_EXACT(PHY_ID_LAN867X_REVB1),
+               .name               = "LAN867X Rev.B1",
                .features           = PHY_BASIC_T1S_P2MP_FEATURES,
-               .config_init        = lan867x_config_init,
-               .read_status        = lan867x_read_status,
+               .config_init        = lan867x_revb1_config_init,
+               .read_status        = lan86xx_read_status,
                .get_plca_cfg       = genphy_c45_plca_get_cfg,
                .set_plca_cfg       = genphy_c45_plca_set_cfg,
                .get_plca_status    = genphy_c45_plca_get_status,
-       }
+       },
+       {
+               PHY_ID_MATCH_EXACT(PHY_ID_LAN865X_REVB0),
+               .name               = "LAN865X Rev.B0 Internal Phy",
+               .features           = PHY_BASIC_T1S_P2MP_FEATURES,
+               .config_init        = lan865x_revb0_config_init,
+               .read_status        = lan86xx_read_status,
+               .get_plca_cfg       = genphy_c45_plca_get_cfg,
+               .set_plca_cfg       = genphy_c45_plca_set_cfg,
+               .get_plca_status    = genphy_c45_plca_get_status,
+       },
 };
 
-module_phy_driver(lan867x_driver);
+module_phy_driver(microchip_t1s_driver);
 
 static struct mdio_device_id __maybe_unused tbl[] = {
-       { PHY_ID_MATCH_MODEL(PHY_ID_LAN867X) },
+       { PHY_ID_MATCH_EXACT(PHY_ID_LAN867X_REVB1) },
+       { PHY_ID_MATCH_EXACT(PHY_ID_LAN865X_REVB0) },
        { }
 };
 
 MODULE_DEVICE_TABLE(mdio, tbl);
 
-MODULE_DESCRIPTION("Microchip 10BASE-T1S lan867x Phy driver");
+MODULE_DESCRIPTION("Microchip 10BASE-T1S PHYs driver");
 MODULE_AUTHOR("Ramón Nordin Rodriguez");
 MODULE_LICENSE("GPL");
index defe5cc..7a96205 100644 (file)
@@ -292,6 +292,7 @@ enum rgmii_clock_delay {
 #define PHY_ID_VSC8575                   0x000707d0
 #define PHY_ID_VSC8582                   0x000707b0
 #define PHY_ID_VSC8584                   0x000707c0
+#define PHY_VENDOR_MSCC                        0x00070400
 
 #define MSCC_VDDMAC_1500                 1500
 #define MSCC_VDDMAC_1800                 1800
index 28df8a2..669a4a7 100644 (file)
@@ -107,6 +107,9 @@ static const struct vsc8531_edge_rate_table edge_table[] = {
 };
 #endif
 
+static const int vsc85xx_internal_delay[] = {200, 800, 1100, 1700, 2000, 2300,
+                                            2600, 3400};
+
 static int vsc85xx_phy_read_page(struct phy_device *phydev)
 {
        return __phy_read(phydev, MSCC_EXT_PAGE_ACCESS);
@@ -525,8 +528,12 @@ static int vsc85xx_update_rgmii_cntl(struct phy_device *phydev, u32 rgmii_cntl,
 {
        u16 rgmii_rx_delay_pos = ffs(rgmii_rx_delay_mask) - 1;
        u16 rgmii_tx_delay_pos = ffs(rgmii_tx_delay_mask) - 1;
+       int delay_size = ARRAY_SIZE(vsc85xx_internal_delay);
+       struct device *dev = &phydev->mdio.dev;
        u16 reg_val = 0;
        u16 mask = 0;
+       s32 rx_delay;
+       s32 tx_delay;
        int rc = 0;
 
        /* For traffic to pass, the VSC8502 family needs the RX_CLK disable bit
@@ -541,12 +548,28 @@ static int vsc85xx_update_rgmii_cntl(struct phy_device *phydev, u32 rgmii_cntl,
        if (phy_interface_is_rgmii(phydev))
                mask |= rgmii_rx_delay_mask | rgmii_tx_delay_mask;
 
-       if (phydev->interface == PHY_INTERFACE_MODE_RGMII_RXID ||
-           phydev->interface == PHY_INTERFACE_MODE_RGMII_ID)
-               reg_val |= RGMII_CLK_DELAY_2_0_NS << rgmii_rx_delay_pos;
-       if (phydev->interface == PHY_INTERFACE_MODE_RGMII_TXID ||
-           phydev->interface == PHY_INTERFACE_MODE_RGMII_ID)
-               reg_val |= RGMII_CLK_DELAY_2_0_NS << rgmii_tx_delay_pos;
+       rx_delay = phy_get_internal_delay(phydev, dev, vsc85xx_internal_delay,
+                                         delay_size, true);
+       if (rx_delay < 0) {
+               if (phydev->interface == PHY_INTERFACE_MODE_RGMII_RXID ||
+                   phydev->interface == PHY_INTERFACE_MODE_RGMII_ID)
+                       rx_delay = RGMII_CLK_DELAY_2_0_NS;
+               else
+                       rx_delay = RGMII_CLK_DELAY_0_2_NS;
+       }
+
+       tx_delay = phy_get_internal_delay(phydev, dev, vsc85xx_internal_delay,
+                                         delay_size, false);
+       if (tx_delay < 0) {
+               if (phydev->interface == PHY_INTERFACE_MODE_RGMII_TXID ||
+                   phydev->interface == PHY_INTERFACE_MODE_RGMII_ID)
+                       rx_delay = RGMII_CLK_DELAY_2_0_NS;
+               else
+                       rx_delay = RGMII_CLK_DELAY_0_2_NS;
+       }
+
+       reg_val |= rx_delay << rgmii_rx_delay_pos;
+       reg_val |= tx_delay << rgmii_tx_delay_pos;
 
        if (mask)
                rc = phy_modify_paged(phydev, MSCC_PHY_PAGE_EXTENDED_2,
@@ -2678,21 +2701,7 @@ static struct phy_driver vsc85xx_driver[] = {
 module_phy_driver(vsc85xx_driver);
 
 static struct mdio_device_id __maybe_unused vsc85xx_tbl[] = {
-       { PHY_ID_VSC8501, 0xfffffff0, },
-       { PHY_ID_VSC8502, 0xfffffff0, },
-       { PHY_ID_VSC8504, 0xfffffff0, },
-       { PHY_ID_VSC8514, 0xfffffff0, },
-       { PHY_ID_VSC8530, 0xfffffff0, },
-       { PHY_ID_VSC8531, 0xfffffff0, },
-       { PHY_ID_VSC8540, 0xfffffff0, },
-       { PHY_ID_VSC8541, 0xfffffff0, },
-       { PHY_ID_VSC8552, 0xfffffff0, },
-       { PHY_ID_VSC856X, 0xfffffff0, },
-       { PHY_ID_VSC8572, 0xfffffff0, },
-       { PHY_ID_VSC8574, 0xfffffff0, },
-       { PHY_ID_VSC8575, 0xfffffff0, },
-       { PHY_ID_VSC8582, 0xfffffff0, },
-       { PHY_ID_VSC8584, 0xfffffff0, },
+       { PHY_ID_MATCH_VENDOR(PHY_VENDOR_MSCC) },
        { }
 };
 
index 0c0df38..bdf00b2 100644 (file)
@@ -52,6 +52,7 @@ static const char *phy_state_to_str(enum phy_state st)
        PHY_STATE_STR(NOLINK)
        PHY_STATE_STR(CABLETEST)
        PHY_STATE_STR(HALTED)
+       PHY_STATE_STR(ERROR)
        }
 
        return NULL;
@@ -1184,7 +1185,7 @@ void phy_stop_machine(struct phy_device *phydev)
 static void phy_process_error(struct phy_device *phydev)
 {
        mutex_lock(&phydev->lock);
-       phydev->state = PHY_HALTED;
+       phydev->state = PHY_ERROR;
        mutex_unlock(&phydev->lock);
 
        phy_trigger_machine(phydev);
@@ -1198,10 +1199,10 @@ static void phy_error_precise(struct phy_device *phydev,
 }
 
 /**
- * phy_error - enter HALTED state for this PHY device
+ * phy_error - enter ERROR state for this PHY device
  * @phydev: target phy_device struct
  *
- * Moves the PHY to the HALTED state in response to a read
+ * Moves the PHY to the ERROR state in response to a read
  * or write error, and tells the controller the link is down.
  * Must not be called from interrupt context, or while the
  * phydev->lock is held.
@@ -1326,7 +1327,8 @@ void phy_stop(struct phy_device *phydev)
        struct net_device *dev = phydev->attached_dev;
        enum phy_state old_state;
 
-       if (!phy_is_started(phydev) && phydev->state != PHY_DOWN) {
+       if (!phy_is_started(phydev) && phydev->state != PHY_DOWN &&
+           phydev->state != PHY_ERROR) {
                WARN(1, "called from state %s\n",
                     phy_state_to_str(phydev->state));
                return;
@@ -1443,6 +1445,7 @@ void phy_state_machine(struct work_struct *work)
                }
                break;
        case PHY_HALTED:
+       case PHY_ERROR:
                if (phydev->link) {
                        phydev->link = 0;
                        phy_link_down(phydev);
index 17d0d05..2cad9cc 100644 (file)
@@ -454,8 +454,7 @@ int phy_unregister_fixup(const char *bus_id, u32 phy_uid, u32 phy_uid_mask)
                fixup = list_entry(pos, struct phy_fixup, list);
 
                if ((!strcmp(fixup->bus_id, bus_id)) &&
-                   ((fixup->phy_uid & phy_uid_mask) ==
-                    (phy_uid & phy_uid_mask))) {
+                   phy_id_compare(fixup->phy_uid, phy_uid, phy_uid_mask)) {
                        list_del(&fixup->list);
                        kfree(fixup);
                        ret = 0;
@@ -491,8 +490,8 @@ static int phy_needs_fixup(struct phy_device *phydev, struct phy_fixup *fixup)
                if (strcmp(fixup->bus_id, PHY_ANY_ID) != 0)
                        return 0;
 
-       if ((fixup->phy_uid & fixup->phy_uid_mask) !=
-           (phydev->phy_id & fixup->phy_uid_mask))
+       if (!phy_id_compare(phydev->phy_id, fixup->phy_uid,
+                           fixup->phy_uid_mask))
                if (fixup->phy_uid != PHY_ANY_UID)
                        return 0;
 
@@ -539,15 +538,14 @@ static int phy_bus_match(struct device *dev, struct device_driver *drv)
                        if (phydev->c45_ids.device_ids[i] == 0xffffffff)
                                continue;
 
-                       if ((phydrv->phy_id & phydrv->phy_id_mask) ==
-                           (phydev->c45_ids.device_ids[i] &
-                            phydrv->phy_id_mask))
+                       if (phy_id_compare(phydev->c45_ids.device_ids[i],
+                                          phydrv->phy_id, phydrv->phy_id_mask))
                                return 1;
                }
                return 0;
        } else {
-               return (phydrv->phy_id & phydrv->phy_id_mask) ==
-                       (phydev->phy_id & phydrv->phy_id_mask);
+               return phy_id_compare(phydev->phy_id, phydrv->phy_id,
+                                     phydrv->phy_id_mask);
        }
 }
 
@@ -1860,9 +1858,10 @@ int phy_suspend(struct phy_device *phydev)
        if (phydev->suspended)
                return 0;
 
-       /* If the device has WOL enabled, we cannot suspend the PHY */
        phy_ethtool_get_wol(phydev, &wol);
-       if (wol.wolopts || (netdev && netdev->wol_enabled))
+       phydev->wol_enabled = wol.wolopts || (netdev && netdev->wol_enabled);
+       /* If the device has WOL enabled, we cannot suspend the PHY */
+       if (phydev->wol_enabled && !(phydrv->flags & PHY_ALWAYS_CALL_SUSPEND))
                return -EBUSY;
 
        if (!phydrv || !phydrv->suspend)
index b483111..1ae7868 100644 (file)
@@ -156,6 +156,23 @@ static const char *phylink_an_mode_str(unsigned int mode)
        return mode < ARRAY_SIZE(modestr) ? modestr[mode] : "unknown";
 }
 
+static unsigned int phylink_interface_signal_rate(phy_interface_t interface)
+{
+       switch (interface) {
+       case PHY_INTERFACE_MODE_SGMII:
+       case PHY_INTERFACE_MODE_1000BASEX: /* 1.25Mbd */
+               return 1250;
+       case PHY_INTERFACE_MODE_2500BASEX: /* 3.125Mbd */
+               return 3125;
+       case PHY_INTERFACE_MODE_5GBASER: /* 5.15625Mbd */
+               return 5156;
+       case PHY_INTERFACE_MODE_10GBASER: /* 10.3125Mbd */
+               return 10313;
+       default:
+               return 0;
+       }
+}
+
 /**
  * phylink_interface_max_speed() - get the maximum speed of a phy interface
  * @interface: phy interface mode defined by &typedef phy_interface_t
@@ -695,20 +712,17 @@ static int phylink_validate(struct phylink *pl, unsigned long *supported,
 {
        const unsigned long *interfaces = pl->config->supported_interfaces;
 
-       if (!phy_interface_empty(interfaces)) {
-               if (state->interface == PHY_INTERFACE_MODE_NA)
-                       return phylink_validate_mask(pl, supported, state,
-                                                    interfaces);
+       if (state->interface == PHY_INTERFACE_MODE_NA)
+               return phylink_validate_mask(pl, supported, state, interfaces);
 
-               if (!test_bit(state->interface, interfaces))
-                       return -EINVAL;
-       }
+       if (!test_bit(state->interface, interfaces))
+               return -EINVAL;
 
        return phylink_validate_mac_and_pcs(pl, supported, state);
 }
 
 static int phylink_parse_fixedlink(struct phylink *pl,
-                                  struct fwnode_handle *fwnode)
+                                  const struct fwnode_handle *fwnode)
 {
        struct fwnode_handle *fixed_node;
        bool pause, asym_pause, autoneg;
@@ -819,7 +833,8 @@ static int phylink_parse_fixedlink(struct phylink *pl,
        return 0;
 }
 
-static int phylink_parse_mode(struct phylink *pl, struct fwnode_handle *fwnode)
+static int phylink_parse_mode(struct phylink *pl,
+                             const struct fwnode_handle *fwnode)
 {
        struct fwnode_handle *dn;
        const char *managed;
@@ -962,11 +977,10 @@ static void phylink_apply_manual_flow(struct phylink *pl,
                state->pause = pl->link_config.pause;
 }
 
-static void phylink_resolve_flow(struct phylink_link_state *state)
+static void phylink_resolve_an_pause(struct phylink_link_state *state)
 {
        bool tx_pause, rx_pause;
 
-       state->pause = MLO_PAUSE_NONE;
        if (state->duplex == DUPLEX_FULL) {
                linkmode_resolve_pause(state->advertising,
                                       state->lp_advertising,
@@ -978,6 +992,25 @@ static void phylink_resolve_flow(struct phylink_link_state *state)
        }
 }
 
+static int phylink_pcs_config(struct phylink_pcs *pcs, unsigned int mode,
+                             const struct phylink_link_state *state,
+                             bool permit_pause_to_mac)
+{
+       if (!pcs)
+               return 0;
+
+       return pcs->ops->pcs_config(pcs, mode, state->interface,
+                                   state->advertising, permit_pause_to_mac);
+}
+
+static void phylink_pcs_link_up(struct phylink_pcs *pcs, unsigned int mode,
+                               phy_interface_t interface, int speed,
+                               int duplex)
+{
+       if (pcs && pcs->ops->pcs_link_up)
+               pcs->ops->pcs_link_up(pcs, mode, interface, speed, duplex);
+}
+
 static void phylink_pcs_poll_stop(struct phylink *pl)
 {
        if (pl->cfg_link_an_mode == MLO_AN_INBAND)
@@ -1024,6 +1057,7 @@ static void phylink_major_config(struct phylink *pl, bool restart,
 {
        struct phylink_pcs *pcs = NULL;
        bool pcs_changed = false;
+       unsigned int rate_kbd;
        int err;
 
        phylink_dbg(pl, "major config %s\n", phy_modes(state->interface));
@@ -1060,18 +1094,15 @@ static void phylink_major_config(struct phylink *pl, bool restart,
 
        phylink_mac_config(pl, state);
 
-       if (pl->pcs) {
-               err = pl->pcs->ops->pcs_config(pl->pcs, pl->cur_link_an_mode,
-                                              state->interface,
-                                              state->advertising,
-                                              !!(pl->link_config.pause &
-                                                 MLO_PAUSE_AN));
-               if (err < 0)
-                       phylink_err(pl, "pcs_config failed: %pe\n",
-                                   ERR_PTR(err));
-               if (err > 0)
-                       restart = true;
-       }
+       err = phylink_pcs_config(pl->pcs, pl->cur_link_an_mode, state,
+                                !!(pl->link_config.pause &
+                                   MLO_PAUSE_AN));
+       if (err < 0)
+               phylink_err(pl, "pcs_config failed: %pe\n",
+                           ERR_PTR(err));
+       else if (err > 0)
+               restart = true;
+
        if (restart)
                phylink_mac_pcs_an_restart(pl);
 
@@ -1083,6 +1114,12 @@ static void phylink_major_config(struct phylink *pl, bool restart,
                                    ERR_PTR(err));
        }
 
+       if (pl->sfp_bus) {
+               rate_kbd = phylink_interface_signal_rate(state->interface);
+               if (rate_kbd)
+                       sfp_upstream_set_signal_rate(pl->sfp_bus, rate_kbd);
+       }
+
        phylink_pcs_poll_start(pl);
 }
 
@@ -1116,11 +1153,9 @@ static int phylink_change_inband_advert(struct phylink *pl)
         * restart negotiation if the pcs_config() helper indicates that
         * the programmed advertisement has changed.
         */
-       ret = pl->pcs->ops->pcs_config(pl->pcs, pl->cur_link_an_mode,
-                                      pl->link_config.interface,
-                                      pl->link_config.advertising,
-                                      !!(pl->link_config.pause &
-                                         MLO_PAUSE_AN));
+       ret = phylink_pcs_config(pl->pcs, pl->cur_link_an_mode,
+                                &pl->link_config,
+                                !!(pl->link_config.pause & MLO_PAUSE_AN));
        if (ret < 0)
                return ret;
 
@@ -1171,7 +1206,8 @@ static void phylink_get_fixed_state(struct phylink *pl,
        else if (pl->link_gpio)
                state->link = !!gpiod_get_value_cansleep(pl->link_gpio);
 
-       phylink_resolve_flow(state);
+       state->pause = MLO_PAUSE_NONE;
+       phylink_resolve_an_pause(state);
 }
 
 static void phylink_mac_initial_config(struct phylink *pl, bool force_restart)
@@ -1251,9 +1287,8 @@ static void phylink_link_up(struct phylink *pl,
 
        pl->cur_interface = link_state.interface;
 
-       if (pl->pcs && pl->pcs->ops->pcs_link_up)
-               pl->pcs->ops->pcs_link_up(pl->pcs, pl->cur_link_an_mode,
-                                         pl->cur_interface, speed, duplex);
+       phylink_pcs_link_up(pl->pcs, pl->cur_link_an_mode, pl->cur_interface,
+                           speed, duplex);
 
        pl->mac_ops->mac_link_up(pl->config, pl->phydev, pl->cur_link_an_mode,
                                 pl->cur_interface, speed, duplex,
@@ -1441,7 +1476,7 @@ static void phylink_fixed_poll(struct timer_list *t)
 static const struct sfp_upstream_ops sfp_phylink_ops;
 
 static int phylink_register_sfp(struct phylink *pl,
-                               struct fwnode_handle *fwnode)
+                               const struct fwnode_handle *fwnode)
 {
        struct sfp_bus *bus;
        int ret;
@@ -1480,7 +1515,7 @@ static int phylink_register_sfp(struct phylink *pl,
  * must use IS_ERR() to check for errors from this function.
  */
 struct phylink *phylink_create(struct phylink_config *config,
-                              struct fwnode_handle *fwnode,
+                              const struct fwnode_handle *fwnode,
                               phy_interface_t iface,
                               const struct phylink_mac_ops *mac_ops)
 {
@@ -1488,19 +1523,18 @@ struct phylink *phylink_create(struct phylink_config *config,
        struct phylink *pl;
        int ret;
 
-       if (mac_ops->mac_select_pcs &&
-           mac_ops->mac_select_pcs(config, PHY_INTERFACE_MODE_NA) !=
-             ERR_PTR(-EOPNOTSUPP))
-               using_mac_select_pcs = true;
-
        /* Validate the supplied configuration */
-       if (using_mac_select_pcs &&
-           phy_interface_empty(config->supported_interfaces)) {
+       if (phy_interface_empty(config->supported_interfaces)) {
                dev_err(config->dev,
-                       "phylink: error: empty supported_interfaces but mac_select_pcs() method present\n");
+                       "phylink: error: empty supported_interfaces\n");
                return ERR_PTR(-EINVAL);
        }
 
+       if (mac_ops->mac_select_pcs &&
+           mac_ops->mac_select_pcs(config, PHY_INTERFACE_MODE_NA) !=
+             ERR_PTR(-EOPNOTSUPP))
+               using_mac_select_pcs = true;
+
        pl = kzalloc(sizeof(*pl), GFP_KERNEL);
        if (!pl)
                return ERR_PTR(-ENOMEM);
@@ -1809,7 +1843,7 @@ EXPORT_SYMBOL_GPL(phylink_of_phy_connect);
  * Returns 0 on success or a negative errno.
  */
 int phylink_fwnode_phy_connect(struct phylink *pl,
-                              struct fwnode_handle *fwnode,
+                              const struct fwnode_handle *fwnode,
                               u32 flags)
 {
        struct fwnode_handle *phy_fwnode;
@@ -3131,8 +3165,8 @@ static void phylink_sfp_link_up(void *upstream)
  */
 static bool phylink_phy_no_inband(struct phy_device *phy)
 {
-       return phy->is_c45 &&
-               (phy->c45_ids.device_ids[1] & 0xfffffff0) == 0xae025150;
+       return phy->is_c45 && phy_id_compare(phy->c45_ids.device_ids[1],
+                                            0xae025150, 0xfffffff0);
 }
 
 static int phylink_sfp_connect_phy(void *upstream, struct phy_device *phy)
@@ -3196,10 +3230,48 @@ static const struct sfp_upstream_ops sfp_phylink_ops = {
 
 /* Helpers for MAC drivers */
 
+static struct {
+       int bit;
+       int speed;
+} phylink_c73_priority_resolution[] = {
+       { ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT, SPEED_100000 },
+       { ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT, SPEED_100000 },
+       /* 100GBASE-KP4 and 100GBASE-CR10 not supported */
+       { ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT, SPEED_40000 },
+       { ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT, SPEED_40000 },
+       { ETHTOOL_LINK_MODE_10000baseKR_Full_BIT, SPEED_10000 },
+       { ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT, SPEED_10000 },
+       /* 5GBASE-KR not supported */
+       { ETHTOOL_LINK_MODE_2500baseX_Full_BIT, SPEED_2500 },
+       { ETHTOOL_LINK_MODE_1000baseKX_Full_BIT, SPEED_1000 },
+};
+
+void phylink_resolve_c73(struct phylink_link_state *state)
+{
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(phylink_c73_priority_resolution); i++) {
+               int bit = phylink_c73_priority_resolution[i].bit;
+               if (linkmode_test_bit(bit, state->advertising) &&
+                   linkmode_test_bit(bit, state->lp_advertising))
+                       break;
+       }
+
+       if (i < ARRAY_SIZE(phylink_c73_priority_resolution)) {
+               state->speed = phylink_c73_priority_resolution[i].speed;
+               state->duplex = DUPLEX_FULL;
+       } else {
+               /* negotiation failure */
+               state->link = false;
+       }
+
+       phylink_resolve_an_pause(state);
+}
+EXPORT_SYMBOL_GPL(phylink_resolve_c73);
+
 static void phylink_decode_c37_word(struct phylink_link_state *state,
                                    uint16_t config_reg, int speed)
 {
-       bool tx_pause, rx_pause;
        int fd_bit;
 
        if (speed == SPEED_2500)
@@ -3218,13 +3290,7 @@ static void phylink_decode_c37_word(struct phylink_link_state *state,
                state->link = false;
        }
 
-       linkmode_resolve_pause(state->advertising, state->lp_advertising,
-                              &tx_pause, &rx_pause);
-
-       if (tx_pause)
-               state->pause |= MLO_PAUSE_TX;
-       if (rx_pause)
-               state->pause |= MLO_PAUSE_RX;
+       phylink_resolve_an_pause(state);
 }
 
 static void phylink_decode_sgmii_word(struct phylink_link_state *state,
index 3d99fd6..894172a 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/phy.h>
 #include <linux/module.h>
 #include <linux/delay.h>
+#include <linux/clk.h>
 
 #define RTL821x_PHYSR                          0x11
 #define RTL821x_PHYSR_DUPLEX                   BIT(13)
@@ -80,6 +81,7 @@ struct rtl821x_priv {
        u16 phycr1;
        u16 phycr2;
        bool has_phycr2;
+       struct clk *clk;
 };
 
 static int rtl821x_read_page(struct phy_device *phydev)
@@ -103,6 +105,11 @@ static int rtl821x_probe(struct phy_device *phydev)
        if (!priv)
                return -ENOMEM;
 
+       priv->clk = devm_clk_get_optional_enabled(dev, NULL);
+       if (IS_ERR(priv->clk))
+               return dev_err_probe(dev, PTR_ERR(priv->clk),
+                                    "failed to get phy clock\n");
+
        ret = phy_read_paged(phydev, 0xa43, RTL8211F_PHYCR1);
        if (ret < 0)
                return ret;
@@ -419,10 +426,31 @@ static int rtl8211f_config_init(struct phy_device *phydev)
        return genphy_soft_reset(phydev);
 }
 
+static int rtl821x_suspend(struct phy_device *phydev)
+{
+       struct rtl821x_priv *priv = phydev->priv;
+       int ret = 0;
+
+       if (!phydev->wol_enabled) {
+               ret = genphy_suspend(phydev);
+
+               if (ret)
+                       return ret;
+
+               clk_disable_unprepare(priv->clk);
+       }
+
+       return ret;
+}
+
 static int rtl821x_resume(struct phy_device *phydev)
 {
+       struct rtl821x_priv *priv = phydev->priv;
        int ret;
 
+       if (!phydev->wol_enabled)
+               clk_prepare_enable(priv->clk);
+
        ret = genphy_resume(phydev);
        if (ret < 0)
                return ret;
@@ -927,10 +955,11 @@ static struct phy_driver realtek_drvs[] = {
                .read_status    = rtlgen_read_status,
                .config_intr    = &rtl8211f_config_intr,
                .handle_interrupt = rtl8211f_handle_interrupt,
-               .suspend        = genphy_suspend,
+               .suspend        = rtl821x_suspend,
                .resume         = rtl821x_resume,
                .read_page      = rtl821x_read_page,
                .write_page     = rtl821x_write_page,
+               .flags          = PHY_ALWAYS_CALL_SUSPEND,
        }, {
                PHY_ID_MATCH_EXACT(RTL_8211FVD_PHYID),
                .name           = "RTL8211F-VD Gigabit Ethernet",
@@ -939,10 +968,11 @@ static struct phy_driver realtek_drvs[] = {
                .read_status    = rtlgen_read_status,
                .config_intr    = &rtl8211f_config_intr,
                .handle_interrupt = rtl8211f_handle_interrupt,
-               .suspend        = genphy_suspend,
+               .suspend        = rtl821x_suspend,
                .resume         = rtl821x_resume,
                .read_page      = rtl821x_read_page,
                .write_page     = rtl821x_write_page,
+               .flags          = PHY_ALWAYS_CALL_SUSPEND,
        }, {
                .name           = "Generic FE-GE Realtek PHY",
                .match_phy_device = rtlgen_match_phy_device,
index 9372e5a..e8dd47b 100644 (file)
@@ -576,6 +576,26 @@ static void sfp_upstream_clear(struct sfp_bus *bus)
 }
 
 /**
+ * sfp_upstream_set_signal_rate() - set data signalling rate
+ * @bus: a pointer to the &struct sfp_bus structure for the sfp module
+ * @rate_kbd: signalling rate in units of 1000 baud
+ *
+ * Configure the rate select settings on the SFP module for the signalling
+ * rate (not the same as the data rate).
+ *
+ * Locks that may be held:
+ *  Phylink's state_mutex
+ *  rtnl lock
+ *  SFP's sm_mutex
+ */
+void sfp_upstream_set_signal_rate(struct sfp_bus *bus, unsigned int rate_kbd)
+{
+       if (bus->registered)
+               bus->socket_ops->set_signal_rate(bus->sfp, rate_kbd);
+}
+EXPORT_SYMBOL_GPL(sfp_upstream_set_signal_rate);
+
+/**
  * sfp_bus_find_fwnode() - parse and locate the SFP bus from fwnode
  * @fwnode: firmware node for the parent device (MAC or PHY)
  *
index 89636dc..d855a18 100644 (file)
@@ -24,14 +24,18 @@ enum {
        GPIO_LOS,
        GPIO_TX_FAULT,
        GPIO_TX_DISABLE,
-       GPIO_RATE_SELECT,
+       GPIO_RS0,
+       GPIO_RS1,
        GPIO_MAX,
 
        SFP_F_PRESENT = BIT(GPIO_MODDEF0),
        SFP_F_LOS = BIT(GPIO_LOS),
        SFP_F_TX_FAULT = BIT(GPIO_TX_FAULT),
        SFP_F_TX_DISABLE = BIT(GPIO_TX_DISABLE),
-       SFP_F_RATE_SELECT = BIT(GPIO_RATE_SELECT),
+       SFP_F_RS0 = BIT(GPIO_RS0),
+       SFP_F_RS1 = BIT(GPIO_RS1),
+
+       SFP_F_OUTPUTS = SFP_F_TX_DISABLE | SFP_F_RS0 | SFP_F_RS1,
 
        SFP_E_INSERT = 0,
        SFP_E_REMOVE,
@@ -148,6 +152,7 @@ static const char *gpio_names[] = {
        "tx-fault",
        "tx-disable",
        "rate-select0",
+       "rate-select1",
 };
 
 static const enum gpiod_flags gpio_flags[] = {
@@ -156,6 +161,7 @@ static const enum gpiod_flags gpio_flags[] = {
        GPIOD_IN,
        GPIOD_ASIS,
        GPIOD_ASIS,
+       GPIOD_ASIS,
 };
 
 /* t_start_up (SFF-8431) or t_init (SFF-8472) is the time required for a
@@ -164,7 +170,6 @@ static const enum gpiod_flags gpio_flags[] = {
  * on board (for a copper SFP) time to initialise.
  */
 #define T_WAIT                 msecs_to_jiffies(50)
-#define T_WAIT_ROLLBALL                msecs_to_jiffies(25000)
 #define T_START_UP             msecs_to_jiffies(300)
 #define T_START_UP_BAD_GPON    msecs_to_jiffies(60000)
 
@@ -242,10 +247,18 @@ struct sfp {
 
        bool need_poll;
 
+       /* Access rules:
+        * state_hw_drive: st_mutex held
+        * state_hw_mask: st_mutex held
+        * state_soft_mask: st_mutex held
+        * state: st_mutex held unless reading input bits
+        */
        struct mutex st_mutex;                  /* Protects state */
+       unsigned int state_hw_drive;
        unsigned int state_hw_mask;
        unsigned int state_soft_mask;
        unsigned int state;
+
        struct delayed_work poll;
        struct delayed_work timeout;
        struct mutex sm_mutex;                  /* Protects state machine */
@@ -262,6 +275,10 @@ struct sfp {
        unsigned int module_t_start_up;
        unsigned int module_t_wait;
 
+       unsigned int rate_kbd;
+       unsigned int rs_threshold_kbd;
+       unsigned int rs_state_mask;
+
        bool have_a2;
        bool tx_fault_ignore;
 
@@ -312,7 +329,7 @@ static bool sfp_module_supported(const struct sfp_eeprom_id *id)
 
 static const struct sff_data sfp_data = {
        .gpios = SFP_F_PRESENT | SFP_F_LOS | SFP_F_TX_FAULT |
-                SFP_F_TX_DISABLE | SFP_F_RATE_SELECT,
+                SFP_F_TX_DISABLE | SFP_F_RS0 | SFP_F_RS1,
        .module_supported = sfp_module_supported,
 };
 
@@ -333,6 +350,27 @@ static void sfp_fixup_ignore_tx_fault(struct sfp *sfp)
        sfp->tx_fault_ignore = true;
 }
 
+// For 10GBASE-T short-reach modules
+static void sfp_fixup_10gbaset_30m(struct sfp *sfp)
+{
+       sfp->id.base.connector = SFF8024_CONNECTOR_RJ45;
+       sfp->id.base.extended_cc = SFF8024_ECC_10GBASE_T_SR;
+}
+
+static void sfp_fixup_rollball_proto(struct sfp *sfp, unsigned int secs)
+{
+       sfp->mdio_protocol = MDIO_I2C_ROLLBALL;
+       sfp->module_t_wait = msecs_to_jiffies(secs * 1000);
+}
+
+static void sfp_fixup_fs_10gt(struct sfp *sfp)
+{
+       sfp_fixup_10gbaset_30m(sfp);
+
+       // These SFPs need 4 seconds before the PHY can be accessed
+       sfp_fixup_rollball_proto(sfp, 4);
+}
+
 static void sfp_fixup_halny_gsfp(struct sfp *sfp)
 {
        /* Ignore the TX_FAULT and LOS signals on this module.
@@ -344,8 +382,8 @@ static void sfp_fixup_halny_gsfp(struct sfp *sfp)
 
 static void sfp_fixup_rollball(struct sfp *sfp)
 {
-       sfp->mdio_protocol = MDIO_I2C_ROLLBALL;
-       sfp->module_t_wait = T_WAIT_ROLLBALL;
+       // Rollball SFPs need 25 seconds before the PHY can be accessed
+       sfp_fixup_rollball_proto(sfp, 25);
 }
 
 static void sfp_fixup_rollball_cc(struct sfp *sfp)
@@ -410,6 +448,10 @@ static const struct sfp_quirk sfp_quirks[] = {
        SFP_QUIRK("ALCATELLUCENT", "3FE46541AA", sfp_quirk_2500basex,
                  sfp_fixup_long_startup),
 
+       // Fiberstore SFP-10G-T doesn't identify as copper, and uses the
+       // Rollball protocol to talk to the PHY.
+       SFP_QUIRK_F("FS", "SFP-10G-T", sfp_fixup_fs_10gt),
+
        SFP_QUIRK_F("HALNy", "HL-GSFP", sfp_fixup_halny_gsfp),
 
        // HG MXPD-483II-F 2.5G supports 2500Base-X, but incorrectly reports
@@ -427,6 +469,11 @@ static const struct sfp_quirk sfp_quirks[] = {
 
        SFP_QUIRK_M("UBNT", "UF-INSTANT", sfp_quirk_ubnt_uf_instant),
 
+       // Walsun HXSX-ATR[CI]-1 don't identify as copper, and use the
+       // Rollball protocol to talk to the PHY.
+       SFP_QUIRK_F("Walsun", "HXSX-ATRC-1", sfp_fixup_fs_10gt),
+       SFP_QUIRK_F("Walsun", "HXSX-ATRI-1", sfp_fixup_fs_10gt),
+
        SFP_QUIRK_F("OEM", "SFP-10G-T", sfp_fixup_rollball_cc),
        SFP_QUIRK_M("OEM", "SFP-2.5G-T", sfp_quirk_oem_2_5g),
        SFP_QUIRK_F("OEM", "RTSFP-10", sfp_fixup_rollball_cc),
@@ -500,20 +547,37 @@ static unsigned int sff_gpio_get_state(struct sfp *sfp)
 
 static void sfp_gpio_set_state(struct sfp *sfp, unsigned int state)
 {
-       if (state & SFP_F_PRESENT) {
-               /* If the module is present, drive the signals */
-               if (sfp->gpio[GPIO_TX_DISABLE])
+       unsigned int drive;
+
+       if (state & SFP_F_PRESENT)
+               /* If the module is present, drive the requested signals */
+               drive = sfp->state_hw_drive;
+       else
+               /* Otherwise, let them float to the pull-ups */
+               drive = 0;
+
+       if (sfp->gpio[GPIO_TX_DISABLE]) {
+               if (drive & SFP_F_TX_DISABLE)
                        gpiod_direction_output(sfp->gpio[GPIO_TX_DISABLE],
                                               state & SFP_F_TX_DISABLE);
-               if (state & SFP_F_RATE_SELECT)
-                       gpiod_direction_output(sfp->gpio[GPIO_RATE_SELECT],
-                                              state & SFP_F_RATE_SELECT);
-       } else {
-               /* Otherwise, let them float to the pull-ups */
-               if (sfp->gpio[GPIO_TX_DISABLE])
+               else
                        gpiod_direction_input(sfp->gpio[GPIO_TX_DISABLE]);
-               if (state & SFP_F_RATE_SELECT)
-                       gpiod_direction_input(sfp->gpio[GPIO_RATE_SELECT]);
+       }
+
+       if (sfp->gpio[GPIO_RS0]) {
+               if (drive & SFP_F_RS0)
+                       gpiod_direction_output(sfp->gpio[GPIO_RS0],
+                                              state & SFP_F_RS0);
+               else
+                       gpiod_direction_input(sfp->gpio[GPIO_RS0]);
+       }
+
+       if (sfp->gpio[GPIO_RS1]) {
+               if (drive & SFP_F_RS1)
+                       gpiod_direction_output(sfp->gpio[GPIO_RS1],
+                                              state & SFP_F_RS1);
+               else
+                       gpiod_direction_input(sfp->gpio[GPIO_RS1]);
        }
 }
 
@@ -675,16 +739,33 @@ static unsigned int sfp_soft_get_state(struct sfp *sfp)
        return state & sfp->state_soft_mask;
 }
 
-static void sfp_soft_set_state(struct sfp *sfp, unsigned int state)
+static void sfp_soft_set_state(struct sfp *sfp, unsigned int state,
+                              unsigned int soft)
 {
-       u8 mask = SFP_STATUS_TX_DISABLE_FORCE;
+       u8 mask = 0;
        u8 val = 0;
 
+       if (soft & SFP_F_TX_DISABLE)
+               mask |= SFP_STATUS_TX_DISABLE_FORCE;
        if (state & SFP_F_TX_DISABLE)
                val |= SFP_STATUS_TX_DISABLE_FORCE;
 
+       if (soft & SFP_F_RS0)
+               mask |= SFP_STATUS_RS0_SELECT;
+       if (state & SFP_F_RS0)
+               val |= SFP_STATUS_RS0_SELECT;
+
+       if (mask)
+               sfp_modify_u8(sfp, true, SFP_STATUS, mask, val);
 
-       sfp_modify_u8(sfp, true, SFP_STATUS, mask, val);
+       val = mask = 0;
+       if (soft & SFP_F_RS1)
+               mask |= SFP_EXT_STATUS_RS1_SELECT;
+       if (state & SFP_F_RS1)
+               val |= SFP_EXT_STATUS_RS1_SELECT;
+
+       if (mask)
+               sfp_modify_u8(sfp, true, SFP_EXT_STATUS, mask, val);
 }
 
 static void sfp_soft_start_poll(struct sfp *sfp)
@@ -692,27 +773,35 @@ static void sfp_soft_start_poll(struct sfp *sfp)
        const struct sfp_eeprom_id *id = &sfp->id;
        unsigned int mask = 0;
 
-       sfp->state_soft_mask = 0;
        if (id->ext.enhopts & SFP_ENHOPTS_SOFT_TX_DISABLE)
                mask |= SFP_F_TX_DISABLE;
        if (id->ext.enhopts & SFP_ENHOPTS_SOFT_TX_FAULT)
                mask |= SFP_F_TX_FAULT;
        if (id->ext.enhopts & SFP_ENHOPTS_SOFT_RX_LOS)
                mask |= SFP_F_LOS;
+       if (id->ext.enhopts & SFP_ENHOPTS_SOFT_RATE_SELECT)
+               mask |= sfp->rs_state_mask;
 
+       mutex_lock(&sfp->st_mutex);
        // Poll the soft state for hardware pins we want to ignore
        sfp->state_soft_mask = ~sfp->state_hw_mask & mask;
 
        if (sfp->state_soft_mask & (SFP_F_LOS | SFP_F_TX_FAULT) &&
            !sfp->need_poll)
                mod_delayed_work(system_wq, &sfp->poll, poll_jiffies);
+       mutex_unlock(&sfp->st_mutex);
 }
 
 static void sfp_soft_stop_poll(struct sfp *sfp)
 {
+       mutex_lock(&sfp->st_mutex);
        sfp->state_soft_mask = 0;
+       mutex_unlock(&sfp->st_mutex);
 }
 
+/* sfp_get_state() - must be called with st_mutex held, or in the
+ * initialisation path.
+ */
 static unsigned int sfp_get_state(struct sfp *sfp)
 {
        unsigned int soft = sfp->state_soft_mask & (SFP_F_LOS | SFP_F_TX_FAULT);
@@ -725,13 +814,26 @@ static unsigned int sfp_get_state(struct sfp *sfp)
        return state;
 }
 
+/* sfp_set_state() - must be called with st_mutex held, or in the
+ * initialisation path.
+ */
 static void sfp_set_state(struct sfp *sfp, unsigned int state)
 {
+       unsigned int soft;
+
        sfp->set_state(sfp, state);
 
-       if (state & SFP_F_PRESENT &&
-           sfp->state_soft_mask & SFP_F_TX_DISABLE)
-               sfp_soft_set_state(sfp, state);
+       soft = sfp->state_soft_mask & SFP_F_OUTPUTS;
+       if (state & SFP_F_PRESENT && soft)
+               sfp_soft_set_state(sfp, state, soft);
+}
+
+static void sfp_mod_state(struct sfp *sfp, unsigned int mask, unsigned int set)
+{
+       mutex_lock(&sfp->st_mutex);
+       sfp->state = (sfp->state & ~mask) | set;
+       sfp_set_state(sfp, sfp->state);
+       mutex_unlock(&sfp->st_mutex);
 }
 
 static unsigned int sfp_check(void *buf, size_t len)
@@ -1537,16 +1639,14 @@ static void sfp_module_tx_disable(struct sfp *sfp)
 {
        dev_dbg(sfp->dev, "tx disable %u -> %u\n",
                sfp->state & SFP_F_TX_DISABLE ? 1 : 0, 1);
-       sfp->state |= SFP_F_TX_DISABLE;
-       sfp_set_state(sfp, sfp->state);
+       sfp_mod_state(sfp, SFP_F_TX_DISABLE, SFP_F_TX_DISABLE);
 }
 
 static void sfp_module_tx_enable(struct sfp *sfp)
 {
        dev_dbg(sfp->dev, "tx disable %u -> %u\n",
                sfp->state & SFP_F_TX_DISABLE ? 1 : 0, 0);
-       sfp->state &= ~SFP_F_TX_DISABLE;
-       sfp_set_state(sfp, sfp->state);
+       sfp_mod_state(sfp, SFP_F_TX_DISABLE, 0);
 }
 
 #if IS_ENABLED(CONFIG_DEBUG_FS)
@@ -1567,10 +1667,15 @@ static int sfp_debug_state_show(struct seq_file *s, void *data)
                   sfp->sm_fault_retries);
        seq_printf(s, "PHY probe remaining retries: %d\n",
                   sfp->sm_phy_retries);
+       seq_printf(s, "Signalling rate: %u kBd\n", sfp->rate_kbd);
+       seq_printf(s, "Rate select threshold: %u kBd\n",
+                  sfp->rs_threshold_kbd);
        seq_printf(s, "moddef0: %d\n", !!(sfp->state & SFP_F_PRESENT));
        seq_printf(s, "rx_los: %d\n", !!(sfp->state & SFP_F_LOS));
        seq_printf(s, "tx_fault: %d\n", !!(sfp->state & SFP_F_TX_FAULT));
        seq_printf(s, "tx_disable: %d\n", !!(sfp->state & SFP_F_TX_DISABLE));
+       seq_printf(s, "rs0: %d\n", !!(sfp->state & SFP_F_RS0));
+       seq_printf(s, "rs1: %d\n", !!(sfp->state & SFP_F_RS1));
        return 0;
 }
 DEFINE_SHOW_ATTRIBUTE(sfp_debug_state);
@@ -1599,16 +1704,18 @@ static void sfp_debugfs_exit(struct sfp *sfp)
 
 static void sfp_module_tx_fault_reset(struct sfp *sfp)
 {
-       unsigned int state = sfp->state;
-
-       if (state & SFP_F_TX_DISABLE)
-               return;
+       unsigned int state;
 
-       sfp_set_state(sfp, state | SFP_F_TX_DISABLE);
+       mutex_lock(&sfp->st_mutex);
+       state = sfp->state;
+       if (!(state & SFP_F_TX_DISABLE)) {
+               sfp_set_state(sfp, state | SFP_F_TX_DISABLE);
 
-       udelay(T_RESET_US);
+               udelay(T_RESET_US);
 
-       sfp_set_state(sfp, state);
+               sfp_set_state(sfp, state);
+       }
+       mutex_unlock(&sfp->st_mutex);
 }
 
 /* SFP state machine */
@@ -1874,6 +1981,95 @@ static int sfp_sm_mod_hpower(struct sfp *sfp, bool enable)
        return 0;
 }
 
+static void sfp_module_parse_rate_select(struct sfp *sfp)
+{
+       u8 rate_id;
+
+       sfp->rs_threshold_kbd = 0;
+       sfp->rs_state_mask = 0;
+
+       if (!(sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_RATE_SELECT)))
+               /* No support for RateSelect */
+               return;
+
+       /* Default to INF-8074 RateSelect operation. The signalling threshold
+        * rate is not well specified, so always select "Full Bandwidth", but
+        * SFF-8079 reveals that it is understood that RS0 will be low for
+        * 1.0625Gb/s and high for 2.125Gb/s. Choose a value half-way between.
+        * This method exists prior to SFF-8472.
+        */
+       sfp->rs_state_mask = SFP_F_RS0;
+       sfp->rs_threshold_kbd = 1594;
+
+       /* Parse the rate identifier, which is complicated due to history:
+        * SFF-8472 rev 9.5 marks this field as reserved.
+        * SFF-8079 references SFF-8472 rev 9.5 and defines bit 0. SFF-8472
+        *  compliance is not required.
+        * SFF-8472 rev 10.2 defines this field using values 0..4
+        * SFF-8472 rev 11.0 redefines this field with bit 0 for SFF-8079
+        * and even values.
+        */
+       rate_id = sfp->id.base.rate_id;
+       if (rate_id == 0)
+               /* Unspecified */
+               return;
+
+       /* SFF-8472 rev 10.0..10.4 did not account for SFF-8079 using bit 0,
+        * and allocated value 3 to SFF-8431 independent tx/rx rate select.
+        * Convert this to a SFF-8472 rev 11.0 rate identifier.
+        */
+       if (sfp->id.ext.sff8472_compliance >= SFP_SFF8472_COMPLIANCE_REV10_2 &&
+           sfp->id.ext.sff8472_compliance < SFP_SFF8472_COMPLIANCE_REV11_0 &&
+           rate_id == 3)
+               rate_id = SFF_RID_8431;
+
+       if (rate_id & SFF_RID_8079) {
+               /* SFF-8079 RateSelect / Application Select in conjunction with
+                * SFF-8472 rev 9.5. SFF-8079 defines rate_id as a bitfield
+                * with only bit 0 used, which takes precedence over SFF-8472.
+                */
+               if (!(sfp->id.ext.enhopts & SFP_ENHOPTS_APP_SELECT_SFF8079)) {
+                       /* SFF-8079 Part 1 - rate selection between Fibre
+                        * Channel 1.0625/2.125/4.25 Gbd modes. Note that RS0
+                        * is high for 2125, so we have to subtract 1 to
+                        * include it.
+                        */
+                       sfp->rs_threshold_kbd = 2125 - 1;
+                       sfp->rs_state_mask = SFP_F_RS0;
+               }
+               return;
+       }
+
+       /* SFF-8472 rev 9.5 does not define the rate identifier */
+       if (sfp->id.ext.sff8472_compliance <= SFP_SFF8472_COMPLIANCE_REV9_5)
+               return;
+
+       /* SFF-8472 rev 11.0 defines rate_id as a numerical value which will
+        * always have bit 0 clear due to SFF-8079's bitfield usage of rate_id.
+        */
+       switch (rate_id) {
+       case SFF_RID_8431_RX_ONLY:
+               sfp->rs_threshold_kbd = 4250;
+               sfp->rs_state_mask = SFP_F_RS0;
+               break;
+
+       case SFF_RID_8431_TX_ONLY:
+               sfp->rs_threshold_kbd = 4250;
+               sfp->rs_state_mask = SFP_F_RS1;
+               break;
+
+       case SFF_RID_8431:
+               sfp->rs_threshold_kbd = 4250;
+               sfp->rs_state_mask = SFP_F_RS0 | SFP_F_RS1;
+               break;
+
+       case SFF_RID_10G8G:
+               sfp->rs_threshold_kbd = 9000;
+               sfp->rs_state_mask = SFP_F_RS0 | SFP_F_RS1;
+               break;
+       }
+}
+
 /* GPON modules based on Realtek RTL8672 and RTL9601C chips (e.g. V-SOL
  * V2801F, CarlitoxxPro CPGOS03-0490, Ubiquiti U-Fiber Instant, ...) do
  * not support multibyte reads from the EEPROM. Each multi-byte read
@@ -1953,6 +2149,7 @@ static int sfp_sm_mod_probe(struct sfp *sfp, bool report)
        /* SFP module inserted - read I2C data */
        struct sfp_eeprom_id id;
        bool cotsworks_sfbg;
+       unsigned int mask;
        bool cotsworks;
        u8 check;
        int ret;
@@ -2092,14 +2289,19 @@ static int sfp_sm_mod_probe(struct sfp *sfp, bool report)
        if (ret < 0)
                return ret;
 
-       /* Initialise state bits to use from hardware */
-       sfp->state_hw_mask = SFP_F_PRESENT;
+       sfp_module_parse_rate_select(sfp);
+
+       mask = SFP_F_PRESENT;
        if (sfp->gpio[GPIO_TX_DISABLE])
-               sfp->state_hw_mask |= SFP_F_TX_DISABLE;
+               mask |= SFP_F_TX_DISABLE;
        if (sfp->gpio[GPIO_TX_FAULT])
-               sfp->state_hw_mask |= SFP_F_TX_FAULT;
+               mask |= SFP_F_TX_FAULT;
        if (sfp->gpio[GPIO_LOS])
-               sfp->state_hw_mask |= SFP_F_LOS;
+               mask |= SFP_F_LOS;
+       if (sfp->gpio[GPIO_RS0])
+               mask |= SFP_F_RS0;
+       if (sfp->gpio[GPIO_RS1])
+               mask |= SFP_F_RS1;
 
        sfp->module_t_start_up = T_START_UP;
        sfp->module_t_wait = T_WAIT;
@@ -2117,8 +2319,17 @@ static int sfp_sm_mod_probe(struct sfp *sfp, bool report)
                sfp->mdio_protocol = MDIO_I2C_NONE;
 
        sfp->quirk = sfp_lookup_quirk(&id);
+
+       mutex_lock(&sfp->st_mutex);
+       /* Initialise state bits to use from hardware */
+       sfp->state_hw_mask = mask;
+
+       /* We want to drive the rate select pins that the module is using */
+       sfp->state_hw_drive |= sfp->rs_state_mask;
+
        if (sfp->quirk && sfp->quirk->fixup)
                sfp->quirk->fixup(sfp);
+       mutex_unlock(&sfp->st_mutex);
 
        return 0;
 }
@@ -2132,6 +2343,7 @@ static void sfp_sm_mod_remove(struct sfp *sfp)
 
        memset(&sfp->id, 0, sizeof(sfp->id));
        sfp->module_power_mW = 0;
+       sfp->state_hw_drive = SFP_F_TX_DISABLE;
        sfp->have_a2 = false;
 
        dev_info(sfp->dev, "module removed\n");
@@ -2452,10 +2664,8 @@ static void sfp_sm_main(struct sfp *sfp, unsigned int event)
        }
 }
 
-static void sfp_sm_event(struct sfp *sfp, unsigned int event)
+static void __sfp_sm_event(struct sfp *sfp, unsigned int event)
 {
-       mutex_lock(&sfp->sm_mutex);
-
        dev_dbg(sfp->dev, "SM: enter %s:%s:%s event %s\n",
                mod_state_to_str(sfp->sm_mod_state),
                dev_state_to_str(sfp->sm_dev_state),
@@ -2470,7 +2680,12 @@ static void sfp_sm_event(struct sfp *sfp, unsigned int event)
                mod_state_to_str(sfp->sm_mod_state),
                dev_state_to_str(sfp->sm_dev_state),
                sm_state_to_str(sfp->sm_state));
+}
 
+static void sfp_sm_event(struct sfp *sfp, unsigned int event)
+{
+       mutex_lock(&sfp->sm_mutex);
+       __sfp_sm_event(sfp, event);
        mutex_unlock(&sfp->sm_mutex);
 }
 
@@ -2494,6 +2709,20 @@ static void sfp_stop(struct sfp *sfp)
        sfp_sm_event(sfp, SFP_E_DEV_DOWN);
 }
 
+static void sfp_set_signal_rate(struct sfp *sfp, unsigned int rate_kbd)
+{
+       unsigned int set;
+
+       sfp->rate_kbd = rate_kbd;
+
+       if (rate_kbd > sfp->rs_threshold_kbd)
+               set = sfp->rs_state_mask;
+       else
+               set = 0;
+
+       sfp_mod_state(sfp, SFP_F_RS0 | SFP_F_RS1, set);
+}
+
 static int sfp_module_info(struct sfp *sfp, struct ethtool_modinfo *modinfo)
 {
        /* locking... and check module is present */
@@ -2578,6 +2807,7 @@ static const struct sfp_socket_ops sfp_module_ops = {
        .detach = sfp_detach,
        .start = sfp_start,
        .stop = sfp_stop,
+       .set_signal_rate = sfp_set_signal_rate,
        .module_info = sfp_module_info,
        .module_eeprom = sfp_module_eeprom,
        .module_eeprom_by_page = sfp_module_eeprom_by_page,
@@ -2596,6 +2826,7 @@ static void sfp_check_state(struct sfp *sfp)
 {
        unsigned int state, i, changed;
 
+       rtnl_lock();
        mutex_lock(&sfp->st_mutex);
        state = sfp_get_state(sfp);
        changed = state ^ sfp->state;
@@ -2609,23 +2840,24 @@ static void sfp_check_state(struct sfp *sfp)
                        dev_dbg(sfp->dev, "%s %u -> %u\n", gpio_names[i],
                                !!(sfp->state & BIT(i)), !!(state & BIT(i)));
 
-       state |= sfp->state & (SFP_F_TX_DISABLE | SFP_F_RATE_SELECT);
+       state |= sfp->state & SFP_F_OUTPUTS;
        sfp->state = state;
+       mutex_unlock(&sfp->st_mutex);
 
-       rtnl_lock();
+       mutex_lock(&sfp->sm_mutex);
        if (changed & SFP_F_PRESENT)
-               sfp_sm_event(sfp, state & SFP_F_PRESENT ?
-                               SFP_E_INSERT : SFP_E_REMOVE);
+               __sfp_sm_event(sfp, state & SFP_F_PRESENT ?
+                                   SFP_E_INSERT : SFP_E_REMOVE);
 
        if (changed & SFP_F_TX_FAULT)
-               sfp_sm_event(sfp, state & SFP_F_TX_FAULT ?
-                               SFP_E_TX_FAULT : SFP_E_TX_CLEAR);
+               __sfp_sm_event(sfp, state & SFP_F_TX_FAULT ?
+                                   SFP_E_TX_FAULT : SFP_E_TX_CLEAR);
 
        if (changed & SFP_F_LOS)
-               sfp_sm_event(sfp, state & SFP_F_LOS ?
-                               SFP_E_LOS_HIGH : SFP_E_LOS_LOW);
+               __sfp_sm_event(sfp, state & SFP_F_LOS ?
+                                   SFP_E_LOS_HIGH : SFP_E_LOS_LOW);
+       mutex_unlock(&sfp->sm_mutex);
        rtnl_unlock();
-       mutex_unlock(&sfp->st_mutex);
 }
 
 static irqreturn_t sfp_irq(int irq, void *data)
@@ -2643,6 +2875,8 @@ static void sfp_poll(struct work_struct *work)
 
        sfp_check_state(sfp);
 
+       // st_mutex doesn't need to be held here for state_soft_mask,
+       // it's unimportant if we race while reading this.
        if (sfp->state_soft_mask & (SFP_F_LOS | SFP_F_TX_FAULT) ||
            sfp->need_poll)
                mod_delayed_work(system_wq, &sfp->poll, poll_jiffies);
@@ -2748,6 +2982,7 @@ static int sfp_probe(struct platform_device *pdev)
                }
 
        sfp->state_hw_mask = SFP_F_PRESENT;
+       sfp->state_hw_drive = SFP_F_TX_DISABLE;
 
        sfp->get_state = sfp_gpio_get_state;
        sfp->set_state = sfp_gpio_set_state;
@@ -2773,9 +3008,9 @@ static int sfp_probe(struct platform_device *pdev)
         */
        sfp->state = sfp_get_state(sfp) | SFP_F_TX_DISABLE;
 
-       if (sfp->gpio[GPIO_RATE_SELECT] &&
-           gpiod_get_value_cansleep(sfp->gpio[GPIO_RATE_SELECT]))
-               sfp->state |= SFP_F_RATE_SELECT;
+       if (sfp->gpio[GPIO_RS0] &&
+           gpiod_get_value_cansleep(sfp->gpio[GPIO_RS0]))
+               sfp->state |= SFP_F_RS0;
        sfp_set_state(sfp, sfp->state);
        sfp_module_tx_disable(sfp);
        if (sfp->state & SFP_F_PRESENT) {
index 6cf1643..c7cb50d 100644 (file)
@@ -19,6 +19,7 @@ struct sfp_socket_ops {
        void (*detach)(struct sfp *sfp);
        void (*start)(struct sfp *sfp);
        void (*stop)(struct sfp *sfp);
+       void (*set_signal_rate)(struct sfp *sfp, unsigned int rate_kbd);
        int (*module_info)(struct sfp *sfp, struct ethtool_modinfo *modinfo);
        int (*module_eeprom)(struct sfp *sfp, struct ethtool_eeprom *ee,
                             u8 *data);
index ac4d162..8c9ed18 100644 (file)
@@ -129,6 +129,40 @@ config PPPOE
          which contains instruction on how to use this driver (under
          the heading "Kernel mode PPPoE").
 
+choice
+       prompt "Number of PPPoE hash bits"
+       default PPPOE_HASH_BITS_4
+       depends on PPPOE
+       help
+               Select the number of bits used for hashing PPPoE interfaces.
+
+               Larger sizes reduces the risk of hash collisions at the cost
+               of slightly increased memory usage.
+
+               This hash table is on a per outer ethernet interface.
+
+config PPPOE_HASH_BITS_1
+       bool "1 bit (2 buckets)"
+
+config PPPOE_HASH_BITS_2
+       bool "2 bits (4 buckets)"
+
+config PPPOE_HASH_BITS_4
+       bool "4 bits (16 buckets)"
+
+config PPPOE_HASH_BITS_8
+       bool "8 bits (256 buckets)"
+
+endchoice
+
+config PPPOE_HASH_BITS
+       int
+       default 1 if PPPOE_HASH_BITS_1
+       default 2 if PPPOE_HASH_BITS_2
+       default 4 if PPPOE_HASH_BITS_4
+       default 8 if PPPOE_HASH_BITS_8
+       default 4
+
 config PPTP
        tristate "PPP over IPv4 (PPTP)"
        depends on PPP && NET_IPGRE_DEMUX
index ce2cbb5..3b79c60 100644 (file)
@@ -80,7 +80,7 @@
 
 #include <linux/uaccess.h>
 
-#define PPPOE_HASH_BITS 4
+#define PPPOE_HASH_BITS CONFIG_PPPOE_HASH_BITS
 #define PPPOE_HASH_SIZE (1 << PPPOE_HASH_BITS)
 #define PPPOE_HASH_MASK        (PPPOE_HASH_SIZE - 1)
 
index dce9f9d..614f3e3 100644 (file)
@@ -176,12 +176,27 @@ static int veth_get_sset_count(struct net_device *dev, int sset)
        }
 }
 
+static void veth_get_page_pool_stats(struct net_device *dev, u64 *data)
+{
+#ifdef CONFIG_PAGE_POOL_STATS
+       struct veth_priv *priv = netdev_priv(dev);
+       struct page_pool_stats pp_stats = {};
+       int i;
+
+       for (i = 0; i < dev->real_num_rx_queues; i++) {
+               if (!priv->rq[i].page_pool)
+                       continue;
+               page_pool_get_stats(priv->rq[i].page_pool, &pp_stats);
+       }
+       page_pool_ethtool_stats_get(data, &pp_stats);
+#endif /* CONFIG_PAGE_POOL_STATS */
+}
+
 static void veth_get_ethtool_stats(struct net_device *dev,
                struct ethtool_stats *stats, u64 *data)
 {
        struct veth_priv *rcv_priv, *priv = netdev_priv(dev);
        struct net_device *peer = rtnl_dereference(priv->peer);
-       struct page_pool_stats pp_stats = {};
        int i, j, idx, pp_idx;
 
        data[0] = peer ? peer->ifindex : 0;
@@ -225,12 +240,7 @@ static void veth_get_ethtool_stats(struct net_device *dev,
        }
 
 page_pool_stats:
-       for (i = 0; i < dev->real_num_rx_queues; i++) {
-               if (!priv->rq[i].page_pool)
-                       continue;
-               page_pool_get_stats(priv->rq[i].page_pool, &pp_stats);
-       }
-       page_pool_ethtool_stats_get(&data[pp_idx], &pp_stats);
+       veth_get_page_pool_stats(dev, &data[pp_idx]);
 }
 
 static void veth_get_channels(struct net_device *dev,
@@ -747,7 +757,7 @@ static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq,
                if (!page)
                        goto drop;
 
-               nskb = build_skb(page_address(page), PAGE_SIZE);
+               nskb = napi_build_skb(page_address(page), PAGE_SIZE);
                if (!nskb) {
                        page_pool_put_full_page(rq->page_pool, page, true);
                        goto drop;
index 486b584..0db14f6 100644 (file)
@@ -445,6 +445,22 @@ static unsigned int mergeable_ctx_to_truesize(void *mrg_ctx)
        return (unsigned long)mrg_ctx & ((1 << MRG_CTX_HEADER_SHIFT) - 1);
 }
 
+static struct sk_buff *virtnet_build_skb(void *buf, unsigned int buflen,
+                                        unsigned int headroom,
+                                        unsigned int len)
+{
+       struct sk_buff *skb;
+
+       skb = build_skb(buf, buflen);
+       if (unlikely(!skb))
+               return NULL;
+
+       skb_reserve(skb, headroom);
+       skb_put(skb, len);
+
+       return skb;
+}
+
 /* Called from bottom half context */
 static struct sk_buff *page_to_skb(struct virtnet_info *vi,
                                   struct receive_queue *rq,
@@ -478,13 +494,10 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
 
        /* copy small packet so we can reuse these pages */
        if (!NET_IP_ALIGN && len > GOOD_COPY_LEN && tailroom >= shinfo_size) {
-               skb = build_skb(buf, truesize);
+               skb = virtnet_build_skb(buf, truesize, p - buf, len);
                if (unlikely(!skb))
                        return NULL;
 
-               skb_reserve(skb, p - buf);
-               skb_put(skb, len);
-
                page = (struct page *)page->private;
                if (page)
                        give_pages(rq, page);
@@ -791,6 +804,75 @@ out:
        return ret;
 }
 
+static void put_xdp_frags(struct xdp_buff *xdp)
+{
+       struct skb_shared_info *shinfo;
+       struct page *xdp_page;
+       int i;
+
+       if (xdp_buff_has_frags(xdp)) {
+               shinfo = xdp_get_shared_info_from_buff(xdp);
+               for (i = 0; i < shinfo->nr_frags; i++) {
+                       xdp_page = skb_frag_page(&shinfo->frags[i]);
+                       put_page(xdp_page);
+               }
+       }
+}
+
+static int virtnet_xdp_handler(struct bpf_prog *xdp_prog, struct xdp_buff *xdp,
+                              struct net_device *dev,
+                              unsigned int *xdp_xmit,
+                              struct virtnet_rq_stats *stats)
+{
+       struct xdp_frame *xdpf;
+       int err;
+       u32 act;
+
+       act = bpf_prog_run_xdp(xdp_prog, xdp);
+       stats->xdp_packets++;
+
+       switch (act) {
+       case XDP_PASS:
+               return act;
+
+       case XDP_TX:
+               stats->xdp_tx++;
+               xdpf = xdp_convert_buff_to_frame(xdp);
+               if (unlikely(!xdpf)) {
+                       netdev_dbg(dev, "convert buff to frame failed for xdp\n");
+                       return XDP_DROP;
+               }
+
+               err = virtnet_xdp_xmit(dev, 1, &xdpf, 0);
+               if (unlikely(!err)) {
+                       xdp_return_frame_rx_napi(xdpf);
+               } else if (unlikely(err < 0)) {
+                       trace_xdp_exception(dev, xdp_prog, act);
+                       return XDP_DROP;
+               }
+               *xdp_xmit |= VIRTIO_XDP_TX;
+               return act;
+
+       case XDP_REDIRECT:
+               stats->xdp_redirects++;
+               err = xdp_do_redirect(dev, xdp, xdp_prog);
+               if (err)
+                       return XDP_DROP;
+
+               *xdp_xmit |= VIRTIO_XDP_REDIR;
+               return act;
+
+       default:
+               bpf_warn_invalid_xdp_action(dev, xdp_prog, act);
+               fallthrough;
+       case XDP_ABORTED:
+               trace_xdp_exception(dev, xdp_prog, act);
+               fallthrough;
+       case XDP_DROP:
+               return XDP_DROP;
+       }
+}
+
 static unsigned int virtnet_get_headroom(struct virtnet_info *vi)
 {
        return vi->xdp_enabled ? VIRTIO_XDP_HEADROOM : 0;
@@ -864,134 +946,103 @@ err_buf:
        return NULL;
 }
 
-static struct sk_buff *receive_small(struct net_device *dev,
-                                    struct virtnet_info *vi,
-                                    struct receive_queue *rq,
-                                    void *buf, void *ctx,
-                                    unsigned int len,
-                                    unsigned int *xdp_xmit,
-                                    struct virtnet_rq_stats *stats)
+static struct sk_buff *receive_small_build_skb(struct virtnet_info *vi,
+                                              unsigned int xdp_headroom,
+                                              void *buf,
+                                              unsigned int len)
 {
+       unsigned int header_offset;
+       unsigned int headroom;
+       unsigned int buflen;
        struct sk_buff *skb;
-       struct bpf_prog *xdp_prog;
-       unsigned int xdp_headroom = (unsigned long)ctx;
+
+       header_offset = VIRTNET_RX_PAD + xdp_headroom;
+       headroom = vi->hdr_len + header_offset;
+       buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) +
+               SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+
+       skb = virtnet_build_skb(buf, buflen, headroom, len);
+       if (unlikely(!skb))
+               return NULL;
+
+       buf += header_offset;
+       memcpy(skb_vnet_hdr(skb), buf, vi->hdr_len);
+
+       return skb;
+}
+
+static struct sk_buff *receive_small_xdp(struct net_device *dev,
+                                        struct virtnet_info *vi,
+                                        struct receive_queue *rq,
+                                        struct bpf_prog *xdp_prog,
+                                        void *buf,
+                                        unsigned int xdp_headroom,
+                                        unsigned int len,
+                                        unsigned int *xdp_xmit,
+                                        struct virtnet_rq_stats *stats)
+{
        unsigned int header_offset = VIRTNET_RX_PAD + xdp_headroom;
        unsigned int headroom = vi->hdr_len + header_offset;
-       unsigned int buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) +
-                             SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+       struct virtio_net_hdr_mrg_rxbuf *hdr = buf + header_offset;
        struct page *page = virt_to_head_page(buf);
-       unsigned int delta = 0;
        struct page *xdp_page;
-       int err;
+       unsigned int buflen;
+       struct xdp_buff xdp;
+       struct sk_buff *skb;
        unsigned int metasize = 0;
+       u32 act;
 
-       len -= vi->hdr_len;
-       stats->bytes += len;
+       if (unlikely(hdr->hdr.gso_type))
+               goto err_xdp;
 
-       if (unlikely(len > GOOD_PACKET_LEN)) {
-               pr_debug("%s: rx error: len %u exceeds max size %d\n",
-                        dev->name, len, GOOD_PACKET_LEN);
-               dev->stats.rx_length_errors++;
-               goto err;
-       }
+       buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) +
+               SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+
+       if (unlikely(xdp_headroom < virtnet_get_headroom(vi))) {
+               int offset = buf - page_address(page) + header_offset;
+               unsigned int tlen = len + vi->hdr_len;
+               int num_buf = 1;
+
+               xdp_headroom = virtnet_get_headroom(vi);
+               header_offset = VIRTNET_RX_PAD + xdp_headroom;
+               headroom = vi->hdr_len + header_offset;
+               buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) +
+                       SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+               xdp_page = xdp_linearize_page(rq, &num_buf, page,
+                                             offset, header_offset,
+                                             &tlen);
+               if (!xdp_page)
+                       goto err_xdp;
 
-       if (likely(!vi->xdp_enabled)) {
-               xdp_prog = NULL;
-               goto skip_xdp;
+               buf = page_address(xdp_page);
+               put_page(page);
+               page = xdp_page;
        }
 
-       rcu_read_lock();
-       xdp_prog = rcu_dereference(rq->xdp_prog);
-       if (xdp_prog) {
-               struct virtio_net_hdr_mrg_rxbuf *hdr = buf + header_offset;
-               struct xdp_frame *xdpf;
-               struct xdp_buff xdp;
-               void *orig_data;
-               u32 act;
+       xdp_init_buff(&xdp, buflen, &rq->xdp_rxq);
+       xdp_prepare_buff(&xdp, buf + VIRTNET_RX_PAD + vi->hdr_len,
+                        xdp_headroom, len, true);
 
-               if (unlikely(hdr->hdr.gso_type))
-                       goto err_xdp;
+       act = virtnet_xdp_handler(xdp_prog, &xdp, dev, xdp_xmit, stats);
 
-               if (unlikely(xdp_headroom < virtnet_get_headroom(vi))) {
-                       int offset = buf - page_address(page) + header_offset;
-                       unsigned int tlen = len + vi->hdr_len;
-                       int num_buf = 1;
-
-                       xdp_headroom = virtnet_get_headroom(vi);
-                       header_offset = VIRTNET_RX_PAD + xdp_headroom;
-                       headroom = vi->hdr_len + header_offset;
-                       buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) +
-                                SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
-                       xdp_page = xdp_linearize_page(rq, &num_buf, page,
-                                                     offset, header_offset,
-                                                     &tlen);
-                       if (!xdp_page)
-                               goto err_xdp;
-
-                       buf = page_address(xdp_page);
-                       put_page(page);
-                       page = xdp_page;
-               }
+       switch (act) {
+       case XDP_PASS:
+               /* Recalculate length in case bpf program changed it */
+               len = xdp.data_end - xdp.data;
+               metasize = xdp.data - xdp.data_meta;
+               break;
 
-               xdp_init_buff(&xdp, buflen, &rq->xdp_rxq);
-               xdp_prepare_buff(&xdp, buf + VIRTNET_RX_PAD + vi->hdr_len,
-                                xdp_headroom, len, true);
-               orig_data = xdp.data;
-               act = bpf_prog_run_xdp(xdp_prog, &xdp);
-               stats->xdp_packets++;
-
-               switch (act) {
-               case XDP_PASS:
-                       /* Recalculate length in case bpf program changed it */
-                       delta = orig_data - xdp.data;
-                       len = xdp.data_end - xdp.data;
-                       metasize = xdp.data - xdp.data_meta;
-                       break;
-               case XDP_TX:
-                       stats->xdp_tx++;
-                       xdpf = xdp_convert_buff_to_frame(&xdp);
-                       if (unlikely(!xdpf))
-                               goto err_xdp;
-                       err = virtnet_xdp_xmit(dev, 1, &xdpf, 0);
-                       if (unlikely(!err)) {
-                               xdp_return_frame_rx_napi(xdpf);
-                       } else if (unlikely(err < 0)) {
-                               trace_xdp_exception(vi->dev, xdp_prog, act);
-                               goto err_xdp;
-                       }
-                       *xdp_xmit |= VIRTIO_XDP_TX;
-                       rcu_read_unlock();
-                       goto xdp_xmit;
-               case XDP_REDIRECT:
-                       stats->xdp_redirects++;
-                       err = xdp_do_redirect(dev, &xdp, xdp_prog);
-                       if (err)
-                               goto err_xdp;
-                       *xdp_xmit |= VIRTIO_XDP_REDIR;
-                       rcu_read_unlock();
-                       goto xdp_xmit;
-               default:
-                       bpf_warn_invalid_xdp_action(vi->dev, xdp_prog, act);
-                       fallthrough;
-               case XDP_ABORTED:
-                       trace_xdp_exception(vi->dev, xdp_prog, act);
-                       goto err_xdp;
-               case XDP_DROP:
-                       goto err_xdp;
-               }
+       case XDP_TX:
+       case XDP_REDIRECT:
+               goto xdp_xmit;
+
+       default:
+               goto err_xdp;
        }
-       rcu_read_unlock();
 
-skip_xdp:
-       skb = build_skb(buf, buflen);
-       if (!skb)
+       skb = virtnet_build_skb(buf, buflen, xdp.data - buf, len);
+       if (unlikely(!skb))
                goto err;
-       skb_reserve(skb, headroom - delta);
-       skb_put(skb, len);
-       if (!xdp_prog) {
-               buf += header_offset;
-               memcpy(skb_vnet_hdr(skb), buf, vi->hdr_len);
-       } /* keep zeroed vnet hdr since XDP is loaded */
 
        if (metasize)
                skb_metadata_set(skb, metasize);
@@ -999,7 +1050,6 @@ skip_xdp:
        return skb;
 
 err_xdp:
-       rcu_read_unlock();
        stats->xdp_drops++;
 err:
        stats->drops++;
@@ -1008,6 +1058,53 @@ xdp_xmit:
        return NULL;
 }
 
+static struct sk_buff *receive_small(struct net_device *dev,
+                                    struct virtnet_info *vi,
+                                    struct receive_queue *rq,
+                                    void *buf, void *ctx,
+                                    unsigned int len,
+                                    unsigned int *xdp_xmit,
+                                    struct virtnet_rq_stats *stats)
+{
+       unsigned int xdp_headroom = (unsigned long)ctx;
+       struct page *page = virt_to_head_page(buf);
+       struct sk_buff *skb;
+
+       len -= vi->hdr_len;
+       stats->bytes += len;
+
+       if (unlikely(len > GOOD_PACKET_LEN)) {
+               pr_debug("%s: rx error: len %u exceeds max size %d\n",
+                        dev->name, len, GOOD_PACKET_LEN);
+               dev->stats.rx_length_errors++;
+               goto err;
+       }
+
+       if (unlikely(vi->xdp_enabled)) {
+               struct bpf_prog *xdp_prog;
+
+               rcu_read_lock();
+               xdp_prog = rcu_dereference(rq->xdp_prog);
+               if (xdp_prog) {
+                       skb = receive_small_xdp(dev, vi, rq, xdp_prog, buf,
+                                               xdp_headroom, len, xdp_xmit,
+                                               stats);
+                       rcu_read_unlock();
+                       return skb;
+               }
+               rcu_read_unlock();
+       }
+
+       skb = receive_small_build_skb(vi, xdp_headroom, buf, len);
+       if (likely(skb))
+               return skb;
+
+err:
+       stats->drops++;
+       put_page(page);
+       return NULL;
+}
+
 static struct sk_buff *receive_big(struct net_device *dev,
                                   struct virtnet_info *vi,
                                   struct receive_queue *rq,
@@ -1031,6 +1128,28 @@ err:
        return NULL;
 }
 
+static void mergeable_buf_free(struct receive_queue *rq, int num_buf,
+                              struct net_device *dev,
+                              struct virtnet_rq_stats *stats)
+{
+       struct page *page;
+       void *buf;
+       int len;
+
+       while (num_buf-- > 1) {
+               buf = virtqueue_get_buf(rq->vq, &len);
+               if (unlikely(!buf)) {
+                       pr_debug("%s: rx error: %d buffers missing\n",
+                                dev->name, num_buf);
+                       dev->stats.rx_length_errors++;
+                       break;
+               }
+               stats->bytes += len;
+               page = virt_to_head_page(buf);
+               put_page(page);
+       }
+}
+
 /* Why not use xdp_build_skb_from_frame() ?
  * XDP core assumes that xdp frags are PAGE_SIZE in length, while in
  * virtio-net there are 2 points that do not match its requirements:
@@ -1132,7 +1251,7 @@ static int virtnet_build_xdp_buff_mrg(struct net_device *dev,
                                 dev->name, *num_buf,
                                 virtio16_to_cpu(vi->vdev, hdr->num_buffers));
                        dev->stats.rx_length_errors++;
-                       return -EINVAL;
+                       goto err;
                }
 
                stats->bytes += len;
@@ -1151,13 +1270,11 @@ static int virtnet_build_xdp_buff_mrg(struct net_device *dev,
                        pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
                                 dev->name, len, (unsigned long)(truesize - room));
                        dev->stats.rx_length_errors++;
-                       return -EINVAL;
+                       goto err;
                }
 
                frag = &shinfo->frags[shinfo->nr_frags++];
-               __skb_frag_set_page(frag, page);
-               skb_frag_off_set(frag, offset);
-               skb_frag_size_set(frag, len);
+               skb_frag_fill_page_desc(frag, page, offset, len);
                if (page_is_pfmemalloc(page))
                        xdp_buff_set_frag_pfmemalloc(xdp);
 
@@ -1166,6 +1283,144 @@ static int virtnet_build_xdp_buff_mrg(struct net_device *dev,
 
        *xdp_frags_truesize = xdp_frags_truesz;
        return 0;
+
+err:
+       put_xdp_frags(xdp);
+       return -EINVAL;
+}
+
+static void *mergeable_xdp_get_buf(struct virtnet_info *vi,
+                                  struct receive_queue *rq,
+                                  struct bpf_prog *xdp_prog,
+                                  void *ctx,
+                                  unsigned int *frame_sz,
+                                  int *num_buf,
+                                  struct page **page,
+                                  int offset,
+                                  unsigned int *len,
+                                  struct virtio_net_hdr_mrg_rxbuf *hdr)
+{
+       unsigned int truesize = mergeable_ctx_to_truesize(ctx);
+       unsigned int headroom = mergeable_ctx_to_headroom(ctx);
+       struct page *xdp_page;
+       unsigned int xdp_room;
+
+       /* Transient failure which in theory could occur if
+        * in-flight packets from before XDP was enabled reach
+        * the receive path after XDP is loaded.
+        */
+       if (unlikely(hdr->hdr.gso_type))
+               return NULL;
+
+       /* Now XDP core assumes frag size is PAGE_SIZE, but buffers
+        * with headroom may add hole in truesize, which
+        * make their length exceed PAGE_SIZE. So we disabled the
+        * hole mechanism for xdp. See add_recvbuf_mergeable().
+        */
+       *frame_sz = truesize;
+
+       if (likely(headroom >= virtnet_get_headroom(vi) &&
+                  (*num_buf == 1 || xdp_prog->aux->xdp_has_frags))) {
+               return page_address(*page) + offset;
+       }
+
+       /* This happens when headroom is not enough because
+        * of the buffer was prefilled before XDP is set.
+        * This should only happen for the first several packets.
+        * In fact, vq reset can be used here to help us clean up
+        * the prefilled buffers, but many existing devices do not
+        * support it, and we don't want to bother users who are
+        * using xdp normally.
+        */
+       if (!xdp_prog->aux->xdp_has_frags) {
+               /* linearize data for XDP */
+               xdp_page = xdp_linearize_page(rq, num_buf,
+                                             *page, offset,
+                                             VIRTIO_XDP_HEADROOM,
+                                             len);
+               if (!xdp_page)
+                       return NULL;
+       } else {
+               xdp_room = SKB_DATA_ALIGN(VIRTIO_XDP_HEADROOM +
+                                         sizeof(struct skb_shared_info));
+               if (*len + xdp_room > PAGE_SIZE)
+                       return NULL;
+
+               xdp_page = alloc_page(GFP_ATOMIC);
+               if (!xdp_page)
+                       return NULL;
+
+               memcpy(page_address(xdp_page) + VIRTIO_XDP_HEADROOM,
+                      page_address(*page) + offset, *len);
+       }
+
+       *frame_sz = PAGE_SIZE;
+
+       put_page(*page);
+
+       *page = xdp_page;
+
+       return page_address(*page) + VIRTIO_XDP_HEADROOM;
+}
+
+static struct sk_buff *receive_mergeable_xdp(struct net_device *dev,
+                                            struct virtnet_info *vi,
+                                            struct receive_queue *rq,
+                                            struct bpf_prog *xdp_prog,
+                                            void *buf,
+                                            void *ctx,
+                                            unsigned int len,
+                                            unsigned int *xdp_xmit,
+                                            struct virtnet_rq_stats *stats)
+{
+       struct virtio_net_hdr_mrg_rxbuf *hdr = buf;
+       int num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers);
+       struct page *page = virt_to_head_page(buf);
+       int offset = buf - page_address(page);
+       unsigned int xdp_frags_truesz = 0;
+       struct sk_buff *head_skb;
+       unsigned int frame_sz;
+       struct xdp_buff xdp;
+       void *data;
+       u32 act;
+       int err;
+
+       data = mergeable_xdp_get_buf(vi, rq, xdp_prog, ctx, &frame_sz, &num_buf, &page,
+                                    offset, &len, hdr);
+       if (unlikely(!data))
+               goto err_xdp;
+
+       err = virtnet_build_xdp_buff_mrg(dev, vi, rq, &xdp, data, len, frame_sz,
+                                        &num_buf, &xdp_frags_truesz, stats);
+       if (unlikely(err))
+               goto err_xdp;
+
+       act = virtnet_xdp_handler(xdp_prog, &xdp, dev, xdp_xmit, stats);
+
+       switch (act) {
+       case XDP_PASS:
+               head_skb = build_skb_from_xdp_buff(dev, vi, &xdp, xdp_frags_truesz);
+               if (unlikely(!head_skb))
+                       break;
+               return head_skb;
+
+       case XDP_TX:
+       case XDP_REDIRECT:
+               return NULL;
+
+       default:
+               break;
+       }
+
+       put_xdp_frags(&xdp);
+
+err_xdp:
+       put_page(page);
+       mergeable_buf_free(rq, num_buf, dev, stats);
+
+       stats->xdp_drops++;
+       stats->drops++;
+       return NULL;
 }
 
 static struct sk_buff *receive_mergeable(struct net_device *dev,
@@ -1182,13 +1437,10 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
        struct page *page = virt_to_head_page(buf);
        int offset = buf - page_address(page);
        struct sk_buff *head_skb, *curr_skb;
-       struct bpf_prog *xdp_prog;
        unsigned int truesize = mergeable_ctx_to_truesize(ctx);
        unsigned int headroom = mergeable_ctx_to_headroom(ctx);
        unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
        unsigned int room = SKB_DATA_ALIGN(headroom + tailroom);
-       unsigned int frame_sz, xdp_room;
-       int err;
 
        head_skb = NULL;
        stats->bytes += len - vi->hdr_len;
@@ -1200,149 +1452,20 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
                goto err_skb;
        }
 
-       if (likely(!vi->xdp_enabled)) {
-               xdp_prog = NULL;
-               goto skip_xdp;
-       }
-
-       rcu_read_lock();
-       xdp_prog = rcu_dereference(rq->xdp_prog);
-       if (xdp_prog) {
-               unsigned int xdp_frags_truesz = 0;
-               struct skb_shared_info *shinfo;
-               struct xdp_frame *xdpf;
-               struct page *xdp_page;
-               struct xdp_buff xdp;
-               void *data;
-               u32 act;
-               int i;
-
-               /* Transient failure which in theory could occur if
-                * in-flight packets from before XDP was enabled reach
-                * the receive path after XDP is loaded.
-                */
-               if (unlikely(hdr->hdr.gso_type))
-                       goto err_xdp;
-
-               /* Now XDP core assumes frag size is PAGE_SIZE, but buffers
-                * with headroom may add hole in truesize, which
-                * make their length exceed PAGE_SIZE. So we disabled the
-                * hole mechanism for xdp. See add_recvbuf_mergeable().
-                */
-               frame_sz = truesize;
-
-               /* This happens when headroom is not enough because
-                * of the buffer was prefilled before XDP is set.
-                * This should only happen for the first several packets.
-                * In fact, vq reset can be used here to help us clean up
-                * the prefilled buffers, but many existing devices do not
-                * support it, and we don't want to bother users who are
-                * using xdp normally.
-                */
-               if (!xdp_prog->aux->xdp_has_frags &&
-                   (num_buf > 1 || headroom < virtnet_get_headroom(vi))) {
-                       /* linearize data for XDP */
-                       xdp_page = xdp_linearize_page(rq, &num_buf,
-                                                     page, offset,
-                                                     VIRTIO_XDP_HEADROOM,
-                                                     &len);
-                       frame_sz = PAGE_SIZE;
-
-                       if (!xdp_page)
-                               goto err_xdp;
-                       offset = VIRTIO_XDP_HEADROOM;
-               } else if (unlikely(headroom < virtnet_get_headroom(vi))) {
-                       xdp_room = SKB_DATA_ALIGN(VIRTIO_XDP_HEADROOM +
-                                                 sizeof(struct skb_shared_info));
-                       if (len + xdp_room > PAGE_SIZE)
-                               goto err_xdp;
-
-                       xdp_page = alloc_page(GFP_ATOMIC);
-                       if (!xdp_page)
-                               goto err_xdp;
-
-                       memcpy(page_address(xdp_page) + VIRTIO_XDP_HEADROOM,
-                              page_address(page) + offset, len);
-                       frame_sz = PAGE_SIZE;
-                       offset = VIRTIO_XDP_HEADROOM;
-               } else {
-                       xdp_page = page;
-               }
-
-               data = page_address(xdp_page) + offset;
-               err = virtnet_build_xdp_buff_mrg(dev, vi, rq, &xdp, data, len, frame_sz,
-                                                &num_buf, &xdp_frags_truesz, stats);
-               if (unlikely(err))
-                       goto err_xdp_frags;
+       if (unlikely(vi->xdp_enabled)) {
+               struct bpf_prog *xdp_prog;
 
-               act = bpf_prog_run_xdp(xdp_prog, &xdp);
-               stats->xdp_packets++;
-
-               switch (act) {
-               case XDP_PASS:
-                       head_skb = build_skb_from_xdp_buff(dev, vi, &xdp, xdp_frags_truesz);
-                       if (unlikely(!head_skb))
-                               goto err_xdp_frags;
-
-                       if (unlikely(xdp_page != page))
-                               put_page(page);
+               rcu_read_lock();
+               xdp_prog = rcu_dereference(rq->xdp_prog);
+               if (xdp_prog) {
+                       head_skb = receive_mergeable_xdp(dev, vi, rq, xdp_prog, buf, ctx,
+                                                        len, xdp_xmit, stats);
                        rcu_read_unlock();
                        return head_skb;
-               case XDP_TX:
-                       stats->xdp_tx++;
-                       xdpf = xdp_convert_buff_to_frame(&xdp);
-                       if (unlikely(!xdpf)) {
-                               netdev_dbg(dev, "convert buff to frame failed for xdp\n");
-                               goto err_xdp_frags;
-                       }
-                       err = virtnet_xdp_xmit(dev, 1, &xdpf, 0);
-                       if (unlikely(!err)) {
-                               xdp_return_frame_rx_napi(xdpf);
-                       } else if (unlikely(err < 0)) {
-                               trace_xdp_exception(vi->dev, xdp_prog, act);
-                               goto err_xdp_frags;
-                       }
-                       *xdp_xmit |= VIRTIO_XDP_TX;
-                       if (unlikely(xdp_page != page))
-                               put_page(page);
-                       rcu_read_unlock();
-                       goto xdp_xmit;
-               case XDP_REDIRECT:
-                       stats->xdp_redirects++;
-                       err = xdp_do_redirect(dev, &xdp, xdp_prog);
-                       if (err)
-                               goto err_xdp_frags;
-                       *xdp_xmit |= VIRTIO_XDP_REDIR;
-                       if (unlikely(xdp_page != page))
-                               put_page(page);
-                       rcu_read_unlock();
-                       goto xdp_xmit;
-               default:
-                       bpf_warn_invalid_xdp_action(vi->dev, xdp_prog, act);
-                       fallthrough;
-               case XDP_ABORTED:
-                       trace_xdp_exception(vi->dev, xdp_prog, act);
-                       fallthrough;
-               case XDP_DROP:
-                       goto err_xdp_frags;
-               }
-err_xdp_frags:
-               if (unlikely(xdp_page != page))
-                       __free_pages(xdp_page, 0);
-
-               if (xdp_buff_has_frags(&xdp)) {
-                       shinfo = xdp_get_shared_info_from_buff(&xdp);
-                       for (i = 0; i < shinfo->nr_frags; i++) {
-                               xdp_page = skb_frag_page(&shinfo->frags[i]);
-                               put_page(xdp_page);
-                       }
                }
-
-               goto err_xdp;
+               rcu_read_unlock();
        }
-       rcu_read_unlock();
 
-skip_xdp:
        head_skb = page_to_skb(vi, rq, page, offset, len, truesize, headroom);
        curr_skb = head_skb;
 
@@ -1408,27 +1531,13 @@ skip_xdp:
        ewma_pkt_len_add(&rq->mrg_avg_pkt_len, head_skb->len);
        return head_skb;
 
-err_xdp:
-       rcu_read_unlock();
-       stats->xdp_drops++;
 err_skb:
        put_page(page);
-       while (num_buf-- > 1) {
-               buf = virtqueue_get_buf(rq->vq, &len);
-               if (unlikely(!buf)) {
-                       pr_debug("%s: rx error: %d buffers missing\n",
-                                dev->name, num_buf);
-                       dev->stats.rx_length_errors++;
-                       break;
-               }
-               stats->bytes += len;
-               page = virt_to_head_page(buf);
-               put_page(page);
-       }
+       mergeable_buf_free(rq, num_buf, dev, stats);
+
 err_buf:
        stats->drops++;
        dev_kfree_skb(head_skb);
-xdp_xmit:
        return NULL;
 }
 
index f2b76ee..7fa74b8 100644 (file)
@@ -686,9 +686,7 @@ vmxnet3_append_frag(struct sk_buff *skb, struct Vmxnet3_RxCompDesc *rcd,
 
        BUG_ON(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS);
 
-       __skb_frag_set_page(frag, rbi->page);
-       skb_frag_off_set(frag, 0);
-       skb_frag_size_set(frag, rcd->len);
+       skb_frag_fill_page_desc(frag, rbi->page, 0, rcd->len);
        skb->data_len += rcd->len;
        skb->truesize += PAGE_SIZE;
        skb_shinfo(skb)->nr_frags++;
index 561fe1b..7874454 100644 (file)
@@ -2352,7 +2352,8 @@ static int encap_bypass_if_local(struct sk_buff *skb, struct net_device *dev,
 #endif
        /* Bypass encapsulation if the destination is local */
        if (rt_flags & RTCF_LOCAL &&
-           !(rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))) {
+           !(rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) &&
+           vxlan->cfg.flags & VXLAN_F_LOCALBYPASS) {
                struct vxlan_dev *dst_vxlan;
 
                dst_release(dst);
@@ -3172,6 +3173,7 @@ static void vxlan_raw_setup(struct net_device *dev)
 }
 
 static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
+       [IFLA_VXLAN_UNSPEC]     = { .strict_start_type = IFLA_VXLAN_LOCALBYPASS },
        [IFLA_VXLAN_ID]         = { .type = NLA_U32 },
        [IFLA_VXLAN_GROUP]      = { .len = sizeof_field(struct iphdr, daddr) },
        [IFLA_VXLAN_GROUP6]     = { .len = sizeof(struct in6_addr) },
@@ -3202,6 +3204,7 @@ static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
        [IFLA_VXLAN_TTL_INHERIT]        = { .type = NLA_FLAG },
        [IFLA_VXLAN_DF]         = { .type = NLA_U8 },
        [IFLA_VXLAN_VNIFILTER]  = { .type = NLA_U8 },
+       [IFLA_VXLAN_LOCALBYPASS]        = NLA_POLICY_MAX(NLA_U8, 1),
 };
 
 static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[],
@@ -4011,6 +4014,17 @@ static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[],
                        conf->flags |= VXLAN_F_UDP_ZERO_CSUM_TX;
        }
 
+       if (data[IFLA_VXLAN_LOCALBYPASS]) {
+               err = vxlan_nl2flag(conf, data, IFLA_VXLAN_LOCALBYPASS,
+                                   VXLAN_F_LOCALBYPASS, changelink,
+                                   true, extack);
+               if (err)
+                       return err;
+       } else if (!changelink) {
+               /* default to local bypass on a new device */
+               conf->flags |= VXLAN_F_LOCALBYPASS;
+       }
+
        if (data[IFLA_VXLAN_UDP_ZERO_CSUM6_TX]) {
                err = vxlan_nl2flag(conf, data, IFLA_VXLAN_UDP_ZERO_CSUM6_TX,
                                    VXLAN_F_UDP_ZERO_CSUM6_TX, changelink,
@@ -4232,6 +4246,7 @@ static size_t vxlan_get_size(const struct net_device *dev)
                nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_UDP_ZERO_CSUM6_RX */
                nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_REMCSUM_TX */
                nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_REMCSUM_RX */
+               nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_LOCALBYPASS */
                0;
 }
 
@@ -4308,7 +4323,9 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
            nla_put_u8(skb, IFLA_VXLAN_REMCSUM_TX,
                       !!(vxlan->cfg.flags & VXLAN_F_REMCSUM_TX)) ||
            nla_put_u8(skb, IFLA_VXLAN_REMCSUM_RX,
-                      !!(vxlan->cfg.flags & VXLAN_F_REMCSUM_RX)))
+                      !!(vxlan->cfg.flags & VXLAN_F_REMCSUM_RX)) ||
+           nla_put_u8(skb, IFLA_VXLAN_LOCALBYPASS,
+                      !!(vxlan->cfg.flags & VXLAN_F_LOCALBYPASS)))
                goto nla_put_failure;
 
        if (nla_put(skb, IFLA_VXLAN_PORT_RANGE, sizeof(ports), &ports))
index 94b5e3e..7738ebe 100644 (file)
@@ -102,14 +102,14 @@ static inline u8 mwifiex_space_avail_for_new_ba_stream(
 {
        struct mwifiex_private *priv;
        u8 i;
-       u32 ba_stream_num = 0, ba_stream_max;
+       size_t ba_stream_num = 0, ba_stream_max;
 
        ba_stream_max = MWIFIEX_MAX_TX_BASTREAM_SUPPORTED;
 
        for (i = 0; i < adapter->priv_num; i++) {
                priv = adapter->priv[i];
                if (priv)
-                       ba_stream_num += mwifiex_wmm_list_len(
+                       ba_stream_num += list_count_nodes(
                                &priv->tx_ba_stream_tbl_ptr);
        }
 
index ac8001c..644b1e1 100644 (file)
@@ -2187,9 +2187,9 @@ int mwifiex_ret_802_11_scan(struct mwifiex_private *priv,
 
        if (nd_config) {
                adapter->nd_info =
-                       kzalloc(sizeof(struct cfg80211_wowlan_nd_match) +
-                               sizeof(struct cfg80211_wowlan_nd_match *) *
-                               scan_rsp->number_of_sets, GFP_ATOMIC);
+                       kzalloc(struct_size(adapter->nd_info, matches,
+                                           scan_rsp->number_of_sets),
+                               GFP_ATOMIC);
 
                if (adapter->nd_info)
                        adapter->nd_info->n_matches = scan_rsp->number_of_sets;
index 4f53a27..d7659e6 100644 (file)
@@ -39,21 +39,6 @@ mwifiex_get_tid(struct mwifiex_ra_list_tbl *ptr)
 }
 
 /*
- * This function gets the length of a list.
- */
-static inline int
-mwifiex_wmm_list_len(struct list_head *head)
-{
-       struct list_head *pos;
-       int count = 0;
-
-       list_for_each(pos, head)
-               ++count;
-
-       return count;
-}
-
-/*
  * This function checks if a RA list is empty or not.
  */
 static inline u8
index 230b0e1..dbddf25 100644 (file)
@@ -127,8 +127,6 @@ void mt7601u_init_debugfs(struct mt7601u_dev *dev)
        struct dentry *dir;
 
        dir = debugfs_create_dir("mt7601u", dev->hw->wiphy->debugfsdir);
-       if (!dir)
-               return;
 
        debugfs_create_u8("temperature", 0400, dir, &dev->raw_temp);
        debugfs_create_u32("temp_mode", 0400, dir, &dev->temp_mode);
index 5adc69d..a28da59 100644 (file)
@@ -485,6 +485,9 @@ void *wilc_parse_join_bss_param(struct cfg80211_bss *bss,
                int rsn_ie_len = sizeof(struct element) + rsn_ie[1];
                int offset = 8;
 
+               param->mode_802_11i = 2;
+               param->rsn_found = true;
+
                /* extract RSN capabilities */
                if (offset < rsn_ie_len) {
                        /* skip over pairwise suites */
@@ -494,11 +497,8 @@ void *wilc_parse_join_bss_param(struct cfg80211_bss *bss,
                                /* skip over authentication suites */
                                offset += (rsn_ie[offset] * 4) + 2;
 
-                               if (offset + 1 < rsn_ie_len) {
-                                       param->mode_802_11i = 2;
-                                       param->rsn_found = true;
+                               if (offset + 1 < rsn_ie_len)
                                        memcpy(param->rsn_cap, &rsn_ie[offset], 2);
-                               }
                        }
                }
        }
index baa2881..8e386db 100644 (file)
@@ -30,8 +30,6 @@ enum {
        WILC_GET_CFG
 };
 
-#define WILC_MAX_ASSOC_RESP_FRAME_SIZE   256
-
 struct rf_info {
        u8 link_speed;
        s8 rssi;
index 614c567..7038b74 100644 (file)
@@ -30,7 +30,7 @@ struct wilc_cfg_str {
 struct wilc_cfg_str_vals {
        u8 mac_address[7];
        u8 firmware_version[129];
-       u8 assoc_rsp[256];
+       u8 assoc_rsp[WILC_MAX_ASSOC_RESP_FRAME_SIZE];
 };
 
 struct wilc_cfg {
index df2f5a6..254a046 100644 (file)
@@ -10,6 +10,8 @@
 #include <linux/netdevice.h>
 #include "fw.h"
 
+#define WILC_MAX_ASSOC_RESP_FRAME_SIZE 512
+
 /********************************************
  *
  *      Wlan Configuration ID
index 808c1c8..376b4b7 100644 (file)
@@ -1280,6 +1280,9 @@ struct rtl8xxxu_rfregs {
 #define  H2C_JOIN_BSS_DISCONNECT       0
 #define  H2C_JOIN_BSS_CONNECT          1
 
+#define H2C_MACID_ROLE_STA             1
+#define H2C_MACID_ROLE_AP              2
+
 /*
  * H2C (firmware) commands differ between the older generation chips
  * 8188[cr]u, 819[12]cu, and 8723au, and the more recent chips 8723bu,
@@ -1727,6 +1730,8 @@ struct rtl8xxxu_cfo_tracking {
 };
 
 #define RTL8XXXU_HW_LED_CONTROL        2
+#define RTL8XXXU_MAX_MAC_ID_NUM        128
+#define RTL8XXXU_BC_MC_MACID   0
 
 struct rtl8xxxu_priv {
        struct ieee80211_hw *hw;
@@ -1851,6 +1856,7 @@ struct rtl8xxxu_priv {
        struct delayed_work ra_watchdog;
        struct work_struct c2hcmd_work;
        struct sk_buff_head c2hcmd_queue;
+       struct work_struct update_beacon_work;
        struct rtl8xxxu_btcoex bt_coex;
        struct rtl8xxxu_ra_report ra_report;
        struct rtl8xxxu_cfo_tracking cfo_tracking;
@@ -1859,6 +1865,14 @@ struct rtl8xxxu_priv {
        bool led_registered;
        char led_name[32];
        struct led_classdev led_cdev;
+       DECLARE_BITMAP(mac_id_map, RTL8XXXU_MAX_MAC_ID_NUM);
+};
+
+struct rtl8xxxu_sta_info {
+       struct ieee80211_sta *sta;
+       struct ieee80211_vif *vif;
+
+       u8 macid;
 };
 
 struct rtl8xxxu_rx_urb {
@@ -1903,15 +1917,16 @@ struct rtl8xxxu_fileops {
        void (*set_tx_power) (struct rtl8xxxu_priv *priv, int channel,
                              bool ht40);
        void (*update_rate_mask) (struct rtl8xxxu_priv *priv,
-                                 u32 ramask, u8 rateid, int sgi, int txbw_40mhz);
+                                 u32 ramask, u8 rateid, int sgi, int txbw_40mhz,
+                                 u8 macid);
        void (*report_connect) (struct rtl8xxxu_priv *priv,
-                               u8 macid, bool connect);
+                               u8 macid, u8 role, bool connect);
        void (*report_rssi) (struct rtl8xxxu_priv *priv, u8 macid, u8 rssi);
        void (*fill_txdesc) (struct ieee80211_hw *hw, struct ieee80211_hdr *hdr,
                             struct ieee80211_tx_info *tx_info,
                             struct rtl8xxxu_txdesc32 *tx_desc, bool sgi,
                             bool short_preamble, bool ampdu_enable,
-                            u32 rts_rate);
+                            u32 rts_rate, u8 macid);
        void (*set_crystal_cap) (struct rtl8xxxu_priv *priv, u8 crystal_cap);
        s8 (*cck_rssi) (struct rtl8xxxu_priv *priv, struct rtl8723au_phy_stats *phy_stats);
        int (*led_classdev_brightness_set) (struct led_classdev *led_cdev,
@@ -1929,6 +1944,8 @@ struct rtl8xxxu_fileops {
        u8 init_reg_hmtfr:1;
        u8 ampdu_max_time;
        u8 ustime_tsf_edca;
+       u8 supports_ap:1;
+       u16 max_macid_num;
        u32 adda_1t_init;
        u32 adda_1t_path_on;
        u32 adda_2t_path_on_a;
@@ -2022,13 +2039,13 @@ void rtl8xxxu_gen2_config_channel(struct ieee80211_hw *hw);
 void rtl8xxxu_gen1_usb_quirks(struct rtl8xxxu_priv *priv);
 void rtl8xxxu_gen2_usb_quirks(struct rtl8xxxu_priv *priv);
 void rtl8xxxu_update_rate_mask(struct rtl8xxxu_priv *priv,
-                              u32 ramask, u8 rateid, int sgi, int txbw_40mhz);
+                              u32 ramask, u8 rateid, int sgi, int txbw_40mhz, u8 macid);
 void rtl8xxxu_gen2_update_rate_mask(struct rtl8xxxu_priv *priv,
-                                   u32 ramask, u8 rateid, int sgi, int txbw_40mhz);
+                                   u32 ramask, u8 rateid, int sgi, int txbw_40mhz, u8 macid);
 void rtl8xxxu_gen1_report_connect(struct rtl8xxxu_priv *priv,
-                                 u8 macid, bool connect);
+                                 u8 macid, u8 role, bool connect);
 void rtl8xxxu_gen2_report_connect(struct rtl8xxxu_priv *priv,
-                                 u8 macid, bool connect);
+                                 u8 macid, u8 role, bool connect);
 void rtl8xxxu_gen1_report_rssi(struct rtl8xxxu_priv *priv, u8 macid, u8 rssi);
 void rtl8xxxu_gen2_report_rssi(struct rtl8xxxu_priv *priv, u8 macid, u8 rssi);
 void rtl8xxxu_gen1_init_aggregation(struct rtl8xxxu_priv *priv);
@@ -2057,17 +2074,17 @@ void rtl8xxxu_fill_txdesc_v1(struct ieee80211_hw *hw, struct ieee80211_hdr *hdr,
                             struct ieee80211_tx_info *tx_info,
                             struct rtl8xxxu_txdesc32 *tx_desc, bool sgi,
                             bool short_preamble, bool ampdu_enable,
-                            u32 rts_rate);
+                            u32 rts_rate, u8 macid);
 void rtl8xxxu_fill_txdesc_v2(struct ieee80211_hw *hw, struct ieee80211_hdr *hdr,
                             struct ieee80211_tx_info *tx_info,
                             struct rtl8xxxu_txdesc32 *tx_desc32, bool sgi,
                             bool short_preamble, bool ampdu_enable,
-                            u32 rts_rate);
+                            u32 rts_rate, u8 macid);
 void rtl8xxxu_fill_txdesc_v3(struct ieee80211_hw *hw, struct ieee80211_hdr *hdr,
                             struct ieee80211_tx_info *tx_info,
                             struct rtl8xxxu_txdesc32 *tx_desc32, bool sgi,
                             bool short_preamble, bool ampdu_enable,
-                            u32 rts_rate);
+                            u32 rts_rate, u8 macid);
 void rtl8723bu_set_ps_tdma(struct rtl8xxxu_priv *priv,
                           u8 arg1, u8 arg2, u8 arg3, u8 arg4, u8 arg5);
 void rtl8723bu_phy_init_antenna_selection(struct rtl8xxxu_priv *priv);
index 8986783..6d0f975 100644 (file)
@@ -1794,7 +1794,8 @@ static void rtl8188e_arfb_refresh(struct rtl8xxxu_ra_info *ra)
 
 static void
 rtl8188e_update_rate_mask(struct rtl8xxxu_priv *priv,
-                         u32 ramask, u8 rateid, int sgi, int txbw_40mhz)
+                         u32 ramask, u8 rateid, int sgi, int txbw_40mhz,
+                         u8 macid)
 {
        struct rtl8xxxu_ra_info *ra = &priv->ra_info;
 
index dbdfd77..71b7f0d 100644 (file)
@@ -1748,6 +1748,8 @@ struct rtl8xxxu_fileops rtl8188fu_fops = {
        .init_reg_hmtfr = 1,
        .ampdu_max_time = 0x70,
        .ustime_tsf_edca = 0x28,
+       .supports_ap = 1,
+       .max_macid_num = 16,
        .adda_1t_init = 0x03c00014,
        .adda_1t_path_on = 0x03c00014,
        .trxff_boundary = 0x3f7f,
index 831639d..1eb0d56 100644 (file)
@@ -1185,6 +1185,20 @@ static void rtl8xxxu_stop_tx_beacon(struct rtl8xxxu_priv *priv)
        rtl8xxxu_write8(priv, REG_TBTT_PROHIBIT + 2, val8);
 }
 
+static void rtl8xxxu_start_tx_beacon(struct rtl8xxxu_priv *priv)
+{
+       u8 val8;
+
+       val8 = rtl8xxxu_read8(priv, REG_FWHW_TXQ_CTRL + 2);
+       val8 |= EN_BCNQ_DL >> 16;
+       rtl8xxxu_write8(priv, REG_FWHW_TXQ_CTRL + 2, val8);
+
+       rtl8xxxu_write8(priv, REG_TBTT_PROHIBIT + 1, 0x80);
+       val8 = rtl8xxxu_read8(priv, REG_TBTT_PROHIBIT + 2);
+       val8 &= 0xF0;
+       rtl8xxxu_write8(priv, REG_TBTT_PROHIBIT + 2, val8);
+}
+
 
 /*
  * The rtl8723a has 3 channel groups for it's efuse settings. It only
@@ -3963,6 +3977,34 @@ void rtl8xxxu_init_burst(struct rtl8xxxu_priv *priv)
        rtl8xxxu_write8(priv, REG_RSV_CTRL, val8);
 }
 
+static u8 rtl8xxxu_acquire_macid(struct rtl8xxxu_priv *priv)
+{
+       u8 macid;
+
+       macid = find_first_zero_bit(priv->mac_id_map, RTL8XXXU_MAX_MAC_ID_NUM);
+       if (macid < RTL8XXXU_MAX_MAC_ID_NUM)
+               set_bit(macid, priv->mac_id_map);
+
+       return macid;
+}
+
+static void rtl8xxxu_release_macid(struct rtl8xxxu_priv *priv, u8 macid)
+{
+       clear_bit(macid, priv->mac_id_map);
+}
+
+static inline u8 rtl8xxxu_get_macid(struct rtl8xxxu_priv *priv,
+                                   struct ieee80211_sta *sta)
+{
+       struct rtl8xxxu_sta_info *sta_info;
+
+       if (!priv->vif || priv->vif->type == NL80211_IFTYPE_STATION || !sta)
+               return 0;
+
+       sta_info = (struct rtl8xxxu_sta_info *)sta->drv_priv;
+       return sta_info->macid;
+}
+
 static int rtl8xxxu_init_device(struct ieee80211_hw *hw)
 {
        struct rtl8xxxu_priv *priv = hw->priv;
@@ -4433,6 +4475,8 @@ static int rtl8xxxu_init_device(struct ieee80211_hw *hw)
        if (priv->rtl_chip == RTL8188E)
                rtl8188e_ra_info_init_all(&priv->ra_info);
 
+       set_bit(RTL8XXXU_BC_MC_MACID, priv->mac_id_map);
+
 exit:
        return ret;
 }
@@ -4490,6 +4534,16 @@ int rtl8xxxu_get_antenna(struct ieee80211_hw *hw, u32 *tx_ant, u32 *rx_ant)
        return 0;
 }
 
+static int rtl8xxxu_set_tim(struct ieee80211_hw *hw, struct ieee80211_sta *sta,
+                           bool set)
+{
+       struct rtl8xxxu_priv *priv = hw->priv;
+
+       schedule_work(&priv->update_beacon_work);
+
+       return 0;
+}
+
 static void rtl8xxxu_sw_scan_start(struct ieee80211_hw *hw,
                                   struct ieee80211_vif *vif, const u8 *mac)
 {
@@ -4513,7 +4567,8 @@ static void rtl8xxxu_sw_scan_complete(struct ieee80211_hw *hw,
 }
 
 void rtl8xxxu_update_rate_mask(struct rtl8xxxu_priv *priv,
-                              u32 ramask, u8 rateid, int sgi, int txbw_40mhz)
+                              u32 ramask, u8 rateid, int sgi, int txbw_40mhz,
+                              u8 macid)
 {
        struct h2c_cmd h2c;
 
@@ -4533,7 +4588,8 @@ void rtl8xxxu_update_rate_mask(struct rtl8xxxu_priv *priv,
 }
 
 void rtl8xxxu_gen2_update_rate_mask(struct rtl8xxxu_priv *priv,
-                                   u32 ramask, u8 rateid, int sgi, int txbw_40mhz)
+                                   u32 ramask, u8 rateid, int sgi, int txbw_40mhz,
+                                   u8 macid)
 {
        struct h2c_cmd h2c;
        u8 bw;
@@ -4550,6 +4606,7 @@ void rtl8xxxu_gen2_update_rate_mask(struct rtl8xxxu_priv *priv,
        h2c.b_macid_cfg.ramask1 = (ramask >> 8) & 0xff;
        h2c.b_macid_cfg.ramask2 = (ramask >> 16) & 0xff;
        h2c.b_macid_cfg.ramask3 = (ramask >> 24) & 0xff;
+       h2c.b_macid_cfg.macid = macid;
 
        h2c.b_macid_cfg.data1 = rateid;
        if (sgi)
@@ -4563,7 +4620,7 @@ void rtl8xxxu_gen2_update_rate_mask(struct rtl8xxxu_priv *priv,
 }
 
 void rtl8xxxu_gen1_report_connect(struct rtl8xxxu_priv *priv,
-                                 u8 macid, bool connect)
+                                 u8 macid, u8 role, bool connect)
 {
        struct h2c_cmd h2c;
 
@@ -4580,7 +4637,7 @@ void rtl8xxxu_gen1_report_connect(struct rtl8xxxu_priv *priv,
 }
 
 void rtl8xxxu_gen2_report_connect(struct rtl8xxxu_priv *priv,
-                                 u8 macid, bool connect)
+                                 u8 macid, u8 role, bool connect)
 {
        /*
         * The firmware turns on the rate control when it knows it's
@@ -4596,6 +4653,9 @@ void rtl8xxxu_gen2_report_connect(struct rtl8xxxu_priv *priv,
        else
                h2c.media_status_rpt.parm &= ~BIT(0);
 
+       h2c.media_status_rpt.parm |= ((role << 4) & 0xf0);
+       h2c.media_status_rpt.macid = macid;
+
        rtl8xxxu_gen2_h2c_cmd(priv, &h2c, sizeof(h2c.media_status_rpt));
 }
 
@@ -4912,7 +4972,8 @@ rtl8xxxu_bss_info_changed(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
                        priv->vif = vif;
                        priv->rssi_level = RTL8XXXU_RATR_STA_INIT;
 
-                       priv->fops->update_rate_mask(priv, ramask, 0, sgi, bw == RATE_INFO_BW_40);
+                       priv->fops->update_rate_mask(priv, ramask, 0, sgi,
+                                                    bw == RATE_INFO_BW_40, 0);
 
                        rtl8xxxu_write8(priv, REG_BCN_MAX_ERR, 0xff);
 
@@ -4922,13 +4983,13 @@ rtl8xxxu_bss_info_changed(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
                        rtl8xxxu_write16(priv, REG_BCN_PSR_RPT,
                                         0xc000 | vif->cfg.aid);
 
-                       priv->fops->report_connect(priv, 0, true);
+                       priv->fops->report_connect(priv, 0, H2C_MACID_ROLE_AP, true);
                } else {
                        val8 = rtl8xxxu_read8(priv, REG_BEACON_CTRL);
                        val8 |= BEACON_DISABLE_TSF_UPDATE;
                        rtl8xxxu_write8(priv, REG_BEACON_CTRL, val8);
 
-                       priv->fops->report_connect(priv, 0, false);
+                       priv->fops->report_connect(priv, 0, H2C_MACID_ROLE_AP, false);
                }
        }
 
@@ -4965,10 +5026,35 @@ rtl8xxxu_bss_info_changed(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
                dev_dbg(dev, "Changed BASIC_RATES!\n");
                rtl8xxxu_set_basic_rates(priv, bss_conf->basic_rates);
        }
+
+       if (changed & BSS_CHANGED_BEACON_ENABLED) {
+               if (bss_conf->enable_beacon)
+                       rtl8xxxu_start_tx_beacon(priv);
+               else
+                       rtl8xxxu_stop_tx_beacon(priv);
+       }
+
+       if (changed & BSS_CHANGED_BEACON)
+               schedule_work(&priv->update_beacon_work);
+
 error:
        return;
 }
 
+static int rtl8xxxu_start_ap(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+                            struct ieee80211_bss_conf *link_conf)
+{
+       struct rtl8xxxu_priv *priv = hw->priv;
+       struct device *dev = &priv->udev->dev;
+
+       dev_dbg(dev, "Start AP mode\n");
+       rtl8xxxu_set_bssid(priv, vif->bss_conf.bssid);
+       rtl8xxxu_write16(priv, REG_BCN_INTERVAL, vif->bss_conf.beacon_int);
+       priv->fops->report_connect(priv, RTL8XXXU_BC_MC_MACID, 0, true);
+
+       return 0;
+}
+
 static u32 rtl8xxxu_80211_to_rtl_queue(u32 queue)
 {
        u32 rtlqueue;
@@ -4997,7 +5083,9 @@ static u32 rtl8xxxu_queue_select(struct ieee80211_hdr *hdr, struct sk_buff *skb)
 {
        u32 queue;
 
-       if (ieee80211_is_mgmt(hdr->frame_control))
+       if (unlikely(ieee80211_is_beacon(hdr->frame_control)))
+               queue = TXDESC_QUEUE_BEACON;
+       else if (ieee80211_is_mgmt(hdr->frame_control))
                queue = TXDESC_QUEUE_MGNT;
        else
                queue = rtl8xxxu_80211_to_rtl_queue(skb_get_queue_mapping(skb));
@@ -5160,23 +5248,16 @@ void
 rtl8xxxu_fill_txdesc_v1(struct ieee80211_hw *hw, struct ieee80211_hdr *hdr,
                        struct ieee80211_tx_info *tx_info,
                        struct rtl8xxxu_txdesc32 *tx_desc, bool sgi,
-                       bool short_preamble, bool ampdu_enable, u32 rts_rate)
+                       bool short_preamble, bool ampdu_enable, u32 rts_rate,
+                       u8 macid)
 {
-       struct ieee80211_rate *tx_rate = ieee80211_get_tx_rate(hw, tx_info);
        struct rtl8xxxu_priv *priv = hw->priv;
        struct device *dev = &priv->udev->dev;
        u8 *qc = ieee80211_get_qos_ctl(hdr);
        u8 tid = qc[0] & IEEE80211_QOS_CTL_TID_MASK;
-       u32 rate;
-       u16 rate_flags = tx_info->control.rates[0].flags;
+       u32 rate = 0;
        u16 seq_number;
 
-       if (rate_flags & IEEE80211_TX_RC_MCS &&
-           !ieee80211_is_mgmt(hdr->frame_control))
-               rate = tx_info->control.rates[0].idx + DESC_RATE_MCS0;
-       else
-               rate = tx_rate->hw_value;
-
        if (rtl8xxxu_debug & RTL8XXXU_DEBUG_TX)
                dev_info(dev, "%s: TX rate: %d, pkt size %u\n",
                         __func__, rate, le16_to_cpu(tx_desc->pkt_size));
@@ -5215,10 +5296,10 @@ rtl8xxxu_fill_txdesc_v1(struct ieee80211_hw *hw, struct ieee80211_hdr *hdr,
         * rts_rate is zero if RTS/CTS or CTS to SELF are not enabled
         */
        tx_desc->txdw4 |= cpu_to_le32(rts_rate << TXDESC32_RTS_RATE_SHIFT);
-       if (ampdu_enable || (rate_flags & IEEE80211_TX_RC_USE_RTS_CTS)) {
+       if (ampdu_enable || tx_info->control.use_rts) {
                tx_desc->txdw4 |= cpu_to_le32(TXDESC32_RTS_CTS_ENABLE);
                tx_desc->txdw4 |= cpu_to_le32(TXDESC32_HW_RTS_ENABLE);
-       } else if (rate_flags & IEEE80211_TX_RC_USE_CTS_PROTECT) {
+       } else if (tx_info->control.use_cts_prot) {
                tx_desc->txdw4 |= cpu_to_le32(TXDESC32_CTS_SELF_ENABLE);
                tx_desc->txdw4 |= cpu_to_le32(TXDESC32_HW_RTS_ENABLE);
        }
@@ -5232,30 +5313,25 @@ void
 rtl8xxxu_fill_txdesc_v2(struct ieee80211_hw *hw, struct ieee80211_hdr *hdr,
                        struct ieee80211_tx_info *tx_info,
                        struct rtl8xxxu_txdesc32 *tx_desc32, bool sgi,
-                       bool short_preamble, bool ampdu_enable, u32 rts_rate)
+                       bool short_preamble, bool ampdu_enable, u32 rts_rate,
+                       u8 macid)
 {
-       struct ieee80211_rate *tx_rate = ieee80211_get_tx_rate(hw, tx_info);
        struct rtl8xxxu_priv *priv = hw->priv;
        struct device *dev = &priv->udev->dev;
        struct rtl8xxxu_txdesc40 *tx_desc40;
        u8 *qc = ieee80211_get_qos_ctl(hdr);
        u8 tid = qc[0] & IEEE80211_QOS_CTL_TID_MASK;
-       u32 rate;
-       u16 rate_flags = tx_info->control.rates[0].flags;
+       u32 rate = 0;
        u16 seq_number;
 
        tx_desc40 = (struct rtl8xxxu_txdesc40 *)tx_desc32;
 
-       if (rate_flags & IEEE80211_TX_RC_MCS &&
-           !ieee80211_is_mgmt(hdr->frame_control))
-               rate = tx_info->control.rates[0].idx + DESC_RATE_MCS0;
-       else
-               rate = tx_rate->hw_value;
-
        if (rtl8xxxu_debug & RTL8XXXU_DEBUG_TX)
                dev_info(dev, "%s: TX rate: %d, pkt size %u\n",
                         __func__, rate, le16_to_cpu(tx_desc40->pkt_size));
 
+       tx_desc40->txdw1 |= cpu_to_le32(macid << TXDESC40_MACID_SHIFT);
+
        seq_number = IEEE80211_SEQ_TO_SN(le16_to_cpu(hdr->seq_ctrl));
 
        tx_desc40->txdw4 = cpu_to_le32(rate);
@@ -5279,17 +5355,21 @@ rtl8xxxu_fill_txdesc_v2(struct ieee80211_hw *hw, struct ieee80211_hdr *hdr,
                tx_desc40->txdw4 |= cpu_to_le32(TXDESC40_RETRY_LIMIT_ENABLE);
        }
 
+       if (tx_info->flags & IEEE80211_TX_CTL_ASSIGN_SEQ)
+               tx_desc40->txdw8 |= cpu_to_le32(TXDESC40_HW_SEQ_ENABLE);
+
        if (short_preamble)
                tx_desc40->txdw5 |= cpu_to_le32(TXDESC40_SHORT_PREAMBLE);
 
        tx_desc40->txdw4 |= cpu_to_le32(rts_rate << TXDESC40_RTS_RATE_SHIFT);
+
        /*
         * rts_rate is zero if RTS/CTS or CTS to SELF are not enabled
         */
-       if (ampdu_enable || (rate_flags & IEEE80211_TX_RC_USE_RTS_CTS)) {
+       if (ampdu_enable || tx_info->control.use_rts) {
                tx_desc40->txdw3 |= cpu_to_le32(TXDESC40_RTS_CTS_ENABLE);
                tx_desc40->txdw3 |= cpu_to_le32(TXDESC40_HW_RTS_ENABLE);
-       } else if (rate_flags & IEEE80211_TX_RC_USE_CTS_PROTECT) {
+       } else if (tx_info->control.use_cts_prot) {
                /*
                 * For some reason the vendor driver doesn't set
                 * TXDESC40_HW_RTS_ENABLE for CTS to SELF
@@ -5307,24 +5387,17 @@ void
 rtl8xxxu_fill_txdesc_v3(struct ieee80211_hw *hw, struct ieee80211_hdr *hdr,
                        struct ieee80211_tx_info *tx_info,
                        struct rtl8xxxu_txdesc32 *tx_desc, bool sgi,
-                       bool short_preamble, bool ampdu_enable, u32 rts_rate)
+                       bool short_preamble, bool ampdu_enable, u32 rts_rate,
+                       u8 macid)
 {
-       struct ieee80211_rate *tx_rate = ieee80211_get_tx_rate(hw, tx_info);
        struct rtl8xxxu_priv *priv = hw->priv;
        struct device *dev = &priv->udev->dev;
        struct rtl8xxxu_ra_info *ra = &priv->ra_info;
        u8 *qc = ieee80211_get_qos_ctl(hdr);
        u8 tid = qc[0] & IEEE80211_QOS_CTL_TID_MASK;
-       u32 rate;
-       u16 rate_flags = tx_info->control.rates[0].flags;
+       u32 rate = 0;
        u16 seq_number;
 
-       if (rate_flags & IEEE80211_TX_RC_MCS &&
-           !ieee80211_is_mgmt(hdr->frame_control))
-               rate = tx_info->control.rates[0].idx + DESC_RATE_MCS0;
-       else
-               rate = tx_rate->hw_value;
-
        seq_number = IEEE80211_SEQ_TO_SN(le16_to_cpu(hdr->seq_ctrl));
 
        if (ieee80211_is_data(hdr->frame_control)) {
@@ -5377,10 +5450,10 @@ rtl8xxxu_fill_txdesc_v3(struct ieee80211_hw *hw, struct ieee80211_hdr *hdr,
         * rts_rate is zero if RTS/CTS or CTS to SELF are not enabled
         */
        tx_desc->txdw4 |= cpu_to_le32(rts_rate << TXDESC32_RTS_RATE_SHIFT);
-       if (ampdu_enable || (rate_flags & IEEE80211_TX_RC_USE_RTS_CTS)) {
+       if (ampdu_enable || tx_info->control.use_rts) {
                tx_desc->txdw4 |= cpu_to_le32(TXDESC32_RTS_CTS_ENABLE);
                tx_desc->txdw4 |= cpu_to_le32(TXDESC32_HW_RTS_ENABLE);
-       } else if (rate_flags & IEEE80211_TX_RC_USE_CTS_PROTECT) {
+       } else if (tx_info->control.use_cts_prot) {
                tx_desc->txdw4 |= cpu_to_le32(TXDESC32_CTS_SELF_ENABLE);
                tx_desc->txdw4 |= cpu_to_le32(TXDESC32_HW_RTS_ENABLE);
        }
@@ -5404,8 +5477,8 @@ static void rtl8xxxu_tx(struct ieee80211_hw *hw,
        struct device *dev = &priv->udev->dev;
        u32 queue, rts_rate;
        u16 pktlen = skb->len;
-       u16 rate_flag = tx_info->control.rates[0].flags;
        int tx_desc_size = priv->fops->tx_desc_size;
+       u8 macid;
        int ret;
        bool ampdu_enable, sgi = false, short_preamble = false;
 
@@ -5488,26 +5561,29 @@ static void rtl8xxxu_tx(struct ieee80211_hw *hw,
                }
        }
 
-       if (rate_flag & IEEE80211_TX_RC_SHORT_GI ||
-           (ieee80211_is_data_qos(hdr->frame_control) &&
-            sta && sta->deflink.ht_cap.cap &
-            (IEEE80211_HT_CAP_SGI_40 | IEEE80211_HT_CAP_SGI_20)))
+       if (ieee80211_is_data_qos(hdr->frame_control) &&
+           sta && sta->deflink.ht_cap.cap &
+           (IEEE80211_HT_CAP_SGI_40 | IEEE80211_HT_CAP_SGI_20))
                sgi = true;
 
-       if (rate_flag & IEEE80211_TX_RC_USE_SHORT_PREAMBLE ||
-           (sta && vif && vif->bss_conf.use_short_preamble))
+       if (sta && vif && vif->bss_conf.use_short_preamble)
                short_preamble = true;
 
-       if (rate_flag & IEEE80211_TX_RC_USE_RTS_CTS)
-               rts_rate = ieee80211_get_rts_cts_rate(hw, tx_info)->hw_value;
-       else if (rate_flag & IEEE80211_TX_RC_USE_CTS_PROTECT)
-               rts_rate = ieee80211_get_rts_cts_rate(hw, tx_info)->hw_value;
+       if (skb->len > hw->wiphy->rts_threshold)
+               tx_info->control.use_rts = true;
+
+       if (sta && vif && vif->bss_conf.use_cts_prot)
+               tx_info->control.use_cts_prot = true;
+
+       if (ampdu_enable || tx_info->control.use_rts ||
+           tx_info->control.use_cts_prot)
+               rts_rate = DESC_RATE_24M;
        else
                rts_rate = 0;
 
-
+       macid = rtl8xxxu_get_macid(priv, sta);
        priv->fops->fill_txdesc(hw, hdr, tx_info, tx_desc, sgi, short_preamble,
-                               ampdu_enable, rts_rate);
+                               ampdu_enable, rts_rate, macid);
 
        rtl8xxxu_calc_tx_desc_csum(tx_desc);
 
@@ -5530,6 +5606,55 @@ error:
        dev_kfree_skb(skb);
 }
 
+static void rtl8xxxu_send_beacon_frame(struct ieee80211_hw *hw,
+                                      struct ieee80211_vif *vif)
+{
+       struct rtl8xxxu_priv *priv = hw->priv;
+       struct sk_buff *skb = ieee80211_beacon_get(hw, vif, 0);
+       struct device *dev = &priv->udev->dev;
+       int retry;
+       u8 val8;
+
+       /* BCN_VALID, write 1 to clear, cleared by SW */
+       val8 = rtl8xxxu_read8(priv, REG_TDECTRL + 2);
+       val8 |= BIT_BCN_VALID >> 16;
+       rtl8xxxu_write8(priv, REG_TDECTRL + 2, val8);
+
+       /* SW_BCN_SEL - Port0 */
+       val8 = rtl8xxxu_read8(priv, REG_DWBCN1_CTRL_8723B + 2);
+       val8 &= ~(BIT_SW_BCN_SEL >> 16);
+       rtl8xxxu_write8(priv, REG_DWBCN1_CTRL_8723B + 2, val8);
+
+       if (skb)
+               rtl8xxxu_tx(hw, NULL, skb);
+
+       retry = 100;
+       do {
+               val8 = rtl8xxxu_read8(priv, REG_TDECTRL + 2);
+               if (val8 & (BIT_BCN_VALID >> 16))
+                       break;
+               usleep_range(10, 20);
+       } while (--retry);
+
+       if (!retry)
+               dev_err(dev, "%s: Failed to read beacon valid bit\n", __func__);
+}
+
+static void rtl8xxxu_update_beacon_work_callback(struct work_struct *work)
+{
+       struct rtl8xxxu_priv *priv =
+               container_of(work, struct rtl8xxxu_priv, update_beacon_work);
+       struct ieee80211_hw *hw = priv->hw;
+       struct ieee80211_vif *vif = priv->vif;
+
+       if (!vif) {
+               WARN_ONCE(true, "no vif to update beacon\n");
+               return;
+       }
+
+       rtl8xxxu_send_beacon_frame(hw, vif);
+}
+
 void rtl8723au_rx_parse_phystats(struct rtl8xxxu_priv *priv,
                                 struct ieee80211_rx_status *rx_status,
                                 struct rtl8723au_phy_stats *phy_stats,
@@ -6198,61 +6323,98 @@ int rtl8xxxu_parse_rxdesc16(struct rtl8xxxu_priv *priv, struct sk_buff *skb)
 int rtl8xxxu_parse_rxdesc24(struct rtl8xxxu_priv *priv, struct sk_buff *skb)
 {
        struct ieee80211_hw *hw = priv->hw;
-       struct ieee80211_rx_status *rx_status = IEEE80211_SKB_RXCB(skb);
-       struct rtl8xxxu_rxdesc24 *rx_desc =
-               (struct rtl8xxxu_rxdesc24 *)skb->data;
+       struct ieee80211_rx_status *rx_status;
+       struct rtl8xxxu_rxdesc24 *rx_desc;
        struct rtl8723au_phy_stats *phy_stats;
-       __le32 *_rx_desc_le = (__le32 *)skb->data;
-       u32 *_rx_desc = (u32 *)skb->data;
+       struct sk_buff *next_skb = NULL;
+       __le32 *_rx_desc_le;
+       u32 *_rx_desc;
        int drvinfo_sz, desc_shift;
-       int i;
+       int i, pkt_len, urb_len, pkt_offset;
+
+       urb_len = skb->len;
+
+       if (urb_len < sizeof(struct rtl8xxxu_rxdesc24)) {
+               kfree_skb(skb);
+               return RX_TYPE_ERROR;
+       }
 
-       for (i = 0; i < (sizeof(struct rtl8xxxu_rxdesc24) / sizeof(u32)); i++)
-               _rx_desc[i] = le32_to_cpu(_rx_desc_le[i]);
+       do {
+               rx_desc = (struct rtl8xxxu_rxdesc24 *)skb->data;
+               _rx_desc_le = (__le32 *)skb->data;
+               _rx_desc = (u32 *)skb->data;
 
-       memset(rx_status, 0, sizeof(struct ieee80211_rx_status));
+               for (i = 0; i < (sizeof(struct rtl8xxxu_rxdesc24) / sizeof(u32)); i++)
+                       _rx_desc[i] = le32_to_cpu(_rx_desc_le[i]);
 
-       skb_pull(skb, sizeof(struct rtl8xxxu_rxdesc24));
+               pkt_len = rx_desc->pktlen;
 
-       phy_stats = (struct rtl8723au_phy_stats *)skb->data;
+               drvinfo_sz = rx_desc->drvinfo_sz * 8;
+               desc_shift = rx_desc->shift;
+               pkt_offset = roundup(pkt_len + drvinfo_sz + desc_shift +
+                                    sizeof(struct rtl8xxxu_rxdesc24), 8);
 
-       drvinfo_sz = rx_desc->drvinfo_sz * 8;
-       desc_shift = rx_desc->shift;
-       skb_pull(skb, drvinfo_sz + desc_shift);
+               /*
+                * Only clone the skb if there's enough data at the end to
+                * at least cover the rx descriptor
+                */
+               if (urb_len >= (pkt_offset + sizeof(struct rtl8xxxu_rxdesc24)))
+                       next_skb = skb_clone(skb, GFP_ATOMIC);
 
-       if (rx_desc->rpt_sel) {
-               struct device *dev = &priv->udev->dev;
-               dev_dbg(dev, "%s: C2H packet\n", __func__);
-               rtl8723bu_handle_c2h(priv, skb);
-               return RX_TYPE_C2H;
-       }
+               rx_status = IEEE80211_SKB_RXCB(skb);
+               memset(rx_status, 0, sizeof(struct ieee80211_rx_status));
 
-       if (rx_desc->phy_stats)
-               priv->fops->parse_phystats(priv, rx_status, phy_stats,
-                                          rx_desc->rxmcs, (struct ieee80211_hdr *)skb->data,
-                                          rx_desc->crc32 || rx_desc->icverr);
+               skb_pull(skb, sizeof(struct rtl8xxxu_rxdesc24));
 
-       rx_status->mactime = rx_desc->tsfl;
-       rx_status->flag |= RX_FLAG_MACTIME_START;
+               phy_stats = (struct rtl8723au_phy_stats *)skb->data;
 
-       if (!rx_desc->swdec)
-               rx_status->flag |= RX_FLAG_DECRYPTED;
-       if (rx_desc->crc32)
-               rx_status->flag |= RX_FLAG_FAILED_FCS_CRC;
-       if (rx_desc->bw)
-               rx_status->bw = RATE_INFO_BW_40;
+               skb_pull(skb, drvinfo_sz + desc_shift);
 
-       if (rx_desc->rxmcs >= DESC_RATE_MCS0) {
-               rx_status->encoding = RX_ENC_HT;
-               rx_status->rate_idx = rx_desc->rxmcs - DESC_RATE_MCS0;
-       } else {
-               rx_status->rate_idx = rx_desc->rxmcs;
-       }
+               skb_trim(skb, pkt_len);
 
-       rx_status->freq = hw->conf.chandef.chan->center_freq;
-       rx_status->band = hw->conf.chandef.chan->band;
+               if (rx_desc->rpt_sel) {
+                       struct device *dev = &priv->udev->dev;
+                       dev_dbg(dev, "%s: C2H packet\n", __func__);
+                       rtl8723bu_handle_c2h(priv, skb);
+               } else {
+                       struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
+
+                       if (rx_desc->phy_stats)
+                               priv->fops->parse_phystats(priv, rx_status, phy_stats,
+                                                          rx_desc->rxmcs, hdr,
+                                                          rx_desc->crc32 || rx_desc->icverr);
+
+                       rx_status->mactime = rx_desc->tsfl;
+                       rx_status->flag |= RX_FLAG_MACTIME_START;
+
+                       if (!rx_desc->swdec)
+                               rx_status->flag |= RX_FLAG_DECRYPTED;
+                       if (rx_desc->crc32)
+                               rx_status->flag |= RX_FLAG_FAILED_FCS_CRC;
+                       if (rx_desc->bw)
+                               rx_status->bw = RATE_INFO_BW_40;
+
+                       if (rx_desc->rxmcs >= DESC_RATE_MCS0) {
+                               rx_status->encoding = RX_ENC_HT;
+                               rx_status->rate_idx = rx_desc->rxmcs - DESC_RATE_MCS0;
+                       } else {
+                               rx_status->rate_idx = rx_desc->rxmcs;
+                       }
+
+                       rx_status->freq = hw->conf.chandef.chan->center_freq;
+                       rx_status->band = hw->conf.chandef.chan->band;
+
+                       ieee80211_rx_irqsafe(hw, skb);
+               }
+
+               skb = next_skb;
+               if (skb)
+                       skb_pull(next_skb, pkt_offset);
+
+               urb_len -= pkt_offset;
+               next_skb = NULL;
+       } while (skb && urb_len >= sizeof(struct rtl8xxxu_rxdesc24));
 
-       ieee80211_rx_irqsafe(hw, skb);
        return RX_TYPE_DATA_PKT;
 }
 
@@ -6282,7 +6444,6 @@ static void rtl8xxxu_rx_complete(struct urb *urb)
 cleanup:
        usb_free_urb(urb);
        dev_kfree_skb(skb);
-       return;
 }
 
 static int rtl8xxxu_submit_rx_urb(struct rtl8xxxu_priv *priv,
@@ -6372,12 +6533,13 @@ static int rtl8xxxu_add_interface(struct ieee80211_hw *hw,
        int ret;
        u8 val8;
 
+       if (!priv->vif)
+               priv->vif = vif;
+       else
+               return -EOPNOTSUPP;
+
        switch (vif->type) {
        case NL80211_IFTYPE_STATION:
-               if (!priv->vif)
-                       priv->vif = vif;
-               else
-                       return -EOPNOTSUPP;
                rtl8xxxu_stop_tx_beacon(priv);
 
                val8 = rtl8xxxu_read8(priv, REG_BEACON_CTRL);
@@ -6386,11 +6548,33 @@ static int rtl8xxxu_add_interface(struct ieee80211_hw *hw,
                rtl8xxxu_write8(priv, REG_BEACON_CTRL, val8);
                ret = 0;
                break;
+       case NL80211_IFTYPE_AP:
+               rtl8xxxu_write8(priv, REG_BEACON_CTRL,
+                               BEACON_DISABLE_TSF_UPDATE | BEACON_CTRL_MBSSID);
+               rtl8xxxu_write8(priv, REG_ATIMWND, 0x0c); /* 12ms */
+               rtl8xxxu_write16(priv, REG_TSFTR_SYN_OFFSET, 0x7fff); /* ~32ms */
+               rtl8xxxu_write8(priv, REG_DUAL_TSF_RST, DUAL_TSF_RESET_TSF0);
+
+               /* enable BCN0 function */
+               rtl8xxxu_write8(priv, REG_BEACON_CTRL,
+                               BEACON_DISABLE_TSF_UPDATE |
+                               BEACON_FUNCTION_ENABLE | BEACON_CTRL_MBSSID |
+                               BEACON_CTRL_TX_BEACON_RPT);
+
+               /* select BCN on port 0 */
+               val8 = rtl8xxxu_read8(priv, REG_CCK_CHECK);
+               val8 &= ~BIT_BCN_PORT_SEL;
+               rtl8xxxu_write8(priv, REG_CCK_CHECK, val8);
+
+               ret = 0;
+               break;
        default:
                ret = -EOPNOTSUPP;
        }
 
        rtl8xxxu_set_linktype(priv, vif->type);
+       ether_addr_copy(priv->mac_addr, vif->addr);
+       rtl8xxxu_set_mac(priv);
 
        return ret;
 }
@@ -6521,22 +6705,22 @@ static void rtl8xxxu_configure_filter(struct ieee80211_hw *hw,
         */
 
        if (*total_flags & FIF_BCN_PRBRESP_PROMISC)
-               rcr &= ~RCR_CHECK_BSSID_BEACON;
+               rcr &= ~(RCR_CHECK_BSSID_BEACON | RCR_CHECK_BSSID_MATCH);
        else
-               rcr |= RCR_CHECK_BSSID_BEACON;
+               rcr |= RCR_CHECK_BSSID_BEACON | RCR_CHECK_BSSID_MATCH;
+
+       if (priv->vif && priv->vif->type == NL80211_IFTYPE_AP)
+               rcr &= ~RCR_CHECK_BSSID_MATCH;
 
        if (*total_flags & FIF_CONTROL)
                rcr |= RCR_ACCEPT_CTRL_FRAME;
        else
                rcr &= ~RCR_ACCEPT_CTRL_FRAME;
 
-       if (*total_flags & FIF_OTHER_BSS) {
+       if (*total_flags & FIF_OTHER_BSS)
                rcr |= RCR_ACCEPT_AP;
-               rcr &= ~RCR_CHECK_BSSID_MATCH;
-       } else {
+       else
                rcr &= ~RCR_ACCEPT_AP;
-               rcr |= RCR_CHECK_BSSID_MATCH;
-       }
 
        if (*total_flags & FIF_PSPOLL)
                rcr |= RCR_ACCEPT_PM;
@@ -6557,7 +6741,7 @@ static void rtl8xxxu_configure_filter(struct ieee80211_hw *hw,
 
 static int rtl8xxxu_set_rts_threshold(struct ieee80211_hw *hw, u32 rts)
 {
-       if (rts > 2347)
+       if (rts > 2347 && rts != (u32)-1)
                return -EINVAL;
 
        return 0;
@@ -6706,7 +6890,8 @@ static u8 rtl8xxxu_signal_to_snr(int signal)
 }
 
 static void rtl8xxxu_refresh_rate_mask(struct rtl8xxxu_priv *priv,
-                                      int signal, struct ieee80211_sta *sta)
+                                      int signal, struct ieee80211_sta *sta,
+                                      bool force)
 {
        struct ieee80211_hw *hw = priv->hw;
        u16 wireless_mode;
@@ -6714,6 +6899,7 @@ static void rtl8xxxu_refresh_rate_mask(struct rtl8xxxu_priv *priv,
        u8 txbw_40mhz;
        u8 snr, snr_thresh_high, snr_thresh_low;
        u8 go_up_gap = 5;
+       u8 macid = rtl8xxxu_get_macid(priv, sta);
 
        rssi_level = priv->rssi_level;
        snr = rtl8xxxu_signal_to_snr(signal);
@@ -6740,7 +6926,7 @@ static void rtl8xxxu_refresh_rate_mask(struct rtl8xxxu_priv *priv,
        else
                rssi_level = RTL8XXXU_RATR_STA_LOW;
 
-       if (rssi_level != priv->rssi_level) {
+       if (rssi_level != priv->rssi_level || force) {
                int sgi = 0;
                u32 rate_bitmap = 0;
 
@@ -6833,7 +7019,7 @@ static void rtl8xxxu_refresh_rate_mask(struct rtl8xxxu_priv *priv,
                }
 
                priv->rssi_level = rssi_level;
-               priv->fops->update_rate_mask(priv, rate_bitmap, ratr_idx, sgi, txbw_40mhz);
+               priv->fops->update_rate_mask(priv, rate_bitmap, ratr_idx, sgi, txbw_40mhz, macid);
        }
 }
 
@@ -6956,7 +7142,7 @@ static void rtl8xxxu_watchdog_callback(struct work_struct *work)
                if (priv->fops->set_crystal_cap)
                        rtl8xxxu_track_cfo(priv);
 
-               rtl8xxxu_refresh_rate_mask(priv, signal, sta);
+               rtl8xxxu_refresh_rate_mask(priv, signal, sta, false);
        }
 
 out:
@@ -7087,6 +7273,38 @@ static void rtl8xxxu_stop(struct ieee80211_hw *hw)
        rtl8xxxu_free_tx_resources(priv);
 }
 
+static int rtl8xxxu_sta_add(struct ieee80211_hw *hw,
+                           struct ieee80211_vif *vif,
+                           struct ieee80211_sta *sta)
+{
+       struct rtl8xxxu_sta_info *sta_info = (struct rtl8xxxu_sta_info *)sta->drv_priv;
+       struct rtl8xxxu_priv *priv = hw->priv;
+
+       if (vif->type == NL80211_IFTYPE_AP) {
+               sta_info->macid = rtl8xxxu_acquire_macid(priv);
+               if (sta_info->macid >= RTL8XXXU_MAX_MAC_ID_NUM)
+                       return -ENOSPC;
+
+               rtl8xxxu_refresh_rate_mask(priv, 0, sta, true);
+               priv->fops->report_connect(priv, sta_info->macid, H2C_MACID_ROLE_STA, true);
+       }
+
+       return 0;
+}
+
+static int rtl8xxxu_sta_remove(struct ieee80211_hw *hw,
+                              struct ieee80211_vif *vif,
+                              struct ieee80211_sta *sta)
+{
+       struct rtl8xxxu_sta_info *sta_info = (struct rtl8xxxu_sta_info *)sta->drv_priv;
+       struct rtl8xxxu_priv *priv = hw->priv;
+
+       if (vif->type == NL80211_IFTYPE_AP)
+               rtl8xxxu_release_macid(priv, sta_info->macid);
+
+       return 0;
+}
+
 static const struct ieee80211_ops rtl8xxxu_ops = {
        .tx = rtl8xxxu_tx,
        .wake_tx_queue = ieee80211_handle_wake_tx_queue,
@@ -7095,6 +7313,7 @@ static const struct ieee80211_ops rtl8xxxu_ops = {
        .config = rtl8xxxu_config,
        .conf_tx = rtl8xxxu_conf_tx,
        .bss_info_changed = rtl8xxxu_bss_info_changed,
+       .start_ap = rtl8xxxu_start_ap,
        .configure_filter = rtl8xxxu_configure_filter,
        .set_rts_threshold = rtl8xxxu_set_rts_threshold,
        .start = rtl8xxxu_start,
@@ -7105,6 +7324,9 @@ static const struct ieee80211_ops rtl8xxxu_ops = {
        .ampdu_action = rtl8xxxu_ampdu_action,
        .sta_statistics = rtl8xxxu_sta_statistics,
        .get_antenna = rtl8xxxu_get_antenna,
+       .set_tim = rtl8xxxu_set_tim,
+       .sta_add = rtl8xxxu_sta_add,
+       .sta_remove = rtl8xxxu_sta_remove,
 };
 
 static int rtl8xxxu_parse_usb(struct rtl8xxxu_priv *priv,
@@ -7296,6 +7518,7 @@ static int rtl8xxxu_probe(struct usb_interface *interface,
        spin_lock_init(&priv->rx_urb_lock);
        INIT_WORK(&priv->rx_urb_wq, rtl8xxxu_rx_urb_work);
        INIT_DELAYED_WORK(&priv->ra_watchdog, rtl8xxxu_watchdog_callback);
+       INIT_WORK(&priv->update_beacon_work, rtl8xxxu_update_beacon_work_callback);
        skb_queue_head_init(&priv->c2hcmd_queue);
 
        usb_set_intfdata(interface, hw);
@@ -7347,7 +7570,11 @@ static int rtl8xxxu_probe(struct usb_interface *interface,
 
        hw->wiphy->max_scan_ssids = 1;
        hw->wiphy->max_scan_ie_len = IEEE80211_MAX_DATA_LEN;
+       if (priv->fops->max_macid_num)
+               hw->wiphy->max_ap_assoc_sta = priv->fops->max_macid_num - 1;
        hw->wiphy->interface_modes = BIT(NL80211_IFTYPE_STATION);
+       if (priv->fops->supports_ap)
+               hw->wiphy->interface_modes |= BIT(NL80211_IFTYPE_AP);
        hw->queues = 4;
 
        sband = &rtl8xxxu_supported_band;
index 4dffbab..8571d51 100644 (file)
 
 #define REG_FIFOPAGE                   0x0204
 #define REG_TDECTRL                    0x0208
+#define  BIT_BCN_VALID                 BIT(16)
 
 #define REG_DWBCN0_CTRL_8188F          REG_TDECTRL
 
 #define  AUTO_LLT_INIT_LLT             BIT(16)
 
 #define REG_DWBCN1_CTRL_8723B          0x0228
+#define  BIT_SW_BCN_SEL                        BIT(20)
 
 /* 0x0280 ~ 0x02FF     RXDMA Configuration */
 #define REG_RXDMA_AGG_PG_TH            0x0280  /* 0-7 : USB DMA size bits
 #define REG_FWHW_TXQ_CTRL              0x0420
 #define  FWHW_TXQ_CTRL_AMPDU_RETRY     BIT(7)
 #define  FWHW_TXQ_CTRL_XMIT_MGMT_ACK   BIT(12)
+#define  EN_BCNQ_DL                    BIT(22)
 
 #define REG_HWSEQ_CTRL                 0x0423
 #define REG_TXPKTBUF_BCNQ_BDNY         0x0424
 #define REG_ARFR1                      0x0448
 #define REG_ARFR2                      0x044c
 #define REG_ARFR3                      0x0450
+#define REG_CCK_CHECK                  0x0454
+#define BIT_BCN_PORT_SEL               BIT(5)
 #define REG_AMPDU_MAX_TIME_8723B       0x0456
 #define REG_AGGLEN_LMT                 0x0458
 #define REG_AMPDU_MIN_SPACE            0x045c
index fa3d73b..f8ba133 100644 (file)
@@ -183,8 +183,8 @@ static int rtw_debugfs_copy_from_user(char tmp[], int size,
 
        tmp_len = (count > size - 1 ? size - 1 : count);
 
-       if (!buffer || copy_from_user(tmp, buffer, tmp_len))
-               return count;
+       if (copy_from_user(tmp, buffer, tmp_len))
+               return -EFAULT;
 
        tmp[tmp_len] = '\0';
 
@@ -201,13 +201,16 @@ static ssize_t rtw_debugfs_set_read_reg(struct file *filp,
        char tmp[32 + 1];
        u32 addr, len;
        int num;
+       int ret;
 
-       rtw_debugfs_copy_from_user(tmp, sizeof(tmp), buffer, count, 2);
+       ret = rtw_debugfs_copy_from_user(tmp, sizeof(tmp), buffer, count, 2);
+       if (ret)
+               return ret;
 
        num = sscanf(tmp, "%x %x", &addr, &len);
 
        if (num !=  2)
-               return count;
+               return -EINVAL;
 
        if (len != 1 && len != 2 && len != 4) {
                rtw_warn(rtwdev, "read reg setting wrong len\n");
@@ -288,8 +291,11 @@ static ssize_t rtw_debugfs_set_rsvd_page(struct file *filp,
        char tmp[32 + 1];
        u32 offset, page_num;
        int num;
+       int ret;
 
-       rtw_debugfs_copy_from_user(tmp, sizeof(tmp), buffer, count, 2);
+       ret = rtw_debugfs_copy_from_user(tmp, sizeof(tmp), buffer, count, 2);
+       if (ret)
+               return ret;
 
        num = sscanf(tmp, "%d %d", &offset, &page_num);
 
@@ -314,8 +320,11 @@ static ssize_t rtw_debugfs_set_single_input(struct file *filp,
        char tmp[32 + 1];
        u32 input;
        int num;
+       int ret;
 
-       rtw_debugfs_copy_from_user(tmp, sizeof(tmp), buffer, count, 1);
+       ret = rtw_debugfs_copy_from_user(tmp, sizeof(tmp), buffer, count, 1);
+       if (ret)
+               return ret;
 
        num = kstrtoint(tmp, 0, &input);
 
@@ -338,14 +347,17 @@ static ssize_t rtw_debugfs_set_write_reg(struct file *filp,
        char tmp[32 + 1];
        u32 addr, val, len;
        int num;
+       int ret;
 
-       rtw_debugfs_copy_from_user(tmp, sizeof(tmp), buffer, count, 3);
+       ret = rtw_debugfs_copy_from_user(tmp, sizeof(tmp), buffer, count, 3);
+       if (ret)
+               return ret;
 
        /* write BB/MAC register */
        num = sscanf(tmp, "%x %x %x", &addr, &val, &len);
 
        if (num !=  3)
-               return count;
+               return -EINVAL;
 
        switch (len) {
        case 1:
@@ -381,8 +393,11 @@ static ssize_t rtw_debugfs_set_h2c(struct file *filp,
        char tmp[32 + 1];
        u8 param[8];
        int num;
+       int ret;
 
-       rtw_debugfs_copy_from_user(tmp, sizeof(tmp), buffer, count, 3);
+       ret = rtw_debugfs_copy_from_user(tmp, sizeof(tmp), buffer, count, 3);
+       if (ret)
+               return ret;
 
        num = sscanf(tmp, "%hhx,%hhx,%hhx,%hhx,%hhx,%hhx,%hhx,%hhx",
                     &param[0], &param[1], &param[2], &param[3],
@@ -408,14 +423,17 @@ static ssize_t rtw_debugfs_set_rf_write(struct file *filp,
        char tmp[32 + 1];
        u32 path, addr, mask, val;
        int num;
+       int ret;
 
-       rtw_debugfs_copy_from_user(tmp, sizeof(tmp), buffer, count, 4);
+       ret = rtw_debugfs_copy_from_user(tmp, sizeof(tmp), buffer, count, 4);
+       if (ret)
+               return ret;
 
        num = sscanf(tmp, "%x %x %x %x", &path, &addr, &mask, &val);
 
        if (num !=  4) {
                rtw_warn(rtwdev, "invalid args, [path] [addr] [mask] [val]\n");
-               return count;
+               return -EINVAL;
        }
 
        mutex_lock(&rtwdev->mutex);
@@ -438,14 +456,17 @@ static ssize_t rtw_debugfs_set_rf_read(struct file *filp,
        char tmp[32 + 1];
        u32 path, addr, mask;
        int num;
+       int ret;
 
-       rtw_debugfs_copy_from_user(tmp, sizeof(tmp), buffer, count, 3);
+       ret = rtw_debugfs_copy_from_user(tmp, sizeof(tmp), buffer, count, 3);
+       if (ret)
+               return ret;
 
        num = sscanf(tmp, "%x %x %x", &path, &addr, &mask);
 
        if (num !=  3) {
                rtw_warn(rtwdev, "invalid args, [path] [addr] [mask] [val]\n");
-               return count;
+               return -EINVAL;
        }
 
        debugfs_priv->rf_path = path;
@@ -467,7 +488,9 @@ static ssize_t rtw_debugfs_set_fix_rate(struct file *filp,
        char tmp[32 + 1];
        int ret;
 
-       rtw_debugfs_copy_from_user(tmp, sizeof(tmp), buffer, count, 1);
+       ret = rtw_debugfs_copy_from_user(tmp, sizeof(tmp), buffer, count, 1);
+       if (ret)
+               return ret;
 
        ret = kstrtou8(tmp, 0, &fix_rate);
        if (ret) {
@@ -860,7 +883,9 @@ static ssize_t rtw_debugfs_set_coex_enable(struct file *filp,
        bool enable;
        int ret;
 
-       rtw_debugfs_copy_from_user(tmp, sizeof(tmp), buffer, count, 1);
+       ret = rtw_debugfs_copy_from_user(tmp, sizeof(tmp), buffer, count, 1);
+       if (ret)
+               return ret;
 
        ret = kstrtobool(tmp, &enable);
        if (ret) {
@@ -930,7 +955,9 @@ static ssize_t rtw_debugfs_set_fw_crash(struct file *filp,
        bool input;
        int ret;
 
-       rtw_debugfs_copy_from_user(tmp, sizeof(tmp), buffer, count, 1);
+       ret = rtw_debugfs_copy_from_user(tmp, sizeof(tmp), buffer, count, 1);
+       if (ret)
+               return ret;
 
        ret = kstrtobool(tmp, &input);
        if (ret)
index 144618b..09bcc23 100644 (file)
@@ -164,8 +164,10 @@ static int rtw_ops_add_interface(struct ieee80211_hw *hw,
        mutex_lock(&rtwdev->mutex);
 
        port = find_first_zero_bit(rtwdev->hw_port, RTW_PORT_NUM);
-       if (port >= RTW_PORT_NUM)
+       if (port >= RTW_PORT_NUM) {
+               mutex_unlock(&rtwdev->mutex);
                return -EINVAL;
+       }
        set_bit(port, rtwdev->hw_port);
 
        rtwvif->port = port;
index 2dc48fa..99e870d 100644 (file)
@@ -13,7 +13,8 @@ rtw89_core-y += core.o \
                coex.o \
                ps.o \
                chan.o \
-               ser.o
+               ser.o \
+               acpi.o
 
 rtw89_core-$(CONFIG_PM) += wow.o
 
diff --git a/drivers/net/wireless/realtek/rtw89/acpi.c b/drivers/net/wireless/realtek/rtw89/acpi.c
new file mode 100644 (file)
index 0000000..8aaf83a
--- /dev/null
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+/* Copyright(c) 2021-2023  Realtek Corporation
+ */
+
+#include <linux/acpi.h>
+#include <linux/uuid.h>
+
+#include "acpi.h"
+#include "debug.h"
+
+static const guid_t rtw89_guid = GUID_INIT(0xD2A8C3E8, 0x4B69, 0x4F00,
+                                          0x82, 0xBD, 0xFE, 0x86,
+                                          0x07, 0x80, 0x3A, 0xA7);
+
+static int rtw89_acpi_dsm_get(struct rtw89_dev *rtwdev, union acpi_object *obj,
+                             u8 *value)
+{
+       switch (obj->type) {
+       case ACPI_TYPE_INTEGER:
+               *value = (u8)obj->integer.value;
+               break;
+       case ACPI_TYPE_BUFFER:
+               *value = obj->buffer.pointer[0];
+               break;
+       default:
+               rtw89_debug(rtwdev, RTW89_DBG_UNEXP,
+                           "acpi dsm return unhandled type: %d\n", obj->type);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+int rtw89_acpi_evaluate_dsm(struct rtw89_dev *rtwdev,
+                           enum rtw89_acpi_dsm_func func, u8 *value)
+{
+       union acpi_object *obj;
+       int ret;
+
+       obj = acpi_evaluate_dsm(ACPI_HANDLE(rtwdev->dev), &rtw89_guid,
+                               0, func, NULL);
+       if (!obj) {
+               rtw89_debug(rtwdev, RTW89_DBG_UNEXP,
+                           "acpi dsm fail to evaluate func: %d\n", func);
+               return -ENOENT;
+       }
+
+       ret = rtw89_acpi_dsm_get(rtwdev, obj, value);
+
+       ACPI_FREE(obj);
+       return ret;
+}
diff --git a/drivers/net/wireless/realtek/rtw89/acpi.h b/drivers/net/wireless/realtek/rtw89/acpi.h
new file mode 100644 (file)
index 0000000..ed74d8c
--- /dev/null
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
+/* Copyright(c) 2021-2023  Realtek Corporation
+ */
+
+#ifndef __RTW89_ACPI_H__
+#define __RTW89_ACPI_H__
+
+#include "core.h"
+
+enum rtw89_acpi_dsm_func {
+       RTW89_ACPI_DSM_FUNC_IDN_BAND_SUP = 2,
+       RTW89_ACPI_DSM_FUNC_6G_DIS = 3,
+       RTW89_ACPI_DSM_FUNC_6G_BP = 4,
+       RTW89_ACPI_DSM_FUNC_TAS_EN = 5,
+       RTW89_ACPI_DSM_FUNC_59G_EN = 6,
+};
+
+int rtw89_acpi_evaluate_dsm(struct rtw89_dev *rtwdev,
+                           enum rtw89_acpi_dsm_func func, u8 *value);
+
+#endif
index bad864d..d369dc5 100644 (file)
@@ -77,6 +77,9 @@ static struct ieee80211_channel rtw89_channels_5ghz[] = {
        RTW89_DEF_CHAN_5G(5785, 157),
        RTW89_DEF_CHAN_5G(5805, 161),
        RTW89_DEF_CHAN_5G_NO_HT40MINUS(5825, 165),
+       RTW89_DEF_CHAN_5G(5845, 169),
+       RTW89_DEF_CHAN_5G(5865, 173),
+       RTW89_DEF_CHAN_5G(5885, 177),
 };
 
 static struct ieee80211_channel rtw89_channels_6ghz[] = {
@@ -1244,13 +1247,34 @@ static void rtw89_core_rx_process_phy_ppdu_iter(void *data,
        struct rtw89_sta *rtwsta = (struct rtw89_sta *)sta->drv_priv;
        struct rtw89_rx_phy_ppdu *phy_ppdu = (struct rtw89_rx_phy_ppdu *)data;
        struct rtw89_dev *rtwdev = rtwsta->rtwdev;
+       struct rtw89_hal *hal = &rtwdev->hal;
+       u8 ant_num = hal->ant_diversity ? 2 : rtwdev->chip->rf_path_num;
+       u8 ant_pos = U8_MAX;
+       u8 evm_pos = 0;
        int i;
 
-       if (rtwsta->mac_id == phy_ppdu->mac_id && phy_ppdu->to_self) {
-               ewma_rssi_add(&rtwsta->avg_rssi, phy_ppdu->rssi_avg);
+       if (rtwsta->mac_id != phy_ppdu->mac_id || !phy_ppdu->to_self)
+               return;
+
+       if (hal->ant_diversity && hal->antenna_rx) {
+               ant_pos = __ffs(hal->antenna_rx);
+               evm_pos = ant_pos;
+       }
+
+       ewma_rssi_add(&rtwsta->avg_rssi, phy_ppdu->rssi_avg);
+
+       if (ant_pos < ant_num) {
+               ewma_rssi_add(&rtwsta->rssi[ant_pos], phy_ppdu->rssi[0]);
+       } else {
                for (i = 0; i < rtwdev->chip->rf_path_num; i++)
                        ewma_rssi_add(&rtwsta->rssi[i], phy_ppdu->rssi[i]);
        }
+
+       if (phy_ppdu->ofdm.has) {
+               ewma_snr_add(&rtwsta->avg_snr, phy_ppdu->ofdm.avg_snr);
+               ewma_evm_add(&rtwsta->evm_min[evm_pos], phy_ppdu->ofdm.evm_min);
+               ewma_evm_add(&rtwsta->evm_max[evm_pos], phy_ppdu->ofdm.evm_max);
+       }
 }
 
 #define VAR_LEN 0xff
@@ -1277,20 +1301,30 @@ static u16 rtw89_core_get_phy_status_ie_len(struct rtw89_dev *rtwdev, u8 *addr)
 static void rtw89_core_parse_phy_status_ie01(struct rtw89_dev *rtwdev, u8 *addr,
                                             struct rtw89_rx_phy_ppdu *phy_ppdu)
 {
+       const struct rtw89_phy_sts_ie0 *ie = (const struct rtw89_phy_sts_ie0 *)addr;
        s16 cfo;
+       u32 t;
 
-       phy_ppdu->chan_idx = RTW89_GET_PHY_STS_IE01_CH_IDX(addr);
+       phy_ppdu->chan_idx = le32_get_bits(ie->w0, RTW89_PHY_STS_IE01_W0_CH_IDX);
        if (phy_ppdu->rate < RTW89_HW_RATE_OFDM6)
                return;
 
        if (!phy_ppdu->to_self)
                return;
 
+       phy_ppdu->ofdm.avg_snr = le32_get_bits(ie->w2, RTW89_PHY_STS_IE01_W2_AVG_SNR);
+       phy_ppdu->ofdm.evm_max = le32_get_bits(ie->w2, RTW89_PHY_STS_IE01_W2_EVM_MAX);
+       phy_ppdu->ofdm.evm_min = le32_get_bits(ie->w2, RTW89_PHY_STS_IE01_W2_EVM_MIN);
+       phy_ppdu->ofdm.has = true;
+
        /* sign conversion for S(12,2) */
-       if (rtwdev->chip->cfo_src_fd)
-               cfo = sign_extend32(RTW89_GET_PHY_STS_IE01_FD_CFO(addr), 11);
-       else
-               cfo = sign_extend32(RTW89_GET_PHY_STS_IE01_PREMB_CFO(addr), 11);
+       if (rtwdev->chip->cfo_src_fd) {
+               t = le32_get_bits(ie->w1, RTW89_PHY_STS_IE01_W1_FD_CFO);
+               cfo = sign_extend32(t, 11);
+       } else {
+               t = le32_get_bits(ie->w1, RTW89_PHY_STS_IE01_W1_PREMB_CFO);
+               cfo = sign_extend32(t, 11);
+       }
 
        rtw89_phy_cfo_parse(rtwdev, cfo, phy_ppdu);
 }
@@ -1333,9 +1367,6 @@ static int rtw89_core_rx_process_phy_ppdu(struct rtw89_dev *rtwdev,
                return -EINVAL;
        }
        rtw89_core_update_phy_ppdu(phy_ppdu);
-       ieee80211_iterate_stations_atomic(rtwdev->hw,
-                                         rtw89_core_rx_process_phy_ppdu_iter,
-                                         phy_ppdu);
 
        return 0;
 }
@@ -1363,6 +1394,8 @@ static int rtw89_core_rx_parse_phy_sts(struct rtw89_dev *rtwdev,
                }
        }
 
+       rtw89_phy_antdiv_parse(rtwdev, phy_ppdu);
+
        return 0;
 }
 
@@ -1376,6 +1409,10 @@ static void rtw89_core_rx_process_phy_sts(struct rtw89_dev *rtwdev,
                rtw89_debug(rtwdev, RTW89_DBG_TXRX, "parse phy sts failed\n");
        else
                phy_ppdu->valid = true;
+
+       ieee80211_iterate_stations_atomic(rtwdev->hw,
+                                         rtw89_core_rx_process_phy_ppdu_iter,
+                                         phy_ppdu);
 }
 
 static u8 rtw89_rxdesc_to_nl_he_gi(struct rtw89_dev *rtwdev,
@@ -1481,6 +1518,34 @@ static void rtw89_stats_trigger_frame(struct rtw89_dev *rtwdev,
        }
 }
 
+static void rtw89_cancel_6ghz_probe_work(struct work_struct *work)
+{
+       struct rtw89_dev *rtwdev = container_of(work, struct rtw89_dev,
+                                               cancel_6ghz_probe_work);
+       struct list_head *pkt_list = rtwdev->scan_info.pkt_list;
+       struct rtw89_pktofld_info *info;
+
+       mutex_lock(&rtwdev->mutex);
+
+       if (!rtwdev->scanning)
+               goto out;
+
+       list_for_each_entry(info, &pkt_list[NL80211_BAND_6GHZ], list) {
+               if (!info->cancel || !test_bit(info->id, rtwdev->pkt_offload))
+                       continue;
+
+               rtw89_fw_h2c_del_pkt_offload(rtwdev, info->id);
+
+               /* Don't delete/free info from pkt_list at this moment. Let it
+                * be deleted/freed in rtw89_release_pkt_list() after scanning,
+                * since if during scanning, pkt_list is accessed in bottom half.
+                */
+       }
+
+out:
+       mutex_unlock(&rtwdev->mutex);
+}
+
 static void rtw89_core_cancel_6ghz_probe_tx(struct rtw89_dev *rtwdev,
                                            struct sk_buff *skb)
 {
@@ -1489,6 +1554,7 @@ static void rtw89_core_cancel_6ghz_probe_tx(struct rtw89_dev *rtwdev,
        struct list_head *pkt_list = rtwdev->scan_info.pkt_list;
        struct rtw89_pktofld_info *info;
        const u8 *ies = mgmt->u.beacon.variable, *ssid_ie;
+       bool queue_work = false;
 
        if (rx_status->band != NL80211_BAND_6GHZ)
                return;
@@ -1497,16 +1563,22 @@ static void rtw89_core_cancel_6ghz_probe_tx(struct rtw89_dev *rtwdev,
 
        list_for_each_entry(info, &pkt_list[NL80211_BAND_6GHZ], list) {
                if (ether_addr_equal(info->bssid, mgmt->bssid)) {
-                       rtw89_fw_h2c_del_pkt_offload(rtwdev, info->id);
+                       info->cancel = true;
+                       queue_work = true;
                        continue;
                }
 
                if (!ssid_ie || ssid_ie[1] != info->ssid_len || info->ssid_len == 0)
                        continue;
 
-               if (memcmp(&ssid_ie[2], info->ssid, info->ssid_len) == 0)
-                       rtw89_fw_h2c_del_pkt_offload(rtwdev, info->id);
+               if (memcmp(&ssid_ie[2], info->ssid, info->ssid_len) == 0) {
+                       info->cancel = true;
+                       queue_work = true;
+               }
        }
+
+       if (queue_work)
+               ieee80211_queue_work(rtwdev->hw, &rtwdev->cancel_6ghz_probe_work);
 }
 
 static void rtw89_vif_rx_stats_iter(void *data, u8 *mac,
@@ -2593,6 +2665,7 @@ static void rtw89_track_work(struct work_struct *work)
        rtw89_phy_ra_update(rtwdev);
        rtw89_phy_cfo_track(rtwdev);
        rtw89_phy_tx_path_div_track(rtwdev);
+       rtw89_phy_antdiv_track(rtwdev);
        rtw89_phy_ul_tb_ctrl_track(rtwdev);
 
        if (rtwdev->lps_enabled && !rtwdev->btc.lps)
@@ -2756,6 +2829,8 @@ int rtw89_core_sta_add(struct rtw89_dev *rtwdev,
 {
        struct rtw89_vif *rtwvif = (struct rtw89_vif *)vif->drv_priv;
        struct rtw89_sta *rtwsta = (struct rtw89_sta *)sta->drv_priv;
+       struct rtw89_hal *hal = &rtwdev->hal;
+       u8 ant_num = hal->ant_diversity ? 2 : rtwdev->chip->rf_path_num;
        int i;
        int ret;
 
@@ -2769,8 +2844,12 @@ int rtw89_core_sta_add(struct rtw89_dev *rtwdev,
                rtw89_core_txq_init(rtwdev, sta->txq[i]);
 
        ewma_rssi_init(&rtwsta->avg_rssi);
-       for (i = 0; i < rtwdev->chip->rf_path_num; i++)
+       ewma_snr_init(&rtwsta->avg_snr);
+       for (i = 0; i < ant_num; i++) {
                ewma_rssi_init(&rtwsta->rssi[i]);
+               ewma_evm_init(&rtwsta->evm_min[i]);
+               ewma_evm_init(&rtwsta->evm_max[i]);
+       }
 
        if (vif->type == NL80211_IFTYPE_STATION && !sta->tdls) {
                /* for station mode, assign the mac_id from itself */
@@ -3430,6 +3509,7 @@ void rtw89_core_stop(struct rtw89_dev *rtwdev)
        mutex_unlock(&rtwdev->mutex);
 
        cancel_work_sync(&rtwdev->c2h_work);
+       cancel_work_sync(&rtwdev->cancel_6ghz_probe_work);
        cancel_work_sync(&btc->eapol_notify_work);
        cancel_work_sync(&btc->arp_notify_work);
        cancel_work_sync(&btc->dhcp_notify_work);
@@ -3441,6 +3521,7 @@ void rtw89_core_stop(struct rtw89_dev *rtwdev)
        cancel_delayed_work_sync(&rtwdev->coex_rfk_chk_work);
        cancel_delayed_work_sync(&rtwdev->cfo_track_work);
        cancel_delayed_work_sync(&rtwdev->forbid_ba_work);
+       cancel_delayed_work_sync(&rtwdev->antdiv_work);
 
        mutex_lock(&rtwdev->mutex);
 
@@ -3476,6 +3557,7 @@ int rtw89_core_init(struct rtw89_dev *rtwdev)
        INIT_DELAYED_WORK(&rtwdev->coex_rfk_chk_work, rtw89_coex_rfk_chk_work);
        INIT_DELAYED_WORK(&rtwdev->cfo_track_work, rtw89_phy_cfo_track_work);
        INIT_DELAYED_WORK(&rtwdev->forbid_ba_work, rtw89_forbid_ba_work);
+       INIT_DELAYED_WORK(&rtwdev->antdiv_work, rtw89_phy_antdiv_work);
        rtwdev->txq_wq = alloc_workqueue("rtw89_tx_wq", WQ_UNBOUND | WQ_HIGHPRI, 0);
        if (!rtwdev->txq_wq)
                return -ENOMEM;
@@ -3486,10 +3568,12 @@ int rtw89_core_init(struct rtw89_dev *rtwdev)
        rtwdev->total_sta_assoc = 0;
 
        rtw89_init_wait(&rtwdev->mcc.wait);
+       rtw89_init_wait(&rtwdev->mac.fw_ofld_wait);
 
        INIT_WORK(&rtwdev->c2h_work, rtw89_fw_c2h_work);
        INIT_WORK(&rtwdev->ips_work, rtw89_ips_work);
        INIT_WORK(&rtwdev->load_firmware_work, rtw89_load_firmware_work);
+       INIT_WORK(&rtwdev->cancel_6ghz_probe_work, rtw89_cancel_6ghz_probe_work);
 
        skb_queue_head_init(&rtwdev->c2h_queue);
        rtw89_core_ppdu_sts_init(rtwdev);
@@ -3584,7 +3668,7 @@ static void rtw89_read_chip_ver(struct rtw89_dev *rtwdev)
 
        if (chip->chip_id == RTL8852B || chip->chip_id == RTL8851B) {
                ret = rtw89_mac_read_xtal_si(rtwdev, XTAL_SI_CV, &val);
-               if (!ret)
+               if (ret)
                        return;
 
                rtwdev->hal.acv = u8_get_bits(val, XTAL_SI_ACV_MASK);
@@ -3693,6 +3777,7 @@ static int rtw89_core_register_hw(struct rtw89_dev *rtwdev)
 {
        struct ieee80211_hw *hw = rtwdev->hw;
        struct rtw89_efuse *efuse = &rtwdev->efuse;
+       struct rtw89_hal *hal = &rtwdev->hal;
        int ret;
        int tx_headroom = IEEE80211_HT_CTL_LEN;
 
@@ -3731,8 +3816,13 @@ static int rtw89_core_register_hw(struct rtw89_dev *rtwdev)
                                     BIT(NL80211_IFTYPE_P2P_CLIENT) |
                                     BIT(NL80211_IFTYPE_P2P_GO);
 
-       hw->wiphy->available_antennas_tx = BIT(rtwdev->chip->rf_path_num) - 1;
-       hw->wiphy->available_antennas_rx = BIT(rtwdev->chip->rf_path_num) - 1;
+       if (hal->ant_diversity) {
+               hw->wiphy->available_antennas_tx = 0x3;
+               hw->wiphy->available_antennas_rx = 0x3;
+       } else {
+               hw->wiphy->available_antennas_tx = BIT(rtwdev->chip->rf_path_num) - 1;
+               hw->wiphy->available_antennas_rx = BIT(rtwdev->chip->rf_path_num) - 1;
+       }
 
        hw->wiphy->flags |= WIPHY_FLAG_SUPPORTS_TDLS |
                            WIPHY_FLAG_TDLS_EXTERNAL_SETUP |
@@ -3760,7 +3850,12 @@ static int rtw89_core_register_hw(struct rtw89_dev *rtwdev)
                return ret;
        }
 
-       hw->wiphy->reg_notifier = rtw89_regd_notifier;
+       ret = rtw89_regd_setup(rtwdev);
+       if (ret) {
+               rtw89_err(rtwdev, "failed to set up regd\n");
+               goto err_free_supported_band;
+       }
+
        hw->wiphy->sar_capa = &rtw89_sar_capa;
 
        ret = ieee80211_register_hw(hw);
index 6df386a..b60cd98 100644 (file)
@@ -122,6 +122,13 @@ enum rtw89_cv {
        CHIP_CV_INVALID = CHIP_CV_MAX,
 };
 
+enum rtw89_bacam_ver {
+       RTW89_BACAM_V0,
+       RTW89_BACAM_V1,
+
+       RTW89_BACAM_V0_EXT = 99,
+};
+
 enum rtw89_core_tx_type {
        RTW89_CORE_TX_TYPE_DATA,
        RTW89_CORE_TX_TYPE_MGMT,
@@ -551,6 +558,12 @@ struct rtw89_rx_phy_ppdu {
        u8 chan_idx;
        u8 ie;
        u16 rate;
+       struct {
+               bool has;
+               u8 avg_snr;
+               u8 evm_max;
+               u8 evm_min;
+       } ofdm;
        bool to_self;
        bool valid;
 };
@@ -2533,6 +2546,8 @@ struct rtw89_ra_report {
 };
 
 DECLARE_EWMA(rssi, 10, 16);
+DECLARE_EWMA(evm, 10, 16);
+DECLARE_EWMA(snr, 10, 16);
 
 struct rtw89_ba_cam_entry {
        struct list_head list;
@@ -2595,6 +2610,9 @@ struct rtw89_sta {
        u8 prev_rssi;
        struct ewma_rssi avg_rssi;
        struct ewma_rssi rssi[RF_PATH_MAX];
+       struct ewma_snr avg_snr;
+       struct ewma_evm evm_min[RF_PATH_MAX];
+       struct ewma_evm evm_max[RF_PATH_MAX];
        struct rtw89_ampdu_params ampdu_params[IEEE80211_NUM_TIDS];
        struct ieee80211_rx_status rx_status;
        u16 rx_hw_rate;
@@ -3090,6 +3108,12 @@ struct rtw89_imr_info {
        u32 tmac_imr_set;
 };
 
+struct rtw89_xtal_info {
+       u32 xcap_reg;
+       u32 sc_xo_mask;
+       u32 sc_xi_mask;
+};
+
 struct rtw89_rrsr_cfgs {
        struct rtw89_reg3_def ref_rate;
        struct rtw89_reg3_def rsc;
@@ -3116,6 +3140,25 @@ struct rtw89_phy_ul_tb_info {
        u8 def_if_bandedge;
 };
 
+struct rtw89_antdiv_stats {
+       struct ewma_rssi cck_rssi_avg;
+       struct ewma_rssi ofdm_rssi_avg;
+       struct ewma_rssi non_legacy_rssi_avg;
+       u16 pkt_cnt_cck;
+       u16 pkt_cnt_ofdm;
+       u16 pkt_cnt_non_legacy;
+       u32 evm;
+};
+
+struct rtw89_antdiv_info {
+       struct rtw89_antdiv_stats target_stats;
+       struct rtw89_antdiv_stats main_stats;
+       struct rtw89_antdiv_stats aux_stats;
+       u8 training_count;
+       u8 rssi_pre;
+       bool get_stats;
+};
+
 struct rtw89_chip_info {
        enum rtw89_core_chip_id chip_id;
        const struct rtw89_chip_ops *ops;
@@ -3123,6 +3166,7 @@ struct rtw89_chip_info {
        u8 fw_format_max;
        bool try_ce_fw;
        u32 fifo_size;
+       bool small_fifo_size;
        u32 dle_scc_rsvd_size;
        u16 max_amsdu_limit;
        bool dis_2g_40m_ul_ofdma;
@@ -3135,6 +3179,7 @@ struct rtw89_chip_info {
        u8 support_chanctx_num;
        u8 support_bands;
        bool support_bw160;
+       bool support_unii4;
        bool support_ul_tb_ctrl;
        bool hw_sec_hdr;
        u8 rf_path_num;
@@ -3145,7 +3190,7 @@ struct rtw89_chip_info {
        u8 scam_num;
        u8 bacam_num;
        u8 bacam_dynamic_num;
-       bool bacam_v1;
+       enum rtw89_bacam_ver bacam_ver;
 
        u8 sec_ctrl_efuse_size;
        u32 physical_efuse_size;
@@ -3162,6 +3207,7 @@ struct rtw89_chip_info {
        const struct rtw89_phy_table *bb_gain_table;
        const struct rtw89_phy_table *rf_table[RF_PATH_MAX];
        const struct rtw89_phy_table *nctl_table;
+       const struct rtw89_rfk_tbl *nctl_post_table;
        const struct rtw89_txpwr_table *byr_table;
        const struct rtw89_phy_dig_gain_table *dig_table;
        const struct rtw89_dig_regs *dig_regs;
@@ -3215,6 +3261,7 @@ struct rtw89_chip_info {
        u32 dma_ch_mask;
        u32 edcca_lvl_reg;
        const struct wiphy_wowlan_support *wowlan_stub;
+       const struct rtw89_xtal_info *xtal_info;
 };
 
 union rtw89_bus_info {
@@ -3248,14 +3295,6 @@ enum rtw89_host_rpr_mode {
        RTW89_RPR_MODE_STF
 };
 
-struct rtw89_mac_info {
-       struct rtw89_dle_info dle_info;
-       struct rtw89_hfc_param hfc_param;
-       enum rtw89_qta_mode qta_mode;
-       u8 rpwm_seq_num;
-       u8 cpwm_seq_num;
-};
-
 #define RTW89_COMPLETION_BUF_SIZE 24
 #define RTW89_WAIT_COND_IDLE UINT_MAX
 
@@ -3278,6 +3317,17 @@ static inline void rtw89_init_wait(struct rtw89_wait_info *wait)
        atomic_set(&wait->cond, RTW89_WAIT_COND_IDLE);
 }
 
+struct rtw89_mac_info {
+       struct rtw89_dle_info dle_info;
+       struct rtw89_hfc_param hfc_param;
+       enum rtw89_qta_mode qta_mode;
+       u8 rpwm_seq_num;
+       u8 cpwm_seq_num;
+
+       /* see RTW89_FW_OFLD_WAIT_COND series for wait condition */
+       struct rtw89_wait_info fw_ofld_wait;
+};
+
 enum rtw89_fw_type {
        RTW89_FW_NORMAL = 1,
        RTW89_FW_WOWLAN = 3,
@@ -3423,6 +3473,8 @@ struct rtw89_hal {
        u8 tx_nss;
        u8 rx_nss;
        bool tx_path_diversity;
+       bool ant_diversity;
+       bool ant_diversity_fixed;
        bool support_cckpd;
        bool support_igi;
        atomic_t roc_entity_idx;
@@ -3888,12 +3940,14 @@ enum rtw89_ser_rcvy_step {
        RTW89_SER_DRV_STOP_RX,
        RTW89_SER_DRV_STOP_RUN,
        RTW89_SER_HAL_STOP_DMA,
+       RTW89_SER_SUPPRESS_LOG,
        RTW89_NUM_OF_SER_FLAGS
 };
 
 struct rtw89_ser {
        u8 state;
        u8 alarm_event;
+       bool prehandle_l1;
 
        struct work_struct ser_hdl_work;
        struct delayed_work ser_alarm_work;
@@ -4054,6 +4108,7 @@ struct rtw89_dev {
        struct work_struct c2h_work;
        struct work_struct ips_work;
        struct work_struct load_firmware_work;
+       struct work_struct cancel_6ghz_probe_work;
 
        struct list_head early_h2c_list;
 
@@ -4086,6 +4141,7 @@ struct rtw89_dev {
        struct rtw89_phy_bb_gain_info bb_gain;
        struct rtw89_phy_efuse_gain efuse_gain;
        struct rtw89_phy_ul_tb_info ul_tb_info;
+       struct rtw89_antdiv_info antdiv;
 
        struct delayed_work track_work;
        struct delayed_work coex_act1_work;
@@ -4094,6 +4150,7 @@ struct rtw89_dev {
        struct delayed_work cfo_track_work;
        struct delayed_work forbid_ba_work;
        struct delayed_work roc_work;
+       struct delayed_work antdiv_work;
        struct rtw89_ppdu_sts_info ppdu_sts;
        u8 total_sta_assoc;
        bool scanning;
@@ -4990,6 +5047,7 @@ int rtw89_core_release_sta_ba_entry(struct rtw89_dev *rtwdev,
 void rtw89_vif_type_mapping(struct ieee80211_vif *vif, bool assoc);
 int rtw89_chip_info_setup(struct rtw89_dev *rtwdev);
 bool rtw89_ra_report_to_bitrate(struct rtw89_dev *rtwdev, u8 rpt_rate, u16 *bitrate);
+int rtw89_regd_setup(struct rtw89_dev *rtwdev);
 int rtw89_regd_init(struct rtw89_dev *rtwdev,
                    void (*reg_notifier)(struct wiphy *wiphy, struct regulatory_request *request));
 void rtw89_regd_notifier(struct wiphy *wiphy, struct regulatory_request *request);
index 1e5b7a9..6f418f1 100644 (file)
@@ -3206,7 +3206,11 @@ static void rtw89_sta_info_get_iter(void *data, struct ieee80211_sta *sta)
        struct seq_file *m = (struct seq_file *)data;
        struct rtw89_dev *rtwdev = rtwsta->rtwdev;
        struct rtw89_hal *hal = &rtwdev->hal;
+       u8 ant_num = hal->ant_diversity ? 2 : rtwdev->chip->rf_path_num;
+       bool ant_asterisk = hal->tx_path_diversity || hal->ant_diversity;
+       u8 evm_min, evm_max;
        u8 rssi;
+       u8 snr;
        int i;
 
        seq_printf(m, "TX rate [%d]: ", rtwsta->mac_id);
@@ -3256,13 +3260,27 @@ static void rtw89_sta_info_get_iter(void *data, struct ieee80211_sta *sta)
        rssi = ewma_rssi_read(&rtwsta->avg_rssi);
        seq_printf(m, "RSSI: %d dBm (raw=%d, prev=%d) [",
                   RTW89_RSSI_RAW_TO_DBM(rssi), rssi, rtwsta->prev_rssi);
-       for (i = 0; i < rtwdev->chip->rf_path_num; i++) {
+       for (i = 0; i < ant_num; i++) {
                rssi = ewma_rssi_read(&rtwsta->rssi[i]);
                seq_printf(m, "%d%s%s", RTW89_RSSI_RAW_TO_DBM(rssi),
-                          hal->tx_path_diversity && (hal->antenna_tx & BIT(i)) ? "*" : "",
-                          i + 1 == rtwdev->chip->rf_path_num ? "" : ", ");
+                          ant_asterisk && (hal->antenna_tx & BIT(i)) ? "*" : "",
+                          i + 1 == ant_num ? "" : ", ");
        }
        seq_puts(m, "]\n");
+
+       seq_puts(m, "EVM: [");
+       for (i = 0; i < (hal->ant_diversity ? 2 : 1); i++) {
+               evm_min = ewma_evm_read(&rtwsta->evm_min[i]);
+               evm_max = ewma_evm_read(&rtwsta->evm_max[i]);
+
+               seq_printf(m, "%s(%2u.%02u, %2u.%02u)", i == 0 ? "" : " ",
+                          evm_min >> 2, (evm_min & 0x3) * 25,
+                          evm_max >> 2, (evm_max & 0x3) * 25);
+       }
+       seq_puts(m, "]\t");
+
+       snr = ewma_snr_read(&rtwsta->avg_snr);
+       seq_printf(m, "SNR: %u\n", snr);
 }
 
 static void
index b9b675b..ad277f2 100644 (file)
@@ -14,6 +14,8 @@
 
 static void rtw89_fw_c2h_cmd_handle(struct rtw89_dev *rtwdev,
                                    struct sk_buff *skb);
+static int rtw89_h2c_tx_and_wait(struct rtw89_dev *rtwdev, struct sk_buff *skb,
+                                struct rtw89_wait_info *wait, unsigned int cond);
 
 static struct sk_buff *rtw89_fw_h2c_alloc_skb(struct rtw89_dev *rtwdev, u32 len,
                                              bool header)
@@ -807,7 +809,7 @@ int rtw89_fw_h2c_ba_cam(struct rtw89_dev *rtwdev, struct rtw89_sta *rtwsta,
        }
        skb_put(skb, H2C_BA_CAM_LEN);
        SET_BA_CAM_MACID(skb->data, macid);
-       if (chip->bacam_v1)
+       if (chip->bacam_ver == RTW89_BACAM_V0_EXT)
                SET_BA_CAM_ENTRY_IDX_V1(skb->data, entry_idx);
        else
                SET_BA_CAM_ENTRY_IDX(skb->data, entry_idx);
@@ -823,7 +825,7 @@ int rtw89_fw_h2c_ba_cam(struct rtw89_dev *rtwdev, struct rtw89_sta *rtwsta,
        SET_BA_CAM_INIT_REQ(skb->data, 1);
        SET_BA_CAM_SSN(skb->data, params->ssn);
 
-       if (chip->bacam_v1) {
+       if (chip->bacam_ver == RTW89_BACAM_V0_EXT) {
                SET_BA_CAM_STD_EN(skb->data, 1);
                SET_BA_CAM_BAND(skb->data, rtwvif->mac_idx);
        }
@@ -848,8 +850,8 @@ fail:
        return ret;
 }
 
-static int rtw89_fw_h2c_init_dynamic_ba_cam_v1(struct rtw89_dev *rtwdev,
-                                              u8 entry_idx, u8 uid)
+static int rtw89_fw_h2c_init_ba_cam_v0_ext(struct rtw89_dev *rtwdev,
+                                          u8 entry_idx, u8 uid)
 {
        struct sk_buff *skb;
        int ret;
@@ -886,7 +888,7 @@ fail:
        return ret;
 }
 
-void rtw89_fw_h2c_init_ba_cam_v1(struct rtw89_dev *rtwdev)
+void rtw89_fw_h2c_init_dynamic_ba_cam_v0_ext(struct rtw89_dev *rtwdev)
 {
        const struct rtw89_chip_info *chip = rtwdev->chip;
        u8 entry_idx = chip->bacam_num;
@@ -894,7 +896,7 @@ void rtw89_fw_h2c_init_ba_cam_v1(struct rtw89_dev *rtwdev)
        int i;
 
        for (i = 0; i < chip->bacam_dynamic_num; i++) {
-               rtw89_fw_h2c_init_dynamic_ba_cam_v1(rtwdev, entry_idx, uid);
+               rtw89_fw_h2c_init_ba_cam_v0_ext(rtwdev, entry_idx, uid);
                entry_idx++;
                uid++;
        }
@@ -997,8 +999,8 @@ void rtw89_fw_release_general_pkt_list_vif(struct rtw89_dev *rtwdev,
        list_for_each_entry_safe(info, tmp, pkt_list, list) {
                if (notify_fw)
                        rtw89_fw_h2c_del_pkt_offload(rtwdev, info->id);
-               rtw89_core_release_bit_map(rtwdev->pkt_offload,
-                                          info->id);
+               else
+                       rtw89_core_release_bit_map(rtwdev->pkt_offload, info->id);
                list_del(&info->list);
                kfree(info);
        }
@@ -2440,7 +2442,9 @@ fail:
 #define H2C_LEN_PKT_OFLD 4
 int rtw89_fw_h2c_del_pkt_offload(struct rtw89_dev *rtwdev, u8 id)
 {
+       struct rtw89_wait_info *wait = &rtwdev->mac.fw_ofld_wait;
        struct sk_buff *skb;
+       unsigned int cond;
        u8 *cmd;
        int ret;
 
@@ -2460,23 +2464,26 @@ int rtw89_fw_h2c_del_pkt_offload(struct rtw89_dev *rtwdev, u8 id)
                              H2C_FUNC_PACKET_OFLD, 1, 1,
                              H2C_LEN_PKT_OFLD);
 
-       ret = rtw89_h2c_tx(rtwdev, skb, false);
+       cond = RTW89_FW_OFLD_WAIT_COND_PKT_OFLD(id, RTW89_PKT_OFLD_OP_DEL);
+
+       ret = rtw89_h2c_tx_and_wait(rtwdev, skb, wait, cond);
        if (ret) {
-               rtw89_err(rtwdev, "failed to send h2c\n");
-               goto fail;
+               rtw89_debug(rtwdev, RTW89_DBG_FW,
+                           "failed to del pkt ofld: id %d, ret %d\n",
+                           id, ret);
+               return ret;
        }
 
+       rtw89_core_release_bit_map(rtwdev->pkt_offload, id);
        return 0;
-fail:
-       dev_kfree_skb_any(skb);
-
-       return ret;
 }
 
 int rtw89_fw_h2c_add_pkt_offload(struct rtw89_dev *rtwdev, u8 *id,
                                 struct sk_buff *skb_ofld)
 {
+       struct rtw89_wait_info *wait = &rtwdev->mac.fw_ofld_wait;
        struct sk_buff *skb;
+       unsigned int cond;
        u8 *cmd;
        u8 alloc_id;
        int ret;
@@ -2507,27 +2514,29 @@ int rtw89_fw_h2c_add_pkt_offload(struct rtw89_dev *rtwdev, u8 *id,
                              H2C_FUNC_PACKET_OFLD, 1, 1,
                              H2C_LEN_PKT_OFLD + skb_ofld->len);
 
-       ret = rtw89_h2c_tx(rtwdev, skb, false);
+       cond = RTW89_FW_OFLD_WAIT_COND_PKT_OFLD(alloc_id, RTW89_PKT_OFLD_OP_ADD);
+
+       ret = rtw89_h2c_tx_and_wait(rtwdev, skb, wait, cond);
        if (ret) {
-               rtw89_err(rtwdev, "failed to send h2c\n");
+               rtw89_debug(rtwdev, RTW89_DBG_FW,
+                           "failed to add pkt ofld: id %d, ret %d\n",
+                           alloc_id, ret);
                rtw89_core_release_bit_map(rtwdev->pkt_offload, alloc_id);
-               goto fail;
+               return ret;
        }
 
        return 0;
-fail:
-       dev_kfree_skb_any(skb);
-
-       return ret;
 }
 
 #define H2C_LEN_SCAN_LIST_OFFLOAD 4
 int rtw89_fw_h2c_scan_list_offload(struct rtw89_dev *rtwdev, int len,
                                   struct list_head *chan_list)
 {
+       struct rtw89_wait_info *wait = &rtwdev->mac.fw_ofld_wait;
        struct rtw89_mac_chinfo *ch_info;
        struct sk_buff *skb;
        int skb_len = H2C_LEN_SCAN_LIST_OFFLOAD + len * RTW89_MAC_CHINFO_SIZE;
+       unsigned int cond;
        u8 *cmd;
        int ret;
 
@@ -2574,27 +2583,27 @@ int rtw89_fw_h2c_scan_list_offload(struct rtw89_dev *rtwdev, int len,
                              H2C_CAT_MAC, H2C_CL_MAC_FW_OFLD,
                              H2C_FUNC_ADD_SCANOFLD_CH, 1, 1, skb_len);
 
-       ret = rtw89_h2c_tx(rtwdev, skb, false);
+       cond = RTW89_FW_OFLD_WAIT_COND(0, H2C_FUNC_ADD_SCANOFLD_CH);
+
+       ret = rtw89_h2c_tx_and_wait(rtwdev, skb, wait, cond);
        if (ret) {
-               rtw89_err(rtwdev, "failed to send h2c\n");
-               goto fail;
+               rtw89_debug(rtwdev, RTW89_DBG_FW, "failed to add scan ofld ch\n");
+               return ret;
        }
 
        return 0;
-fail:
-       dev_kfree_skb_any(skb);
-
-       return ret;
 }
 
 int rtw89_fw_h2c_scan_offload(struct rtw89_dev *rtwdev,
                              struct rtw89_scan_option *option,
                              struct rtw89_vif *rtwvif)
 {
+       struct rtw89_wait_info *wait = &rtwdev->mac.fw_ofld_wait;
        struct rtw89_chan *op = &rtwdev->scan_info.op_chan;
        struct rtw89_h2c_scanofld *h2c;
        u32 len = sizeof(*h2c);
        struct sk_buff *skb;
+       unsigned int cond;
        int ret;
 
        skb = rtw89_fw_h2c_alloc_skb_with_hdr(rtwdev, len);
@@ -2633,17 +2642,15 @@ int rtw89_fw_h2c_scan_offload(struct rtw89_dev *rtwdev,
                              H2C_FUNC_SCANOFLD, 1, 1,
                              len);
 
-       ret = rtw89_h2c_tx(rtwdev, skb, false);
+       cond = RTW89_FW_OFLD_WAIT_COND(0, H2C_FUNC_SCANOFLD);
+
+       ret = rtw89_h2c_tx_and_wait(rtwdev, skb, wait, cond);
        if (ret) {
-               rtw89_err(rtwdev, "failed to send h2c\n");
-               goto fail;
+               rtw89_debug(rtwdev, RTW89_DBG_FW, "failed to scan ofld\n");
+               return ret;
        }
 
        return 0;
-fail:
-       dev_kfree_skb_any(skb);
-
-       return ret;
 }
 
 int rtw89_fw_h2c_rf_reg(struct rtw89_dev *rtwdev,
@@ -3019,9 +3026,8 @@ static void rtw89_release_pkt_list(struct rtw89_dev *rtwdev)
                        continue;
 
                list_for_each_entry_safe(info, tmp, &pkt_list[idx], list) {
-                       rtw89_fw_h2c_del_pkt_offload(rtwdev, info->id);
-                       rtw89_core_release_bit_map(rtwdev->pkt_offload,
-                                                  info->id);
+                       if (test_bit(info->id, rtwdev->pkt_offload))
+                               rtw89_fw_h2c_del_pkt_offload(rtwdev, info->id);
                        list_del(&info->list);
                        kfree(info);
                }
index 675f85c..0482837 100644 (file)
@@ -138,8 +138,13 @@ enum rtw89_pkt_offload_op {
        RTW89_PKT_OFLD_OP_ADD,
        RTW89_PKT_OFLD_OP_DEL,
        RTW89_PKT_OFLD_OP_READ,
+
+       NUM_OF_RTW89_PKT_OFFLOAD_OP,
 };
 
+#define RTW89_PKT_OFLD_WAIT_TAG(pkt_id, pkt_op) \
+       ((pkt_id) * NUM_OF_RTW89_PKT_OFFLOAD_OP + (pkt_op))
+
 enum rtw89_scanofld_notify_reason {
        RTW89_SCAN_DWELL_NOTIFY,
        RTW89_SCAN_PRE_TX_NOTIFY,
@@ -277,6 +282,7 @@ struct rtw89_pktofld_info {
        u8 ssid_len;
        u8 bssid[ETH_ALEN];
        u16 channel_6ghz;
+       bool cancel;
 };
 
 static inline void RTW89_SET_FWCMD_RA_IS_DIS(void *cmd, u32 val)
@@ -3215,16 +3221,17 @@ static inline struct rtw89_fw_c2h_attr *RTW89_SKB_C2H_CB(struct sk_buff *skb)
 #define RTW89_GET_C2H_LOG_SRT_PRT(c2h) (char *)((__le32 *)(c2h) + 2)
 #define RTW89_GET_C2H_LOG_LEN(len) ((len) - RTW89_C2H_HEADER_LEN)
 
-#define RTW89_GET_MAC_C2H_DONE_ACK_CAT(c2h) \
-       le32_get_bits(*((const __le32 *)(c2h) + 2), GENMASK(1, 0))
-#define RTW89_GET_MAC_C2H_DONE_ACK_CLASS(c2h) \
-       le32_get_bits(*((const __le32 *)(c2h) + 2), GENMASK(7, 2))
-#define RTW89_GET_MAC_C2H_DONE_ACK_FUNC(c2h) \
-       le32_get_bits(*((const __le32 *)(c2h) + 2), GENMASK(15, 8))
-#define RTW89_GET_MAC_C2H_DONE_ACK_H2C_RETURN(c2h) \
-       le32_get_bits(*((const __le32 *)(c2h) + 2), GENMASK(23, 16))
-#define RTW89_GET_MAC_C2H_DONE_ACK_H2C_SEQ(c2h) \
-       le32_get_bits(*((const __le32 *)(c2h) + 2), GENMASK(31, 24))
+struct rtw89_c2h_done_ack {
+       __le32 w0;
+       __le32 w1;
+       __le32 w2;
+} __packed;
+
+#define RTW89_C2H_DONE_ACK_W2_CAT GENMASK(1, 0)
+#define RTW89_C2H_DONE_ACK_W2_CLASS GENMASK(7, 2)
+#define RTW89_C2H_DONE_ACK_W2_FUNC GENMASK(15, 8)
+#define RTW89_C2H_DONE_ACK_W2_H2C_RETURN GENMASK(23, 16)
+#define RTW89_C2H_DONE_ACK_W2_H2C_SEQ GENMASK(31, 24)
 
 #define RTW89_GET_MAC_C2H_REV_ACK_CAT(c2h) \
        le32_get_bits(*((const __le32 *)(c2h) + 2), GENMASK(1, 0))
@@ -3339,6 +3346,16 @@ static_assert(sizeof(struct rtw89_mac_mcc_tsf_rpt) <= RTW89_COMPLETION_BUF_SIZE)
 #define RTW89_GET_MAC_C2H_MCC_STATUS_RPT_TSF_HIGH(c2h) \
        le32_get_bits(*((const __le32 *)(c2h) + 4), GENMASK(31, 0))
 
+struct rtw89_c2h_pkt_ofld_rsp {
+       __le32 w0;
+       __le32 w1;
+       __le32 w2;
+} __packed;
+
+#define RTW89_C2H_PKT_OFLD_RSP_W2_PTK_ID GENMASK(7, 0)
+#define RTW89_C2H_PKT_OFLD_RSP_W2_PTK_OP GENMASK(10, 8)
+#define RTW89_C2H_PKT_OFLD_RSP_W2_PTK_LEN GENMASK(31, 16)
+
 struct rtw89_h2c_bcnfltr {
        __le32 w0;
 } __packed;
@@ -3497,17 +3514,28 @@ struct rtw89_fw_h2c_rf_reg_info {
 
 /* CLASS 9 - FW offload */
 #define H2C_CL_MAC_FW_OFLD             0x9
-#define H2C_FUNC_PACKET_OFLD           0x1
-#define H2C_FUNC_MAC_MACID_PAUSE       0x8
-#define H2C_FUNC_USR_EDCA              0xF
-#define H2C_FUNC_TSF32_TOGL            0x10
-#define H2C_FUNC_OFLD_CFG              0x14
-#define H2C_FUNC_ADD_SCANOFLD_CH       0x16
-#define H2C_FUNC_SCANOFLD              0x17
-#define H2C_FUNC_PKT_DROP              0x1b
-#define H2C_FUNC_CFG_BCNFLTR           0x1e
-#define H2C_FUNC_OFLD_RSSI             0x1f
-#define H2C_FUNC_OFLD_TP               0x20
+enum rtw89_fw_ofld_h2c_func {
+       H2C_FUNC_PACKET_OFLD            = 0x1,
+       H2C_FUNC_MAC_MACID_PAUSE        = 0x8,
+       H2C_FUNC_USR_EDCA               = 0xF,
+       H2C_FUNC_TSF32_TOGL             = 0x10,
+       H2C_FUNC_OFLD_CFG               = 0x14,
+       H2C_FUNC_ADD_SCANOFLD_CH        = 0x16,
+       H2C_FUNC_SCANOFLD               = 0x17,
+       H2C_FUNC_PKT_DROP               = 0x1b,
+       H2C_FUNC_CFG_BCNFLTR            = 0x1e,
+       H2C_FUNC_OFLD_RSSI              = 0x1f,
+       H2C_FUNC_OFLD_TP                = 0x20,
+
+       NUM_OF_RTW89_FW_OFLD_H2C_FUNC,
+};
+
+#define RTW89_FW_OFLD_WAIT_COND(tag, func) \
+       ((tag) * NUM_OF_RTW89_FW_OFLD_H2C_FUNC + (func))
+
+#define RTW89_FW_OFLD_WAIT_COND_PKT_OFLD(pkt_id, pkt_op) \
+       RTW89_FW_OFLD_WAIT_COND(RTW89_PKT_OFLD_WAIT_TAG(pkt_id, pkt_op), \
+                               H2C_FUNC_PACKET_OFLD)
 
 /* CLASS 10 - Security CAM */
 #define H2C_CL_MAC_SEC_CAM             0xa
@@ -3648,7 +3676,7 @@ void rtw89_fw_release_general_pkt_list_vif(struct rtw89_dev *rtwdev,
 void rtw89_fw_release_general_pkt_list(struct rtw89_dev *rtwdev, bool notify_fw);
 int rtw89_fw_h2c_ba_cam(struct rtw89_dev *rtwdev, struct rtw89_sta *rtwsta,
                        bool valid, struct ieee80211_ampdu_params *params);
-void rtw89_fw_h2c_init_ba_cam_v1(struct rtw89_dev *rtwdev);
+void rtw89_fw_h2c_init_dynamic_ba_cam_v0_ext(struct rtw89_dev *rtwdev);
 
 int rtw89_fw_h2c_lps_parm(struct rtw89_dev *rtwdev,
                          struct rtw89_lps_parm *lps_param);
@@ -3711,8 +3739,8 @@ static inline void rtw89_fw_h2c_init_ba_cam(struct rtw89_dev *rtwdev)
 {
        const struct rtw89_chip_info *chip = rtwdev->chip;
 
-       if (chip->bacam_v1)
-               rtw89_fw_h2c_init_ba_cam_v1(rtwdev);
+       if (chip->bacam_ver == RTW89_BACAM_V0_EXT)
+               rtw89_fw_h2c_init_dynamic_ba_cam_v0_ext(rtwdev);
 }
 
 #endif
index 512de49..0833a9e 100644 (file)
@@ -644,6 +644,39 @@ static void rtw89_mac_dump_err_status(struct rtw89_dev *rtwdev,
        rtw89_info(rtwdev, "<---\n");
 }
 
+static bool rtw89_mac_suppress_log(struct rtw89_dev *rtwdev, u32 err)
+{
+       struct rtw89_ser *ser = &rtwdev->ser;
+       u32 dmac_err, imr, isr;
+       int ret;
+
+       if (rtwdev->chip->chip_id == RTL8852C) {
+               ret = rtw89_mac_check_mac_en(rtwdev, 0, RTW89_DMAC_SEL);
+               if (ret)
+                       return true;
+
+               if (err == MAC_AX_ERR_L1_ERR_DMAC) {
+                       dmac_err = rtw89_read32(rtwdev, R_AX_DMAC_ERR_ISR);
+                       imr = rtw89_read32(rtwdev, R_AX_TXPKTCTL_B0_ERRFLAG_IMR);
+                       isr = rtw89_read32(rtwdev, R_AX_TXPKTCTL_B0_ERRFLAG_ISR);
+
+                       if ((dmac_err & B_AX_TXPKTCTRL_ERR_FLAG) &&
+                           ((isr & imr) & B_AX_B0_ISR_ERR_CMDPSR_FRZTO)) {
+                               set_bit(RTW89_SER_SUPPRESS_LOG, ser->flags);
+                               return true;
+                       }
+               } else if (err == MAC_AX_ERR_L1_RESET_DISABLE_DMAC_DONE) {
+                       if (test_bit(RTW89_SER_SUPPRESS_LOG, ser->flags))
+                               return true;
+               } else if (err == MAC_AX_ERR_L1_RESET_RECOVERY_DONE) {
+                       if (test_and_clear_bit(RTW89_SER_SUPPRESS_LOG, ser->flags))
+                               return true;
+               }
+       }
+
+       return false;
+}
+
 u32 rtw89_mac_get_err_status(struct rtw89_dev *rtwdev)
 {
        u32 err, err_scnr;
@@ -667,6 +700,9 @@ u32 rtw89_mac_get_err_status(struct rtw89_dev *rtwdev)
        else if (err_scnr == RTW89_RXI300_ERROR)
                err = MAC_AX_ERR_RXI300;
 
+       if (rtw89_mac_suppress_log(rtwdev, err))
+               return err;
+
        rtw89_fw_st_dbg_dump(rtwdev);
        rtw89_mac_dump_err_status(rtwdev, err);
 
@@ -676,6 +712,7 @@ EXPORT_SYMBOL(rtw89_mac_get_err_status);
 
 int rtw89_mac_set_err_status(struct rtw89_dev *rtwdev, u32 err)
 {
+       struct rtw89_ser *ser = &rtwdev->ser;
        u32 halt;
        int ret = 0;
 
@@ -692,6 +729,11 @@ int rtw89_mac_set_err_status(struct rtw89_dev *rtwdev, u32 err)
        }
 
        rtw89_write32(rtwdev, R_AX_HALT_H2C, err);
+
+       if (ser->prehandle_l1 &&
+           (err == MAC_AX_ERR_L1_DISABLE_EN || err == MAC_AX_ERR_L1_RCVY_EN))
+               return 0;
+
        rtw89_write32(rtwdev, R_AX_HALT_H2C_CTRL, B_AX_HALT_H2C_TRIGGER);
 
        return 0;
@@ -1479,6 +1521,8 @@ const struct rtw89_mac_size_set rtw89_mac_size = {
        .ple_qt_52a_wow = {264, 0, 32, 20, 64, 13, 1005, 0, 64, 128, 120,},
        /* 8852B PCIE WOW */
        .ple_qt_52b_wow = {147, 0, 16, 20, 157, 13, 133, 0, 172, 14, 24, 0,},
+       /* 8851B PCIE WOW */
+       .ple_qt_51b_wow = {147, 0, 16, 20, 157, 13, 133, 0, 172, 14, 24, 0,},
 };
 EXPORT_SYMBOL(rtw89_mac_size);
 
@@ -2602,6 +2646,7 @@ static int rtw89_mac_read_phycap(struct rtw89_dev *rtwdev,
 
 int rtw89_mac_setup_phycap(struct rtw89_dev *rtwdev)
 {
+       struct rtw89_efuse *efuse = &rtwdev->efuse;
        struct rtw89_hal *hal = &rtwdev->hal;
        const struct rtw89_chip_info *chip = rtwdev->chip;
        struct rtw89_mac_c2h_info c2h_info = {0};
@@ -2633,6 +2678,13 @@ int rtw89_mac_setup_phycap(struct rtw89_dev *rtwdev)
                hal->tx_path_diversity = true;
        }
 
+       if (chip->rf_path_num == 1) {
+               hal->antenna_tx = RF_A;
+               hal->antenna_rx = RF_A;
+               if ((efuse->rfe_type % 3) == 2)
+                       hal->ant_diversity = true;
+       }
+
        rtw89_debug(rtwdev, RTW89_DBG_FW,
                    "phycap hal/phy/chip: tx_nss=0x%x/0x%x/0x%x rx_nss=0x%x/0x%x/0x%x\n",
                    hal->tx_nss, tx_nss, chip->tx_nss,
@@ -2641,6 +2693,7 @@ int rtw89_mac_setup_phycap(struct rtw89_dev *rtwdev)
                    "ant num/bitmap: tx=%d/0x%x rx=%d/0x%x\n",
                    tx_ant, hal->antenna_tx, rx_ant, hal->antenna_rx);
        rtw89_debug(rtwdev, RTW89_DBG_FW, "TX path diversity=%d\n", hal->tx_path_diversity);
+       rtw89_debug(rtwdev, RTW89_DBG_FW, "Antenna diversity=%d\n", hal->ant_diversity);
 
        return 0;
 }
@@ -4331,6 +4384,8 @@ rtw89_mac_c2h_bcn_fltr_rpt(struct rtw89_dev *rtwdev, struct sk_buff *c2h,
 static void
 rtw89_mac_c2h_rec_ack(struct rtw89_dev *rtwdev, struct sk_buff *c2h, u32 len)
 {
+       /* N.B. This will run in interrupt context. */
+
        rtw89_debug(rtwdev, RTW89_DBG_FW,
                    "C2H rev ack recv, cat: %d, class: %d, func: %d, seq : %d\n",
                    RTW89_GET_MAC_C2H_REV_ACK_CAT(c2h->data),
@@ -4340,15 +4395,44 @@ rtw89_mac_c2h_rec_ack(struct rtw89_dev *rtwdev, struct sk_buff *c2h, u32 len)
 }
 
 static void
-rtw89_mac_c2h_done_ack(struct rtw89_dev *rtwdev, struct sk_buff *c2h, u32 len)
-{
+rtw89_mac_c2h_done_ack(struct rtw89_dev *rtwdev, struct sk_buff *skb_c2h, u32 len)
+{
+       /* N.B. This will run in interrupt context. */
+       struct rtw89_wait_info *fw_ofld_wait = &rtwdev->mac.fw_ofld_wait;
+       const struct rtw89_c2h_done_ack *c2h =
+               (const struct rtw89_c2h_done_ack *)skb_c2h->data;
+       u8 h2c_cat = le32_get_bits(c2h->w2, RTW89_C2H_DONE_ACK_W2_CAT);
+       u8 h2c_class = le32_get_bits(c2h->w2, RTW89_C2H_DONE_ACK_W2_CLASS);
+       u8 h2c_func = le32_get_bits(c2h->w2, RTW89_C2H_DONE_ACK_W2_FUNC);
+       u8 h2c_return = le32_get_bits(c2h->w2, RTW89_C2H_DONE_ACK_W2_H2C_RETURN);
+       u8 h2c_seq = le32_get_bits(c2h->w2, RTW89_C2H_DONE_ACK_W2_H2C_SEQ);
+       struct rtw89_completion_data data = {};
+       unsigned int cond;
+
        rtw89_debug(rtwdev, RTW89_DBG_FW,
                    "C2H done ack recv, cat: %d, class: %d, func: %d, ret: %d, seq : %d\n",
-                   RTW89_GET_MAC_C2H_DONE_ACK_CAT(c2h->data),
-                   RTW89_GET_MAC_C2H_DONE_ACK_CLASS(c2h->data),
-                   RTW89_GET_MAC_C2H_DONE_ACK_FUNC(c2h->data),
-                   RTW89_GET_MAC_C2H_DONE_ACK_H2C_RETURN(c2h->data),
-                   RTW89_GET_MAC_C2H_DONE_ACK_H2C_SEQ(c2h->data));
+                   h2c_cat, h2c_class, h2c_func, h2c_return, h2c_seq);
+
+       if (h2c_cat != H2C_CAT_MAC)
+               return;
+
+       switch (h2c_class) {
+       default:
+               return;
+       case H2C_CL_MAC_FW_OFLD:
+               switch (h2c_func) {
+               default:
+                       return;
+               case H2C_FUNC_ADD_SCANOFLD_CH:
+               case H2C_FUNC_SCANOFLD:
+                       cond = RTW89_FW_OFLD_WAIT_COND(0, h2c_func);
+                       break;
+               }
+
+               data.err = !!h2c_return;
+               rtw89_complete_cond(fw_ofld_wait, cond, &data);
+               return;
+       }
 }
 
 static void
@@ -4364,9 +4448,22 @@ rtw89_mac_c2h_bcn_cnt(struct rtw89_dev *rtwdev, struct sk_buff *c2h, u32 len)
 }
 
 static void
-rtw89_mac_c2h_pkt_ofld_rsp(struct rtw89_dev *rtwdev, struct sk_buff *c2h,
+rtw89_mac_c2h_pkt_ofld_rsp(struct rtw89_dev *rtwdev, struct sk_buff *skb_c2h,
                           u32 len)
 {
+       struct rtw89_wait_info *wait = &rtwdev->mac.fw_ofld_wait;
+       const struct rtw89_c2h_pkt_ofld_rsp *c2h =
+               (const struct rtw89_c2h_pkt_ofld_rsp *)skb_c2h->data;
+       u16 pkt_len = le32_get_bits(c2h->w2, RTW89_C2H_PKT_OFLD_RSP_W2_PTK_LEN);
+       u8 pkt_id = le32_get_bits(c2h->w2, RTW89_C2H_PKT_OFLD_RSP_W2_PTK_ID);
+       u8 pkt_op = le32_get_bits(c2h->w2, RTW89_C2H_PKT_OFLD_RSP_W2_PTK_OP);
+       struct rtw89_completion_data data = {};
+       unsigned int cond;
+
+       data.err = !pkt_len;
+       cond = RTW89_FW_OFLD_WAIT_COND_PKT_OFLD(pkt_id, pkt_op);
+
+       rtw89_complete_cond(wait, cond, &data);
 }
 
 static void
@@ -4574,6 +4671,21 @@ bool rtw89_mac_c2h_chk_atomic(struct rtw89_dev *rtwdev, u8 class, u8 func)
        switch (class) {
        default:
                return false;
+       case RTW89_MAC_C2H_CLASS_INFO:
+               switch (func) {
+               default:
+                       return false;
+               case RTW89_MAC_C2H_FUNC_REC_ACK:
+               case RTW89_MAC_C2H_FUNC_DONE_ACK:
+                       return true;
+               }
+       case RTW89_MAC_C2H_CLASS_OFLD:
+               switch (func) {
+               default:
+                       return false;
+               case RTW89_MAC_C2H_FUNC_PKT_OFLD_RSP:
+                       return true;
+               }
        case RTW89_MAC_C2H_CLASS_MCC:
                return true;
        }
index 6ba633c..0f380b6 100644 (file)
@@ -642,6 +642,7 @@ enum mac_ax_err_info {
        MAC_AX_ERR_L0_PROMOTE_TO_L1 = 0x0010,
 
        /* L1 */
+       MAC_AX_ERR_L1_PREERR_DMAC = 0x999,
        MAC_AX_ERR_L1_ERR_DMAC = 0x1000,
        MAC_AX_ERR_L1_RESET_DISABLE_DMAC_DONE = 0x1001,
        MAC_AX_ERR_L1_RESET_RECOVERY_DONE = 0x1002,
@@ -780,6 +781,7 @@ enum mac_ax_err_info {
        MAC_AX_ERR_L1_RCVY_EN = 0x0002,
        MAC_AX_ERR_L1_RCVY_STOP_REQ = 0x0003,
        MAC_AX_ERR_L1_RCVY_START_REQ = 0x0004,
+       MAC_AX_ERR_L1_RESET_START_DMAC = 0x000A,
        MAC_AX_ERR_L0_CFG_NOTIFY = 0x0010,
        MAC_AX_ERR_L0_CFG_DIS_NOTIFY = 0x0011,
        MAC_AX_ERR_L0_CFG_HANDSHAKE = 0x0012,
@@ -819,6 +821,7 @@ struct rtw89_mac_size_set {
        const struct rtw89_ple_quota ple_qt58;
        const struct rtw89_ple_quota ple_qt_52a_wow;
        const struct rtw89_ple_quota ple_qt_52b_wow;
+       const struct rtw89_ple_quota ple_qt_51b_wow;
 };
 
 extern const struct rtw89_mac_size_set rtw89_mac_size;
index c42e310..0ffd7fb 100644 (file)
@@ -759,13 +759,18 @@ int rtw89_ops_set_antenna(struct ieee80211_hw *hw, u32 tx_ant, u32 rx_ant)
        struct rtw89_dev *rtwdev = hw->priv;
        struct rtw89_hal *hal = &rtwdev->hal;
 
-       if (rx_ant != hw->wiphy->available_antennas_rx && rx_ant != hal->antenna_rx)
+       if (hal->ant_diversity) {
+               if (tx_ant != rx_ant || hweight32(tx_ant) != 1)
+                       return -EINVAL;
+       } else if (rx_ant != hw->wiphy->available_antennas_rx && rx_ant != hal->antenna_rx) {
                return -EINVAL;
+       }
 
        mutex_lock(&rtwdev->mutex);
        hal->antenna_tx = tx_ant;
        hal->antenna_rx = rx_ant;
        hal->tx_path_diversity = false;
+       hal->ant_diversity_fixed = true;
        mutex_unlock(&rtwdev->mutex);
 
        return 0;
index 70b4754..92bfef9 100644 (file)
@@ -1003,10 +1003,10 @@ static u32 __rtw89_pci_check_and_reclaim_tx_resource(struct rtw89_dev *rtwdev,
        min_cnt = min(bd_cnt, wd_cnt);
        if (min_cnt == 0) {
                /* This message can be frequently shown in low power mode or
-                * high traffic with 8852B, and we have recognized it as normal
+                * high traffic with small FIFO chips, and we have recognized it as normal
                 * behavior, so print with mask RTW89_DBG_TXRX in these situations.
                 */
-               if (rtwpci->low_power || chip->chip_id == RTL8852B)
+               if (rtwpci->low_power || chip->small_fifo_size)
                        debug_mask = RTW89_DBG_TXRX;
                else
                        debug_mask = RTW89_DBG_UNEXP;
@@ -3216,11 +3216,16 @@ static void rtw89_pci_clear_resource(struct rtw89_dev *rtwdev,
 void rtw89_pci_config_intr_mask(struct rtw89_dev *rtwdev)
 {
        struct rtw89_pci *rtwpci = (struct rtw89_pci *)rtwdev->priv;
+       const struct rtw89_chip_info *chip = rtwdev->chip;
+       u32 hs0isr_ind_int_en = B_AX_HS0ISR_IND_INT_EN;
+
+       if (chip->chip_id == RTL8851B)
+               hs0isr_ind_int_en = B_AX_HS0ISR_IND_INT_EN_WKARND;
 
        rtwpci->halt_c2h_intrs = B_AX_HALT_C2H_INT_EN | 0;
 
        if (rtwpci->under_recovery) {
-               rtwpci->intrs[0] = B_AX_HS0ISR_IND_INT_EN;
+               rtwpci->intrs[0] = hs0isr_ind_int_en;
                rtwpci->intrs[1] = 0;
        } else {
                rtwpci->intrs[0] = B_AX_TXDMA_STUCK_INT_EN |
@@ -3230,7 +3235,7 @@ void rtw89_pci_config_intr_mask(struct rtw89_dev *rtwdev)
                                   B_AX_RXDMA_STUCK_INT_EN |
                                   B_AX_RDU_INT_EN |
                                   B_AX_RPQBD_FULL_INT_EN |
-                                  B_AX_HS0ISR_IND_INT_EN;
+                                  hs0isr_ind_int_en;
 
                rtwpci->intrs[1] = B_AX_HC10ISR_IND_INT_EN;
        }
index 0e4bd21..2f3d1ad 100644 (file)
 #define B_AX_HD1ISR_IND_INT_EN         BIT(26)
 #define B_AX_HD0ISR_IND_INT_EN         BIT(25)
 #define B_AX_HS0ISR_IND_INT_EN         BIT(24)
+#define B_AX_HS0ISR_IND_INT_EN_WKARND  BIT(23)
 #define B_AX_RETRAIN_INT_EN            BIT(21)
 #define B_AX_RPQBD_FULL_INT_EN         BIT(20)
 #define B_AX_RDU_INT_EN                        BIT(19)
index c7e9061..568488d 100644 (file)
@@ -10,6 +10,7 @@
 #include "ps.h"
 #include "reg.h"
 #include "sar.h"
+#include "txrx.h"
 #include "util.h"
 
 static u16 get_max_amsdu_len(struct rtw89_dev *rtwdev,
@@ -1400,7 +1401,8 @@ static void rtw89_phy_init_rf_nctl(struct rtw89_dev *rtwdev)
        rtw89_phy_write32_set(rtwdev, R_IOQ_IQK_DPK, 0x3);
        rtw89_phy_write32_set(rtwdev, R_GNT_BT_WGT_EN, 0x1);
        rtw89_phy_write32_set(rtwdev, R_P0_PATH_RST, 0x8000000);
-       rtw89_phy_write32_set(rtwdev, R_P1_PATH_RST, 0x8000000);
+       if (chip->chip_id != RTL8851B)
+               rtw89_phy_write32_set(rtwdev, R_P1_PATH_RST, 0x8000000);
        if (chip->chip_id == RTL8852B)
                rtw89_phy_write32_set(rtwdev, R_IOQ_IQK_DPK, 0x2);
 
@@ -1414,6 +1416,9 @@ static void rtw89_phy_init_rf_nctl(struct rtw89_dev *rtwdev)
 
        nctl_table = chip->nctl_table;
        rtw89_phy_init_reg(rtwdev, nctl_table, rtw89_phy_config_bb_reg, NULL);
+
+       if (chip->nctl_post_table)
+               rtw89_rfk_parser(rtwdev, chip->nctl_post_table);
 }
 
 static u32 rtw89_phy0_phy1_offset(struct rtw89_dev *rtwdev, u32 addr)
@@ -2338,27 +2343,29 @@ void rtw89_phy_c2h_handle(struct rtw89_dev *rtwdev, struct sk_buff *skb,
 
 static u8 rtw89_phy_cfo_get_xcap_reg(struct rtw89_dev *rtwdev, bool sc_xo)
 {
+       const struct rtw89_xtal_info *xtal = rtwdev->chip->xtal_info;
        u32 reg_mask;
 
        if (sc_xo)
-               reg_mask = B_AX_XTAL_SC_XO_MASK;
+               reg_mask = xtal->sc_xo_mask;
        else
-               reg_mask = B_AX_XTAL_SC_XI_MASK;
+               reg_mask = xtal->sc_xi_mask;
 
-       return (u8)rtw89_read32_mask(rtwdev, R_AX_XTAL_ON_CTRL0, reg_mask);
+       return (u8)rtw89_read32_mask(rtwdev, xtal->xcap_reg, reg_mask);
 }
 
 static void rtw89_phy_cfo_set_xcap_reg(struct rtw89_dev *rtwdev, bool sc_xo,
                                       u8 val)
 {
+       const struct rtw89_xtal_info *xtal = rtwdev->chip->xtal_info;
        u32 reg_mask;
 
        if (sc_xo)
-               reg_mask = B_AX_XTAL_SC_XO_MASK;
+               reg_mask = xtal->sc_xo_mask;
        else
-               reg_mask = B_AX_XTAL_SC_XI_MASK;
+               reg_mask = xtal->sc_xi_mask;
 
-       rtw89_write32_mask(rtwdev, R_AX_XTAL_ON_CTRL0, reg_mask, val);
+       rtw89_write32_mask(rtwdev, xtal->xcap_reg, reg_mask, val);
 }
 
 static void rtw89_phy_cfo_set_crystal_cap(struct rtw89_dev *rtwdev,
@@ -2371,7 +2378,7 @@ static void rtw89_phy_cfo_set_crystal_cap(struct rtw89_dev *rtwdev,
        if (!force && cfo->crystal_cap == crystal_cap)
                return;
        crystal_cap = clamp_t(u8, crystal_cap, 0, 127);
-       if (chip->chip_id == RTL8852A) {
+       if (chip->chip_id == RTL8852A || chip->chip_id == RTL8851B) {
                rtw89_phy_cfo_set_xcap_reg(rtwdev, true, crystal_cap);
                rtw89_phy_cfo_set_xcap_reg(rtwdev, false, crystal_cap);
                sc_xo_val = rtw89_phy_cfo_get_xcap_reg(rtwdev, true);
@@ -2946,6 +2953,126 @@ static void rtw89_phy_ul_tb_info_init(struct rtw89_dev *rtwdev)
                rtw89_phy_read32_mask(rtwdev, R_BANDEDGE, B_BANDEDGE_EN);
 }
 
+static
+void rtw89_phy_antdiv_sts_instance_reset(struct rtw89_antdiv_stats *antdiv_sts)
+{
+       ewma_rssi_init(&antdiv_sts->cck_rssi_avg);
+       ewma_rssi_init(&antdiv_sts->ofdm_rssi_avg);
+       ewma_rssi_init(&antdiv_sts->non_legacy_rssi_avg);
+       antdiv_sts->pkt_cnt_cck = 0;
+       antdiv_sts->pkt_cnt_ofdm = 0;
+       antdiv_sts->pkt_cnt_non_legacy = 0;
+       antdiv_sts->evm = 0;
+}
+
+static void rtw89_phy_antdiv_sts_instance_add(struct rtw89_dev *rtwdev,
+                                             struct rtw89_rx_phy_ppdu *phy_ppdu,
+                                             struct rtw89_antdiv_stats *stats)
+{
+       if (GET_DATA_RATE_MODE(phy_ppdu->rate) == DATA_RATE_MODE_NON_HT) {
+               if (phy_ppdu->rate < RTW89_HW_RATE_OFDM6) {
+                       ewma_rssi_add(&stats->cck_rssi_avg, phy_ppdu->rssi_avg);
+                       stats->pkt_cnt_cck++;
+               } else {
+                       ewma_rssi_add(&stats->ofdm_rssi_avg, phy_ppdu->rssi_avg);
+                       stats->pkt_cnt_ofdm++;
+                       stats->evm += phy_ppdu->ofdm.evm_min;
+               }
+       } else {
+               ewma_rssi_add(&stats->non_legacy_rssi_avg, phy_ppdu->rssi_avg);
+               stats->pkt_cnt_non_legacy++;
+               stats->evm += phy_ppdu->ofdm.evm_min;
+       }
+}
+
+static u8 rtw89_phy_antdiv_sts_instance_get_rssi(struct rtw89_antdiv_stats *stats)
+{
+       if (stats->pkt_cnt_non_legacy >= stats->pkt_cnt_cck &&
+           stats->pkt_cnt_non_legacy >= stats->pkt_cnt_ofdm)
+               return ewma_rssi_read(&stats->non_legacy_rssi_avg);
+       else if (stats->pkt_cnt_ofdm >= stats->pkt_cnt_cck &&
+                stats->pkt_cnt_ofdm >= stats->pkt_cnt_non_legacy)
+               return ewma_rssi_read(&stats->ofdm_rssi_avg);
+       else
+               return ewma_rssi_read(&stats->cck_rssi_avg);
+}
+
+static u8 rtw89_phy_antdiv_sts_instance_get_evm(struct rtw89_antdiv_stats *stats)
+{
+       return phy_div(stats->evm, stats->pkt_cnt_non_legacy + stats->pkt_cnt_ofdm);
+}
+
+void rtw89_phy_antdiv_parse(struct rtw89_dev *rtwdev,
+                           struct rtw89_rx_phy_ppdu *phy_ppdu)
+{
+       struct rtw89_antdiv_info *antdiv = &rtwdev->antdiv;
+       struct rtw89_hal *hal = &rtwdev->hal;
+
+       if (!hal->ant_diversity || hal->ant_diversity_fixed)
+               return;
+
+       rtw89_phy_antdiv_sts_instance_add(rtwdev, phy_ppdu, &antdiv->target_stats);
+
+       if (!antdiv->get_stats)
+               return;
+
+       if (hal->antenna_rx == RF_A)
+               rtw89_phy_antdiv_sts_instance_add(rtwdev, phy_ppdu, &antdiv->main_stats);
+       else if (hal->antenna_rx == RF_B)
+               rtw89_phy_antdiv_sts_instance_add(rtwdev, phy_ppdu, &antdiv->aux_stats);
+}
+
+static void rtw89_phy_antdiv_reg_init(struct rtw89_dev *rtwdev)
+{
+       rtw89_phy_write32_idx(rtwdev, R_P0_TRSW, B_P0_ANT_TRAIN_EN,
+                             0x0, RTW89_PHY_0);
+       rtw89_phy_write32_idx(rtwdev, R_P0_TRSW, B_P0_TX_ANT_SEL,
+                             0x0, RTW89_PHY_0);
+
+       rtw89_phy_write32_idx(rtwdev, R_P0_ANT_SW, B_P0_TRSW_TX_EXTEND,
+                             0x0, RTW89_PHY_0);
+       rtw89_phy_write32_idx(rtwdev, R_P0_ANT_SW, B_P0_HW_ANTSW_DIS_BY_GNT_BT,
+                             0x0, RTW89_PHY_0);
+
+       rtw89_phy_write32_idx(rtwdev, R_P0_TRSW, B_P0_BT_FORCE_ANTIDX_EN,
+                             0x0, RTW89_PHY_0);
+
+       rtw89_phy_write32_idx(rtwdev, R_RFSW_CTRL_ANT0_BASE, B_RFSW_CTRL_ANT_MAPPING,
+                             0x0100, RTW89_PHY_0);
+
+       rtw89_phy_write32_idx(rtwdev, R_P0_ANTSEL, B_P0_ANTSEL_BTG_TRX,
+                             0x1, RTW89_PHY_0);
+       rtw89_phy_write32_idx(rtwdev, R_P0_ANTSEL, B_P0_ANTSEL_HW_CTRL,
+                             0x0, RTW89_PHY_0);
+       rtw89_phy_write32_idx(rtwdev, R_P0_ANTSEL, B_P0_ANTSEL_SW_2G,
+                             0x0, RTW89_PHY_0);
+       rtw89_phy_write32_idx(rtwdev, R_P0_ANTSEL, B_P0_ANTSEL_SW_5G,
+                             0x0, RTW89_PHY_0);
+}
+
+static void rtw89_phy_antdiv_sts_reset(struct rtw89_dev *rtwdev)
+{
+       struct rtw89_antdiv_info *antdiv = &rtwdev->antdiv;
+
+       rtw89_phy_antdiv_sts_instance_reset(&antdiv->target_stats);
+       rtw89_phy_antdiv_sts_instance_reset(&antdiv->main_stats);
+       rtw89_phy_antdiv_sts_instance_reset(&antdiv->aux_stats);
+}
+
+static void rtw89_phy_antdiv_init(struct rtw89_dev *rtwdev)
+{
+       struct rtw89_antdiv_info *antdiv = &rtwdev->antdiv;
+       struct rtw89_hal *hal = &rtwdev->hal;
+
+       if (!hal->ant_diversity)
+               return;
+
+       antdiv->get_stats = false;
+       antdiv->rssi_pre = 0;
+       rtw89_phy_antdiv_sts_reset(rtwdev);
+       rtw89_phy_antdiv_reg_init(rtwdev);
+}
+
 static void rtw89_phy_stat_thermal_update(struct rtw89_dev *rtwdev)
 {
        struct rtw89_phy_stat *phystat = &rtwdev->phystat;
@@ -4114,6 +4241,144 @@ void rtw89_phy_tx_path_div_track(struct rtw89_dev *rtwdev)
                                          &done);
 }
 
+#define ANTDIV_MAIN 0
+#define ANTDIV_AUX 1
+
+static void rtw89_phy_antdiv_set_ant(struct rtw89_dev *rtwdev)
+{
+       struct rtw89_hal *hal = &rtwdev->hal;
+       u8 default_ant, optional_ant;
+
+       if (!hal->ant_diversity || hal->antenna_tx == 0)
+               return;
+
+       if (hal->antenna_tx == RF_B) {
+               default_ant = ANTDIV_AUX;
+               optional_ant = ANTDIV_MAIN;
+       } else {
+               default_ant = ANTDIV_MAIN;
+               optional_ant = ANTDIV_AUX;
+       }
+
+       rtw89_phy_write32_idx(rtwdev, R_P0_ANTSEL, B_P0_ANTSEL_CGCS_CTRL,
+                             default_ant, RTW89_PHY_0);
+       rtw89_phy_write32_idx(rtwdev, R_P0_ANTSEL, B_P0_ANTSEL_RX_ORI,
+                             default_ant, RTW89_PHY_0);
+       rtw89_phy_write32_idx(rtwdev, R_P0_ANTSEL, B_P0_ANTSEL_RX_ALT,
+                             optional_ant, RTW89_PHY_0);
+       rtw89_phy_write32_idx(rtwdev, R_P0_ANTSEL, B_P0_ANTSEL_TX_ORI,
+                             default_ant, RTW89_PHY_0);
+}
+
+static void rtw89_phy_swap_hal_antenna(struct rtw89_dev *rtwdev)
+{
+       struct rtw89_hal *hal = &rtwdev->hal;
+
+       hal->antenna_rx = hal->antenna_rx == RF_A ? RF_B : RF_A;
+       hal->antenna_tx = hal->antenna_rx;
+}
+
+static void rtw89_phy_antdiv_decision_state(struct rtw89_dev *rtwdev)
+{
+       struct rtw89_antdiv_info *antdiv = &rtwdev->antdiv;
+       struct rtw89_hal *hal = &rtwdev->hal;
+       bool no_change = false;
+       u8 main_rssi, aux_rssi;
+       u8 main_evm, aux_evm;
+       u32 candidate;
+
+       antdiv->get_stats = false;
+       antdiv->training_count = 0;
+
+       main_rssi = rtw89_phy_antdiv_sts_instance_get_rssi(&antdiv->main_stats);
+       main_evm = rtw89_phy_antdiv_sts_instance_get_evm(&antdiv->main_stats);
+       aux_rssi = rtw89_phy_antdiv_sts_instance_get_rssi(&antdiv->aux_stats);
+       aux_evm = rtw89_phy_antdiv_sts_instance_get_evm(&antdiv->aux_stats);
+
+       if (main_evm > aux_evm + ANTDIV_EVM_DIFF_TH)
+               candidate = RF_A;
+       else if (aux_evm > main_evm + ANTDIV_EVM_DIFF_TH)
+               candidate = RF_B;
+       else if (main_rssi > aux_rssi + RTW89_TX_DIV_RSSI_RAW_TH)
+               candidate = RF_A;
+       else if (aux_rssi > main_rssi + RTW89_TX_DIV_RSSI_RAW_TH)
+               candidate = RF_B;
+       else
+               no_change = true;
+
+       if (no_change) {
+               /* swap back from training antenna to original */
+               rtw89_phy_swap_hal_antenna(rtwdev);
+               return;
+       }
+
+       hal->antenna_tx = candidate;
+       hal->antenna_rx = candidate;
+}
+
+static void rtw89_phy_antdiv_training_state(struct rtw89_dev *rtwdev)
+{
+       struct rtw89_antdiv_info *antdiv = &rtwdev->antdiv;
+       u64 state_period;
+
+       if (antdiv->training_count % 2 == 0) {
+               if (antdiv->training_count == 0)
+                       rtw89_phy_antdiv_sts_reset(rtwdev);
+
+               antdiv->get_stats = true;
+               state_period = msecs_to_jiffies(ANTDIV_TRAINNING_INTVL);
+       } else {
+               antdiv->get_stats = false;
+               state_period = msecs_to_jiffies(ANTDIV_DELAY);
+
+               rtw89_phy_swap_hal_antenna(rtwdev);
+               rtw89_phy_antdiv_set_ant(rtwdev);
+       }
+
+       antdiv->training_count++;
+       ieee80211_queue_delayed_work(rtwdev->hw, &rtwdev->antdiv_work,
+                                    state_period);
+}
+
+void rtw89_phy_antdiv_work(struct work_struct *work)
+{
+       struct rtw89_dev *rtwdev = container_of(work, struct rtw89_dev,
+                                               antdiv_work.work);
+       struct rtw89_antdiv_info *antdiv = &rtwdev->antdiv;
+
+       mutex_lock(&rtwdev->mutex);
+
+       if (antdiv->training_count <= ANTDIV_TRAINNING_CNT) {
+               rtw89_phy_antdiv_training_state(rtwdev);
+       } else {
+               rtw89_phy_antdiv_decision_state(rtwdev);
+               rtw89_phy_antdiv_set_ant(rtwdev);
+       }
+
+       mutex_unlock(&rtwdev->mutex);
+}
+
+void rtw89_phy_antdiv_track(struct rtw89_dev *rtwdev)
+{
+       struct rtw89_antdiv_info *antdiv = &rtwdev->antdiv;
+       struct rtw89_hal *hal = &rtwdev->hal;
+       u8 rssi, rssi_pre;
+
+       if (!hal->ant_diversity || hal->ant_diversity_fixed)
+               return;
+
+       rssi = rtw89_phy_antdiv_sts_instance_get_rssi(&antdiv->target_stats);
+       rssi_pre = antdiv->rssi_pre;
+       antdiv->rssi_pre = rssi;
+       rtw89_phy_antdiv_sts_instance_reset(&antdiv->target_stats);
+
+       if (abs((int)rssi - (int)rssi_pre) < ANTDIV_RSSI_DIFF_TH)
+               return;
+
+       antdiv->training_count = 0;
+       ieee80211_queue_delayed_work(rtwdev->hw, &rtwdev->antdiv_work, 0);
+}
+
 static void rtw89_phy_env_monitor_init(struct rtw89_dev *rtwdev)
 {
        rtw89_phy_ccx_top_setting_init(rtwdev);
@@ -4133,6 +4398,8 @@ void rtw89_phy_dm_init(struct rtw89_dev *rtwdev)
        rtw89_phy_dig_init(rtwdev);
        rtw89_phy_cfo_init(rtwdev);
        rtw89_phy_ul_tb_info_init(rtwdev);
+       rtw89_phy_antdiv_init(rtwdev);
+       rtw89_phy_antdiv_set_ant(rtwdev);
 
        rtw89_phy_init_rf_nctl(rtwdev);
        rtw89_chip_rfk_init(rtwdev);
index 7535867..ab174a0 100644 (file)
 #define UL_TB_TF_CNT_L2H_TH 100
 #define UL_TB_TF_CNT_H2L_TH 70
 
+#define ANTDIV_TRAINNING_CNT 2
+#define ANTDIV_TRAINNING_INTVL 30
+#define ANTDIV_DELAY 110
+#define ANTDIV_TP_DIFF_TH_HIGH 100
+#define ANTDIV_TP_DIFF_TH_LOW 5
+#define ANTDIV_EVM_DIFF_TH 8
+#define ANTDIV_RSSI_DIFF_TH 3
+
 #define CCX_MAX_PERIOD 2097
 #define CCX_MAX_PERIOD_UNIT 32
 #define MS_TO_4US_RATIO 250
@@ -549,6 +557,10 @@ void rtw89_phy_set_phy_regs(struct rtw89_dev *rtwdev, u32 addr, u32 mask,
 void rtw89_phy_dig_reset(struct rtw89_dev *rtwdev);
 void rtw89_phy_dig(struct rtw89_dev *rtwdev);
 void rtw89_phy_tx_path_div_track(struct rtw89_dev *rtwdev);
+void rtw89_phy_antdiv_parse(struct rtw89_dev *rtwdev,
+                           struct rtw89_rx_phy_ppdu *phy_ppdu);
+void rtw89_phy_antdiv_track(struct rtw89_dev *rtwdev);
+void rtw89_phy_antdiv_work(struct work_struct *work);
 void rtw89_phy_set_bss_color(struct rtw89_dev *rtwdev, struct ieee80211_vif *vif);
 void rtw89_phy_tssi_ctrl_set_bandedge_cfg(struct rtw89_dev *rtwdev,
                                          enum rtw89_mac_idx mac_idx,
index 266e423..21f6878 100644 (file)
 #define B_AX_XTAL_SC_XI_MASK GENMASK(16, 10)
 #define B_AX_XTAL_SC_MASK GENMASK(6, 0)
 
+#define R_AX_XTAL_ON_CTRL3 0x028C
+#define B_AX_XTAL_SC_INIT_A_BLOCK_MASK GENMASK(30, 24)
+#define B_AX_XTAL_SC_LPS_A_BLOCK_MASK GENMASK(22, 16)
+#define B_AX_XTAL_SC_XO_A_BLOCK_MASK GENMASK(14, 8)
+#define B_AX_XTAL_SC_XI_A_BLOCK_MASK GENMASK(6, 0)
+
 #define R_AX_GPIO0_7_FUNC_SEL 0x02D0
 
 #define R_AX_EECS_EESK_FUNC_SEL 0x02D8
 #define RR_MOD_MASK GENMASK(19, 16)
 #define RR_MOD_DCK GENMASK(14, 10)
 #define RR_MOD_RGM GENMASK(13, 4)
+#define RR_MOD_RXB GENMASK(9, 5)
 #define RR_MOD_V_DOWN 0x0
 #define RR_MOD_V_STANDBY 0x1
 #define RR_TXAGC 0x10001
 #define RR_RXBB 0x83
 #define RR_RXBB_VOBUF GENMASK(15, 12)
 #define RR_RXBB_C2G GENMASK(16, 10)
+#define RR_RXBB_C2 GENMASK(11, 8)
 #define RR_RXBB_C1G GENMASK(9, 8)
 #define RR_RXBB_FATT GENMASK(7, 0)
 #define RR_RXBB_ATTR GENMASK(7, 4)
 #define RR_LOGEN 0xa3
 #define RR_LOGEN_RPT GENMASK(19, 16)
 #define RR_SX 0xaf
+#define RR_IBD 0xc9
+#define RR_IBD_VAL GENMASK(4, 0)
 #define RR_LDO 0xb1
 #define RR_LDO_SEL GENMASK(8, 6)
 #define RR_VCO 0xb2
+#define RR_VCO_SEL GENMASK(9, 8)
+#define RR_VCI 0xb3
+#define RR_VCI_ON BIT(7)
 #define RR_LPF 0xb7
 #define RR_LPF_BUSY BIT(8)
 #define RR_XTALX2 0xb8
 #define RR_MALSEL 0xbe
 #define RR_SYNFB 0xc5
 #define RR_SYNFB_LK BIT(15)
+#define RR_AACK 0xca
 #define RR_LCKST 0xcf
 #define RR_LCKST_BIN BIT(0)
 #define RR_LCK_TRG 0xd3
 #define B_ENABLE_CCK BIT(5)
 #define R_RSTB_ASYNC 0x0704
 #define B_RSTB_ASYNC_ALL BIT(1)
+#define R_P0_ANT_SW 0x0728
+#define B_P0_HW_ANTSW_DIS_BY_GNT_BT BIT(12)
+#define B_P0_TRSW_TX_EXTEND GENMASK(3, 0)
 #define R_MAC_PIN_SEL 0x0734
 #define B_CH_IDX_SEG0 GENMASK(23, 16)
 #define R_PLCP_HISTOGRAM 0x0738
 #define B_P0_RFCTM_VAL GENMASK(25, 20)
 #define R_P0_RFCTM_RDY BIT(26)
 #define R_P0_TRSW 0x5868
-#define B_P0_TRSW_B BIT(0)
-#define B_P0_TRSW_A BIT(1)
+#define B_P0_BT_FORCE_ANTIDX_EN BIT(12)
 #define B_P0_TRSW_X BIT(2)
+#define B_P0_TRSW_A BIT(1)
+#define B_P0_TX_ANT_SEL BIT(1)
+#define B_P0_TRSW_B BIT(0)
+#define B_P0_ANT_TRAIN_EN BIT(0)
 #define B_P0_TRSW_SO_A2 GENMASK(7, 5)
+#define R_P0_ANTSEL 0x586C
+#define B_P0_ANTSEL_SW_5G BIT(25)
+#define B_P0_ANTSEL_SW_2G BIT(23)
+#define B_P0_ANTSEL_BTG_TRX BIT(21)
+#define B_P0_ANTSEL_CGCS_CTRL BIT(17)
+#define B_P0_ANTSEL_HW_CTRL BIT(16)
+#define B_P0_ANTSEL_TX_ORI GENMASK(15, 12)
+#define B_P0_ANTSEL_RX_ALT GENMASK(11, 8)
+#define B_P0_ANTSEL_RX_ORI GENMASK(7, 4)
+#define R_RFSW_CTRL_ANT0_BASE 0x5870
+#define B_RFSW_CTRL_ANT_MAPPING GENMASK(15, 0)
 #define R_P0_RFM 0x5894
 #define B_P0_RFM_DIS_WL BIT(7)
 #define B_P0_RFM_TX_OPT BIT(6)
 #define IQK_DF4_TXT_8_25MHZ 0x021
 #define R_IQK_CFG 0x8034
 #define B_IQK_CFG_SET GENMASK(5, 4)
+#define R_IQK_RXA 0x8044
+#define B_IQK_RXAGC GENMASK(15, 13)
 #define R_TPG_SEL 0x8068
 #define R_TPG_MOD 0x806C
 #define B_TPG_MOD_F GENMASK(2, 1)
 #define B_PRT_COM_SYNERR BIT(30)
 #define B_PRT_COM_DCI GENMASK(27, 16)
 #define B_PRT_COM_CORV GENMASK(15, 8)
+#define B_RPT_COM_RDY GENMASK(15, 0)
 #define B_PRT_COM_DCQ GENMASK(11, 0)
 #define B_PRT_COM_RXOV BIT(8)
 #define B_PRT_COM_GL GENMASK(7, 4)
 #define B_IQKINF2_KCNT GENMASK(15, 8)
 #define B_IQKINF2_NCTLV GENMASK(7, 0)
 #define R_DCOF0 0xC000
+#define B_DCOF0_RST BIT(17)
 #define B_DCOF0_V GENMASK(4, 1)
 #define R_DCOF1 0xC004
+#define B_DCOF1_RST BIT(17)
 #define B_DCOF1_S BIT(0)
 #define R_DCOF8 0xC020
 #define B_DCOF8_V GENMASK(4, 1)
+#define R_DCOF9 0xC024
+#define B_DCOF9_RST BIT(17)
 #define R_DACK_S0P0 0xC040
 #define B_DACK_S0P0_OK BIT(31)
 #define R_DACK_BIAS00 0xc048
 #define B_ADDCK0D_VAL GENMASK(25, 16)
 #define R_ADDCK0 0xC0F4
 #define B_ADDCK0_TRG BIT(11)
+#define B_ADDCK0_IQ BIT(10)
 #define B_ADDCK0 GENMASK(9, 8)
 #define B_ADDCK0_MAN GENMASK(5, 4)
 #define B_ADDCK0_EN BIT(4)
 #define B_ADDCK0_RL0 GENMASK(17, 8)
 #define R_ADDCKR0 0xC0FC
 #define B_ADDCKR0_A0 GENMASK(19, 10)
+#define B_ADDCKR0_DC GENMASK(15, 4)
 #define B_ADDCKR0_A1 GENMASK(9, 0)
 #define R_DACK10 0xC100
 #define B_DACK10 GENMASK(4, 1)
 #define R_ADDCKR1 0xC1fC
 #define B_ADDCKR1_A0 GENMASK(19, 10)
 #define B_ADDCKR1_A1 GENMASK(9, 0)
+#define R_DACKN0_CTL 0xC210
+#define B_DACKN0_EN BIT(0)
+#define B_DACKN0_V GENMASK(21, 14)
+#define R_DACKN1_CTL 0xC224
+#define B_DACKN1_V GENMASK(21, 14)
 
 /* WiFi CPU local domain */
 #define R_AX_WDT_CTRL 0x0040
index 6e5a740..377a7a1 100644 (file)
@@ -2,6 +2,7 @@
 /* Copyright(c) 2019-2020  Realtek Corporation
  */
 
+#include "acpi.h"
 #include "debug.h"
 #include "ps.h"
 
@@ -282,6 +283,66 @@ do { \
                    __r->txpwr_regd[RTW89_BAND_6G]); \
 } while (0)
 
+static void rtw89_regd_setup_unii4(struct rtw89_dev *rtwdev,
+                                  struct wiphy *wiphy)
+{
+       const struct rtw89_chip_info *chip = rtwdev->chip;
+       bool regd_allow_unii_4 = chip->support_unii4;
+       struct ieee80211_supported_band *sband;
+       int ret;
+       u8 val;
+
+       if (!chip->support_unii4)
+               goto bottom;
+
+       ret = rtw89_acpi_evaluate_dsm(rtwdev, RTW89_ACPI_DSM_FUNC_59G_EN, &val);
+       if (ret) {
+               rtw89_debug(rtwdev, RTW89_DBG_REGD,
+                           "acpi: cannot eval unii 4: %d\n", ret);
+               goto bottom;
+       }
+
+       rtw89_debug(rtwdev, RTW89_DBG_REGD,
+                   "acpi: eval if allow unii 4: %d\n", val);
+
+       switch (val) {
+       case 0:
+               regd_allow_unii_4 = false;
+               break;
+       case 1:
+               regd_allow_unii_4 = true;
+               break;
+       default:
+               break;
+       }
+
+bottom:
+       rtw89_debug(rtwdev, RTW89_DBG_REGD, "regd: allow unii 4: %d\n",
+                   regd_allow_unii_4);
+
+       if (regd_allow_unii_4)
+               return;
+
+       sband = wiphy->bands[NL80211_BAND_5GHZ];
+       if (!sband)
+               return;
+
+       sband->n_channels -= 3;
+}
+
+int rtw89_regd_setup(struct rtw89_dev *rtwdev)
+{
+       struct wiphy *wiphy = rtwdev->hw->wiphy;
+
+       if (!wiphy)
+               return -EINVAL;
+
+       rtw89_regd_setup_unii4(rtwdev, wiphy);
+
+       wiphy->reg_notifier = rtw89_regd_notifier;
+       return 0;
+}
+
 int rtw89_regd_init(struct rtw89_dev *rtwdev,
                    void (*reg_notifier)(struct wiphy *wiphy,
                                         struct regulatory_request *request))
diff --git a/drivers/net/wireless/realtek/rtw89/rtw8851b.c b/drivers/net/wireless/realtek/rtw89/rtw8851b.c
new file mode 100644 (file)
index 0000000..00cabf9
--- /dev/null
@@ -0,0 +1,174 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+/* Copyright(c) 2022-2023  Realtek Corporation
+ */
+
+#include "coex.h"
+#include "fw.h"
+#include "mac.h"
+#include "phy.h"
+#include "reg.h"
+#include "rtw8851b.h"
+#include "rtw8851b_rfk_table.h"
+#include "rtw8851b_table.h"
+#include "txrx.h"
+#include "util.h"
+
+#define RTW8851B_FW_FORMAT_MAX 0
+#define RTW8851B_FW_BASENAME "rtw89/rtw8851b_fw"
+#define RTW8851B_MODULE_FIRMWARE \
+       RTW8851B_FW_BASENAME ".bin"
+
+static const struct rtw89_hfc_ch_cfg rtw8851b_hfc_chcfg_pcie[] = {
+       {5, 343, grp_0}, /* ACH 0 */
+       {5, 343, grp_0}, /* ACH 1 */
+       {5, 343, grp_0}, /* ACH 2 */
+       {5, 343, grp_0}, /* ACH 3 */
+       {0, 0, grp_0}, /* ACH 4 */
+       {0, 0, grp_0}, /* ACH 5 */
+       {0, 0, grp_0}, /* ACH 6 */
+       {0, 0, grp_0}, /* ACH 7 */
+       {4, 344, grp_0}, /* B0MGQ */
+       {4, 344, grp_0}, /* B0HIQ */
+       {0, 0, grp_0}, /* B1MGQ */
+       {0, 0, grp_0}, /* B1HIQ */
+       {40, 0, 0} /* FWCMDQ */
+};
+
+static const struct rtw89_hfc_pub_cfg rtw8851b_hfc_pubcfg_pcie = {
+       448, /* Group 0 */
+       0, /* Group 1 */
+       448, /* Public Max */
+       0 /* WP threshold */
+};
+
+static const struct rtw89_hfc_param_ini rtw8851b_hfc_param_ini_pcie[] = {
+       [RTW89_QTA_SCC] = {rtw8851b_hfc_chcfg_pcie, &rtw8851b_hfc_pubcfg_pcie,
+                          &rtw89_mac_size.hfc_preccfg_pcie, RTW89_HCIFC_POH},
+       [RTW89_QTA_DLFW] = {NULL, NULL, &rtw89_mac_size.hfc_preccfg_pcie,
+                           RTW89_HCIFC_POH},
+       [RTW89_QTA_INVALID] = {NULL},
+};
+
+static const struct rtw89_dle_mem rtw8851b_dle_mem_pcie[] = {
+       [RTW89_QTA_SCC] = {RTW89_QTA_SCC, &rtw89_mac_size.wde_size6,
+                          &rtw89_mac_size.ple_size6, &rtw89_mac_size.wde_qt6,
+                          &rtw89_mac_size.wde_qt6, &rtw89_mac_size.ple_qt18,
+                          &rtw89_mac_size.ple_qt58},
+       [RTW89_QTA_WOW] = {RTW89_QTA_WOW, &rtw89_mac_size.wde_size6,
+                          &rtw89_mac_size.ple_size6, &rtw89_mac_size.wde_qt6,
+                          &rtw89_mac_size.wde_qt6, &rtw89_mac_size.ple_qt18,
+                          &rtw89_mac_size.ple_qt_51b_wow},
+       [RTW89_QTA_DLFW] = {RTW89_QTA_DLFW, &rtw89_mac_size.wde_size9,
+                           &rtw89_mac_size.ple_size8, &rtw89_mac_size.wde_qt4,
+                           &rtw89_mac_size.wde_qt4, &rtw89_mac_size.ple_qt13,
+                           &rtw89_mac_size.ple_qt13},
+       [RTW89_QTA_INVALID] = {RTW89_QTA_INVALID, NULL, NULL, NULL, NULL, NULL,
+                              NULL},
+};
+
+static const struct rtw89_xtal_info rtw8851b_xtal_info = {
+       .xcap_reg               = R_AX_XTAL_ON_CTRL3,
+       .sc_xo_mask             = B_AX_XTAL_SC_XO_A_BLOCK_MASK,
+       .sc_xi_mask             = B_AX_XTAL_SC_XI_A_BLOCK_MASK,
+};
+
+static const struct rtw89_chip_ops rtw8851b_chip_ops = {
+       .fem_setup              = NULL,
+       .fill_txdesc            = rtw89_core_fill_txdesc,
+       .fill_txdesc_fwcmd      = rtw89_core_fill_txdesc,
+       .h2c_dctl_sec_cam       = NULL,
+};
+
+#ifdef CONFIG_PM
+static const struct wiphy_wowlan_support rtw_wowlan_stub_8851b = {
+       .flags = WIPHY_WOWLAN_MAGIC_PKT | WIPHY_WOWLAN_DISCONNECT,
+       .n_patterns = RTW89_MAX_PATTERN_NUM,
+       .pattern_max_len = RTW89_MAX_PATTERN_SIZE,
+       .pattern_min_len = 1,
+};
+#endif
+
+const struct rtw89_chip_info rtw8851b_chip_info = {
+       .chip_id                = RTL8851B,
+       .ops                    = &rtw8851b_chip_ops,
+       .fw_basename            = RTW8851B_FW_BASENAME,
+       .fw_format_max          = RTW8851B_FW_FORMAT_MAX,
+       .try_ce_fw              = true,
+       .fifo_size              = 196608,
+       .small_fifo_size        = true,
+       .dle_scc_rsvd_size      = 98304,
+       .max_amsdu_limit        = 3500,
+       .dis_2g_40m_ul_ofdma    = true,
+       .rsvd_ple_ofst          = 0x2f800,
+       .hfc_param_ini          = rtw8851b_hfc_param_ini_pcie,
+       .dle_mem                = rtw8851b_dle_mem_pcie,
+       .wde_qempty_acq_num     = 4,
+       .wde_qempty_mgq_sel     = 4,
+       .rf_base_addr           = {0xe000},
+       .pwr_on_seq             = NULL,
+       .pwr_off_seq            = NULL,
+       .bb_table               = &rtw89_8851b_phy_bb_table,
+       .bb_gain_table          = &rtw89_8851b_phy_bb_gain_table,
+       .rf_table               = {&rtw89_8851b_phy_radioa_table,},
+       .nctl_table             = &rtw89_8851b_phy_nctl_table,
+       .nctl_post_table        = &rtw8851b_nctl_post_defs_tbl,
+       .byr_table              = &rtw89_8851b_byr_table,
+       .dflt_parms             = &rtw89_8851b_dflt_parms,
+       .rfe_parms_conf         = rtw89_8851b_rfe_parms_conf,
+       .txpwr_factor_rf        = 2,
+       .txpwr_factor_mac       = 1,
+       .dig_table              = NULL,
+       .tssi_dbw_table         = NULL,
+       .support_chanctx_num    = 0,
+       .support_bands          = BIT(NL80211_BAND_2GHZ) |
+                                 BIT(NL80211_BAND_5GHZ),
+       .support_bw160          = false,
+       .support_unii4          = true,
+       .support_ul_tb_ctrl     = true,
+       .hw_sec_hdr             = false,
+       .rf_path_num            = 1,
+       .tx_nss                 = 1,
+       .rx_nss                 = 1,
+       .acam_num               = 32,
+       .bcam_num               = 20,
+       .scam_num               = 128,
+       .bacam_num              = 2,
+       .bacam_dynamic_num      = 4,
+       .bacam_ver              = RTW89_BACAM_V0,
+       .sec_ctrl_efuse_size    = 4,
+       .physical_efuse_size    = 1216,
+       .logical_efuse_size     = 2048,
+       .limit_efuse_size       = 1280,
+       .dav_phy_efuse_size     = 0,
+       .dav_log_efuse_size     = 0,
+       .phycap_addr            = 0x580,
+       .phycap_size            = 128,
+       .para_ver               = 0,
+       .wlcx_desired           = 0x06000000,
+       .btcx_desired           = 0x7,
+       .scbd                   = 0x1,
+       .mailbox                = 0x1,
+
+       .ps_mode_supported      = BIT(RTW89_PS_MODE_RFOFF) |
+                                 BIT(RTW89_PS_MODE_CLK_GATED),
+       .low_power_hci_modes    = 0,
+       .h2c_cctl_func_id       = H2C_FUNC_MAC_CCTLINFO_UD,
+       .hci_func_en_addr       = R_AX_HCI_FUNC_EN,
+       .h2c_desc_size          = sizeof(struct rtw89_txwd_body),
+       .txwd_body_size         = sizeof(struct rtw89_txwd_body),
+       .bss_clr_map_reg        = R_BSS_CLR_MAP_V1,
+       .dma_ch_mask            = BIT(RTW89_DMA_ACH4) | BIT(RTW89_DMA_ACH5) |
+                                 BIT(RTW89_DMA_ACH6) | BIT(RTW89_DMA_ACH7) |
+                                 BIT(RTW89_DMA_B1MG) | BIT(RTW89_DMA_B1HI),
+       .edcca_lvl_reg          = R_SEG0R_EDCCA_LVL_V1,
+#ifdef CONFIG_PM
+       .wowlan_stub            = &rtw_wowlan_stub_8851b,
+#endif
+       .xtal_info              = &rtw8851b_xtal_info,
+};
+EXPORT_SYMBOL(rtw8851b_chip_info);
+
+MODULE_FIRMWARE(RTW8851B_MODULE_FIRMWARE);
+MODULE_AUTHOR("Realtek Corporation");
+MODULE_DESCRIPTION("Realtek 802.11ax wireless 8851B driver");
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/wireless/realtek/rtw89/rtw8851b.h b/drivers/net/wireless/realtek/rtw89/rtw8851b.h
new file mode 100644 (file)
index 0000000..e34b7d0
--- /dev/null
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
+/* Copyright(c) 2022-2023  Realtek Corporation
+ */
+
+#ifndef __RTW89_8851B_H__
+#define __RTW89_8851B_H__
+
+#include "core.h"
+
+#define RF_PATH_NUM_8851B 1
+#define BB_PATH_NUM_8851B 1
+
+extern const struct rtw89_chip_info rtw8851b_chip_info;
+
+#endif
diff --git a/drivers/net/wireless/realtek/rtw89/rtw8851b_rfk.c b/drivers/net/wireless/realtek/rtw89/rtw8851b_rfk.c
new file mode 100644 (file)
index 0000000..6eb47ed
--- /dev/null
@@ -0,0 +1,1775 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+/* Copyright(c) 2022-2023  Realtek Corporation
+ */
+
+#include "coex.h"
+#include "debug.h"
+#include "mac.h"
+#include "phy.h"
+#include "reg.h"
+#include "rtw8851b.h"
+#include "rtw8851b_rfk.h"
+#include "rtw8851b_rfk_table.h"
+#include "rtw8851b_table.h"
+
+#define RTW8851B_RXK_GROUP_NR 4
+#define RTW8851B_TXK_GROUP_NR 1
+#define RTW8851B_IQK_VER 0x2a
+#define RTW8851B_IQK_SS 1
+#define RTW8851B_LOK_GRAM 10
+
+enum rtw8851b_iqk_type {
+       ID_TXAGC = 0x0,
+       ID_FLOK_COARSE = 0x1,
+       ID_FLOK_FINE = 0x2,
+       ID_TXK = 0x3,
+       ID_RXAGC = 0x4,
+       ID_RXK = 0x5,
+       ID_NBTXK = 0x6,
+       ID_NBRXK = 0x7,
+       ID_FLOK_VBUFFER = 0x8,
+       ID_A_FLOK_COARSE = 0x9,
+       ID_G_FLOK_COARSE = 0xa,
+       ID_A_FLOK_FINE = 0xb,
+       ID_G_FLOK_FINE = 0xc,
+       ID_IQK_RESTORE = 0x10,
+};
+
+static const u32 g_idxrxgain[RTW8851B_RXK_GROUP_NR] = {0x10e, 0x116, 0x28e, 0x296};
+static const u32 g_idxattc2[RTW8851B_RXK_GROUP_NR] = {0x0, 0xf, 0x0, 0xf};
+static const u32 g_idxrxagc[RTW8851B_RXK_GROUP_NR] = {0x0, 0x1, 0x2, 0x3};
+static const u32 a_idxrxgain[RTW8851B_RXK_GROUP_NR] = {0x10C, 0x112, 0x28c, 0x292};
+static const u32 a_idxattc2[RTW8851B_RXK_GROUP_NR] = {0xf, 0xf, 0xf, 0xf};
+static const u32 a_idxrxagc[RTW8851B_RXK_GROUP_NR] = {0x4, 0x5, 0x6, 0x7};
+static const u32 a_power_range[RTW8851B_TXK_GROUP_NR] = {0x0};
+static const u32 a_track_range[RTW8851B_TXK_GROUP_NR] = {0x6};
+static const u32 a_gain_bb[RTW8851B_TXK_GROUP_NR] = {0x0a};
+static const u32 a_itqt[RTW8851B_TXK_GROUP_NR] = {0x12};
+static const u32 g_power_range[RTW8851B_TXK_GROUP_NR] = {0x0};
+static const u32 g_track_range[RTW8851B_TXK_GROUP_NR] = {0x6};
+static const u32 g_gain_bb[RTW8851B_TXK_GROUP_NR] = {0x10};
+static const u32 g_itqt[RTW8851B_TXK_GROUP_NR] = {0x12};
+
+static const u32 rtw8851b_backup_bb_regs[] = {0xc0ec, 0xc0e8};
+static const u32 rtw8851b_backup_rf_regs[] = {
+       0xef, 0xde, 0x0, 0x1e, 0x2, 0x85, 0x90, 0x5};
+
+#define BACKUP_BB_REGS_NR ARRAY_SIZE(rtw8851b_backup_bb_regs)
+#define BACKUP_RF_REGS_NR ARRAY_SIZE(rtw8851b_backup_rf_regs)
+
+static u8 _kpath(struct rtw89_dev *rtwdev, enum rtw89_phy_idx phy_idx)
+{
+       return RF_A;
+}
+
+static void _adc_fifo_rst(struct rtw89_dev *rtwdev, enum rtw89_phy_idx phy_idx,
+                         u8 path)
+{
+       rtw89_phy_write32_mask(rtwdev, R_ADC_FIFO, B_ADC_FIFO_RXK, 0x0101);
+       fsleep(10);
+       rtw89_phy_write32_mask(rtwdev, R_ADC_FIFO, B_ADC_FIFO_RXK, 0x1111);
+}
+
+static void _wait_rx_mode(struct rtw89_dev *rtwdev, u8 kpath)
+{
+       u32 rf_mode;
+       u8 path;
+       int ret;
+
+       for (path = 0; path < RF_PATH_MAX; path++) {
+               if (!(kpath & BIT(path)))
+                       continue;
+
+               ret = read_poll_timeout_atomic(rtw89_read_rf, rf_mode,
+                                              rf_mode != 2, 2, 5000, false,
+                                              rtwdev, path, 0x00, RR_MOD_MASK);
+               rtw89_debug(rtwdev, RTW89_DBG_RFK,
+                           "[RFK] Wait S%d to Rx mode!! (ret = %d)\n",
+                           path, ret);
+       }
+}
+
+static void _dack_reset(struct rtw89_dev *rtwdev, enum rtw89_rf_path path)
+{
+       rtw89_phy_write32_mask(rtwdev, R_DCOF0, B_DCOF0_RST, 0x0);
+       rtw89_phy_write32_mask(rtwdev, R_DCOF0, B_DCOF0_RST, 0x1);
+}
+
+static void _drck(struct rtw89_dev *rtwdev)
+{
+       u32 rck_d;
+       u32 val;
+       int ret;
+
+       rtw89_debug(rtwdev, RTW89_DBG_RFK, "[DACK]Ddie RCK start!!!\n");
+
+       rtw89_phy_write32_mask(rtwdev, R_DRCK, B_DRCK_IDLE, 0x1);
+       rtw89_phy_write32_mask(rtwdev, R_DRCK, B_DRCK_EN, 0x1);
+
+       ret = read_poll_timeout_atomic(rtw89_phy_read32_mask, val, val,
+                                      1, 10000, false,
+                                      rtwdev, R_DRCK_RES, B_DRCK_POL);
+       if (ret)
+               rtw89_debug(rtwdev, RTW89_DBG_RFK, "[DACK]DRCK timeout\n");
+
+       rtw89_phy_write32_mask(rtwdev, R_DRCK, B_DRCK_EN, 0x0);
+       rtw89_phy_write32_mask(rtwdev, R_DRCK_FH, B_DRCK_LAT, 0x1);
+       udelay(1);
+       rtw89_phy_write32_mask(rtwdev, R_DRCK_FH, B_DRCK_LAT, 0x0);
+
+       rck_d = rtw89_phy_read32_mask(rtwdev, R_DRCK_RES, 0x7c00);
+       rtw89_phy_write32_mask(rtwdev, R_DRCK, B_DRCK_IDLE, 0x0);
+       rtw89_phy_write32_mask(rtwdev, R_DRCK, B_DRCK_VAL, rck_d);
+
+       rtw89_debug(rtwdev, RTW89_DBG_RFK, "[DACK]0xc0c4 = 0x%x\n",
+                   rtw89_phy_read32_mask(rtwdev, R_DRCK, MASKDWORD));
+}
+
+static void _addck_backup(struct rtw89_dev *rtwdev)
+{
+       struct rtw89_dack_info *dack = &rtwdev->dack;
+
+       rtw89_phy_write32_mask(rtwdev, R_ADDCK0, B_ADDCK0, 0x0);
+
+       dack->addck_d[0][0] = rtw89_phy_read32_mask(rtwdev, R_ADDCKR0, B_ADDCKR0_A0);
+       dack->addck_d[0][1] = rtw89_phy_read32_mask(rtwdev, R_ADDCKR0, B_ADDCKR0_A1);
+}
+
+static void _addck_reload(struct rtw89_dev *rtwdev)
+{
+       struct rtw89_dack_info *dack = &rtwdev->dack;
+
+       rtw89_phy_write32_mask(rtwdev, R_ADDCK0_RL, B_ADDCK0_RL1, dack->addck_d[0][0]);
+       rtw89_phy_write32_mask(rtwdev, R_ADDCK0_RL, B_ADDCK0_RL0, dack->addck_d[0][1]);
+       rtw89_phy_write32_mask(rtwdev, R_ADDCK0_RL, B_ADDCK0_RLS, 0x3);
+}
+
+static void _dack_backup_s0(struct rtw89_dev *rtwdev)
+{
+       struct rtw89_dack_info *dack = &rtwdev->dack;
+       u8 i;
+
+       rtw89_phy_write32_mask(rtwdev, R_P0_NRBW, B_P0_NRBW_DBG, 0x1);
+
+       for (i = 0; i < RTW89_DACK_MSBK_NR; i++) {
+               rtw89_phy_write32_mask(rtwdev, R_DCOF0, B_DCOF0_V, i);
+               dack->msbk_d[0][0][i] =
+                       rtw89_phy_read32_mask(rtwdev, R_DACK_S0P2, B_DACK_S0M0);
+
+               rtw89_phy_write32_mask(rtwdev, R_DCOF8, B_DCOF8_V, i);
+               dack->msbk_d[0][1][i] =
+                       rtw89_phy_read32_mask(rtwdev, R_DACK_S0P3, B_DACK_S0M1);
+       }
+
+       dack->biask_d[0][0] =
+               rtw89_phy_read32_mask(rtwdev, R_DACK_BIAS00, B_DACK_BIAS00);
+       dack->biask_d[0][1] =
+               rtw89_phy_read32_mask(rtwdev, R_DACK_BIAS01, B_DACK_BIAS01);
+       dack->dadck_d[0][0] =
+               rtw89_phy_read32_mask(rtwdev, R_DACK_DADCK00, B_DACK_DADCK00) + 24;
+       dack->dadck_d[0][1] =
+               rtw89_phy_read32_mask(rtwdev, R_DACK_DADCK01, B_DACK_DADCK01) + 24;
+}
+
+static void _dack_reload_by_path(struct rtw89_dev *rtwdev,
+                                enum rtw89_rf_path path, u8 index)
+{
+       struct rtw89_dack_info *dack = &rtwdev->dack;
+       u32 idx_offset, path_offset;
+       u32 offset, reg;
+       u32 tmp;
+       u8 i;
+
+       if (index == 0)
+               idx_offset = 0;
+       else
+               idx_offset = 0x14;
+
+       if (path == RF_PATH_A)
+               path_offset = 0;
+       else
+               path_offset = 0x28;
+
+       offset = idx_offset + path_offset;
+
+       rtw89_phy_write32_mask(rtwdev, R_DCOF1, B_DCOF1_RST, 0x1);
+       rtw89_phy_write32_mask(rtwdev, R_DCOF9, B_DCOF9_RST, 0x1);
+
+       /* msbk_d: 15/14/13/12 */
+       tmp = 0x0;
+       for (i = 0; i < 4; i++)
+               tmp |= dack->msbk_d[path][index][i + 12] << (i * 8);
+       reg = 0xc200 + offset;
+       rtw89_phy_write32(rtwdev, reg, tmp);
+       rtw89_debug(rtwdev, RTW89_DBG_RFK, "[DACK]0x%x=0x%x\n", reg,
+                   rtw89_phy_read32_mask(rtwdev, reg, MASKDWORD));
+
+       /* msbk_d: 11/10/9/8 */
+       tmp = 0x0;
+       for (i = 0; i < 4; i++)
+               tmp |= dack->msbk_d[path][index][i + 8] << (i * 8);
+       reg = 0xc204 + offset;
+       rtw89_phy_write32(rtwdev, reg, tmp);
+       rtw89_debug(rtwdev, RTW89_DBG_RFK, "[DACK]0x%x=0x%x\n", reg,
+                   rtw89_phy_read32_mask(rtwdev, reg, MASKDWORD));
+
+       /* msbk_d: 7/6/5/4 */
+       tmp = 0x0;
+       for (i = 0; i < 4; i++)
+               tmp |= dack->msbk_d[path][index][i + 4] << (i * 8);
+       reg = 0xc208 + offset;
+       rtw89_phy_write32(rtwdev, reg, tmp);
+       rtw89_debug(rtwdev, RTW89_DBG_RFK, "[DACK]0x%x=0x%x\n", reg,
+                   rtw89_phy_read32_mask(rtwdev, reg, MASKDWORD));
+
+       /* msbk_d: 3/2/1/0 */
+       tmp = 0x0;
+       for (i = 0; i < 4; i++)
+               tmp |= dack->msbk_d[path][index][i] << (i * 8);
+       reg = 0xc20c + offset;
+       rtw89_phy_write32(rtwdev, reg, tmp);
+       rtw89_debug(rtwdev, RTW89_DBG_RFK, "[DACK]0x%x=0x%x\n", reg,
+                   rtw89_phy_read32_mask(rtwdev, reg, MASKDWORD));
+
+       /* dadak_d/biask_d */
+       tmp = 0x0;
+       tmp = (dack->biask_d[path][index] << 22) |
+             (dack->dadck_d[path][index] << 14);
+       reg = 0xc210 + offset;
+       rtw89_phy_write32(rtwdev, reg, tmp);
+       rtw89_debug(rtwdev, RTW89_DBG_RFK, "[DACK]0x%x=0x%x\n", reg,
+                   rtw89_phy_read32_mask(rtwdev, reg, MASKDWORD));
+
+       rtw89_phy_write32_mask(rtwdev, R_DACKN0_CTL + offset, B_DACKN0_EN, 0x1);
+}
+
+static void _dack_reload(struct rtw89_dev *rtwdev, enum rtw89_rf_path path)
+{
+       u8 index;
+
+       for (index = 0; index < 2; index++)
+               _dack_reload_by_path(rtwdev, path, index);
+}
+
+static void _addck(struct rtw89_dev *rtwdev)
+{
+       struct rtw89_dack_info *dack = &rtwdev->dack;
+       u32 val;
+       int ret;
+
+       rtw89_phy_write32_mask(rtwdev, R_ADDCK0, B_ADDCK0_RST, 0x1);
+       rtw89_phy_write32_mask(rtwdev, R_ADDCK0, B_ADDCK0_EN, 0x1);
+       rtw89_phy_write32_mask(rtwdev, R_ADDCK0, B_ADDCK0_EN, 0x0);
+       udelay(1);
+       rtw89_phy_write32_mask(rtwdev, R_ADDCK0, B_ADDCK0, 0x1);
+
+       ret = read_poll_timeout_atomic(rtw89_phy_read32_mask, val, val,
+                                      1, 10000, false,
+                                      rtwdev, R_ADDCKR0, BIT(0));
+       if (ret) {
+               rtw89_debug(rtwdev, RTW89_DBG_RFK, "[DACK]S0 ADDCK timeout\n");
+               dack->addck_timeout[0] = true;
+       }
+
+       rtw89_debug(rtwdev, RTW89_DBG_RFK, "[DACK]ADDCK ret = %d\n", ret);
+
+       rtw89_phy_write32_mask(rtwdev, R_ADDCK0, B_ADDCK0_RST, 0x0);
+}
+
+static void _new_dadck(struct rtw89_dev *rtwdev)
+{
+       struct rtw89_dack_info *dack = &rtwdev->dack;
+       u32 i_dc, q_dc, ic, qc;
+       u32 val;
+       int ret;
+
+       rtw89_rfk_parser(rtwdev, &rtw8851b_dadck_setup_defs_tbl);
+
+       ret = read_poll_timeout_atomic(rtw89_phy_read32_mask, val, val,
+                                      1, 10000, false,
+                                      rtwdev, R_ADDCKR0, BIT(0));
+       if (ret) {
+               rtw89_debug(rtwdev, RTW89_DBG_RFK, "[DACK]S0 DADCK timeout\n");
+               dack->addck_timeout[0] = true;
+       }
+
+       rtw89_debug(rtwdev, RTW89_DBG_RFK, "[DACK]DADCK ret = %d\n", ret);
+
+       rtw89_phy_write32_mask(rtwdev, R_ADDCK0, B_ADDCK0_IQ, 0x0);
+       i_dc = rtw89_phy_read32_mask(rtwdev, R_ADDCKR0, B_ADDCKR0_DC);
+       rtw89_phy_write32_mask(rtwdev, R_ADDCK0, B_ADDCK0_IQ, 0x1);
+       q_dc = rtw89_phy_read32_mask(rtwdev, R_ADDCKR0, B_ADDCKR0_DC);
+
+       ic = 0x80 - sign_extend32(i_dc, 11) * 6;
+       qc = 0x80 - sign_extend32(q_dc, 11) * 6;
+
+       rtw89_debug(rtwdev, RTW89_DBG_RFK,
+                   "[DACK]before DADCK, i_dc=0x%x, q_dc=0x%x\n", i_dc, q_dc);
+
+       dack->dadck_d[0][0] = ic;
+       dack->dadck_d[0][1] = qc;
+
+       rtw89_phy_write32_mask(rtwdev, R_DACKN0_CTL, B_DACKN0_V, dack->dadck_d[0][0]);
+       rtw89_phy_write32_mask(rtwdev, R_DACKN1_CTL, B_DACKN1_V, dack->dadck_d[0][1]);
+       rtw89_debug(rtwdev, RTW89_DBG_RFK,
+                   "[DACK]after DADCK, 0xc210=0x%x, 0xc224=0x%x\n",
+                   rtw89_phy_read32_mask(rtwdev, R_DACKN0_CTL, MASKDWORD),
+                   rtw89_phy_read32_mask(rtwdev, R_DACKN1_CTL, MASKDWORD));
+
+       rtw89_rfk_parser(rtwdev, &rtw8851b_dadck_post_defs_tbl);
+}
+
+static bool _dack_s0_poll(struct rtw89_dev *rtwdev)
+{
+       if (rtw89_phy_read32_mask(rtwdev, R_DACK_S0P0, B_DACK_S0P0_OK) == 0 ||
+           rtw89_phy_read32_mask(rtwdev, R_DACK_S0P1, B_DACK_S0P1_OK) == 0 ||
+           rtw89_phy_read32_mask(rtwdev, R_DACK_S0P2, B_DACK_S0P2_OK) == 0 ||
+           rtw89_phy_read32_mask(rtwdev, R_DACK_S0P3, B_DACK_S0P3_OK) == 0)
+               return false;
+
+       return true;
+}
+
+static void _dack_s0(struct rtw89_dev *rtwdev)
+{
+       struct rtw89_dack_info *dack = &rtwdev->dack;
+       bool done;
+       int ret;
+
+       rtw89_rfk_parser(rtwdev, &rtw8851b_dack_s0_1_defs_tbl);
+       _dack_reset(rtwdev, RF_PATH_A);
+       rtw89_phy_write32_mask(rtwdev, R_DCOF1, B_DCOF1_S, 0x1);
+
+       ret = read_poll_timeout_atomic(_dack_s0_poll, done, done,
+                                      1, 10000, false, rtwdev);
+       if (ret) {
+               rtw89_debug(rtwdev, RTW89_DBG_RFK, "[DACK]S0 DACK timeout\n");
+               dack->msbk_timeout[0] = true;
+       }
+
+       rtw89_debug(rtwdev, RTW89_DBG_RFK, "[DACK]DACK ret = %d\n", ret);
+
+       rtw89_rfk_parser(rtwdev, &rtw8851b_dack_s0_2_defs_tbl);
+
+       rtw89_debug(rtwdev, RTW89_DBG_RFK, "[DACK]after S0 DADCK\n");
+
+       _dack_backup_s0(rtwdev);
+       _dack_reload(rtwdev, RF_PATH_A);
+
+       rtw89_phy_write32_mask(rtwdev, R_P0_NRBW, B_P0_NRBW_DBG, 0x0);
+}
+
+static void _dack(struct rtw89_dev *rtwdev)
+{
+       _dack_s0(rtwdev);
+}
+
+static void _dack_dump(struct rtw89_dev *rtwdev)
+{
+       struct rtw89_dack_info *dack = &rtwdev->dack;
+       u8 i;
+       u8 t;
+
+       rtw89_debug(rtwdev, RTW89_DBG_RFK, "[DACK]S0 ADC_DCK ic = 0x%x, qc = 0x%x\n",
+                   dack->addck_d[0][0], dack->addck_d[0][1]);
+       rtw89_debug(rtwdev, RTW89_DBG_RFK, "[DACK]S0 DAC_DCK ic = 0x%x, qc = 0x%x\n",
+                   dack->dadck_d[0][0], dack->dadck_d[0][1]);
+       rtw89_debug(rtwdev, RTW89_DBG_RFK, "[DACK]S0 biask ic = 0x%x, qc = 0x%x\n",
+                   dack->biask_d[0][0], dack->biask_d[0][1]);
+
+       rtw89_debug(rtwdev, RTW89_DBG_RFK, "[DACK]S0 MSBK ic:\n");
+       for (i = 0; i < RTW89_DACK_MSBK_NR; i++) {
+               t = dack->msbk_d[0][0][i];
+               rtw89_debug(rtwdev, RTW89_DBG_RFK, "[DACK]0x%x\n", t);
+       }
+
+       rtw89_debug(rtwdev, RTW89_DBG_RFK, "[DACK]S0 MSBK qc:\n");
+       for (i = 0; i < RTW89_DACK_MSBK_NR; i++) {
+               t = dack->msbk_d[0][1][i];
+               rtw89_debug(rtwdev, RTW89_DBG_RFK, "[DACK]0x%x\n", t);
+       }
+}
+
+static void _dack_manual_off(struct rtw89_dev *rtwdev)
+{
+       rtw89_rfk_parser(rtwdev, &rtw8851b_dack_manual_off_defs_tbl);
+}
+
+static void _dac_cal(struct rtw89_dev *rtwdev, bool force)
+{
+       struct rtw89_dack_info *dack = &rtwdev->dack;
+       u32 rf0_0;
+
+       dack->dack_done = false;
+
+       rtw89_debug(rtwdev, RTW89_DBG_RFK, "[DACK]DACK 0x2\n");
+       rtw89_debug(rtwdev, RTW89_DBG_RFK, "[DACK]DACK start!!!\n");
+       rf0_0 = rtw89_read_rf(rtwdev, RF_PATH_A, RR_MOD, RFREG_MASK);
+       rtw89_debug(rtwdev, RTW89_DBG_RFK, "[DACK]RF0=0x%x\n", rf0_0);
+
+       _drck(rtwdev);
+       _dack_manual_off(rtwdev);
+       rtw89_write_rf(rtwdev, RF_PATH_A, RR_MOD, RFREG_MASK, 0x337e1);
+       rtw89_write_rf(rtwdev, RF_PATH_A, RR_RSV1, RR_RSV1_RST, 0x0);
+
+       _addck(rtwdev);
+       _addck_backup(rtwdev);
+       _addck_reload(rtwdev);
+       rtw89_write_rf(rtwdev, RF_PATH_A, RR_MOD, RFREG_MASK, 0x40001);
+
+       _dack(rtwdev);
+       _new_dadck(rtwdev);
+       _dack_dump(rtwdev);
+       rtw89_write_rf(rtwdev, RF_PATH_A, RR_RSV1, RR_RSV1_RST, 0x1);
+
+       dack->dack_done = true;
+       dack->dack_cnt++;
+       rtw89_debug(rtwdev, RTW89_DBG_RFK, "[DACK]DACK finish!!!\n");
+}
+
+static void _iqk_sram(struct rtw89_dev *rtwdev, u8 path)
+{
+       u32 i;
+
+       rtw89_debug(rtwdev, RTW89_DBG_RFK, "[IQK]===>%s\n", __func__);
+
+       rtw89_phy_write32_mask(rtwdev, R_KIP_RPT1, MASKDWORD, 0x00020000);
+       rtw89_phy_write32_mask(rtwdev, R_MDPK_RX_DCK, MASKDWORD, 0x80000000);
+       rtw89_phy_write32_mask(rtwdev, R_SRAM_IQRX2, MASKDWORD, 0x00000080);
+       rtw89_phy_write32_mask(rtwdev, R_SRAM_IQRX, MASKDWORD, 0x00010000);
+       rtw89_phy_write32_mask(rtwdev, R_IQK_DIF4, B_IQK_DIF4_TXT, 0x009);
+
+       for (i = 0; i <= 0x9f; i++) {
+               rtw89_phy_write32_mask(rtwdev, R_SRAM_IQRX, MASKDWORD,
+                                      0x00010000 + i);
+               rtw89_debug(rtwdev, RTW89_DBG_RFK, "[IQK]0x%x\n",
+                           rtw89_phy_read32_mask(rtwdev, R_RPT_COM, B_PRT_COM_DCI));
+       }
+
+       for (i = 0; i <= 0x9f; i++) {
+               rtw89_phy_write32_mask(rtwdev, R_SRAM_IQRX, MASKDWORD,
+                                      0x00010000 + i);
+               rtw89_debug(rtwdev, RTW89_DBG_RFK, "[IQK]0x%x\n",
+                           rtw89_phy_read32_mask(rtwdev, R_RPT_COM, B_PRT_COM_DCQ));
+       }
+
+       rtw89_phy_write32_mask(rtwdev, R_SRAM_IQRX2, MASKDWORD, 0x00000000);
+       rtw89_phy_write32_mask(rtwdev, R_SRAM_IQRX, MASKDWORD, 0x00000000);
+}
+
+static void _iqk_rxk_setting(struct rtw89_dev *rtwdev, u8 path)
+{
+       rtw89_write_rf(rtwdev, path, RR_MOD, RR_MOD_MASK, 0xc);
+       rtw89_write_rf(rtwdev, path, RR_RXKPLL, RR_RXKPLL_POW, 0x0);
+       rtw89_write_rf(rtwdev, path, RR_RXKPLL, RR_RXKPLL_POW, 0x1);
+}
+
+static bool _iqk_check_cal(struct rtw89_dev *rtwdev, u8 path)
+{
+       bool fail1 = false, fail2 = false;
+       u32 val;
+       int ret;
+
+       ret = read_poll_timeout_atomic(rtw89_phy_read32_mask, val, val == 0x55,
+                                      10, 8200, false,
+                                      rtwdev, 0xbff8, MASKBYTE0);
+       if (ret) {
+               fail1 = true;
+               rtw89_debug(rtwdev, RTW89_DBG_RFK,
+                           "[IQK]NCTL1 IQK timeout!!!\n");
+       }
+
+       fsleep(10);
+
+       ret = read_poll_timeout_atomic(rtw89_phy_read32_mask, val, val == 0x8000,
+                                      10, 200, false,
+                                      rtwdev, R_RPT_COM, B_RPT_COM_RDY);
+       if (ret) {
+               fail2 = true;
+               rtw89_debug(rtwdev, RTW89_DBG_RFK,
+                           "[IQK]NCTL2 IQK timeout!!!\n");
+       }
+
+       fsleep(10);
+       rtw89_phy_write32_mask(rtwdev, R_NCTL_N1, MASKBYTE0, 0x0);
+
+       rtw89_debug(rtwdev, RTW89_DBG_RFK,
+                   "[IQK]S%x, ret = %d, notready = %x fail=%d,%d\n",
+                   path, ret, fail1 || fail2, fail1, fail2);
+
+       return fail1 || fail2;
+}
+
+static bool _iqk_one_shot(struct rtw89_dev *rtwdev, enum rtw89_phy_idx phy_idx,
+                         u8 path, u8 ktype)
+{
+       struct rtw89_iqk_info *iqk_info = &rtwdev->iqk;
+       bool notready;
+       u32 iqk_cmd;
+
+       switch (ktype) {
+       case ID_A_FLOK_COARSE:
+               rtw89_debug(rtwdev, RTW89_DBG_RFK,
+                           "[IQK]============ S%d ID_A_FLOK_COARSE ============\n", path);
+               rtw89_phy_write32_mask(rtwdev, R_UPD_CLK, B_IQK_RFC_ON, 0x1);
+               iqk_cmd = 0x108 | (1 << (4 + path));
+               break;
+       case ID_G_FLOK_COARSE:
+               rtw89_debug(rtwdev, RTW89_DBG_RFK,
+                           "[IQK]============ S%d ID_G_FLOK_COARSE ============\n", path);
+               rtw89_phy_write32_mask(rtwdev, R_UPD_CLK, B_IQK_RFC_ON, 0x1);
+               iqk_cmd = 0x108 | (1 << (4 + path));
+               break;
+       case ID_A_FLOK_FINE:
+               rtw89_debug(rtwdev, RTW89_DBG_RFK,
+                           "[IQK]============ S%d ID_A_FLOK_FINE ============\n", path);
+               rtw89_phy_write32_mask(rtwdev, R_UPD_CLK, B_IQK_RFC_ON, 0x1);
+               iqk_cmd = 0x308 | (1 << (4 + path));
+               break;
+       case ID_G_FLOK_FINE:
+               rtw89_debug(rtwdev, RTW89_DBG_RFK,
+                           "[IQK]============ S%d ID_G_FLOK_FINE ============\n", path);
+               rtw89_phy_write32_mask(rtwdev, R_UPD_CLK, B_IQK_RFC_ON, 0x1);
+               iqk_cmd = 0x308 | (1 << (4 + path));
+               break;
+       case ID_TXK:
+               rtw89_debug(rtwdev, RTW89_DBG_RFK,
+                           "[IQK]============ S%d ID_TXK ============\n", path);
+               rtw89_phy_write32_mask(rtwdev, R_UPD_CLK, B_IQK_RFC_ON, 0x0);
+               iqk_cmd = 0x008 | (1 << (path + 4)) |
+                         (((0x8 + iqk_info->iqk_bw[path]) & 0xf) << 8);
+               break;
+       case ID_RXAGC:
+               rtw89_debug(rtwdev, RTW89_DBG_RFK,
+                           "[IQK]============ S%d ID_RXAGC ============\n", path);
+               rtw89_phy_write32_mask(rtwdev, R_UPD_CLK, B_IQK_RFC_ON, 0x1);
+               iqk_cmd = 0x708 | (1 << (4 + path)) | (path << 1);
+               break;
+       case ID_RXK:
+               rtw89_debug(rtwdev, RTW89_DBG_RFK,
+                           "[IQK]============ S%d ID_RXK ============\n", path);
+               rtw89_phy_write32_mask(rtwdev, R_UPD_CLK, B_IQK_RFC_ON, 0x1);
+               iqk_cmd = 0x008 | (1 << (path + 4)) |
+                         (((0xc + iqk_info->iqk_bw[path]) & 0xf) << 8);
+               break;
+       case ID_NBTXK:
+               rtw89_debug(rtwdev, RTW89_DBG_RFK,
+                           "[IQK]============ S%d ID_NBTXK ============\n", path);
+               rtw89_phy_write32_mask(rtwdev, R_UPD_CLK, B_IQK_RFC_ON, 0x0);
+               rtw89_phy_write32_mask(rtwdev, R_IQK_DIF4, B_IQK_DIF4_TXT,
+                                      0x00b);
+               iqk_cmd = 0x408 | (1 << (4 + path));
+               break;
+       case ID_NBRXK:
+               rtw89_debug(rtwdev, RTW89_DBG_RFK,
+                           "[IQK]============ S%d ID_NBRXK ============\n", path);
+               rtw89_phy_write32_mask(rtwdev, R_UPD_CLK, B_IQK_RFC_ON, 0x1);
+               rtw89_phy_write32_mask(rtwdev, R_IQK_DIF4, B_IQK_DIF4_RXT,
+                                      0x011);
+               iqk_cmd = 0x608 | (1 << (4 + path));
+               break;
+       default:
+               return false;
+       }
+
+       rtw89_phy_write32_mask(rtwdev, R_NCTL_CFG, MASKDWORD, iqk_cmd + 1);
+       notready = _iqk_check_cal(rtwdev, path);
+       if (iqk_info->iqk_sram_en &&
+           (ktype == ID_NBRXK || ktype == ID_RXK))
+               _iqk_sram(rtwdev, path);
+
+       rtw89_phy_write32_mask(rtwdev, R_UPD_CLK, B_IQK_RFC_ON, 0x0);
+       rtw89_debug(rtwdev, RTW89_DBG_RFK,
+                   "[IQK]S%x, ktype= %x, id = %x, notready = %x\n",
+                   path, ktype, iqk_cmd + 1, notready);
+
+       return notready;
+}
+
+static bool _rxk_2g_group_sel(struct rtw89_dev *rtwdev,
+                             enum rtw89_phy_idx phy_idx, u8 path)
+{
+       struct rtw89_iqk_info *iqk_info = &rtwdev->iqk;
+       bool kfail = false;
+       bool notready;
+       u32 rf_0;
+       u8 gp;
+
+       rtw89_debug(rtwdev, RTW89_DBG_RFK, "[IQK]===>%s\n", __func__);
+
+       for (gp = 0; gp < RTW8851B_RXK_GROUP_NR; gp++) {
+               rtw89_debug(rtwdev, RTW89_DBG_RFK, "[IQK]S%x, gp = %x\n", path, gp);
+
+               rtw89_write_rf(rtwdev, path, RR_MOD, RR_MOD_RGM, g_idxrxgain[gp]);
+               rtw89_write_rf(rtwdev, path, RR_RXBB, RR_RXBB_C2, g_idxattc2[gp]);
+               rtw89_phy_write32_mask(rtwdev, R_CFIR_LUT, B_CFIR_LUT_SEL, 0x1);
+               rtw89_phy_write32_mask(rtwdev, R_CFIR_LUT, B_CFIR_LUT_G3, 0x0);
+               rtw89_phy_write32_mask(rtwdev, R_CFIR_LUT, B_CFIR_LUT_GP_V1, gp);
+
+               rtw89_write_rf(rtwdev, path, RR_RXKPLL, RFREG_MASK, 0x80013);
+               fsleep(10);
+               rf_0 = rtw89_read_rf(rtwdev, path, RR_MOD, RFREG_MASK);
+               rtw89_phy_write32_mask(rtwdev, R_IQK_DIF2, B_IQK_DIF2_RXPI, rf_0);
+               rtw89_phy_write32_mask(rtwdev, R_IQK_RXA, B_IQK_RXAGC, g_idxrxagc[gp]);
+               rtw89_phy_write32_mask(rtwdev, R_IQK_DIF4, B_IQK_DIF4_RXT, 0x11);
+
+               notready = _iqk_one_shot(rtwdev, phy_idx, path, ID_RXAGC);
+
+               rtw89_debug(rtwdev, RTW89_DBG_RFK,
+                           "[IQK]S%x, RXAGC 0x8008 = 0x%x, rxbb = %x\n", path,
+                           rtw89_phy_read32_mask(rtwdev, R_NCTL_RPT, MASKDWORD),
+                           rtw89_read_rf(rtwdev, path, RR_MOD, 0x003e0));
+
+               if (gp == 0x3) {
+                       rtw89_write_rf(rtwdev, path, RR_RXKPLL, RR_RXKPLL_OFF, 0x13);
+                       rtw89_phy_write32_mask(rtwdev, R_IQK_DIF4, B_IQK_DIF4_RXT, 0x011);
+                       notready = _iqk_one_shot(rtwdev, phy_idx, path, ID_NBRXK);
+                       iqk_info->nb_rxcfir[path] =
+                               rtw89_phy_read32_mask(rtwdev, R_RXIQC, MASKDWORD) | 0x2;
+
+                       rtw89_debug(rtwdev, RTW89_DBG_RFK,
+                                   "[IQK]S%x, NBRXK 0x8008 = 0x%x\n", path,
+                                   rtw89_phy_read32_mask(rtwdev, R_NCTL_RPT, MASKDWORD));
+               }
+
+               notready = _iqk_one_shot(rtwdev, phy_idx, path, ID_RXK);
+
+               rtw89_debug(rtwdev, RTW89_DBG_RFK,
+                           "[IQK]S%x, WBRXK 0x8008 = 0x%x\n", path,
+                           rtw89_phy_read32_mask(rtwdev, R_NCTL_RPT, MASKDWORD));
+       }
+
+       if (!notready)
+               kfail = !!rtw89_phy_read32_mask(rtwdev, R_NCTL_RPT, B_NCTL_RPT_FLG);
+
+       if (kfail)
+               _iqk_sram(rtwdev, path);
+
+       if (kfail) {
+               rtw89_phy_write32_mask(rtwdev, R_RXIQC + (path << 8),
+                                      MASKDWORD, iqk_info->nb_rxcfir[path] | 0x2);
+               iqk_info->is_wb_txiqk[path] = false;
+       } else {
+               rtw89_phy_write32_mask(rtwdev, R_RXIQC + (path << 8),
+                                      MASKDWORD, 0x40000000);
+               iqk_info->is_wb_txiqk[path] = true;
+       }
+
+       rtw89_debug(rtwdev, RTW89_DBG_RFK,
+                   "[IQK]S%x, kfail = 0x%x, 0x8%x3c = 0x%x\n", path, kfail,
+                   1 << path, iqk_info->nb_rxcfir[path]);
+       return kfail;
+}
+
+static bool _rxk_5g_group_sel(struct rtw89_dev *rtwdev,
+                             enum rtw89_phy_idx phy_idx, u8 path)
+{
+       struct rtw89_iqk_info *iqk_info = &rtwdev->iqk;
+       bool kfail = false;
+       bool notready;
+       u32 rf_0;
+       u8 gp;
+
+       rtw89_debug(rtwdev, RTW89_DBG_RFK, "[IQK]===>%s\n", __func__);
+
+       for (gp = 0; gp < RTW8851B_RXK_GROUP_NR; gp++) {
+               rtw89_debug(rtwdev, RTW89_DBG_RFK, "[IQK]S%x, gp = %x\n", path, gp);
+
+               rtw89_write_rf(rtwdev, RF_PATH_A, RR_MOD, 0x03ff0, a_idxrxgain[gp]);
+               rtw89_write_rf(rtwdev, RF_PATH_A, RR_RXA2, RR_RXA2_ATT, a_idxattc2[gp]);
+
+               rtw89_phy_write32_mask(rtwdev, R_CFIR_LUT, B_CFIR_LUT_SEL, 0x1);
+               rtw89_phy_write32_mask(rtwdev, R_CFIR_LUT, B_CFIR_LUT_G3, 0x0);
+               rtw89_phy_write32_mask(rtwdev, R_CFIR_LUT, B_CFIR_LUT_GP_V1, gp);
+
+               rtw89_write_rf(rtwdev, path, RR_RXKPLL, RFREG_MASK, 0x80013);
+               fsleep(100);
+               rf_0 = rtw89_read_rf(rtwdev, path, RR_MOD, RFREG_MASK);
+               rtw89_phy_write32_mask(rtwdev, R_IQK_DIF2, B_IQK_DIF2_RXPI, rf_0);
+               rtw89_phy_write32_mask(rtwdev, R_IQK_RXA, B_IQK_RXAGC, a_idxrxagc[gp]);
+               rtw89_phy_write32_mask(rtwdev, R_IQK_DIF4, B_IQK_DIF4_RXT, 0x11);
+               notready = _iqk_one_shot(rtwdev, phy_idx, path, ID_RXAGC);
+
+               rtw89_debug(rtwdev, RTW89_DBG_RFK,
+                           "[IQK]S%x, RXAGC 0x8008 = 0x%x, rxbb = %x\n", path,
+                           rtw89_phy_read32_mask(rtwdev, R_NCTL_RPT, MASKDWORD),
+                           rtw89_read_rf(rtwdev, path, RR_MOD, RR_MOD_RXB));
+
+               if (gp == 0x3) {
+                       rtw89_write_rf(rtwdev, path, RR_RXKPLL, RR_RXKPLL_OFF, 0x13);
+                       rtw89_phy_write32_mask(rtwdev, R_IQK_DIF4, B_IQK_DIF4_RXT, 0x011);
+                       notready = _iqk_one_shot(rtwdev, phy_idx, path, ID_NBRXK);
+                       iqk_info->nb_rxcfir[path] =
+                               rtw89_phy_read32_mask(rtwdev, R_RXIQC, MASKDWORD) | 0x2;
+
+                       rtw89_debug(rtwdev, RTW89_DBG_RFK,
+                                   "[IQK]S%x, NBRXK 0x8008 = 0x%x\n", path,
+                                   rtw89_phy_read32_mask(rtwdev, R_NCTL_RPT, MASKDWORD));
+               }
+
+               notready = _iqk_one_shot(rtwdev, phy_idx, path, ID_RXK);
+
+               rtw89_debug(rtwdev, RTW89_DBG_RFK,
+                           "[IQK]S%x, WBRXK 0x8008 = 0x%x\n", path,
+                           rtw89_phy_read32_mask(rtwdev, R_NCTL_RPT, MASKDWORD));
+       }
+
+       if (!notready)
+               kfail = !!rtw89_phy_read32_mask(rtwdev, R_NCTL_RPT, B_NCTL_RPT_FLG);
+
+       if (kfail)
+               _iqk_sram(rtwdev, path);
+
+       if (kfail) {
+               rtw89_phy_write32_mask(rtwdev, R_RXIQC + (path << 8), MASKDWORD,
+                                      iqk_info->nb_rxcfir[path] | 0x2);
+               iqk_info->is_wb_txiqk[path] = false;
+       } else {
+               rtw89_phy_write32_mask(rtwdev, R_RXIQC + (path << 8), MASKDWORD,
+                                      0x40000000);
+               iqk_info->is_wb_txiqk[path] = true;
+       }
+
+       rtw89_debug(rtwdev, RTW89_DBG_RFK,
+                   "[IQK]S%x, kfail = 0x%x, 0x8%x3c = 0x%x\n", path, kfail,
+                   1 << path, iqk_info->nb_rxcfir[path]);
+       return kfail;
+}
+
+static bool _iqk_5g_nbrxk(struct rtw89_dev *rtwdev, enum rtw89_phy_idx phy_idx,
+                         u8 path)
+{
+       struct rtw89_iqk_info *iqk_info = &rtwdev->iqk;
+       bool kfail = false;
+       bool notready;
+       u8 gp = 0x3;
+       u32 rf_0;
+
+       rtw89_debug(rtwdev, RTW89_DBG_RFK, "[IQK]===>%s\n", __func__);
+       rtw89_debug(rtwdev, RTW89_DBG_RFK, "[IQK]S%x, gp = %x\n", path, gp);
+
+       rtw89_write_rf(rtwdev, RF_PATH_A, RR_MOD, RR_MOD_RGM, a_idxrxgain[gp]);
+       rtw89_write_rf(rtwdev, RF_PATH_A, RR_RXA2, RR_RXA2_ATT, a_idxattc2[gp]);
+
+       rtw89_phy_write32_mask(rtwdev, R_CFIR_LUT, B_CFIR_LUT_SEL, 0x1);
+       rtw89_phy_write32_mask(rtwdev, R_CFIR_LUT, B_CFIR_LUT_G3, 0x0);
+       rtw89_phy_write32_mask(rtwdev, R_CFIR_LUT, B_CFIR_LUT_GP_V1, gp);
+
+       rtw89_write_rf(rtwdev, path, RR_RXKPLL, RFREG_MASK, 0x80013);
+       fsleep(100);
+       rf_0 = rtw89_read_rf(rtwdev, path, RR_MOD, RFREG_MASK);
+       rtw89_phy_write32_mask(rtwdev, R_IQK_DIF2, B_IQK_DIF2_RXPI, rf_0);
+       rtw89_phy_write32_mask(rtwdev, R_IQK_RXA, B_IQK_RXAGC, a_idxrxagc[gp]);
+       rtw89_phy_write32_mask(rtwdev, R_IQK_DIF4, B_IQK_DIF4_RXT, 0x11);
+       notready = _iqk_one_shot(rtwdev, phy_idx, path, ID_RXAGC);
+
+       rtw89_debug(rtwdev, RTW89_DBG_RFK,
+                   "[IQK]S%x, RXAGC 0x8008 = 0x%x, rxbb = %x\n", path,
+                   rtw89_phy_read32_mask(rtwdev, R_NCTL_RPT, MASKDWORD),
+                   rtw89_read_rf(rtwdev, path, RR_MOD, 0x003e0));
+
+       if (gp == 0x3) {
+               rtw89_write_rf(rtwdev, path, RR_RXKPLL, RR_RXKPLL_OFF, 0x13);
+               rtw89_phy_write32_mask(rtwdev, R_IQK_DIF4, B_IQK_DIF4_RXT, 0x011);
+               notready = _iqk_one_shot(rtwdev, phy_idx, path, ID_NBRXK);
+               iqk_info->nb_rxcfir[path] =
+                       rtw89_phy_read32_mask(rtwdev, R_RXIQC, MASKDWORD) | 0x2;
+
+               rtw89_debug(rtwdev, RTW89_DBG_RFK,
+                           "[IQK]S%x, NBRXK 0x8008 = 0x%x\n", path,
+                           rtw89_phy_read32_mask(rtwdev, R_NCTL_RPT, MASKDWORD));
+       }
+
+       rtw89_debug(rtwdev, RTW89_DBG_RFK, "[IQK]S%x, WBRXK 0x8008 = 0x%x\n",
+                   path, rtw89_phy_read32_mask(rtwdev, R_NCTL_RPT, MASKDWORD));
+
+       if (!notready)
+               kfail = !!rtw89_phy_read32_mask(rtwdev, R_NCTL_RPT, B_NCTL_RPT_FLG);
+
+       if (kfail) {
+               rtw89_phy_write32_mask(rtwdev, R_RXIQC + (path << 8),
+                                      MASKDWORD, 0x40000002);
+               iqk_info->is_wb_rxiqk[path] = false;
+       } else {
+               iqk_info->is_wb_rxiqk[path] = false;
+       }
+
+       rtw89_debug(rtwdev, RTW89_DBG_RFK,
+                   "[IQK]S%x, kfail = 0x%x, 0x8%x3c = 0x%x\n", path, kfail,
+                   1 << path, iqk_info->nb_rxcfir[path]);
+
+       return kfail;
+}
+
+static bool _iqk_2g_nbrxk(struct rtw89_dev *rtwdev, enum rtw89_phy_idx phy_idx,
+                         u8 path)
+{
+       struct rtw89_iqk_info *iqk_info = &rtwdev->iqk;
+       bool kfail = false;
+       bool notready;
+       u8 gp = 0x3;
+       u32 rf_0;
+
+       rtw89_debug(rtwdev, RTW89_DBG_RFK, "[IQK]===>%s\n", __func__);
+       rtw89_debug(rtwdev, RTW89_DBG_RFK, "[IQK]S%x, gp = %x\n", path, gp);
+
+       rtw89_write_rf(rtwdev, path, RR_MOD, RR_MOD_RGM, g_idxrxgain[gp]);
+       rtw89_write_rf(rtwdev, path, RR_RXBB, RR_RXBB_C2, g_idxattc2[gp]);
+       rtw89_phy_write32_mask(rtwdev, R_CFIR_LUT, B_CFIR_LUT_SEL, 0x1);
+       rtw89_phy_write32_mask(rtwdev, R_CFIR_LUT, B_CFIR_LUT_G3, 0x0);
+       rtw89_phy_write32_mask(rtwdev, R_CFIR_LUT, B_CFIR_LUT_GP_V1, gp);
+
+       rtw89_write_rf(rtwdev, path, RR_RXKPLL, RFREG_MASK, 0x80013);
+       fsleep(10);
+       rf_0 = rtw89_read_rf(rtwdev, path, RR_MOD, RFREG_MASK);
+       rtw89_phy_write32_mask(rtwdev, R_IQK_DIF2, B_IQK_DIF2_RXPI, rf_0);
+       rtw89_phy_write32_mask(rtwdev, R_IQK_RXA, B_IQK_RXAGC, g_idxrxagc[gp]);
+       rtw89_phy_write32_mask(rtwdev, R_IQK_DIF4, B_IQK_DIF4_RXT, 0x11);
+       notready = _iqk_one_shot(rtwdev, phy_idx, path, ID_RXAGC);
+
+       rtw89_debug(rtwdev, RTW89_DBG_RFK,
+                   "[IQK]S%x, RXAGC 0x8008 = 0x%x, rxbb = %x\n",
+                   path, rtw89_phy_read32_mask(rtwdev, R_NCTL_RPT, MASKDWORD),
+                   rtw89_read_rf(rtwdev, path, RR_MOD, 0x003e0));
+
+       if (gp == 0x3) {
+               rtw89_write_rf(rtwdev, path, RR_RXKPLL, RR_RXKPLL_OFF, 0x13);
+               rtw89_phy_write32_mask(rtwdev, R_IQK_DIF4, B_IQK_DIF4_RXT, 0x011);
+               notready = _iqk_one_shot(rtwdev, phy_idx, path, ID_NBRXK);
+               iqk_info->nb_rxcfir[path] =
+                       rtw89_phy_read32_mask(rtwdev, R_RXIQC, MASKDWORD) | 0x2;
+
+               rtw89_debug(rtwdev, RTW89_DBG_RFK,
+                           "[IQK]S%x, NBRXK 0x8008 = 0x%x\n", path,
+                           rtw89_phy_read32_mask(rtwdev, R_NCTL_RPT, MASKDWORD));
+       }
+
+       rtw89_debug(rtwdev, RTW89_DBG_RFK, "[IQK]S%x, WBRXK 0x8008 = 0x%x\n",
+                   path, rtw89_phy_read32_mask(rtwdev, R_NCTL_RPT, MASKDWORD));
+
+       if (!notready)
+               kfail = !!rtw89_phy_read32_mask(rtwdev, R_NCTL_RPT, B_NCTL_RPT_FLG);
+
+       if (kfail) {
+               rtw89_phy_write32_mask(rtwdev, R_RXIQC + (path << 8),
+                                      MASKDWORD, 0x40000002);
+               iqk_info->is_wb_rxiqk[path] = false;
+       } else {
+               iqk_info->is_wb_rxiqk[path] = false;
+       }
+
+       rtw89_debug(rtwdev, RTW89_DBG_RFK,
+                   "[IQK]S%x, kfail = 0x%x, 0x8%x3c = 0x%x\n", path, kfail,
+                   1 << path, iqk_info->nb_rxcfir[path]);
+       return kfail;
+}
+
+static void _iqk_rxclk_setting(struct rtw89_dev *rtwdev, u8 path)
+{
+       struct rtw89_iqk_info *iqk_info = &rtwdev->iqk;
+
+       rtw89_write_rf(rtwdev, path, RR_RXBB2, RR_RXBB2_CKT, 0x1);
+
+       if (iqk_info->iqk_bw[path] == RTW89_CHANNEL_WIDTH_80)
+               rtw89_rfk_parser(rtwdev, &rtw8851b_iqk_rxclk_80_defs_tbl);
+       else
+               rtw89_rfk_parser(rtwdev, &rtw8851b_iqk_rxclk_others_defs_tbl);
+}
+
+static bool _txk_5g_group_sel(struct rtw89_dev *rtwdev,
+                             enum rtw89_phy_idx phy_idx, u8 path)
+{
+       struct rtw89_iqk_info *iqk_info = &rtwdev->iqk;
+       bool kfail = false;
+       bool notready;
+       u8 gp;
+
+       rtw89_debug(rtwdev, RTW89_DBG_RFK, "[IQK]===>%s\n", __func__);
+
+       for (gp = 0x0; gp < RTW8851B_TXK_GROUP_NR; gp++) {
+               rtw89_write_rf(rtwdev, path, RR_TXIG, RR_TXIG_GR0, a_power_range[gp]);
+               rtw89_write_rf(rtwdev, path, RR_TXIG, RR_TXIG_GR1, a_track_range[gp]);
+               rtw89_write_rf(rtwdev, path, RR_TXIG, RR_TXIG_TG, a_gain_bb[gp]);
+
+               rtw89_phy_write32_mask(rtwdev, R_CFIR_LUT, B_CFIR_LUT_SEL, 0x1);
+               rtw89_phy_write32_mask(rtwdev, R_CFIR_LUT, B_CFIR_LUT_G3, 0x1);
+               rtw89_phy_write32_mask(rtwdev, R_CFIR_LUT, B_CFIR_LUT_G2, 0x0);
+               rtw89_phy_write32_mask(rtwdev, R_CFIR_LUT, B_CFIR_LUT_GP, gp);
+               rtw89_phy_write32_mask(rtwdev, R_NCTL_N1, B_NCTL_N1_CIP, 0x00);
+               rtw89_phy_write32_mask(rtwdev, R_KIP_IQP, MASKDWORD, a_itqt[gp]);
+
+               notready = _iqk_one_shot(rtwdev, phy_idx, path, ID_NBTXK);
+               iqk_info->nb_txcfir[path] =
+                       rtw89_phy_read32_mask(rtwdev, R_TXIQC, MASKDWORD)  | 0x2;
+
+               rtw89_phy_write32_mask(rtwdev, R_KIP_IQP + (path << 8),
+                                      MASKDWORD, a_itqt[gp]);
+               notready = _iqk_one_shot(rtwdev, phy_idx, path, ID_TXK);
+       }
+
+       if (!notready)
+               kfail = !!rtw89_phy_read32_mask(rtwdev, R_NCTL_RPT, B_NCTL_RPT_FLG);
+
+       if (kfail) {
+               rtw89_phy_write32_mask(rtwdev, R_TXIQC + (path << 8),
+                                      MASKDWORD, iqk_info->nb_txcfir[path] | 0x2);
+               iqk_info->is_wb_txiqk[path] = false;
+       } else {
+               rtw89_phy_write32_mask(rtwdev, R_TXIQC + (path << 8),
+                                      MASKDWORD, 0x40000000);
+               iqk_info->is_wb_txiqk[path] = true;
+       }
+
+       rtw89_debug(rtwdev, RTW89_DBG_RFK,
+                   "[IQK]S%x, kfail = 0x%x, 0x8%x38 = 0x%x\n", path, kfail,
+                   1 << path, iqk_info->nb_txcfir[path]);
+       return kfail;
+}
+
+static bool _txk_2g_group_sel(struct rtw89_dev *rtwdev,
+                             enum rtw89_phy_idx phy_idx, u8 path)
+{
+       struct rtw89_iqk_info *iqk_info = &rtwdev->iqk;
+       bool kfail = false;
+       bool notready;
+       u8 gp;
+
+       rtw89_debug(rtwdev, RTW89_DBG_RFK, "[IQK]===>%s\n", __func__);
+
+       for (gp = 0x0; gp < RTW8851B_TXK_GROUP_NR; gp++) {
+               rtw89_write_rf(rtwdev, path, RR_TXIG, RR_TXIG_GR0, g_power_range[gp]);
+               rtw89_write_rf(rtwdev, path, RR_TXIG, RR_TXIG_GR1, g_track_range[gp]);
+               rtw89_write_rf(rtwdev, path, RR_TXIG, RR_TXIG_TG, g_gain_bb[gp]);
+
+               rtw89_phy_write32_mask(rtwdev, R_KIP_IQP, MASKDWORD, g_itqt[gp]);
+               rtw89_phy_write32_mask(rtwdev, R_CFIR_LUT, B_CFIR_LUT_SEL, 0x1);
+               rtw89_phy_write32_mask(rtwdev, R_CFIR_LUT, B_CFIR_LUT_G3, 0x1);
+               rtw89_phy_write32_mask(rtwdev, R_CFIR_LUT, B_CFIR_LUT_G2, 0x0);
+               rtw89_phy_write32_mask(rtwdev, R_CFIR_LUT, B_CFIR_LUT_GP, gp);
+               rtw89_phy_write32_mask(rtwdev, R_NCTL_N1, B_NCTL_N1_CIP, 0x00);
+
+               notready = _iqk_one_shot(rtwdev, phy_idx, path, ID_NBTXK);
+               iqk_info->nb_txcfir[path] =
+                       rtw89_phy_read32_mask(rtwdev, R_TXIQC, MASKDWORD)  | 0x2;
+
+               rtw89_phy_write32_mask(rtwdev, R_KIP_IQP + (path << 8),
+                                      MASKDWORD, g_itqt[gp]);
+               notready = _iqk_one_shot(rtwdev, phy_idx, path, ID_TXK);
+       }
+
+       if (!notready)
+               kfail = !!rtw89_phy_read32_mask(rtwdev, R_NCTL_RPT, B_NCTL_RPT_FLG);
+
+       if (kfail) {
+               rtw89_phy_write32_mask(rtwdev, R_TXIQC + (path << 8),
+                                      MASKDWORD, iqk_info->nb_txcfir[path] | 0x2);
+               iqk_info->is_wb_txiqk[path] = false;
+       } else {
+               rtw89_phy_write32_mask(rtwdev, R_TXIQC + (path << 8),
+                                      MASKDWORD, 0x40000000);
+               iqk_info->is_wb_txiqk[path] = true;
+       }
+
+       rtw89_debug(rtwdev, RTW89_DBG_RFK,
+                   "[IQK]S%x, kfail = 0x%x, 0x8%x38 = 0x%x\n", path, kfail,
+                   1 << path, iqk_info->nb_txcfir[path]);
+       return kfail;
+}
+
+static bool _iqk_5g_nbtxk(struct rtw89_dev *rtwdev, enum rtw89_phy_idx phy_idx,
+                         u8 path)
+{
+       struct rtw89_iqk_info *iqk_info = &rtwdev->iqk;
+       bool kfail = false;
+       bool notready;
+       u8 gp;
+
+       rtw89_debug(rtwdev, RTW89_DBG_RFK, "[IQK]===>%s\n", __func__);
+
+       for (gp = 0x0; gp < RTW8851B_TXK_GROUP_NR; gp++) {
+               rtw89_write_rf(rtwdev, path, RR_TXIG, RR_TXIG_GR0, a_power_range[gp]);
+               rtw89_write_rf(rtwdev, path, RR_TXIG, RR_TXIG_GR1, a_track_range[gp]);
+               rtw89_write_rf(rtwdev, path, RR_TXIG, RR_TXIG_TG, a_gain_bb[gp]);
+
+               rtw89_phy_write32_mask(rtwdev, R_CFIR_LUT, B_CFIR_LUT_SEL, 0x1);
+               rtw89_phy_write32_mask(rtwdev, R_CFIR_LUT, B_CFIR_LUT_G3, 0x1);
+               rtw89_phy_write32_mask(rtwdev, R_CFIR_LUT, B_CFIR_LUT_G2, 0x0);
+               rtw89_phy_write32_mask(rtwdev, R_CFIR_LUT, B_CFIR_LUT_GP, gp);
+               rtw89_phy_write32_mask(rtwdev, R_NCTL_N1, B_NCTL_N1_CIP, 0x00);
+               rtw89_phy_write32_mask(rtwdev, R_KIP_IQP, MASKDWORD, a_itqt[gp]);
+
+               notready = _iqk_one_shot(rtwdev, phy_idx, path, ID_NBTXK);
+               iqk_info->nb_txcfir[path] =
+                       rtw89_phy_read32_mask(rtwdev, R_TXIQC, MASKDWORD)  | 0x2;
+       }
+
+       if (!notready)
+               kfail = !!rtw89_phy_read32_mask(rtwdev, R_NCTL_RPT, B_NCTL_RPT_FLG);
+
+       if (kfail) {
+               rtw89_phy_write32_mask(rtwdev, R_TXIQC + (path << 8),
+                                      MASKDWORD, 0x40000002);
+               iqk_info->is_wb_rxiqk[path] = false;
+       } else {
+               iqk_info->is_wb_rxiqk[path] = false;
+       }
+
+       rtw89_debug(rtwdev, RTW89_DBG_RFK,
+                   "[IQK]S%x, kfail = 0x%x, 0x8%x38 = 0x%x\n", path, kfail,
+                   1 << path, iqk_info->nb_txcfir[path]);
+       return kfail;
+}
+
+static bool _iqk_2g_nbtxk(struct rtw89_dev *rtwdev, enum rtw89_phy_idx phy_idx,
+                         u8 path)
+{
+       struct rtw89_iqk_info *iqk_info = &rtwdev->iqk;
+       bool kfail = false;
+       bool notready;
+       u8 gp;
+
+       rtw89_debug(rtwdev, RTW89_DBG_RFK, "[IQK]===>%s\n", __func__);
+
+       for (gp = 0x0; gp < RTW8851B_TXK_GROUP_NR; gp++) {
+               rtw89_write_rf(rtwdev, path, RR_TXIG, RR_TXIG_GR0, g_power_range[gp]);
+               rtw89_write_rf(rtwdev, path, RR_TXIG, RR_TXIG_GR1, g_track_range[gp]);
+               rtw89_write_rf(rtwdev, path, RR_TXIG, RR_TXIG_TG, g_gain_bb[gp]);
+
+               rtw89_phy_write32_mask(rtwdev, R_KIP_IQP, MASKDWORD, g_itqt[gp]);
+               rtw89_phy_write32_mask(rtwdev, R_CFIR_LUT, B_CFIR_LUT_SEL, 0x1);
+               rtw89_phy_write32_mask(rtwdev, R_CFIR_LUT, B_CFIR_LUT_G3, 0x1);
+               rtw89_phy_write32_mask(rtwdev, R_CFIR_LUT, B_CFIR_LUT_G2, 0x0);
+               rtw89_phy_write32_mask(rtwdev, R_CFIR_LUT, B_CFIR_LUT_GP, gp);
+               rtw89_phy_write32_mask(rtwdev, R_NCTL_N1, B_NCTL_N1_CIP, 0x00);
+
+               notready = _iqk_one_shot(rtwdev, phy_idx, path, ID_NBTXK);
+               iqk_info->nb_txcfir[path] =
+                       rtw89_phy_read32_mask(rtwdev, R_TXIQC + (path << 8),
+                                             MASKDWORD)  | 0x2;
+       }
+
+       if (!notready)
+               kfail = !!rtw89_phy_read32_mask(rtwdev, R_NCTL_RPT, B_NCTL_RPT_FLG);
+
+       if (kfail) {
+               rtw89_phy_write32_mask(rtwdev, R_TXIQC + (path << 8),
+                                      MASKDWORD, 0x40000002);
+               iqk_info->is_wb_rxiqk[path] = false;
+       } else {
+               iqk_info->is_wb_rxiqk[path] = false;
+       }
+
+       rtw89_debug(rtwdev, RTW89_DBG_RFK,
+                   "[IQK]S%x, kfail = 0x%x, 0x8%x38 = 0x%x\n", path, kfail,
+                   1 << path, iqk_info->nb_txcfir[path]);
+       return kfail;
+}
+
+static bool _iqk_2g_lok(struct rtw89_dev *rtwdev, enum rtw89_phy_idx phy_idx,
+                       u8 path)
+{
+       static const u32 g_txbb[RTW8851B_LOK_GRAM] = {
+               0x02, 0x06, 0x0a, 0x0c, 0x0e, 0x10, 0x12, 0x14, 0x16, 0x17};
+       static const u32 g_itqt[RTW8851B_LOK_GRAM] = {
+               0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x12, 0x12, 0x12, 0x1b};
+       static const u32 g_wa[RTW8851B_LOK_GRAM] = {
+               0x00, 0x04, 0x08, 0x0c, 0x0e, 0x10, 0x12, 0x14, 0x16, 0x17};
+       bool fail = false;
+       u8 i;
+
+       rtw89_debug(rtwdev, RTW89_DBG_RFK, "[IQK]===>%s\n", __func__);
+
+       rtw89_write_rf(rtwdev, RF_PATH_A, RR_LUTDBG, RR_LUTDBG_LOK, 0x0);
+       rtw89_write_rf(rtwdev, RF_PATH_A, RR_TXIG, RR_TXIG_GR0, 0x0);
+       rtw89_write_rf(rtwdev, RF_PATH_A, RR_TXIG, RR_TXIG_GR1, 0x6);
+
+       for (i = 0; i < RTW8851B_LOK_GRAM; i++) {
+               rtw89_write_rf(rtwdev, RF_PATH_A, RR_TXIG, RR_TXIG_TG, g_txbb[i]);
+               rtw89_write_rf(rtwdev, RF_PATH_A, RR_LUTWA, RR_LUTWA_M1, g_wa[i]);
+               rtw89_phy_write32_mask(rtwdev, R_UPD_CLK, B_IQK_RFC_ON, 0x1);
+               rtw89_phy_write32_mask(rtwdev, R_KIP_IQP, B_KIP_IQP_IQSW, g_itqt[i]);
+               rtw89_phy_write32_mask(rtwdev, R_IQK_DIF4, B_IQK_DIF4_TXT, 0x021);
+               rtw89_phy_write32_mask(rtwdev, R_NCTL_CFG, MASKDWORD,
+                                      0x00000109 | (1 << (4 + path)));
+               fail |= _iqk_check_cal(rtwdev, path);
+
+               rtw89_phy_write32_mask(rtwdev, R_NCTL_N1, B_NCTL_N1_CIP, 0x00);
+               rtw89_phy_write32_mask(rtwdev, R_KIP_IQP, B_KIP_IQP_IQSW, g_itqt[i]);
+               rtw89_phy_write32_mask(rtwdev, R_NCTL_CFG, MASKDWORD,
+                                      0x00000309 | (1 << (4 + path)));
+               fail |= _iqk_check_cal(rtwdev, path);
+
+               rtw89_phy_write32_mask(rtwdev, R_NCTL_N1, B_NCTL_N1_CIP, 0x00);
+               rtw89_phy_write32_mask(rtwdev, R_UPD_CLK, B_IQK_RFC_ON, 0x0);
+
+               rtw89_debug(rtwdev, RTW89_DBG_RFK,
+                           "[IQK]S0, i = %x, 0x8[19:15] = 0x%x,0x8[09:05] = 0x%x\n", i,
+                           rtw89_read_rf(rtwdev, RF_PATH_A, RR_DTXLOK, 0xf8000),
+                           rtw89_read_rf(rtwdev, RF_PATH_A, RR_DTXLOK, 0x003e0));
+               rtw89_debug(rtwdev, RTW89_DBG_RFK,
+                           "[IQK]S0, i = %x, 0x9[19:16] = 0x%x,0x9[09:06] = 0x%x\n", i,
+                           rtw89_read_rf(rtwdev, RF_PATH_A, RR_RSV2, 0xf0000),
+                           rtw89_read_rf(rtwdev, RF_PATH_A, RR_RSV2, 0x003c0));
+               rtw89_debug(rtwdev, RTW89_DBG_RFK,
+                           "[IQK]S0, i = %x, 0x58 = %x\n", i,
+                           rtw89_read_rf(rtwdev, RF_PATH_A, RR_TXMO, RFREG_MASK));
+       }
+
+       return fail;
+}
+
+static bool _iqk_5g_lok(struct rtw89_dev *rtwdev, enum rtw89_phy_idx phy_idx,
+                       u8 path)
+{
+       static const u32 a_txbb[RTW8851B_LOK_GRAM] = {
+               0x02, 0x06, 0x0a, 0x0c, 0x0e, 0x10, 0x12, 0x14, 0x16, 0x17};
+       static const u32 a_itqt[RTW8851B_LOK_GRAM] = {
+               0x09, 0x09, 0x09, 0x12, 0x12, 0x12, 0x1b, 0x1b, 0x1b, 0x1b};
+       static const u32 a_wa[RTW8851B_LOK_GRAM] = {
+               0x80, 0x84, 0x88, 0x8c, 0x8e, 0x90, 0x92, 0x94, 0x96, 0x97};
+       bool fail = false;
+       u8 i;
+
+       rtw89_debug(rtwdev, RTW89_DBG_RFK, "[IQK]===>%s\n", __func__);
+
+       rtw89_write_rf(rtwdev, RF_PATH_A, RR_LUTDBG, RR_LUTDBG_LOK, 0x0);
+       rtw89_write_rf(rtwdev, RF_PATH_A, RR_TXIG, RR_TXIG_GR0, 0x0);
+       rtw89_write_rf(rtwdev, RF_PATH_A, RR_TXIG, RR_TXIG_GR1, 0x7);
+
+       for (i = 0; i < RTW8851B_LOK_GRAM; i++) {
+               rtw89_write_rf(rtwdev, RF_PATH_A, RR_TXIG, RR_TXIG_TG, a_txbb[i]);
+               rtw89_write_rf(rtwdev, RF_PATH_A, RR_LUTWA, RR_LUTWA_M1, a_wa[i]);
+               rtw89_phy_write32_mask(rtwdev, R_UPD_CLK, B_IQK_RFC_ON, 0x1);
+               rtw89_phy_write32_mask(rtwdev, R_KIP_IQP, B_KIP_IQP_IQSW, a_itqt[i]);
+               rtw89_phy_write32_mask(rtwdev, R_IQK_DIF4, B_IQK_DIF4_TXT, 0x021);
+               rtw89_phy_write32_mask(rtwdev, R_NCTL_CFG, MASKDWORD,
+                                      0x00000109 | (1 << (4 + path)));
+               fail |= _iqk_check_cal(rtwdev, path);
+
+               rtw89_phy_write32_mask(rtwdev, R_NCTL_N1, B_NCTL_N1_CIP, 0x00);
+               rtw89_phy_write32_mask(rtwdev, R_KIP_IQP, B_KIP_IQP_IQSW, a_itqt[i]);
+               rtw89_phy_write32_mask(rtwdev, R_IQK_DIF4, B_IQK_DIF4_TXT, 0x021);
+               rtw89_phy_write32_mask(rtwdev, R_NCTL_CFG, MASKDWORD,
+                                      0x00000309 | (1 << (4 + path)));
+               fail |= _iqk_check_cal(rtwdev, path);
+
+               rtw89_phy_write32_mask(rtwdev, R_NCTL_N1, B_NCTL_N1_CIP, 0x00);
+               rtw89_phy_write32_mask(rtwdev, R_UPD_CLK, B_IQK_RFC_ON, 0x0);
+
+               rtw89_debug(rtwdev, RTW89_DBG_RFK,
+                           "[IQK]S0, i = %x, 0x8[19:15] = 0x%x,0x8[09:05] = 0x%x\n", i,
+                           rtw89_read_rf(rtwdev, RF_PATH_A, RR_DTXLOK, 0xf8000),
+                           rtw89_read_rf(rtwdev, RF_PATH_A, RR_DTXLOK, 0x003e0));
+               rtw89_debug(rtwdev, RTW89_DBG_RFK,
+                           "[IQK]S0, i = %x, 0x9[19:16] = 0x%x,0x9[09:06] = 0x%x\n", i,
+                           rtw89_read_rf(rtwdev, RF_PATH_A, RR_RSV2, 0xf0000),
+                           rtw89_read_rf(rtwdev, RF_PATH_A, RR_RSV2, 0x003c0));
+               rtw89_debug(rtwdev, RTW89_DBG_RFK,
+                           "[IQK]S0, i = %x, 0x58 = %x\n", i,
+                           rtw89_read_rf(rtwdev, RF_PATH_A, RR_TXMO, RFREG_MASK));
+       }
+
+       return fail;
+}
+
+static void _iqk_txk_setting(struct rtw89_dev *rtwdev, u8 path)
+{
+       struct rtw89_iqk_info *iqk_info = &rtwdev->iqk;
+
+       switch (iqk_info->iqk_band[path]) {
+       case RTW89_BAND_2G:
+               rtw89_debug(rtwdev, RTW89_DBG_RFK, "[IQK]RTW89_BAND_2G\n");
+               rtw89_rfk_parser(rtwdev, &rtw8851b_iqk_txk_2ghz_defs_tbl);
+               break;
+       case RTW89_BAND_5G:
+               rtw89_debug(rtwdev, RTW89_DBG_RFK, "[IQK]RTW89_BAND_5G\n");
+               rtw89_rfk_parser(rtwdev, &rtw8851b_iqk_txk_5ghz_defs_tbl);
+               break;
+       default:
+               break;
+       }
+}
+
+#define IQK_LOK_RETRY 1
+
+static void _iqk_by_path(struct rtw89_dev *rtwdev, enum rtw89_phy_idx phy_idx,
+                        u8 path)
+{
+       struct rtw89_iqk_info *iqk_info = &rtwdev->iqk;
+       bool lok_is_fail;
+       u8 i;
+
+       rtw89_debug(rtwdev, RTW89_DBG_RFK, "[IQK]===>%s\n", __func__);
+
+       for (i = 0; i < IQK_LOK_RETRY; i++) {
+               _iqk_txk_setting(rtwdev, path);
+               if (iqk_info->iqk_band[path] == RTW89_BAND_2G)
+                       lok_is_fail = _iqk_2g_lok(rtwdev, phy_idx, path);
+               else
+                       lok_is_fail = _iqk_5g_lok(rtwdev, phy_idx, path);
+
+               if (!lok_is_fail)
+                       break;
+       }
+
+       if (iqk_info->is_nbiqk) {
+               if (iqk_info->iqk_band[path] == RTW89_BAND_2G)
+                       iqk_info->iqk_tx_fail[0][path] =
+                               _iqk_2g_nbtxk(rtwdev, phy_idx, path);
+               else
+                       iqk_info->iqk_tx_fail[0][path] =
+                               _iqk_5g_nbtxk(rtwdev, phy_idx, path);
+       } else {
+               if (iqk_info->iqk_band[path] == RTW89_BAND_2G)
+                       iqk_info->iqk_tx_fail[0][path] =
+                               _txk_2g_group_sel(rtwdev, phy_idx, path);
+               else
+                       iqk_info->iqk_tx_fail[0][path] =
+                               _txk_5g_group_sel(rtwdev, phy_idx, path);
+       }
+
+       _iqk_rxclk_setting(rtwdev, path);
+       _iqk_rxk_setting(rtwdev, path);
+       _adc_fifo_rst(rtwdev, phy_idx, path);
+
+       if (iqk_info->is_nbiqk) {
+               if (iqk_info->iqk_band[path] == RTW89_BAND_2G)
+                       iqk_info->iqk_rx_fail[0][path] =
+                               _iqk_2g_nbrxk(rtwdev, phy_idx, path);
+               else
+                       iqk_info->iqk_rx_fail[0][path] =
+                               _iqk_5g_nbrxk(rtwdev, phy_idx, path);
+       } else {
+               if (iqk_info->iqk_band[path] == RTW89_BAND_2G)
+                       iqk_info->iqk_rx_fail[0][path] =
+                               _rxk_2g_group_sel(rtwdev, phy_idx, path);
+               else
+                       iqk_info->iqk_rx_fail[0][path] =
+                               _rxk_5g_group_sel(rtwdev, phy_idx, path);
+       }
+}
+
+static void _rfk_backup_bb_reg(struct rtw89_dev *rtwdev,
+                              u32 backup_bb_reg_val[])
+{
+       u32 i;
+
+       for (i = 0; i < BACKUP_BB_REGS_NR; i++) {
+               backup_bb_reg_val[i] =
+                       rtw89_phy_read32_mask(rtwdev, rtw8851b_backup_bb_regs[i],
+                                             MASKDWORD);
+               rtw89_debug(rtwdev, RTW89_DBG_RFK,
+                           "[RFK]backup bb reg : %x, value =%x\n",
+                           rtw8851b_backup_bb_regs[i], backup_bb_reg_val[i]);
+       }
+}
+
+static void _rfk_backup_rf_reg(struct rtw89_dev *rtwdev,
+                              u32 backup_rf_reg_val[], u8 rf_path)
+{
+       u32 i;
+
+       for (i = 0; i < BACKUP_RF_REGS_NR; i++) {
+               backup_rf_reg_val[i] =
+                       rtw89_read_rf(rtwdev, rf_path,
+                                     rtw8851b_backup_rf_regs[i], RFREG_MASK);
+               rtw89_debug(rtwdev, RTW89_DBG_RFK,
+                           "[RFK]backup rf S%d reg : %x, value =%x\n", rf_path,
+                           rtw8851b_backup_rf_regs[i], backup_rf_reg_val[i]);
+       }
+}
+
+static void _rfk_restore_bb_reg(struct rtw89_dev *rtwdev,
+                               const u32 backup_bb_reg_val[])
+{
+       u32 i;
+
+       for (i = 0; i < BACKUP_BB_REGS_NR; i++) {
+               rtw89_phy_write32_mask(rtwdev, rtw8851b_backup_bb_regs[i],
+                                      MASKDWORD, backup_bb_reg_val[i]);
+               rtw89_debug(rtwdev, RTW89_DBG_RFK,
+                           "[RFK]restore bb reg : %x, value =%x\n",
+                           rtw8851b_backup_bb_regs[i], backup_bb_reg_val[i]);
+       }
+}
+
+static void _rfk_restore_rf_reg(struct rtw89_dev *rtwdev,
+                               const u32 backup_rf_reg_val[], u8 rf_path)
+{
+       u32 i;
+
+       for (i = 0; i < BACKUP_RF_REGS_NR; i++) {
+               rtw89_write_rf(rtwdev, rf_path, rtw8851b_backup_rf_regs[i],
+                              RFREG_MASK, backup_rf_reg_val[i]);
+
+               rtw89_debug(rtwdev, RTW89_DBG_RFK,
+                           "[RFK]restore rf S%d reg: %x, value =%x\n", rf_path,
+                           rtw8851b_backup_rf_regs[i], backup_rf_reg_val[i]);
+       }
+}
+
+static void _iqk_get_ch_info(struct rtw89_dev *rtwdev, enum rtw89_phy_idx phy,
+                            u8 path)
+{
+       const struct rtw89_chan *chan = rtw89_chan_get(rtwdev, RTW89_SUB_ENTITY_0);
+       struct rtw89_iqk_info *iqk_info = &rtwdev->iqk;
+       u8 idx = 0;
+
+       iqk_info->iqk_band[path] = chan->band_type;
+       iqk_info->iqk_bw[path] = chan->band_width;
+       iqk_info->iqk_ch[path] = chan->channel;
+       iqk_info->iqk_table_idx[path] = idx;
+
+       rtw89_debug(rtwdev, RTW89_DBG_RFK, "[IQK]S%d (PHY%d): / DBCC %s/ %s/ CH%d/ %s\n",
+                   path, phy, rtwdev->dbcc_en ? "on" : "off",
+                   iqk_info->iqk_band[path] == 0 ? "2G" :
+                   iqk_info->iqk_band[path] == 1 ? "5G" : "6G",
+                   iqk_info->iqk_ch[path],
+                   iqk_info->iqk_bw[path] == 0 ? "20M" :
+                   iqk_info->iqk_bw[path] == 1 ? "40M" : "80M");
+       rtw89_debug(rtwdev, RTW89_DBG_RFK, "[IQK]times = 0x%x, ch =%x\n",
+                   iqk_info->iqk_times, idx);
+       rtw89_debug(rtwdev, RTW89_DBG_RFK, "[IQK]S%x, iqk_info->syn1to2= 0x%x\n",
+                   path, iqk_info->syn1to2);
+}
+
+static void _iqk_start_iqk(struct rtw89_dev *rtwdev, enum rtw89_phy_idx phy_idx,
+                          u8 path)
+{
+       _iqk_by_path(rtwdev, phy_idx, path);
+}
+
+static void _iqk_restore(struct rtw89_dev *rtwdev, u8 path)
+{
+       bool fail;
+
+       rtw89_debug(rtwdev, RTW89_DBG_RFK, "[IQK]===>%s\n", __func__);
+
+       rtw89_phy_write32_mask(rtwdev, R_NCTL_CFG, MASKDWORD, 0x00001219);
+       fsleep(10);
+       fail = _iqk_check_cal(rtwdev, path);
+       rtw89_debug(rtwdev, RTW89_DBG_RFK, "[IQK] restore fail=%d\n", fail);
+
+       rtw89_phy_write32_mask(rtwdev, R_NCTL_N1, B_NCTL_N1_CIP, 0x00);
+       rtw89_phy_write32_mask(rtwdev, R_NCTL_RPT, MASKDWORD, 0x00000000);
+       rtw89_phy_write32_mask(rtwdev, R_KIP_SYSCFG, MASKDWORD, 0x80000000);
+}
+
+static void _iqk_afebb_restore(struct rtw89_dev *rtwdev,
+                              enum rtw89_phy_idx phy_idx, u8 path)
+{
+       rtw89_rfk_parser(rtwdev, &rtw8851b_iqk_afebb_restore_defs_tbl);
+}
+
+static void _iqk_preset(struct rtw89_dev *rtwdev, u8 path)
+{
+       rtw89_debug(rtwdev, RTW89_DBG_RFK, "[IQK]===>%s\n", __func__);
+
+       rtw89_write_rf(rtwdev, path, RR_RSV1, RR_RSV1_RST, 0x0);
+       rtw89_phy_write32_mask(rtwdev, R_NCTL_RPT, MASKDWORD, 0x00000080);
+       rtw89_phy_write32_mask(rtwdev, R_KIP_SYSCFG, MASKDWORD, 0x81ff010a);
+}
+
+static void _iqk_macbb_setting(struct rtw89_dev *rtwdev,
+                              enum rtw89_phy_idx phy_idx, u8 path)
+{
+       rtw89_debug(rtwdev, RTW89_DBG_RFK, "[IQK]===>%s\n", __func__);
+
+       rtw89_rfk_parser(rtwdev, &rtw8851b_iqk_macbb_defs_tbl);
+}
+
+static void _iqk_init(struct rtw89_dev *rtwdev)
+{
+       struct rtw89_iqk_info *iqk_info = &rtwdev->iqk;
+       u8 idx, path;
+
+       rtw89_phy_write32_mask(rtwdev, R_IQKINF, MASKDWORD, 0x0);
+
+       if (iqk_info->is_iqk_init)
+               return;
+
+       rtw89_debug(rtwdev, RTW89_DBG_RFK, "[IQK]===>%s\n", __func__);
+
+       iqk_info->is_iqk_init = true;
+       iqk_info->is_nbiqk = false;
+       iqk_info->iqk_fft_en = false;
+       iqk_info->iqk_sram_en = false;
+       iqk_info->iqk_cfir_en = false;
+       iqk_info->iqk_xym_en = false;
+       iqk_info->thermal_rek_en = false;
+       iqk_info->iqk_times = 0x0;
+
+       for (idx = 0; idx < RTW89_IQK_CHS_NR; idx++) {
+               iqk_info->iqk_channel[idx] = 0x0;
+               for (path = 0; path < RF_PATH_NUM_8851B; path++) {
+                       iqk_info->lok_cor_fail[idx][path] = false;
+                       iqk_info->lok_fin_fail[idx][path] = false;
+                       iqk_info->iqk_tx_fail[idx][path] = false;
+                       iqk_info->iqk_rx_fail[idx][path] = false;
+                       iqk_info->iqk_table_idx[path] = 0x0;
+               }
+       }
+}
+
+static void _doiqk(struct rtw89_dev *rtwdev, bool force,
+                  enum rtw89_phy_idx phy_idx, u8 path)
+{
+       struct rtw89_iqk_info *iqk_info = &rtwdev->iqk;
+       u8 phy_map = rtw89_btc_phymap(rtwdev, phy_idx, RF_AB);
+       u32 backup_rf_val[RTW8851B_IQK_SS][BACKUP_RF_REGS_NR];
+       u32 backup_bb_val[BACKUP_BB_REGS_NR];
+
+       rtw89_btc_ntfy_wl_rfk(rtwdev, phy_map, BTC_WRFKT_IQK,
+                             BTC_WRFK_ONESHOT_START);
+
+       rtw89_debug(rtwdev, RTW89_DBG_RFK,
+                   "[IQK]==========IQK strat!!!!!==========\n");
+       iqk_info->iqk_times++;
+       iqk_info->kcount = 0;
+       iqk_info->version = RTW8851B_IQK_VER;
+
+       rtw89_debug(rtwdev, RTW89_DBG_RFK, "[IQK]Test Ver 0x%x\n", iqk_info->version);
+       _iqk_get_ch_info(rtwdev, phy_idx, path);
+
+       _rfk_backup_bb_reg(rtwdev, &backup_bb_val[0]);
+       _rfk_backup_rf_reg(rtwdev, &backup_rf_val[path][0], path);
+       _iqk_macbb_setting(rtwdev, phy_idx, path);
+       _iqk_preset(rtwdev, path);
+       _iqk_start_iqk(rtwdev, phy_idx, path);
+       _iqk_restore(rtwdev, path);
+       _iqk_afebb_restore(rtwdev, phy_idx, path);
+       _rfk_restore_bb_reg(rtwdev, &backup_bb_val[0]);
+       _rfk_restore_rf_reg(rtwdev, &backup_rf_val[path][0], path);
+
+       rtw89_btc_ntfy_wl_rfk(rtwdev, phy_map, BTC_WRFKT_IQK,
+                             BTC_WRFK_ONESHOT_STOP);
+}
+
+static void _iqk(struct rtw89_dev *rtwdev, enum rtw89_phy_idx phy_idx, bool force)
+{
+       _doiqk(rtwdev, force, phy_idx, RF_PATH_A);
+}
+
+static void _rck(struct rtw89_dev *rtwdev, enum rtw89_rf_path path)
+{
+       u32 rf_reg5;
+       u32 rck_val;
+       u32 val;
+       int ret;
+
+       rtw89_debug(rtwdev, RTW89_DBG_RFK, "[RCK] ====== S%d RCK ======\n", path);
+
+       rf_reg5 = rtw89_read_rf(rtwdev, path, RR_RSV1, RFREG_MASK);
+
+       rtw89_write_rf(rtwdev, path, RR_RSV1, RR_RSV1_RST, 0x0);
+       rtw89_write_rf(rtwdev, path, RR_MOD, RR_MOD_MASK, RR_MOD_V_RX);
+
+       rtw89_debug(rtwdev, RTW89_DBG_RFK, "[RCK] RF0x00 = 0x%05x\n",
+                   rtw89_read_rf(rtwdev, path, RR_MOD, RFREG_MASK));
+
+       /* RCK trigger */
+       rtw89_write_rf(rtwdev, path, RR_RCKC, RFREG_MASK, 0x00240);
+
+       ret = read_poll_timeout_atomic(rtw89_read_rf, val, val, 2, 30,
+                                      false, rtwdev, path, RR_RCKS, BIT(3));
+
+       rck_val = rtw89_read_rf(rtwdev, path, RR_RCKC, RR_RCKC_CA);
+
+       rtw89_debug(rtwdev, RTW89_DBG_RFK, "[RCK] rck_val = 0x%x, ret = %d\n",
+                   rck_val, ret);
+
+       rtw89_write_rf(rtwdev, path, RR_RCKC, RFREG_MASK, rck_val);
+       rtw89_write_rf(rtwdev, path, RR_RSV1, RFREG_MASK, rf_reg5);
+
+       rtw89_debug(rtwdev, RTW89_DBG_RFK, "[RCK] RF 0x1b = 0x%x\n",
+                   rtw89_read_rf(rtwdev, path, RR_RCKC, RFREG_MASK));
+}
+
+void rtw8851b_aack(struct rtw89_dev *rtwdev)
+{
+       u32 tmp05, ib[4];
+       u32 tmp;
+       int ret;
+       int rek;
+       int i;
+
+       rtw89_debug(rtwdev, RTW89_DBG_RFK, "[LCK]DO AACK\n");
+
+       tmp05 = rtw89_read_rf(rtwdev, RF_PATH_A, RR_RSV1, RFREG_MASK);
+       rtw89_write_rf(rtwdev, RF_PATH_A, RR_MOD, RR_MOD_MASK, 0x3);
+       rtw89_write_rf(rtwdev, RF_PATH_A, RR_RSV1, RFREG_MASK, 0x0);
+
+       for (rek = 0; rek < 4; rek++) {
+               rtw89_write_rf(rtwdev, RF_PATH_A, RR_AACK, RFREG_MASK, 0x8201e);
+               rtw89_write_rf(rtwdev, RF_PATH_A, RR_AACK, RFREG_MASK, 0x8201f);
+               fsleep(100);
+
+               ret = read_poll_timeout_atomic(rtw89_read_rf, tmp, tmp,
+                                              1, 1000, false,
+                                              rtwdev, RF_PATH_A, 0xd0, BIT(16));
+               if (ret)
+                       rtw89_warn(rtwdev, "[LCK]AACK timeout\n");
+
+               rtw89_write_rf(rtwdev, RF_PATH_A, RR_VCI, RR_VCI_ON, 0x1);
+               for (i = 0; i < 4; i++) {
+                       rtw89_write_rf(rtwdev, RF_PATH_A, RR_VCO, RR_VCO_SEL, i);
+                       ib[i] = rtw89_read_rf(rtwdev, RF_PATH_A, RR_IBD, RR_IBD_VAL);
+               }
+               rtw89_write_rf(rtwdev, RF_PATH_A, RR_VCI, RR_VCI_ON, 0x0);
+
+               if (ib[0] != 0 && ib[1] != 0 && ib[2] != 0 && ib[3] != 0)
+                       break;
+       }
+
+       if (rek != 0)
+               rtw89_debug(rtwdev, RTW89_DBG_RFK, "[LCK]AACK rek = %d\n", rek);
+
+       rtw89_write_rf(rtwdev, RF_PATH_A, RR_RSV1, RFREG_MASK, tmp05);
+}
+
+void rtw8851b_rck(struct rtw89_dev *rtwdev)
+{
+       _rck(rtwdev, RF_PATH_A);
+}
+
+void rtw8851b_dack(struct rtw89_dev *rtwdev)
+{
+       _dac_cal(rtwdev, false);
+}
+
+void rtw8851b_iqk(struct rtw89_dev *rtwdev, enum rtw89_phy_idx phy_idx)
+{
+       u8 phy_map = rtw89_btc_phymap(rtwdev, phy_idx, 0);
+       u32 tx_en;
+
+       rtw89_btc_ntfy_wl_rfk(rtwdev, phy_map, BTC_WRFKT_IQK, BTC_WRFK_START);
+       rtw89_chip_stop_sch_tx(rtwdev, phy_idx, &tx_en, RTW89_SCH_TX_SEL_ALL);
+       _wait_rx_mode(rtwdev, _kpath(rtwdev, phy_idx));
+
+       _iqk_init(rtwdev);
+       _iqk(rtwdev, phy_idx, false);
+
+       rtw89_chip_resume_sch_tx(rtwdev, phy_idx, tx_en);
+       rtw89_btc_ntfy_wl_rfk(rtwdev, phy_map, BTC_WRFKT_IQK, BTC_WRFK_STOP);
+}
+
+static void _bw_setting(struct rtw89_dev *rtwdev, enum rtw89_rf_path path,
+                       enum rtw89_bandwidth bw, bool dav)
+{
+       u32 reg18_addr = dav ? RR_CFGCH : RR_CFGCH_V1;
+       u32 rf_reg18;
+
+       rtw89_debug(rtwdev, RTW89_DBG_RFK, "[RFK]===> %s\n", __func__);
+
+       rf_reg18 = rtw89_read_rf(rtwdev, path, reg18_addr, RFREG_MASK);
+       if (rf_reg18 == INV_RF_DATA) {
+               rtw89_debug(rtwdev, RTW89_DBG_RFK,
+                           "[RFK]Invalid RF_0x18 for Path-%d\n", path);
+               return;
+       }
+       rf_reg18 &= ~RR_CFGCH_BW;
+
+       switch (bw) {
+       case RTW89_CHANNEL_WIDTH_5:
+       case RTW89_CHANNEL_WIDTH_10:
+       case RTW89_CHANNEL_WIDTH_20:
+               rf_reg18 |= FIELD_PREP(RR_CFGCH_BW, CFGCH_BW_20M);
+               break;
+       case RTW89_CHANNEL_WIDTH_40:
+               rf_reg18 |= FIELD_PREP(RR_CFGCH_BW, CFGCH_BW_40M);
+               break;
+       case RTW89_CHANNEL_WIDTH_80:
+               rf_reg18 |= FIELD_PREP(RR_CFGCH_BW, CFGCH_BW_80M);
+               break;
+       default:
+               rtw89_debug(rtwdev, RTW89_DBG_RFK, "[RFK]Fail to set CH\n");
+       }
+
+       rf_reg18 &= ~(RR_CFGCH_POW_LCK | RR_CFGCH_TRX_AH | RR_CFGCH_BCN |
+                     RR_CFGCH_BW2) & RFREG_MASK;
+       rf_reg18 |= RR_CFGCH_BW2;
+       rtw89_write_rf(rtwdev, path, reg18_addr, RFREG_MASK, rf_reg18);
+
+       rtw89_debug(rtwdev, RTW89_DBG_RFK, "[RFK] set %x at path%d, %x =0x%x\n",
+                   bw, path, reg18_addr,
+                   rtw89_read_rf(rtwdev, path, reg18_addr, RFREG_MASK));
+}
+
+static void _ctrl_bw(struct rtw89_dev *rtwdev, enum rtw89_phy_idx phy,
+                    enum rtw89_bandwidth bw)
+{
+       _bw_setting(rtwdev, RF_PATH_A, bw, true);
+       _bw_setting(rtwdev, RF_PATH_A, bw, false);
+}
+
+static bool _set_s0_arfc18(struct rtw89_dev *rtwdev, u32 val)
+{
+       u32 bak;
+       u32 tmp;
+       int ret;
+
+       bak = rtw89_read_rf(rtwdev, RF_PATH_A, RR_LDO, RFREG_MASK);
+       rtw89_write_rf(rtwdev, RF_PATH_A, RR_LDO, RR_LDO_SEL, 0x1);
+       rtw89_write_rf(rtwdev, RF_PATH_A, RR_CFGCH, RFREG_MASK, val);
+
+       ret = read_poll_timeout_atomic(rtw89_read_rf, tmp, tmp == 0, 1, 1000,
+                                      false, rtwdev, RF_PATH_A, RR_LPF, RR_LPF_BUSY);
+       if (ret)
+               rtw89_debug(rtwdev, RTW89_DBG_RFK, "[LCK]LCK timeout\n");
+
+       rtw89_write_rf(rtwdev, RF_PATH_A, RR_LDO, RFREG_MASK, bak);
+
+       return !!ret;
+}
+
+static void _lck_check(struct rtw89_dev *rtwdev)
+{
+       u32 tmp;
+
+       if (rtw89_read_rf(rtwdev, RF_PATH_A, RR_SYNFB, RR_SYNFB_LK) == 0) {
+               rtw89_debug(rtwdev, RTW89_DBG_RFK, "[LCK]SYN MMD reset\n");
+
+               rtw89_write_rf(rtwdev, RF_PATH_A, RR_MMD, RR_MMD_RST_EN, 0x1);
+               rtw89_write_rf(rtwdev, RF_PATH_A, RR_MMD, RR_MMD_RST_SYN, 0x0);
+               rtw89_write_rf(rtwdev, RF_PATH_A, RR_MMD, RR_MMD_RST_SYN, 0x1);
+               rtw89_write_rf(rtwdev, RF_PATH_A, RR_MMD, RR_MMD_RST_EN, 0x0);
+       }
+
+       udelay(10);
+
+       if (rtw89_read_rf(rtwdev, RF_PATH_A, RR_SYNFB, RR_SYNFB_LK) == 0) {
+               rtw89_debug(rtwdev, RTW89_DBG_RFK, "[LCK]re-set RF 0x18\n");
+
+               rtw89_write_rf(rtwdev, RF_PATH_A, RR_LCK_TRG, RR_LCK_TRGSEL, 0x1);
+               tmp = rtw89_read_rf(rtwdev, RF_PATH_A, RR_CFGCH, RFREG_MASK);
+               _set_s0_arfc18(rtwdev, tmp);
+               rtw89_write_rf(rtwdev, RF_PATH_A, RR_LCK_TRG, RR_LCK_TRGSEL, 0x0);
+       }
+
+       if (rtw89_read_rf(rtwdev, RF_PATH_A, RR_SYNFB, RR_SYNFB_LK) == 0) {
+               rtw89_debug(rtwdev, RTW89_DBG_RFK, "[LCK]SYN off/on\n");
+
+               tmp = rtw89_read_rf(rtwdev, RF_PATH_A, RR_POW, RFREG_MASK);
+               rtw89_write_rf(rtwdev, RF_PATH_A, RR_POW, RFREG_MASK, tmp);
+               tmp = rtw89_read_rf(rtwdev, RF_PATH_A, RR_SX, RFREG_MASK);
+               rtw89_write_rf(rtwdev, RF_PATH_A, RR_SX, RFREG_MASK, tmp);
+
+               rtw89_write_rf(rtwdev, RF_PATH_A, RR_SYNLUT, RR_SYNLUT_MOD, 0x1);
+               rtw89_write_rf(rtwdev, RF_PATH_A, RR_POW, RR_POW_SYN, 0x0);
+               rtw89_write_rf(rtwdev, RF_PATH_A, RR_POW, RR_POW_SYN, 0x3);
+               rtw89_write_rf(rtwdev, RF_PATH_A, RR_SYNLUT, RR_SYNLUT_MOD, 0x0);
+
+               rtw89_write_rf(rtwdev, RF_PATH_A, RR_LCK_TRG, RR_LCK_TRGSEL, 0x1);
+               tmp = rtw89_read_rf(rtwdev, RF_PATH_A, RR_CFGCH, RFREG_MASK);
+               _set_s0_arfc18(rtwdev, tmp);
+               rtw89_write_rf(rtwdev, RF_PATH_A, RR_LCK_TRG, RR_LCK_TRGSEL, 0x0);
+
+               rtw89_debug(rtwdev, RTW89_DBG_RFK, "[LCK]0xb2=%x, 0xc5=%x\n",
+                           rtw89_read_rf(rtwdev, RF_PATH_A, RR_VCO, RFREG_MASK),
+                           rtw89_read_rf(rtwdev, RF_PATH_A, RR_SYNFB, RFREG_MASK));
+       }
+}
+
+static void _set_ch(struct rtw89_dev *rtwdev, u32 val)
+{
+       bool timeout;
+
+       timeout = _set_s0_arfc18(rtwdev, val);
+       if (!timeout)
+               _lck_check(rtwdev);
+}
+
+static void _ch_setting(struct rtw89_dev *rtwdev, enum rtw89_rf_path path,
+                       u8 central_ch, bool dav)
+{
+       u32 reg18_addr = dav ? RR_CFGCH : RR_CFGCH_V1;
+       bool is_2g_ch = central_ch <= 14;
+       u32 rf_reg18;
+
+       rtw89_debug(rtwdev, RTW89_DBG_RFK, "[RFK]===> %s\n", __func__);
+
+       rf_reg18 = rtw89_read_rf(rtwdev, path, reg18_addr, RFREG_MASK);
+       rf_reg18 &= ~(RR_CFGCH_BAND1 | RR_CFGCH_POW_LCK | RR_CFGCH_TRX_AH |
+                     RR_CFGCH_BCN | RR_CFGCH_BAND0 | RR_CFGCH_CH);
+       rf_reg18 |= FIELD_PREP(RR_CFGCH_CH, central_ch);
+
+       if (!is_2g_ch)
+               rf_reg18 |= FIELD_PREP(RR_CFGCH_BAND1, CFGCH_BAND1_5G) |
+                           FIELD_PREP(RR_CFGCH_BAND0, CFGCH_BAND0_5G);
+
+       rf_reg18 &= ~(RR_CFGCH_POW_LCK | RR_CFGCH_TRX_AH | RR_CFGCH_BCN |
+                     RR_CFGCH_BW2) & RFREG_MASK;
+       rf_reg18 |= RR_CFGCH_BW2;
+
+       if (path == RF_PATH_A && dav)
+               _set_ch(rtwdev, rf_reg18);
+       else
+               rtw89_write_rf(rtwdev, path, reg18_addr, RFREG_MASK, rf_reg18);
+
+       rtw89_write_rf(rtwdev, path, RR_LCKST, RR_LCKST_BIN, 0);
+       rtw89_write_rf(rtwdev, path, RR_LCKST, RR_LCKST_BIN, 1);
+
+       rtw89_debug(rtwdev, RTW89_DBG_RFK,
+                   "[RFK]CH: %d for Path-%d, reg0x%x = 0x%x\n",
+                   central_ch, path, reg18_addr,
+                   rtw89_read_rf(rtwdev, path, reg18_addr, RFREG_MASK));
+}
+
+static void _ctrl_ch(struct rtw89_dev *rtwdev, u8 central_ch)
+{
+       _ch_setting(rtwdev, RF_PATH_A, central_ch, true);
+       _ch_setting(rtwdev, RF_PATH_A, central_ch, false);
+}
+
+static void _set_rxbb_bw(struct rtw89_dev *rtwdev, enum rtw89_bandwidth bw,
+                        enum rtw89_rf_path path)
+{
+       rtw89_write_rf(rtwdev, path, RR_LUTWE2, RR_LUTWE2_RTXBW, 0x1);
+       rtw89_write_rf(rtwdev, path, RR_LUTWA, RR_LUTWA_M2, 0x12);
+
+       if (bw == RTW89_CHANNEL_WIDTH_20)
+               rtw89_write_rf(rtwdev, path, RR_LUTWD0, RR_LUTWD0_LB, 0x1b);
+       else if (bw == RTW89_CHANNEL_WIDTH_40)
+               rtw89_write_rf(rtwdev, path, RR_LUTWD0, RR_LUTWD0_LB, 0x13);
+       else if (bw == RTW89_CHANNEL_WIDTH_80)
+               rtw89_write_rf(rtwdev, path, RR_LUTWD0, RR_LUTWD0_LB, 0xb);
+       else
+               rtw89_write_rf(rtwdev, path, RR_LUTWD0, RR_LUTWD0_LB, 0x3);
+
+       rtw89_debug(rtwdev, RTW89_DBG_RFK, "[RFK] set S%d RXBB BW 0x3F = 0x%x\n", path,
+                   rtw89_read_rf(rtwdev, path, RR_LUTWD0, RR_LUTWD0_LB));
+
+       rtw89_write_rf(rtwdev, path, RR_LUTWE2, RR_LUTWE2_RTXBW, 0x0);
+}
+
+static void _rxbb_bw(struct rtw89_dev *rtwdev, enum rtw89_phy_idx phy,
+                    enum rtw89_bandwidth bw)
+{
+       u8 kpath, path;
+
+       kpath = _kpath(rtwdev, phy);
+
+       for (path = 0; path < RF_PATH_NUM_8851B; path++) {
+               if (!(kpath & BIT(path)))
+                       continue;
+
+               _set_rxbb_bw(rtwdev, bw, path);
+       }
+}
+
+static void rtw8851b_ctrl_bw_ch(struct rtw89_dev *rtwdev,
+                               enum rtw89_phy_idx phy, u8 central_ch,
+                               enum rtw89_band band, enum rtw89_bandwidth bw)
+{
+       _ctrl_ch(rtwdev, central_ch);
+       _ctrl_bw(rtwdev, phy, bw);
+       _rxbb_bw(rtwdev, phy, bw);
+}
+
+void rtw8851b_set_channel_rf(struct rtw89_dev *rtwdev,
+                            const struct rtw89_chan *chan,
+                            enum rtw89_phy_idx phy_idx)
+{
+       rtw8851b_ctrl_bw_ch(rtwdev, phy_idx, chan->channel, chan->band_type,
+                           chan->band_width);
+}
diff --git a/drivers/net/wireless/realtek/rtw89/rtw8851b_rfk.h b/drivers/net/wireless/realtek/rtw89/rtw8851b_rfk.h
new file mode 100644 (file)
index 0000000..d86c630
--- /dev/null
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
+/* Copyright(c) 2022-2023  Realtek Corporation
+ */
+
+#ifndef __RTW89_8851B_RFK_H__
+#define __RTW89_8851B_RFK_H__
+
+#include "core.h"
+
+void rtw8851b_aack(struct rtw89_dev *rtwdev);
+void rtw8851b_rck(struct rtw89_dev *rtwdev);
+void rtw8851b_dack(struct rtw89_dev *rtwdev);
+void rtw8851b_iqk(struct rtw89_dev *rtwdev, enum rtw89_phy_idx phy_idx);
+void rtw8851b_set_channel_rf(struct rtw89_dev *rtwdev,
+                            const struct rtw89_chan *chan,
+                            enum rtw89_phy_idx phy_idx);
+
+#endif
diff --git a/drivers/net/wireless/realtek/rtw89/rtw8851be.c b/drivers/net/wireless/realtek/rtw89/rtw8851be.c
new file mode 100644 (file)
index 0000000..0f7711c
--- /dev/null
@@ -0,0 +1,86 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+/* Copyright(c) 2022-2023  Realtek Corporation
+ */
+
+#include <linux/module.h>
+#include <linux/pci.h>
+
+#include "pci.h"
+#include "reg.h"
+#include "rtw8851b.h"
+
+static const struct rtw89_pci_info rtw8851b_pci_info = {
+       .txbd_trunc_mode        = MAC_AX_BD_TRUNC,
+       .rxbd_trunc_mode        = MAC_AX_BD_TRUNC,
+       .rxbd_mode              = MAC_AX_RXBD_PKT,
+       .tag_mode               = MAC_AX_TAG_MULTI,
+       .tx_burst               = MAC_AX_TX_BURST_2048B,
+       .rx_burst               = MAC_AX_RX_BURST_128B,
+       .wd_dma_idle_intvl      = MAC_AX_WD_DMA_INTVL_256NS,
+       .wd_dma_act_intvl       = MAC_AX_WD_DMA_INTVL_256NS,
+       .multi_tag_num          = MAC_AX_TAG_NUM_8,
+       .lbc_en                 = MAC_AX_PCIE_ENABLE,
+       .lbc_tmr                = MAC_AX_LBC_TMR_2MS,
+       .autok_en               = MAC_AX_PCIE_DISABLE,
+       .io_rcy_en              = MAC_AX_PCIE_DISABLE,
+       .io_rcy_tmr             = MAC_AX_IO_RCY_ANA_TMR_6MS,
+
+       .init_cfg_reg           = R_AX_PCIE_INIT_CFG1,
+       .txhci_en_bit           = B_AX_TXHCI_EN,
+       .rxhci_en_bit           = B_AX_RXHCI_EN,
+       .rxbd_mode_bit          = B_AX_RXBD_MODE,
+       .exp_ctrl_reg           = R_AX_PCIE_EXP_CTRL,
+       .max_tag_num_mask       = B_AX_MAX_TAG_NUM,
+       .rxbd_rwptr_clr_reg     = R_AX_RXBD_RWPTR_CLR,
+       .txbd_rwptr_clr2_reg    = 0,
+       .dma_stop1              = {R_AX_PCIE_DMA_STOP1, B_AX_TX_STOP1_MASK_V1},
+       .dma_stop2              = {0},
+       .dma_busy1              = {R_AX_PCIE_DMA_BUSY1, DMA_BUSY1_CHECK_V1},
+       .dma_busy2_reg          = 0,
+       .dma_busy3_reg          = R_AX_PCIE_DMA_BUSY1,
+
+       .rpwm_addr              = R_AX_PCIE_HRPWM,
+       .cpwm_addr              = R_AX_CPWM,
+       .tx_dma_ch_mask         = BIT(RTW89_TXCH_ACH4) | BIT(RTW89_TXCH_ACH5) |
+                                 BIT(RTW89_TXCH_ACH6) | BIT(RTW89_TXCH_ACH7) |
+                                 BIT(RTW89_TXCH_CH10) | BIT(RTW89_TXCH_CH11),
+       .bd_idx_addr_low_power  = NULL,
+       .dma_addr_set           = &rtw89_pci_ch_dma_addr_set,
+       .bd_ram_table           = &rtw89_bd_ram_table_single,
+
+       .ltr_set                = rtw89_pci_ltr_set,
+       .fill_txaddr_info       = rtw89_pci_fill_txaddr_info,
+       .config_intr_mask       = rtw89_pci_config_intr_mask,
+       .enable_intr            = rtw89_pci_enable_intr,
+       .disable_intr           = rtw89_pci_disable_intr,
+       .recognize_intrs        = rtw89_pci_recognize_intrs,
+};
+
+static const struct rtw89_driver_info rtw89_8851be_info = {
+       .chip = &rtw8851b_chip_info,
+       .bus = {
+               .pci = &rtw8851b_pci_info,
+       },
+};
+
+static const struct pci_device_id rtw89_8851be_id_table[] = {
+       {
+               PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0xb851),
+               .driver_data = (kernel_ulong_t)&rtw89_8851be_info,
+       },
+       {},
+};
+MODULE_DEVICE_TABLE(pci, rtw89_8851be_id_table);
+
+static struct pci_driver rtw89_8851be_driver = {
+       .name           = "rtw89_8851be",
+       .id_table       = rtw89_8851be_id_table,
+       .probe          = rtw89_pci_probe,
+       .remove         = rtw89_pci_remove,
+       .driver.pm      = &rtw89_pm_ops,
+};
+module_pci_driver(rtw89_8851be_driver);
+
+MODULE_AUTHOR("Realtek Corporation");
+MODULE_DESCRIPTION("Realtek 802.11ax wireless 8851BE driver");
+MODULE_LICENSE("Dual BSD/GPL");
index d7930ef..4e6f3bb 100644 (file)
@@ -463,6 +463,12 @@ static const struct rtw89_imr_info rtw8852a_imr_info = {
        .tmac_imr_set           = B_AX_TMAC_IMR_SET,
 };
 
+static const struct rtw89_xtal_info rtw8852a_xtal_info = {
+       .xcap_reg               = R_AX_XTAL_ON_CTRL0,
+       .sc_xo_mask             = B_AX_XTAL_SC_XO_MASK,
+       .sc_xi_mask             = B_AX_XTAL_SC_XI_MASK,
+};
+
 static const struct rtw89_rrsr_cfgs rtw8852a_rrsr_cfgs = {
        .ref_rate = {R_AX_TRXPTCL_RRSR_CTL_0, B_AX_WMAC_RESP_REF_RATE_SEL, 0},
        .rsc = {R_AX_TRXPTCL_RRSR_CTL_0, B_AX_WMAC_RESP_RSC_MASK, 2},
@@ -2069,6 +2075,7 @@ const struct rtw89_chip_info rtw8852a_chip_info = {
        .fw_format_max          = RTW8852A_FW_FORMAT_MAX,
        .try_ce_fw              = false,
        .fifo_size              = 458752,
+       .small_fifo_size        = false,
        .dle_scc_rsvd_size      = 0,
        .max_amsdu_limit        = 3500,
        .dis_2g_40m_ul_ofdma    = true,
@@ -2085,6 +2092,7 @@ const struct rtw89_chip_info rtw8852a_chip_info = {
        .rf_table               = {&rtw89_8852a_phy_radioa_table,
                                   &rtw89_8852a_phy_radiob_table,},
        .nctl_table             = &rtw89_8852a_phy_nctl_table,
+       .nctl_post_table        = NULL,
        .byr_table              = &rtw89_8852a_byr_table,
        .dflt_parms             = &rtw89_8852a_dflt_parms,
        .rfe_parms_conf         = NULL,
@@ -2097,6 +2105,7 @@ const struct rtw89_chip_info rtw8852a_chip_info = {
        .support_bands          = BIT(NL80211_BAND_2GHZ) |
                                  BIT(NL80211_BAND_5GHZ),
        .support_bw160          = false,
+       .support_unii4          = false,
        .support_ul_tb_ctrl     = false,
        .hw_sec_hdr             = false,
        .rf_path_num            = 2,
@@ -2107,7 +2116,7 @@ const struct rtw89_chip_info rtw8852a_chip_info = {
        .scam_num               = 128,
        .bacam_num              = 2,
        .bacam_dynamic_num      = 4,
-       .bacam_v1               = false,
+       .bacam_ver              = RTW89_BACAM_V0,
        .sec_ctrl_efuse_size    = 4,
        .physical_efuse_size    = 1216,
        .logical_efuse_size     = 1536,
@@ -2159,6 +2168,7 @@ const struct rtw89_chip_info rtw8852a_chip_info = {
 #ifdef CONFIG_PM
        .wowlan_stub            = &rtw_wowlan_stub_8852a,
 #endif
+       .xtal_info              = &rtw8852a_xtal_info,
 };
 EXPORT_SYMBOL(rtw8852a_chip_info);
 
index 6da1b60..9ed4ade 100644 (file)
@@ -2506,6 +2506,7 @@ const struct rtw89_chip_info rtw8852b_chip_info = {
        .fw_format_max          = RTW8852B_FW_FORMAT_MAX,
        .try_ce_fw              = true,
        .fifo_size              = 196608,
+       .small_fifo_size        = true,
        .dle_scc_rsvd_size      = 98304,
        .max_amsdu_limit        = 3500,
        .dis_2g_40m_ul_ofdma    = true,
@@ -2522,6 +2523,7 @@ const struct rtw89_chip_info rtw8852b_chip_info = {
        .rf_table               = {&rtw89_8852b_phy_radioa_table,
                                   &rtw89_8852b_phy_radiob_table,},
        .nctl_table             = &rtw89_8852b_phy_nctl_table,
+       .nctl_post_table        = NULL,
        .byr_table              = &rtw89_8852b_byr_table,
        .dflt_parms             = &rtw89_8852b_dflt_parms,
        .rfe_parms_conf         = NULL,
@@ -2534,6 +2536,7 @@ const struct rtw89_chip_info rtw8852b_chip_info = {
        .support_bands          = BIT(NL80211_BAND_2GHZ) |
                                  BIT(NL80211_BAND_5GHZ),
        .support_bw160          = false,
+       .support_unii4          = true,
        .support_ul_tb_ctrl     = true,
        .hw_sec_hdr             = false,
        .rf_path_num            = 2,
@@ -2544,7 +2547,7 @@ const struct rtw89_chip_info rtw8852b_chip_info = {
        .scam_num               = 128,
        .bacam_num              = 2,
        .bacam_dynamic_num      = 4,
-       .bacam_v1               = false,
+       .bacam_ver              = RTW89_BACAM_V0,
        .sec_ctrl_efuse_size    = 4,
        .physical_efuse_size    = 1216,
        .logical_efuse_size     = 2048,
@@ -2598,6 +2601,7 @@ const struct rtw89_chip_info rtw8852b_chip_info = {
 #ifdef CONFIG_PM
        .wowlan_stub            = &rtw_wowlan_stub_8852b,
 #endif
+       .xtal_info              = NULL,
 };
 EXPORT_SYMBOL(rtw8852b_chip_info);
 
index ceb819a..f2e70bd 100644 (file)
@@ -2805,6 +2805,7 @@ const struct rtw89_chip_info rtw8852c_chip_info = {
        .fw_format_max          = RTW8852C_FW_FORMAT_MAX,
        .try_ce_fw              = false,
        .fifo_size              = 458752,
+       .small_fifo_size        = false,
        .dle_scc_rsvd_size      = 0,
        .max_amsdu_limit        = 8000,
        .dis_2g_40m_ul_ofdma    = false,
@@ -2821,6 +2822,7 @@ const struct rtw89_chip_info rtw8852c_chip_info = {
        .rf_table               = {&rtw89_8852c_phy_radiob_table,
                                   &rtw89_8852c_phy_radioa_table,},
        .nctl_table             = &rtw89_8852c_phy_nctl_table,
+       .nctl_post_table        = NULL,
        .byr_table              = &rtw89_8852c_byr_table,
        .dflt_parms             = &rtw89_8852c_dflt_parms,
        .rfe_parms_conf         = NULL,
@@ -2834,6 +2836,7 @@ const struct rtw89_chip_info rtw8852c_chip_info = {
                                  BIT(NL80211_BAND_5GHZ) |
                                  BIT(NL80211_BAND_6GHZ),
        .support_bw160          = true,
+       .support_unii4          = true,
        .support_ul_tb_ctrl     = false,
        .hw_sec_hdr             = true,
        .rf_path_num            = 2,
@@ -2844,7 +2847,7 @@ const struct rtw89_chip_info rtw8852c_chip_info = {
        .scam_num               = 128,
        .bacam_num              = 8,
        .bacam_dynamic_num      = 8,
-       .bacam_v1               = true,
+       .bacam_ver              = RTW89_BACAM_V0_EXT,
        .sec_ctrl_efuse_size    = 4,
        .physical_efuse_size    = 1216,
        .logical_efuse_size     = 2048,
@@ -2897,6 +2900,7 @@ const struct rtw89_chip_info rtw8852c_chip_info = {
 #ifdef CONFIG_PM
        .wowlan_stub            = &rtw_wowlan_stub_8852c,
 #endif
+       .xtal_info              = NULL,
 };
 EXPORT_SYMBOL(rtw8852c_chip_info);
 
index 9e9f694..9ba99f3 100644 (file)
@@ -20,12 +20,14 @@ enum ser_evt {
        SER_EV_NONE,
        SER_EV_STATE_IN,
        SER_EV_STATE_OUT,
+       SER_EV_L1_RESET_PREPARE, /* pre-M0 */
        SER_EV_L1_RESET, /* M1 */
        SER_EV_DO_RECOVERY, /* M3 */
        SER_EV_MAC_RESET_DONE, /* M5 */
        SER_EV_L2_RESET,
        SER_EV_L2_RECFG_DONE,
        SER_EV_L2_RECFG_TIMEOUT,
+       SER_EV_M1_TIMEOUT,
        SER_EV_M3_TIMEOUT,
        SER_EV_FW_M5_TIMEOUT,
        SER_EV_L0_RESET,
@@ -34,6 +36,7 @@ enum ser_evt {
 
 enum ser_state {
        SER_IDLE_ST,
+       SER_L1_RESET_PRE_ST,
        SER_RESET_TRX_ST,
        SER_DO_HCI_ST,
        SER_L2_RESET_ST,
@@ -374,6 +377,13 @@ static int hal_stop_dma(struct rtw89_ser *ser)
        return ret;
 }
 
+static void hal_send_post_m0_event(struct rtw89_ser *ser)
+{
+       struct rtw89_dev *rtwdev = container_of(ser, struct rtw89_dev, ser);
+
+       rtw89_mac_set_err_status(rtwdev, MAC_AX_ERR_L1_RESET_START_DMAC);
+}
+
 static void hal_send_m2_event(struct rtw89_ser *ser)
 {
        struct rtw89_dev *rtwdev = container_of(ser, struct rtw89_dev, ser);
@@ -398,6 +408,9 @@ static void ser_idle_st_hdl(struct rtw89_ser *ser, u8 evt)
                rtw89_hci_recovery_complete(rtwdev);
                clear_bit(RTW89_FLAG_CRASH_SIMULATING, rtwdev->flags);
                break;
+       case SER_EV_L1_RESET_PREPARE:
+               ser_state_goto(ser, SER_L1_RESET_PRE_ST);
+               break;
        case SER_EV_L1_RESET:
                ser_state_goto(ser, SER_RESET_TRX_ST);
                break;
@@ -412,6 +425,28 @@ static void ser_idle_st_hdl(struct rtw89_ser *ser, u8 evt)
        }
 }
 
+static void ser_l1_reset_pre_st_hdl(struct rtw89_ser *ser, u8 evt)
+{
+       switch (evt) {
+       case SER_EV_STATE_IN:
+               ser->prehandle_l1 = true;
+               hal_send_post_m0_event(ser);
+               ser_set_alarm(ser, 1000, SER_EV_M1_TIMEOUT);
+               break;
+       case SER_EV_L1_RESET:
+               ser_state_goto(ser, SER_RESET_TRX_ST);
+               break;
+       case SER_EV_M1_TIMEOUT:
+               ser_state_goto(ser, SER_L2_RESET_ST);
+               break;
+       case SER_EV_STATE_OUT:
+               ser_del_alarm(ser);
+               break;
+       default:
+               break;
+       }
+}
+
 static void ser_reset_trx_st_hdl(struct rtw89_ser *ser, u8 evt)
 {
        struct rtw89_dev *rtwdev = container_of(ser, struct rtw89_dev, ser);
@@ -654,12 +689,14 @@ static const struct event_ent ser_ev_tbl[] = {
        {SER_EV_NONE, "SER_EV_NONE"},
        {SER_EV_STATE_IN, "SER_EV_STATE_IN"},
        {SER_EV_STATE_OUT, "SER_EV_STATE_OUT"},
-       {SER_EV_L1_RESET, "SER_EV_L1_RESET"},
+       {SER_EV_L1_RESET_PREPARE, "SER_EV_L1_RESET_PREPARE pre-m0"},
+       {SER_EV_L1_RESET, "SER_EV_L1_RESET m1"},
        {SER_EV_DO_RECOVERY, "SER_EV_DO_RECOVERY m3"},
        {SER_EV_MAC_RESET_DONE, "SER_EV_MAC_RESET_DONE m5"},
        {SER_EV_L2_RESET, "SER_EV_L2_RESET"},
        {SER_EV_L2_RECFG_DONE, "SER_EV_L2_RECFG_DONE"},
        {SER_EV_L2_RECFG_TIMEOUT, "SER_EV_L2_RECFG_TIMEOUT"},
+       {SER_EV_M1_TIMEOUT, "SER_EV_M1_TIMEOUT"},
        {SER_EV_M3_TIMEOUT, "SER_EV_M3_TIMEOUT"},
        {SER_EV_FW_M5_TIMEOUT, "SER_EV_FW_M5_TIMEOUT"},
        {SER_EV_L0_RESET, "SER_EV_L0_RESET"},
@@ -668,6 +705,7 @@ static const struct event_ent ser_ev_tbl[] = {
 
 static const struct state_ent ser_st_tbl[] = {
        {SER_IDLE_ST, "SER_IDLE_ST", ser_idle_st_hdl},
+       {SER_L1_RESET_PRE_ST, "SER_L1_RESET_PRE_ST", ser_l1_reset_pre_st_hdl},
        {SER_RESET_TRX_ST, "SER_RESET_TRX_ST", ser_reset_trx_st_hdl},
        {SER_DO_HCI_ST, "SER_DO_HCI_ST", ser_do_hci_st_hdl},
        {SER_L2_RESET_ST, "SER_L2_RESET_ST", ser_l2_reset_st_hdl}
@@ -713,6 +751,9 @@ int rtw89_ser_notify(struct rtw89_dev *rtwdev, u32 err)
        rtw89_info(rtwdev, "SER catches error: 0x%x\n", err);
 
        switch (err) {
+       case MAC_AX_ERR_L1_PREERR_DMAC: /* pre-M0 */
+               event = SER_EV_L1_RESET_PREPARE;
+               break;
        case MAC_AX_ERR_L1_ERR_DMAC:
        case MAC_AX_ERR_L0_PROMOTE_TO_L1:
                event = SER_EV_L1_RESET; /* M1 */
index 98eb960..d880ecb 100644 (file)
        le32_get_bits(*((const __le32 *)ie), GENMASK(4, 0))
 #define RTW89_GET_PHY_STS_IE_LEN(ie) \
        le32_get_bits(*((const __le32 *)ie), GENMASK(11, 5))
-#define RTW89_GET_PHY_STS_IE01_CH_IDX(ie) \
-       le32_get_bits(*((const __le32 *)ie), GENMASK(23, 16))
-#define RTW89_GET_PHY_STS_IE01_FD_CFO(ie) \
-       le32_get_bits(*((const __le32 *)(ie) + 1), GENMASK(19, 8))
-#define RTW89_GET_PHY_STS_IE01_PREMB_CFO(ie) \
-       le32_get_bits(*((const __le32 *)(ie) + 1), GENMASK(31, 20))
+
+struct rtw89_phy_sts_ie0 {
+       __le32 w0;
+       __le32 w1;
+       __le32 w2;
+} __packed;
+
+#define RTW89_PHY_STS_IE01_W0_CH_IDX GENMASK(23, 16)
+#define RTW89_PHY_STS_IE01_W1_FD_CFO GENMASK(19, 8)
+#define RTW89_PHY_STS_IE01_W1_PREMB_CFO GENMASK(31, 20)
+#define RTW89_PHY_STS_IE01_W2_AVG_SNR GENMASK(5, 0)
+#define RTW89_PHY_STS_IE01_W2_EVM_MAX GENMASK(15, 8)
+#define RTW89_PHY_STS_IE01_W2_EVM_MIN GENMASK(23, 16)
 
 enum rtw89_tx_channel {
        RTW89_TXCH_ACH0 = 0,
index 2ca8abb..364e546 100644 (file)
@@ -91,7 +91,7 @@ static void rtw89_wow_show_wakeup_reason(struct rtw89_dev *rtwdev)
        u32 wow_reason_reg;
        u8 reason;
 
-       if (chip_id == RTL8852A || chip_id == RTL8852B)
+       if (chip_id == RTL8852A || chip_id == RTL8852B || chip_id == RTL8851B)
                wow_reason_reg = R_AX_C2HREG_DATA3 + 3;
        else
                wow_reason_reg = R_AX_C2HREG_DATA3_V1 + 3;
index e700dc8..5664ac5 100644 (file)
@@ -140,17 +140,6 @@ enum ipc_channel_state {
        IMEM_CHANNEL_CLOSING,
 };
 
-/* Time Unit */
-enum ipc_time_unit {
-       IPC_SEC = 0,
-       IPC_MILLI_SEC = 1,
-       IPC_MICRO_SEC = 2,
-       IPC_NANO_SEC = 3,
-       IPC_PICO_SEC = 4,
-       IPC_FEMTO_SEC = 5,
-       IPC_ATTO_SEC = 6,
-};
-
 /**
  * enum ipc_ctype - Enum defining supported channel type needed for control
  *                 /IP traffic.
@@ -204,7 +193,6 @@ enum ipc_hp_identifier {
  * @pipe_nr:                   Pipe identification number
  * @irq:                       Interrupt vector
  * @dir:                       Direction of data stream in pipe
- * @td_tag:                    Unique tag of the buffer queued
  * @buf_size:                  Buffer size (in bytes) for preallocated
  *                             buffers (for DL pipes)
  * @nr_of_queued_entries:      Aueued number of entries
@@ -224,7 +212,6 @@ struct ipc_pipe {
        u32 pipe_nr;
        u32 irq;
        enum ipc_mem_pipe_dir dir;
-       u32 td_tag;
        u32 buf_size;
        u16 nr_of_queued_entries;
        u8 is_open:1;
index 9968bb8..17ca8d1 100644 (file)
@@ -333,9 +333,7 @@ struct mux_acb {
  * @wwan_q_offset:     This will hold the offset of the given instance
  *                     Useful while passing or receiving packets from
  *                     wwan/imem layer.
- * @adb_finish_timer:  Timer for forcefully finishing the ADB
  * @acb_tx_sequence_nr: Sequence number for the ACB header.
- * @params:            user configurable parameters
  * @adb_tx_sequence_nr: Sequence number for ADB header
  * @acc_adb_size:       Statistic data for logging
  * @acc_payload_size:   Statistic data for logging
@@ -367,9 +365,7 @@ struct iosm_mux {
        long long ul_data_pend_bytes;
        struct mux_acb acb;
        int wwan_q_offset;
-       struct hrtimer adb_finish_timer;
        u16 acb_tx_sequence_nr;
-       struct ipc_params *params;
        u16 adb_tx_sequence_nr;
        unsigned long long acc_adb_size;
        unsigned long long acc_payload_size;
index 4c9022a..ff747fc 100644 (file)
@@ -18,8 +18,6 @@
 #define IOSM_IP_TYPE_IPV4 0x40
 #define IOSM_IP_TYPE_IPV6 0x60
 
-#define IOSM_IF_ID_PAYLOAD 2
-
 /**
  * struct iosm_netdev_priv - netdev WWAN driver specific private data
  * @ipc_wwan:  Pointer to iosm_wwan struct
index c1501f4..3d79b35 100644 (file)
@@ -1128,9 +1128,7 @@ static int xenvif_handle_frag_list(struct xenvif_queue *queue, struct sk_buff *s
                        BUG();
 
                offset += len;
-               __skb_frag_set_page(&frags[i], page);
-               skb_frag_off_set(&frags[i], 0);
-               skb_frag_size_set(&frags[i], len);
+               skb_frag_fill_page_desc(&frags[i], page, 0, len);
        }
 
        /* Release all the original (foreign) frags. */
index 1e0f229..c1896a1 100644 (file)
@@ -359,7 +359,7 @@ static struct i2c_driver fdp_nci_i2c_driver = {
                   .name = FDP_I2C_DRIVER_NAME,
                   .acpi_match_table = fdp_nci_i2c_acpi_match,
                  },
-       .probe_new = fdp_nci_i2c_probe,
+       .probe = fdp_nci_i2c_probe,
        .remove = fdp_nci_i2c_remove,
 };
 module_i2c_driver(fdp_nci_i2c_driver);
index e72b358..642df4e 100644 (file)
@@ -286,7 +286,7 @@ static struct i2c_driver microread_i2c_driver = {
        .driver = {
                .name = MICROREAD_I2C_DRIVER_NAME,
        },
-       .probe_new      = microread_i2c_probe,
+       .probe          = microread_i2c_probe,
        .remove         = microread_i2c_remove,
        .id_table       = microread_i2c_id,
 };
index 164e2ab..7455313 100644 (file)
@@ -258,7 +258,7 @@ static const struct i2c_device_id nfcmrvl_i2c_id_table[] = {
 MODULE_DEVICE_TABLE(i2c, nfcmrvl_i2c_id_table);
 
 static struct i2c_driver nfcmrvl_i2c_driver = {
-       .probe_new = nfcmrvl_i2c_probe,
+       .probe = nfcmrvl_i2c_probe,
        .id_table = nfcmrvl_i2c_id_table,
        .remove = nfcmrvl_i2c_remove,
        .driver = {
index d4c299b..baddaf2 100644 (file)
@@ -348,7 +348,7 @@ static struct i2c_driver nxp_nci_i2c_driver = {
                   .acpi_match_table = ACPI_PTR(acpi_id),
                   .of_match_table = of_nxp_nci_i2c_match,
                  },
-       .probe_new = nxp_nci_i2c_probe,
+       .probe = nxp_nci_i2c_probe,
        .id_table = nxp_nci_i2c_id_table,
        .remove = nxp_nci_i2c_remove,
 };
index 1503a98..438ab95 100644 (file)
@@ -259,7 +259,7 @@ static struct i2c_driver pn533_i2c_driver = {
                   .name = PN533_I2C_DRIVER_NAME,
                   .of_match_table = of_match_ptr(of_pn533_i2c_match),
                  },
-       .probe_new = pn533_i2c_probe,
+       .probe = pn533_i2c_probe,
        .id_table = pn533_i2c_id_table,
        .remove = pn533_i2c_remove,
 };
index 8b0d910..3f6d748 100644 (file)
@@ -953,7 +953,7 @@ static struct i2c_driver pn544_hci_i2c_driver = {
                   .of_match_table = of_match_ptr(of_pn544_i2c_match),
                   .acpi_match_table = ACPI_PTR(pn544_hci_i2c_acpi_match),
                  },
-       .probe_new = pn544_hci_i2c_probe,
+       .probe = pn544_hci_i2c_probe,
        .id_table = pn544_hci_i2c_id_table,
        .remove = pn544_hci_i2c_remove,
 };
index 2517ae7..720d4a7 100644 (file)
@@ -261,7 +261,7 @@ static struct i2c_driver s3fwrn5_i2c_driver = {
                .name = S3FWRN5_I2C_DRIVER_NAME,
                .of_match_table = of_match_ptr(of_s3fwrn5_i2c_match),
        },
-       .probe_new = s3fwrn5_i2c_probe,
+       .probe = s3fwrn5_i2c_probe,
        .remove = s3fwrn5_i2c_remove,
        .id_table = s3fwrn5_i2c_id_table,
 };
index 6b5eed8..d20a337 100644 (file)
@@ -283,7 +283,7 @@ static struct i2c_driver st_nci_i2c_driver = {
                .of_match_table = of_match_ptr(of_st_nci_i2c_match),
                .acpi_match_table = ACPI_PTR(st_nci_i2c_acpi_match),
        },
-       .probe_new = st_nci_i2c_probe,
+       .probe = st_nci_i2c_probe,
        .id_table = st_nci_i2c_id_table,
        .remove = st_nci_i2c_remove,
 };
index 55f7a23..064a63d 100644 (file)
@@ -597,7 +597,7 @@ static struct i2c_driver st21nfca_hci_i2c_driver = {
                .of_match_table = of_match_ptr(of_st21nfca_i2c_match),
                .acpi_match_table = ACPI_PTR(st21nfca_hci_i2c_acpi_match),
        },
-       .probe_new = st21nfca_hci_i2c_probe,
+       .probe = st21nfca_hci_i2c_probe,
        .id_table = st21nfca_hci_i2c_id_table,
        .remove = st21nfca_hci_i2c_remove,
 };
index 8acb9eb..1399b5d 100644 (file)
@@ -660,7 +660,7 @@ static int ism_probe(struct pci_dev *pdev, const struct pci_device_id *id)
        if (ret)
                goto err_disable;
 
-       ret = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
+       ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
        if (ret)
                goto err_resource;
 
index 8a4c866..2ff07ba 100644 (file)
@@ -101,269 +101,3 @@ ssize_t netfs_extract_user_iter(struct iov_iter *orig, size_t orig_len,
        return npages;
 }
 EXPORT_SYMBOL_GPL(netfs_extract_user_iter);
-
-/*
- * Extract and pin a list of up to sg_max pages from UBUF- or IOVEC-class
- * iterators, and add them to the scatterlist.
- */
-static ssize_t netfs_extract_user_to_sg(struct iov_iter *iter,
-                                       ssize_t maxsize,
-                                       struct sg_table *sgtable,
-                                       unsigned int sg_max,
-                                       iov_iter_extraction_t extraction_flags)
-{
-       struct scatterlist *sg = sgtable->sgl + sgtable->nents;
-       struct page **pages;
-       unsigned int npages;
-       ssize_t ret = 0, res;
-       size_t len, off;
-
-       /* We decant the page list into the tail of the scatterlist */
-       pages = (void *)sgtable->sgl + array_size(sg_max, sizeof(struct scatterlist));
-       pages -= sg_max;
-
-       do {
-               res = iov_iter_extract_pages(iter, &pages, maxsize, sg_max,
-                                            extraction_flags, &off);
-               if (res < 0)
-                       goto failed;
-
-               len = res;
-               maxsize -= len;
-               ret += len;
-               npages = DIV_ROUND_UP(off + len, PAGE_SIZE);
-               sg_max -= npages;
-
-               for (; npages > 0; npages--) {
-                       struct page *page = *pages;
-                       size_t seg = min_t(size_t, PAGE_SIZE - off, len);
-
-                       *pages++ = NULL;
-                       sg_set_page(sg, page, seg, off);
-                       sgtable->nents++;
-                       sg++;
-                       len -= seg;
-                       off = 0;
-               }
-       } while (maxsize > 0 && sg_max > 0);
-
-       return ret;
-
-failed:
-       while (sgtable->nents > sgtable->orig_nents)
-               put_page(sg_page(&sgtable->sgl[--sgtable->nents]));
-       return res;
-}
-
-/*
- * Extract up to sg_max pages from a BVEC-type iterator and add them to the
- * scatterlist.  The pages are not pinned.
- */
-static ssize_t netfs_extract_bvec_to_sg(struct iov_iter *iter,
-                                       ssize_t maxsize,
-                                       struct sg_table *sgtable,
-                                       unsigned int sg_max,
-                                       iov_iter_extraction_t extraction_flags)
-{
-       const struct bio_vec *bv = iter->bvec;
-       struct scatterlist *sg = sgtable->sgl + sgtable->nents;
-       unsigned long start = iter->iov_offset;
-       unsigned int i;
-       ssize_t ret = 0;
-
-       for (i = 0; i < iter->nr_segs; i++) {
-               size_t off, len;
-
-               len = bv[i].bv_len;
-               if (start >= len) {
-                       start -= len;
-                       continue;
-               }
-
-               len = min_t(size_t, maxsize, len - start);
-               off = bv[i].bv_offset + start;
-
-               sg_set_page(sg, bv[i].bv_page, len, off);
-               sgtable->nents++;
-               sg++;
-               sg_max--;
-
-               ret += len;
-               maxsize -= len;
-               if (maxsize <= 0 || sg_max == 0)
-                       break;
-               start = 0;
-       }
-
-       if (ret > 0)
-               iov_iter_advance(iter, ret);
-       return ret;
-}
-
-/*
- * Extract up to sg_max pages from a KVEC-type iterator and add them to the
- * scatterlist.  This can deal with vmalloc'd buffers as well as kmalloc'd or
- * static buffers.  The pages are not pinned.
- */
-static ssize_t netfs_extract_kvec_to_sg(struct iov_iter *iter,
-                                       ssize_t maxsize,
-                                       struct sg_table *sgtable,
-                                       unsigned int sg_max,
-                                       iov_iter_extraction_t extraction_flags)
-{
-       const struct kvec *kv = iter->kvec;
-       struct scatterlist *sg = sgtable->sgl + sgtable->nents;
-       unsigned long start = iter->iov_offset;
-       unsigned int i;
-       ssize_t ret = 0;
-
-       for (i = 0; i < iter->nr_segs; i++) {
-               struct page *page;
-               unsigned long kaddr;
-               size_t off, len, seg;
-
-               len = kv[i].iov_len;
-               if (start >= len) {
-                       start -= len;
-                       continue;
-               }
-
-               kaddr = (unsigned long)kv[i].iov_base + start;
-               off = kaddr & ~PAGE_MASK;
-               len = min_t(size_t, maxsize, len - start);
-               kaddr &= PAGE_MASK;
-
-               maxsize -= len;
-               ret += len;
-               do {
-                       seg = min_t(size_t, len, PAGE_SIZE - off);
-                       if (is_vmalloc_or_module_addr((void *)kaddr))
-                               page = vmalloc_to_page((void *)kaddr);
-                       else
-                               page = virt_to_page(kaddr);
-
-                       sg_set_page(sg, page, len, off);
-                       sgtable->nents++;
-                       sg++;
-                       sg_max--;
-
-                       len -= seg;
-                       kaddr += PAGE_SIZE;
-                       off = 0;
-               } while (len > 0 && sg_max > 0);
-
-               if (maxsize <= 0 || sg_max == 0)
-                       break;
-               start = 0;
-       }
-
-       if (ret > 0)
-               iov_iter_advance(iter, ret);
-       return ret;
-}
-
-/*
- * Extract up to sg_max folios from an XARRAY-type iterator and add them to
- * the scatterlist.  The pages are not pinned.
- */
-static ssize_t netfs_extract_xarray_to_sg(struct iov_iter *iter,
-                                         ssize_t maxsize,
-                                         struct sg_table *sgtable,
-                                         unsigned int sg_max,
-                                         iov_iter_extraction_t extraction_flags)
-{
-       struct scatterlist *sg = sgtable->sgl + sgtable->nents;
-       struct xarray *xa = iter->xarray;
-       struct folio *folio;
-       loff_t start = iter->xarray_start + iter->iov_offset;
-       pgoff_t index = start / PAGE_SIZE;
-       ssize_t ret = 0;
-       size_t offset, len;
-       XA_STATE(xas, xa, index);
-
-       rcu_read_lock();
-
-       xas_for_each(&xas, folio, ULONG_MAX) {
-               if (xas_retry(&xas, folio))
-                       continue;
-               if (WARN_ON(xa_is_value(folio)))
-                       break;
-               if (WARN_ON(folio_test_hugetlb(folio)))
-                       break;
-
-               offset = offset_in_folio(folio, start);
-               len = min_t(size_t, maxsize, folio_size(folio) - offset);
-
-               sg_set_page(sg, folio_page(folio, 0), len, offset);
-               sgtable->nents++;
-               sg++;
-               sg_max--;
-
-               maxsize -= len;
-               ret += len;
-               if (maxsize <= 0 || sg_max == 0)
-                       break;
-       }
-
-       rcu_read_unlock();
-       if (ret > 0)
-               iov_iter_advance(iter, ret);
-       return ret;
-}
-
-/**
- * netfs_extract_iter_to_sg - Extract pages from an iterator and add ot an sglist
- * @iter: The iterator to extract from
- * @maxsize: The amount of iterator to copy
- * @sgtable: The scatterlist table to fill in
- * @sg_max: Maximum number of elements in @sgtable that may be filled
- * @extraction_flags: Flags to qualify the request
- *
- * Extract the page fragments from the given amount of the source iterator and
- * add them to a scatterlist that refers to all of those bits, to a maximum
- * addition of @sg_max elements.
- *
- * The pages referred to by UBUF- and IOVEC-type iterators are extracted and
- * pinned; BVEC-, KVEC- and XARRAY-type are extracted but aren't pinned; PIPE-
- * and DISCARD-type are not supported.
- *
- * No end mark is placed on the scatterlist; that's left to the caller.
- *
- * @extraction_flags can have ITER_ALLOW_P2PDMA set to request peer-to-peer DMA
- * be allowed on the pages extracted.
- *
- * If successul, @sgtable->nents is updated to include the number of elements
- * added and the number of bytes added is returned.  @sgtable->orig_nents is
- * left unaltered.
- *
- * The iov_iter_extract_mode() function should be used to query how cleanup
- * should be performed.
- */
-ssize_t netfs_extract_iter_to_sg(struct iov_iter *iter, size_t maxsize,
-                                struct sg_table *sgtable, unsigned int sg_max,
-                                iov_iter_extraction_t extraction_flags)
-{
-       if (maxsize == 0)
-               return 0;
-
-       switch (iov_iter_type(iter)) {
-       case ITER_UBUF:
-       case ITER_IOVEC:
-               return netfs_extract_user_to_sg(iter, maxsize, sgtable, sg_max,
-                                               extraction_flags);
-       case ITER_BVEC:
-               return netfs_extract_bvec_to_sg(iter, maxsize, sgtable, sg_max,
-                                               extraction_flags);
-       case ITER_KVEC:
-               return netfs_extract_kvec_to_sg(iter, maxsize, sgtable, sg_max,
-                                               extraction_flags);
-       case ITER_XARRAY:
-               return netfs_extract_xarray_to_sg(iter, maxsize, sgtable, sg_max,
-                                                 extraction_flags);
-       default:
-               pr_err("%s(%u) unsupported\n", __func__, iov_iter_type(iter));
-               WARN_ON_ONCE(1);
-               return -EIO;
-       }
-}
-EXPORT_SYMBOL_GPL(netfs_extract_iter_to_sg);
index 6e3be58..38d2265 100644 (file)
@@ -4333,8 +4333,8 @@ static void *smb2_get_aead_req(struct crypto_aead *tfm, struct smb_rqst *rqst,
                }
                sgtable.orig_nents = sgtable.nents;
 
-               rc = netfs_extract_iter_to_sg(iter, count, &sgtable,
-                                             num_sgs - sgtable.nents, 0);
+               rc = extract_iter_to_sg(iter, count, &sgtable,
+                                       num_sgs - sgtable.nents, 0);
                iov_iter_revert(iter, rc);
                sgtable.orig_nents = sgtable.nents;
        }
index 0362ebd..223e17c 100644 (file)
@@ -2227,7 +2227,7 @@ static int smbd_iter_to_mr(struct smbd_connection *info,
 
        memset(sgt->sgl, 0, max_sg * sizeof(struct scatterlist));
 
-       ret = netfs_extract_iter_to_sg(iter, iov_iter_count(iter), sgt, max_sg, 0);
+       ret = extract_iter_to_sg(iter, iov_iter_count(iter), sgt, max_sg, 0);
        WARN_ON(ret < 0);
        if (sgt->nents > 0)
                sg_mark_end(&sgt->sgl[sgt->nents - 1]);
index 7e76623..34224e7 100644 (file)
@@ -56,9 +56,9 @@ struct af_alg_type {
 };
 
 struct af_alg_sgl {
-       struct scatterlist sg[ALG_MAX_PAGES + 1];
-       struct page *pages[ALG_MAX_PAGES];
-       unsigned int npages;
+       struct sg_table sgt;
+       struct scatterlist sgl[ALG_MAX_PAGES + 1];
+       bool need_unpin;
 };
 
 /* TX SGL entry */
@@ -163,7 +163,6 @@ int af_alg_release(struct socket *sock);
 void af_alg_release_parent(struct sock *sk);
 int af_alg_accept(struct sock *sk, struct socket *newsock, bool kern);
 
-int af_alg_make_sg(struct af_alg_sgl *sgl, struct iov_iter *iter, int len);
 void af_alg_free_sg(struct af_alg_sgl *sgl);
 
 static inline struct alg_sock *alg_sk(struct sock *sk)
index e53ceee..f588958 100644 (file)
@@ -1125,7 +1125,6 @@ struct bpf_trampoline {
        int progs_cnt[BPF_TRAMP_MAX];
        /* Executable image of trampoline */
        struct bpf_tramp_image *cur_image;
-       u64 selector;
        struct module *mod;
 };
 
@@ -1197,7 +1196,7 @@ enum bpf_dynptr_type {
 };
 
 int bpf_dynptr_check_size(u32 size);
-u32 bpf_dynptr_get_size(const struct bpf_dynptr_kern *ptr);
+u32 __bpf_dynptr_size(const struct bpf_dynptr_kern *ptr);
 
 #ifdef CONFIG_BPF_JIT
 int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr);
@@ -2078,8 +2077,8 @@ struct file *bpf_link_new_file(struct bpf_link *link, int *reserved_fd);
 struct bpf_link *bpf_link_get_from_fd(u32 ufd);
 struct bpf_link *bpf_link_get_curr_or_next(u32 *id);
 
-int bpf_obj_pin_user(u32 ufd, const char __user *pathname);
-int bpf_obj_get_user(const char __user *pathname, int flags);
+int bpf_obj_pin_user(u32 ufd, int path_fd, const char __user *pathname);
+int bpf_obj_get_user(int path_fd, const char __user *pathname, int flags);
 
 #define BPF_ITER_FUNC_PREFIX "bpf_iter_"
 #define DEFINE_BPF_ITER_FUNC(target, args...)                  \
index 3dd29a5..5b11a3b 100644 (file)
  * that converting umax_value to int cannot overflow.
  */
 #define BPF_MAX_VAR_SIZ        (1 << 29)
-/* size of type_str_buf in bpf_verifier. */
-#define TYPE_STR_BUF_LEN 128
+/* size of tmp_str_buf in bpf_verifier.
+ * we need at least 306 bytes to fit full stack mask representation
+ * (in the "-8,-16,...,-512" form)
+ */
+#define TMP_STR_BUF_LEN 320
 
 /* Liveness marks, used for registers and spilled-regs (in stack slots).
  * Read marks propagate upwards until they find a write mark; they record that
@@ -238,6 +241,10 @@ enum bpf_stack_slot_type {
 
 #define BPF_REG_SIZE 8 /* size of eBPF register in bytes */
 
+#define BPF_REGMASK_ARGS ((1 << BPF_REG_1) | (1 << BPF_REG_2) | \
+                         (1 << BPF_REG_3) | (1 << BPF_REG_4) | \
+                         (1 << BPF_REG_5))
+
 #define BPF_DYNPTR_SIZE                sizeof(struct bpf_dynptr_kern)
 #define BPF_DYNPTR_NR_SLOTS            (BPF_DYNPTR_SIZE / BPF_REG_SIZE)
 
@@ -541,6 +548,15 @@ struct bpf_subprog_info {
        bool is_async_cb;
 };
 
+struct bpf_verifier_env;
+
+struct backtrack_state {
+       struct bpf_verifier_env *env;
+       u32 frame;
+       u32 reg_masks[MAX_CALL_FRAMES];
+       u64 stack_masks[MAX_CALL_FRAMES];
+};
+
 /* single container for all structs
  * one verifier_env per bpf_check() call
  */
@@ -578,6 +594,7 @@ struct bpf_verifier_env {
                int *insn_stack;
                int cur_stack;
        } cfg;
+       struct backtrack_state bt;
        u32 pass_cnt; /* number of times do_check() was called */
        u32 subprog_cnt;
        /* number of instructions analyzed by the verifier */
@@ -606,8 +623,10 @@ struct bpf_verifier_env {
        /* Same as scratched_regs but for stack slots */
        u64 scratched_stack_slots;
        u64 prev_log_pos, prev_insn_print_pos;
-       /* buffer used in reg_type_str() to generate reg_type string */
-       char type_str_buf[TYPE_STR_BUF_LEN];
+       /* buffer used to generate temporary string representations,
+        * e.g., in reg_type_str() to generate reg_type string
+        */
+       char tmp_str_buf[TMP_STR_BUF_LEN];
 };
 
 __printf(2, 0) void bpf_verifier_vlog(struct bpf_verifier_log *log,
index 9e77165..251833a 100644 (file)
@@ -89,6 +89,7 @@
 #define MII_BCM54XX_EXP_SEL    0x17    /* Expansion register select */
 #define MII_BCM54XX_EXP_SEL_TOP        0x0d00  /* TOP_MISC expansion register select */
 #define MII_BCM54XX_EXP_SEL_SSD        0x0e00  /* Secondary SerDes select */
+#define MII_BCM54XX_EXP_SEL_WOL        0x0e00  /* Wake-on-LAN expansion select register */
 #define MII_BCM54XX_EXP_SEL_ER 0x0f00  /* Expansion register select */
 #define MII_BCM54XX_EXP_SEL_ETC        0x0d00  /* Expansion register spare + 2k mem */
 
 #define BCM54XX_TOP_MISC_IDDQ_SD               (1 << 2)
 #define BCM54XX_TOP_MISC_IDDQ_SR               (1 << 3)
 
+#define BCM54XX_TOP_MISC_LED_CTL               (MII_BCM54XX_EXP_SEL_TOP + 0x0C)
+#define  BCM54XX_LED4_SEL_INTR                 BIT(1)
+
 /*
  * BCM5482: Secondary SerDes registers
  */
 #define BCM54612E_EXP_SPARE0           (MII_BCM54XX_EXP_SEL_ETC + 0x34)
 #define BCM54612E_LED4_CLK125OUT_EN    (1 << 1)
 
+
+/* Wake-on-LAN registers */
+#define BCM54XX_WOL_MAIN_CTL           (MII_BCM54XX_EXP_SEL_WOL + 0x80)
+#define  BCM54XX_WOL_EN                        BIT(0)
+#define  BCM54XX_WOL_MODE_SINGLE_MPD   0
+#define  BCM54XX_WOL_MODE_SINGLE_MPDSEC        1
+#define  BCM54XX_WOL_MODE_DUAL         2
+#define  BCM54XX_WOL_MODE_SHIFT                1
+#define  BCM54XX_WOL_MODE_MASK         0x3
+#define  BCM54XX_WOL_MP_MSB_FF_EN      BIT(3)
+#define  BCM54XX_WOL_SECKEY_OPT_4B     0
+#define  BCM54XX_WOL_SECKEY_OPT_6B     1
+#define  BCM54XX_WOL_SECKEY_OPT_8B     2
+#define  BCM54XX_WOL_SECKEY_OPT_SHIFT  4
+#define  BCM54XX_WOL_SECKEY_OPT_MASK   0x3
+#define  BCM54XX_WOL_L2_TYPE_CHK       BIT(6)
+#define  BCM54XX_WOL_L4IPV4UDP_CHK     BIT(7)
+#define  BCM54XX_WOL_L4IPV6UDP_CHK     BIT(8)
+#define  BCM54XX_WOL_UDPPORT_CHK       BIT(9)
+#define  BCM54XX_WOL_CRC_CHK           BIT(10)
+#define  BCM54XX_WOL_SECKEY_MODE       BIT(11)
+#define  BCM54XX_WOL_RST               BIT(12)
+#define  BCM54XX_WOL_DIR_PKT_EN                BIT(13)
+#define  BCM54XX_WOL_MASK_MODE_DA_FF   0
+#define  BCM54XX_WOL_MASK_MODE_DA_MPD  1
+#define  BCM54XX_WOL_MASK_MODE_DA_ONLY 2
+#define  BCM54XX_WOL_MASK_MODE_MPD     3
+#define  BCM54XX_WOL_MASK_MODE_SHIFT   14
+#define  BCM54XX_WOL_MASK_MODE_MASK    0x3
+
+#define BCM54XX_WOL_INNER_PROTO                (MII_BCM54XX_EXP_SEL_WOL + 0x81)
+#define BCM54XX_WOL_OUTER_PROTO                (MII_BCM54XX_EXP_SEL_WOL + 0x82)
+#define BCM54XX_WOL_OUTER_PROTO2       (MII_BCM54XX_EXP_SEL_WOL + 0x83)
+
+#define BCM54XX_WOL_MPD_DATA1(x)       (MII_BCM54XX_EXP_SEL_WOL + 0x84 + (x))
+#define BCM54XX_WOL_MPD_DATA2(x)       (MII_BCM54XX_EXP_SEL_WOL + 0x87 + (x))
+#define BCM54XX_WOL_SEC_KEY_8B         (MII_BCM54XX_EXP_SEL_WOL + 0x8A)
+#define BCM54XX_WOL_MASK(x)            (MII_BCM54XX_EXP_SEL_WOL + 0x8B + (x))
+#define BCM54XX_SEC_KEY_STORE(x)       (MII_BCM54XX_EXP_SEL_WOL + 0x8E)
+#define BCM54XX_WOL_SHARED_CNT         (MII_BCM54XX_EXP_SEL_WOL + 0x92)
+
+#define BCM54XX_WOL_INT_MASK           (MII_BCM54XX_EXP_SEL_WOL + 0x93)
+#define  BCM54XX_WOL_PKT1              BIT(0)
+#define  BCM54XX_WOL_PKT2              BIT(1)
+#define  BCM54XX_WOL_DIR               BIT(2)
+#define  BCM54XX_WOL_ALL_INTRS         (BCM54XX_WOL_PKT1 | \
+                                        BCM54XX_WOL_PKT2 | \
+                                        BCM54XX_WOL_DIR)
+
+#define BCM54XX_WOL_INT_STATUS         (MII_BCM54XX_EXP_SEL_WOL + 0x94)
+
 /*****************************************************************************/
 /* Fast Ethernet Transceiver definitions. */
 /*****************************************************************************/
 #define LPI_FEATURE_EN                 0x8000
 #define LPI_FEATURE_EN_DIG1000X                0x4000
 
+#define BRCM_CL45VEN_EEE_LPI_CNT       0x803f
+
 /* Core register definitions*/
 #define MII_BRCM_CORE_BASE12   0x12
 #define MII_BRCM_CORE_BASE13   0x13
index 508199e..cac9f30 100644 (file)
@@ -98,10 +98,14 @@ struct btf_type;
 union bpf_attr;
 struct btf_show;
 struct btf_id_set;
+struct bpf_prog;
+
+typedef int (*btf_kfunc_filter_t)(const struct bpf_prog *prog, u32 kfunc_id);
 
 struct btf_kfunc_id_set {
        struct module *owner;
        struct btf_id_set8 *set;
+       btf_kfunc_filter_t filter;
 };
 
 struct btf_id_dtor_kfunc {
@@ -479,7 +483,6 @@ static inline void *btf_id_set8_contains(const struct btf_id_set8 *set, u32 id)
        return bsearch(&id, set->pairs, set->cnt, sizeof(set->pairs[0]), btf_id_cmp_func);
 }
 
-struct bpf_prog;
 struct bpf_verifier_log;
 
 #ifdef CONFIG_BPF_SYSCALL
@@ -487,10 +490,10 @@ const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id);
 const char *btf_name_by_offset(const struct btf *btf, u32 offset);
 struct btf *btf_parse_vmlinux(void);
 struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog);
-u32 *btf_kfunc_id_set_contains(const struct btf *btf,
-                              enum bpf_prog_type prog_type,
-                              u32 kfunc_btf_id);
-u32 *btf_kfunc_is_modify_return(const struct btf *btf, u32 kfunc_btf_id);
+u32 *btf_kfunc_id_set_contains(const struct btf *btf, u32 kfunc_btf_id,
+                              const struct bpf_prog *prog);
+u32 *btf_kfunc_is_modify_return(const struct btf *btf, u32 kfunc_btf_id,
+                               const struct bpf_prog *prog);
 int register_btf_kfunc_id_set(enum bpf_prog_type prog_type,
                              const struct btf_kfunc_id_set *s);
 int register_btf_fmodret_id_set(const struct btf_kfunc_id_set *kset);
@@ -517,8 +520,9 @@ static inline const char *btf_name_by_offset(const struct btf *btf,
        return NULL;
 }
 static inline u32 *btf_kfunc_id_set_contains(const struct btf *btf,
-                                            enum bpf_prog_type prog_type,
-                                            u32 kfunc_btf_id)
+                                            u32 kfunc_btf_id,
+                                            struct bpf_prog *prog)
+
 {
        return NULL;
 }
index 6995092..6933654 100644 (file)
@@ -6,6 +6,9 @@
 #ifndef _CAN_LENGTH_H
 #define _CAN_LENGTH_H
 
+#include <linux/can.h>
+#include <linux/can/netlink.h>
+
 /*
  * Size of a Classical CAN Standard Frame
  *
index c39bbf1..8af62ff 100644 (file)
@@ -183,6 +183,49 @@ struct led_classdev {
 
        /* LEDs that have private triggers have this set */
        struct led_hw_trigger_type      *trigger_type;
+
+       /* Unique trigger name supported by LED set in hw control mode */
+       const char              *hw_control_trigger;
+       /*
+        * Check if the LED driver supports the requested mode provided by the
+        * defined supported trigger to setup the LED to hw control mode.
+        *
+        * Return 0 on success. Return -EOPNOTSUPP when the passed flags are not
+        * supported and software fallback needs to be used.
+        * Return a negative error number on any other case  for check fail due
+        * to various reason like device not ready or timeouts.
+        */
+       int                     (*hw_control_is_supported)(struct led_classdev *led_cdev,
+                                                          unsigned long flags);
+       /*
+        * Activate hardware control, LED driver will use the provided flags
+        * from the supported trigger and setup the LED to be driven by hardware
+        * following the requested mode from the trigger flags.
+        * Deactivate hardware blink control by setting brightness to LED_OFF via
+        * the brightness_set() callback.
+        *
+        * Return 0 on success, a negative error number on flags apply fail.
+        */
+       int                     (*hw_control_set)(struct led_classdev *led_cdev,
+                                                 unsigned long flags);
+       /*
+        * Get from the LED driver the current mode that the LED is set in hw
+        * control mode and put them in flags.
+        * Trigger can use this to get the initial state of a LED already set in
+        * hardware blink control.
+        *
+        * Return 0 on success, a negative error number on failing parsing the
+        * initial mode. Error from this function is NOT FATAL as the device
+        * may be in a not supported initial state by the attached LED trigger.
+        */
+       int                     (*hw_control_get)(struct led_classdev *led_cdev,
+                                                 unsigned long *flags);
+       /*
+        * Get the device this LED blinks in response to.
+        * e.g. for a PHY LED, it is the network device. If the LED is
+        * not yet associated to a device, return NULL.
+        */
+       struct device           *(*hw_control_get_device)(struct led_classdev *led_cdev);
 #endif
 
 #ifdef CONFIG_LEDS_BRIGHTNESS_HW_CHANGED
@@ -509,6 +552,16 @@ static inline void *led_get_trigger_data(struct led_classdev *led_cdev)
 
 #endif /* CONFIG_LEDS_TRIGGERS */
 
+/* Trigger specific enum */
+enum led_trigger_netdev_modes {
+       TRIGGER_NETDEV_LINK = 0,
+       TRIGGER_NETDEV_TX,
+       TRIGGER_NETDEV_RX,
+
+       /* Keep last */
+       __TRIGGER_NETDEV_MAX,
+};
+
 /* Trigger specific functions */
 #ifdef CONFIG_LEDS_TRIGGER_DISK
 void ledtrig_disk_activity(bool write);
index 27013d6..c1b7008 100644 (file)
@@ -106,6 +106,16 @@ int mdio_driver_register(struct mdio_driver *drv);
 void mdio_driver_unregister(struct mdio_driver *drv);
 int mdio_device_bus_match(struct device *dev, struct device_driver *drv);
 
+static inline void mdio_device_get(struct mdio_device *mdiodev)
+{
+       get_device(&mdiodev->dev);
+}
+
+static inline void mdio_device_put(struct mdio_device *mdiodev)
+{
+       mdio_device_free(mdiodev);
+}
+
 static inline bool mdio_phy_id_is_c45(int phy_id)
 {
        return (phy_id & MDIO_PHY_ID_C45) && !(phy_id & ~MDIO_PHY_ID_C45_MASK);
@@ -486,6 +496,45 @@ static inline u32 linkmode_adv_to_mii_10base_t1_t(unsigned long *adv)
        return result;
 }
 
+/**
+ * mii_c73_mod_linkmode - convert a Clause 73 advertisement to linkmodes
+ * @adv: linkmode advertisement setting
+ * @lpa: array of three u16s containing the advertisement
+ *
+ * Convert an IEEE 802.3 Clause 73 advertisement to ethtool link modes.
+ */
+static inline void mii_c73_mod_linkmode(unsigned long *adv, u16 *lpa)
+{
+       linkmode_mod_bit(ETHTOOL_LINK_MODE_Pause_BIT,
+                        adv, lpa[0] & MDIO_AN_C73_0_PAUSE);
+       linkmode_mod_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT,
+                        adv, lpa[0] & MDIO_AN_C73_0_ASM_DIR);
+       linkmode_mod_bit(ETHTOOL_LINK_MODE_1000baseKX_Full_BIT,
+                        adv, lpa[1] & MDIO_AN_C73_1_1000BASE_KX);
+       linkmode_mod_bit(ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT,
+                        adv, lpa[1] & MDIO_AN_C73_1_10GBASE_KX4);
+       linkmode_mod_bit(ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT,
+                        adv, lpa[1] & MDIO_AN_C73_1_40GBASE_KR4);
+       linkmode_mod_bit(ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT,
+                        adv, lpa[1] & MDIO_AN_C73_1_40GBASE_CR4);
+       /* 100GBASE_CR10 and 100GBASE_KP4 not implemented */
+       linkmode_mod_bit(ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT,
+                        adv, lpa[1] & MDIO_AN_C73_1_100GBASE_KR4);
+       linkmode_mod_bit(ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT,
+                        adv, lpa[1] & MDIO_AN_C73_1_100GBASE_CR4);
+       /* 25GBASE_R_S not implemented */
+       /* The 25GBASE_R bit can be used for 25Gbase KR or CR modes */
+       linkmode_mod_bit(ETHTOOL_LINK_MODE_25000baseKR_Full_BIT,
+                        adv, lpa[1] & MDIO_AN_C73_1_25GBASE_R);
+       linkmode_mod_bit(ETHTOOL_LINK_MODE_25000baseCR_Full_BIT,
+                        adv, lpa[1] & MDIO_AN_C73_1_25GBASE_R);
+       linkmode_mod_bit(ETHTOOL_LINK_MODE_10000baseKR_Full_BIT,
+                        adv, lpa[1] & MDIO_AN_C73_1_10GBASE_KR);
+       linkmode_mod_bit(ETHTOOL_LINK_MODE_2500baseX_Full_BIT,
+                        adv, lpa[2] & MDIO_AN_C73_2_2500BASE_KX);
+       /* 5GBASE_KR not implemented */
+}
+
 int __mdiobus_read(struct mii_bus *bus, int addr, u32 regnum);
 int __mdiobus_write(struct mii_bus *bus, int addr, u32 regnum, u16 val);
 int __mdiobus_modify_changed(struct mii_bus *bus, int addr, u32 regnum,
diff --git a/include/linux/mdio/mdio-regmap.h b/include/linux/mdio/mdio-regmap.h
new file mode 100644 (file)
index 0000000..679d906
--- /dev/null
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Driver for MMIO-Mapped MDIO devices. Some IPs expose internal PHYs or PCS
+ * within the MMIO-mapped area
+ *
+ * Copyright (C) 2023 Maxime Chevallier <maxime.chevallier@bootlin.com>
+ */
+#ifndef MDIO_REGMAP_H
+#define MDIO_REGMAP_H
+
+#include <linux/phy.h>
+
+struct device;
+struct regmap;
+
+struct mdio_regmap_config {
+       struct device *parent;
+       struct regmap *regmap;
+       char name[MII_BUS_ID_SIZE];
+       u8 valid_addr;
+       bool autoscan;
+};
+
+struct mii_bus *devm_mdio_regmap_register(struct device *dev,
+                                         const struct mdio_regmap_config *config);
+
+#endif
index a1f3522..b11a84f 100644 (file)
@@ -300,10 +300,6 @@ void netfs_stats_show(struct seq_file *);
 ssize_t netfs_extract_user_iter(struct iov_iter *orig, size_t orig_len,
                                struct iov_iter *new,
                                iov_iter_extraction_t extraction_flags);
-struct sg_table;
-ssize_t netfs_extract_iter_to_sg(struct iov_iter *iter, size_t len,
-                                struct sg_table *sgtable, unsigned int sg_max,
-                                iov_iter_extraction_t extraction_flags);
 
 /**
  * netfs_inode - Get the netfs inode context from the inode
diff --git a/include/linux/pcs-altera-tse.h b/include/linux/pcs-altera-tse.h
deleted file mode 100644 (file)
index 92ab9f0..0000000
+++ /dev/null
@@ -1,17 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 2022 Bootlin
- *
- * Maxime Chevallier <maxime.chevallier@bootlin.com>
- */
-
-#ifndef __LINUX_PCS_ALTERA_TSE_H
-#define __LINUX_PCS_ALTERA_TSE_H
-
-struct phylink_pcs;
-struct net_device;
-
-struct phylink_pcs *alt_tse_pcs_create(struct net_device *ndev,
-                                      void __iomem *pcs_base, int reg_width);
-
-#endif /* __LINUX_PCS_ALTERA_TSE_H */
index 5712cc2..885b59d 100644 (file)
@@ -12,6 +12,7 @@
 struct mdio_device *lynx_get_mdio_device(struct phylink_pcs *pcs);
 
 struct phylink_pcs *lynx_pcs_create(struct mdio_device *mdio);
+struct phylink_pcs *lynx_pcs_create_mdiodev(struct mii_bus *bus, int addr);
 
 void lynx_pcs_destroy(struct phylink_pcs *pcs);
 
index d2da1e0..ec8175b 100644 (file)
@@ -18,6 +18,7 @@
 #define DW_AN_C37_SGMII                        2
 #define DW_2500BASEX                   3
 #define DW_AN_C37_1000BASEX            4
+#define DW_10GBASER                    5
 
 struct xpcs_id;
 
@@ -35,8 +36,8 @@ int xpcs_do_config(struct dw_xpcs *xpcs, phy_interface_t interface,
 void xpcs_get_interfaces(struct dw_xpcs *xpcs, unsigned long *interfaces);
 int xpcs_config_eee(struct dw_xpcs *xpcs, int mult_fact_100ns,
                    int enable);
-struct dw_xpcs *xpcs_create(struct mdio_device *mdiodev,
-                           phy_interface_t interface);
+struct dw_xpcs *xpcs_create_mdiodev(struct mii_bus *bus, int addr,
+                                   phy_interface_t interface);
 void xpcs_destroy(struct dw_xpcs *xpcs);
 
 #endif /* __LINUX_PCS_XPCS_H */
index 6478838..11c1e91 100644 (file)
@@ -86,6 +86,7 @@ extern const int phy_10gbit_features_array[1];
 #define PHY_IS_INTERNAL                0x00000001
 #define PHY_RST_AFTER_CLK_EN   0x00000002
 #define PHY_POLL_CABLE_TEST    0x00000004
+#define PHY_ALWAYS_CALL_SUSPEND        0x00000008
 #define MDIO_DEVICE_IS_PHY     0x80000000
 
 /**
@@ -496,14 +497,17 @@ struct phy_device *mdiobus_scan_c22(struct mii_bus *bus, int addr);
  * Once complete, move to UP to restart the PHY.
  * - phy_stop aborts the running test and moves to @PHY_HALTED
  *
- * @PHY_HALTED: PHY is up, but no polling or interrupts are done. Or
- * PHY is in an error state.
+ * @PHY_HALTED: PHY is up, but no polling or interrupts are done.
  * - phy_start moves to @PHY_UP
+ *
+ * @PHY_ERROR: PHY is up, but is in an error state.
+ * - phy_stop moves to @PHY_HALTED
  */
 enum phy_state {
        PHY_DOWN = 0,
        PHY_READY,
        PHY_HALTED,
+       PHY_ERROR,
        PHY_UP,
        PHY_RUNNING,
        PHY_NOLINK,
@@ -548,6 +552,8 @@ struct macsec_ops;
  * @downshifted_rate: Set true if link speed has been downshifted.
  * @is_on_sfp_module: Set true if PHY is located on an SFP module.
  * @mac_managed_pm: Set true if MAC driver takes of suspending/resuming PHY
+ * @wol_enabled: Set to true if the PHY or the attached MAC have Wake-on-LAN
+ *              enabled.
  * @state: State of the PHY for management purposes
  * @dev_flags: Device-specific flags used by the PHY driver.
  *
@@ -644,6 +650,7 @@ struct phy_device {
        unsigned downshifted_rate:1;
        unsigned is_on_sfp_module:1;
        unsigned mac_managed_pm:1;
+       unsigned wol_enabled:1;
 
        unsigned autoneg:1;
        /* The most recently read link state */
@@ -1108,6 +1115,34 @@ struct phy_driver {
 #define PHY_ID_MATCH_MODEL(id) .phy_id = (id), .phy_id_mask = GENMASK(31, 4)
 #define PHY_ID_MATCH_VENDOR(id) .phy_id = (id), .phy_id_mask = GENMASK(31, 10)
 
+/**
+ * phy_id_compare - compare @id1 with @id2 taking account of @mask
+ * @id1: first PHY ID
+ * @id2: second PHY ID
+ * @mask: the PHY ID mask, set bits are significant in matching
+ *
+ * Return true if the bits from @id1 and @id2 specified by @mask match.
+ * This uses an equivalent test to (@id & @mask) == (@phy_id & @mask).
+ */
+static inline bool phy_id_compare(u32 id1, u32 id2, u32 mask)
+{
+       return !((id1 ^ id2) & mask);
+}
+
+/**
+ * phydev_id_compare - compare @id with the PHY's Clause 22 ID
+ * @phydev: the PHY device
+ * @id: the PHY ID to be matched
+ *
+ * Compare the @phydev clause 22 ID with the provided @id and return true or
+ * false depending whether it matches, using the bound driver mask. The
+ * @phydev must be bound to a driver.
+ */
+static inline bool phydev_id_compare(struct phy_device *phydev, u32 id)
+{
+       return phy_id_compare(id, phydev->phy_id, phydev->drv->phy_id_mask);
+}
+
 /* A Structure for boards to register fixups with the PHY Lib */
 struct phy_fixup {
        struct list_head list;
@@ -1171,10 +1206,12 @@ static inline int phy_read(struct phy_device *phydev, u32 regnum)
 #define phy_read_poll_timeout(phydev, regnum, val, cond, sleep_us, \
                                timeout_us, sleep_before_read) \
 ({ \
-       int __ret = read_poll_timeout(phy_read, val, val < 0 || (cond), \
+       int __ret, __val; \
+       __ret = read_poll_timeout(__val = phy_read, val, \
+                                 __val < 0 || (cond), \
                sleep_us, timeout_us, sleep_before_read, phydev, regnum); \
-       if (val < 0) \
-               __ret = val; \
+       if (__val < 0) \
+               __ret = __val; \
        if (__ret) \
                phydev_err(phydev, "%s failed: %d\n", __func__, __ret); \
        __ret; \
@@ -1267,11 +1304,13 @@ int phy_read_mmd(struct phy_device *phydev, int devad, u32 regnum);
 #define phy_read_mmd_poll_timeout(phydev, devaddr, regnum, val, cond, \
                                  sleep_us, timeout_us, sleep_before_read) \
 ({ \
-       int __ret = read_poll_timeout(phy_read_mmd, val, (cond) || val < 0, \
+       int __ret, __val; \
+       __ret = read_poll_timeout(__val = phy_read_mmd, val, \
+                                 __val < 0 || (cond), \
                                  sleep_us, timeout_us, sleep_before_read, \
                                  phydev, devaddr, regnum); \
-       if (val <  0) \
-               __ret = val; \
+       if (__val < 0) \
+               __ret = __val; \
        if (__ret) \
                phydev_err(phydev, "%s failed: %d\n", __func__, __ret); \
        __ret; \
index 71755c6..0cf07d7 100644 (file)
@@ -568,16 +568,17 @@ void phylink_generic_validate(struct phylink_config *config,
                              unsigned long *supported,
                              struct phylink_link_state *state);
 
-struct phylink *phylink_create(struct phylink_config *, struct fwnode_handle *,
-                              phy_interface_t iface,
-                              const struct phylink_mac_ops *mac_ops);
+struct phylink *phylink_create(struct phylink_config *,
+                              const struct fwnode_handle *,
+                              phy_interface_t,
+                              const struct phylink_mac_ops *);
 void phylink_destroy(struct phylink *);
 bool phylink_expects_phy(struct phylink *pl);
 
 int phylink_connect_phy(struct phylink *, struct phy_device *);
 int phylink_of_phy_connect(struct phylink *, struct device_node *, u32 flags);
 int phylink_fwnode_phy_connect(struct phylink *pl,
-                              struct fwnode_handle *fwnode,
+                              const struct fwnode_handle *fwnode,
                               u32 flags);
 void phylink_disconnect_phy(struct phylink *);
 
@@ -655,6 +656,8 @@ int phylink_mii_c22_pcs_config(struct mdio_device *pcs, unsigned int mode,
                               const unsigned long *advertising);
 void phylink_mii_c22_pcs_an_restart(struct mdio_device *pcs);
 
+void phylink_resolve_c73(struct phylink_link_state *state);
+
 void phylink_mii_c45_pcs_get_state(struct mdio_device *pcs,
                                   struct phylink_link_state *state);
 
index 9ca353a..8eac4f3 100644 (file)
@@ -17,12 +17,15 @@ struct ref_tracker_dir {
        bool                    dead;
        struct list_head        list; /* List of active trackers */
        struct list_head        quarantine; /* List of dead trackers */
+       char                    name[32];
 #endif
 };
 
 #ifdef CONFIG_REF_TRACKER
+
 static inline void ref_tracker_dir_init(struct ref_tracker_dir *dir,
-                                       unsigned int quarantine_count)
+                                       unsigned int quarantine_count,
+                                       const char *name)
 {
        INIT_LIST_HEAD(&dir->list);
        INIT_LIST_HEAD(&dir->quarantine);
@@ -31,14 +34,20 @@ static inline void ref_tracker_dir_init(struct ref_tracker_dir *dir,
        dir->dead = false;
        refcount_set(&dir->untracked, 1);
        refcount_set(&dir->no_tracker, 1);
+       strscpy(dir->name, name, sizeof(dir->name));
        stack_depot_init();
 }
 
 void ref_tracker_dir_exit(struct ref_tracker_dir *dir);
 
+void ref_tracker_dir_print_locked(struct ref_tracker_dir *dir,
+                                 unsigned int display_limit);
+
 void ref_tracker_dir_print(struct ref_tracker_dir *dir,
                           unsigned int display_limit);
 
+int ref_tracker_dir_snprint(struct ref_tracker_dir *dir, char *buf, size_t size);
+
 int ref_tracker_alloc(struct ref_tracker_dir *dir,
                      struct ref_tracker **trackerp, gfp_t gfp);
 
@@ -48,7 +57,8 @@ int ref_tracker_free(struct ref_tracker_dir *dir,
 #else /* CONFIG_REF_TRACKER */
 
 static inline void ref_tracker_dir_init(struct ref_tracker_dir *dir,
-                                       unsigned int quarantine_count)
+                                       unsigned int quarantine_count,
+                                       const char *name)
 {
 }
 
@@ -56,11 +66,22 @@ static inline void ref_tracker_dir_exit(struct ref_tracker_dir *dir)
 {
 }
 
+static inline void ref_tracker_dir_print_locked(struct ref_tracker_dir *dir,
+                                               unsigned int display_limit)
+{
+}
+
 static inline void ref_tracker_dir_print(struct ref_tracker_dir *dir,
                                         unsigned int display_limit)
 {
 }
 
+static inline int ref_tracker_dir_snprint(struct ref_tracker_dir *dir,
+                                         char *buf, size_t size)
+{
+       return 0;
+}
+
 static inline int ref_tracker_alloc(struct ref_tracker_dir *dir,
                                    struct ref_tracker **trackerp,
                                    gfp_t gfp)
index ef06a19..9346cd4 100644 (file)
@@ -342,6 +342,12 @@ enum {
        SFP_ENCODING                    = 11,
        SFP_BR_NOMINAL                  = 12,
        SFP_RATE_ID                     = 13,
+       SFF_RID_8079                    = 0x01,
+       SFF_RID_8431_RX_ONLY            = 0x02,
+       SFF_RID_8431_TX_ONLY            = 0x04,
+       SFF_RID_8431                    = 0x06,
+       SFF_RID_10G8G                   = 0x0e,
+
        SFP_LINK_LEN_SM_KM              = 14,
        SFP_LINK_LEN_SM_100M            = 15,
        SFP_LINK_LEN_50UM_OM2_10M       = 16,
@@ -465,6 +471,7 @@ enum {
        SFP_STATUS                      = 110,
        SFP_STATUS_TX_DISABLE           = BIT(7),
        SFP_STATUS_TX_DISABLE_FORCE     = BIT(6),
+       SFP_STATUS_RS0_SELECT           = BIT(3),
        SFP_STATUS_TX_FAULT             = BIT(2),
        SFP_STATUS_RX_LOS               = BIT(1),
        SFP_ALARM0                      = 112,
@@ -496,6 +503,7 @@ enum {
        SFP_WARN1_RXPWR_LOW             = BIT(6),
 
        SFP_EXT_STATUS                  = 118,
+       SFP_EXT_STATUS_RS1_SELECT       = BIT(3),
        SFP_EXT_STATUS_PWRLVL_SELECT    = BIT(0),
 
        SFP_VSL                         = 120,
@@ -556,6 +564,7 @@ int sfp_get_module_eeprom_by_page(struct sfp_bus *bus,
                                  struct netlink_ext_ack *extack);
 void sfp_upstream_start(struct sfp_bus *bus);
 void sfp_upstream_stop(struct sfp_bus *bus);
+void sfp_upstream_set_signal_rate(struct sfp_bus *bus, unsigned int rate_kbd);
 void sfp_bus_put(struct sfp_bus *bus);
 struct sfp_bus *sfp_bus_find_fwnode(const struct fwnode_handle *fwnode);
 int sfp_bus_add_upstream(struct sfp_bus *bus, void *upstream,
@@ -615,6 +624,11 @@ static inline void sfp_upstream_stop(struct sfp_bus *bus)
 {
 }
 
+static inline void sfp_upstream_set_signal_rate(struct sfp_bus *bus,
+                                               unsigned int rate_kbd)
+{
+}
+
 static inline void sfp_bus_put(struct sfp_bus *bus)
 {
 }
index 0b40417..e2f48dd 100644 (file)
@@ -330,6 +330,7 @@ struct tc_skb_ext {
        u8 post_ct_snat:1;
        u8 post_ct_dnat:1;
        u8 act_miss:1; /* Set if act_miss_cookie is used */
+       u8 l2_miss:1; /* Set by bridge upon FDB or MDB miss */
 };
 #endif
 
@@ -1383,7 +1384,7 @@ static inline int skb_pad(struct sk_buff *skb, int pad)
 #define dev_kfree_skb(a)       consume_skb(a)
 
 int skb_append_pagefrags(struct sk_buff *skb, struct page *page,
-                        int offset, size_t size);
+                        int offset, size_t size, size_t max_frags);
 
 struct skb_seq_state {
        __u32           lower_offset;
@@ -2421,20 +2422,22 @@ static inline unsigned int skb_pagelen(const struct sk_buff *skb)
        return skb_headlen(skb) + __skb_pagelen(skb);
 }
 
+static inline void skb_frag_fill_page_desc(skb_frag_t *frag,
+                                          struct page *page,
+                                          int off, int size)
+{
+       frag->bv_page = page;
+       frag->bv_offset = off;
+       skb_frag_size_set(frag, size);
+}
+
 static inline void __skb_fill_page_desc_noacc(struct skb_shared_info *shinfo,
                                              int i, struct page *page,
                                              int off, int size)
 {
        skb_frag_t *frag = &shinfo->frags[i];
 
-       /*
-        * Propagate page pfmemalloc to the skb if we can. The problem is
-        * that not all callers have unique ownership of the page but rely
-        * on page_is_pfmemalloc doing the right thing(tm).
-        */
-       frag->bv_page             = page;
-       frag->bv_offset           = off;
-       skb_frag_size_set(frag, size);
+       skb_frag_fill_page_desc(frag, page, off, size);
 }
 
 /**
@@ -2466,6 +2469,11 @@ static inline void __skb_fill_page_desc(struct sk_buff *skb, int i,
                                        struct page *page, int off, int size)
 {
        __skb_fill_page_desc_noacc(skb_shinfo(skb), i, page, off, size);
+
+       /* Propagate page pfmemalloc to the skb if we can. The problem is
+        * that not all callers have unique ownership of the page but rely
+        * on page_is_pfmemalloc doing the right thing(tm).
+        */
        page = compound_head(page);
        if (page_is_pfmemalloc(page))
                skb->pfmemalloc = true;
@@ -3494,32 +3502,6 @@ static inline void skb_frag_page_copy(skb_frag_t *fragto,
        fragto->bv_page = fragfrom->bv_page;
 }
 
-/**
- * __skb_frag_set_page - sets the page contained in a paged fragment
- * @frag: the paged fragment
- * @page: the page to set
- *
- * Sets the fragment @frag to contain @page.
- */
-static inline void __skb_frag_set_page(skb_frag_t *frag, struct page *page)
-{
-       frag->bv_page = page;
-}
-
-/**
- * skb_frag_set_page - sets the page contained in a paged fragment of an skb
- * @skb: the buffer
- * @f: the fragment offset
- * @page: the page to set
- *
- * Sets the @f'th fragment of @skb to contain @page.
- */
-static inline void skb_frag_set_page(struct sk_buff *skb, int f,
-                                    struct page *page)
-{
-       __skb_frag_set_page(&skb_shinfo(skb)->frags[f], page);
-}
-
 bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t prio);
 
 /**
@@ -4043,7 +4025,7 @@ __skb_header_pointer(const struct sk_buff *skb, int offset, int len,
        if (likely(hlen - offset >= len))
                return (void *)data + offset;
 
-       if (!skb || unlikely(skb_copy_bits(skb, offset, buffer, len) < 0))
+       if (!skb || !buffer || unlikely(skb_copy_bits(skb, offset, buffer, len) < 0))
                return NULL;
 
        return buffer;
@@ -5126,5 +5108,8 @@ static inline void skb_mark_for_recycle(struct sk_buff *skb)
 #endif
 }
 
+ssize_t skb_splice_from_iter(struct sk_buff *skb, struct iov_iter *iter,
+                            ssize_t maxsize, gfp_t gfp);
+
 #endif /* __KERNEL__ */
 #endif /* _LINUX_SKBUFF_H */
index 13c3a23..bd1cc32 100644 (file)
@@ -327,6 +327,7 @@ struct ucred {
                                          */
 
 #define MSG_ZEROCOPY   0x4000000       /* Use user data in kernel path */
+#define MSG_SPLICE_PAGES 0x8000000     /* Splice the pages from the iterator in sendmsg() */
 #define MSG_FASTOPEN   0x20000000      /* Send data in TCP SYN */
 #define MSG_CMSG_CLOEXEC 0x40000000    /* Set close_on_exec for file
                                           descriptor received through
@@ -337,6 +338,8 @@ struct ucred {
 #define MSG_CMSG_COMPAT        0               /* We never have 32 bit fixups */
 #endif
 
+/* Flags to be cleared on entry by sendmsg and sendmmsg syscalls */
+#define MSG_INTERNAL_SENDMSG_FLAGS (MSG_SPLICE_PAGES)
 
 /* Setsockoptions(2) level. Thanks to BSD these must match IPPROTO_xxx */
 #define SOL_IP         0
index 044c1d8..0ccb983 100644 (file)
@@ -433,4 +433,9 @@ static inline bool iov_iter_extract_will_pin(const struct iov_iter *iter)
        return user_backed_iter(iter);
 }
 
+struct sg_table;
+ssize_t extract_iter_to_sg(struct iov_iter *iter, size_t len,
+                          struct sg_table *sgtable, unsigned int sg_max,
+                          iov_iter_extraction_t extraction_flags);
+
 #endif
index 59955ac..b57bec6 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-1.0+ */
 /*
  * Bond several ethernet interfaces into a Cisco, running 'Etherchannel'.
  *
@@ -7,9 +8,6 @@
  * BUT, I'm the one who modified it for ethernet, so:
  * (c) Copyright 1999, Thomas Davis, tadavis@lbl.gov
  *
- *     This software may be used and distributed according to the terms
- *     of the GNU Public License, incorporated herein by reference.
- *
  */
 
 #ifndef _NET_BONDING_H
index 6a942e7..9a3c51a 100644 (file)
@@ -123,6 +123,7 @@ struct devlink_port {
        struct list_head list;
        struct list_head region_list;
        struct devlink *devlink;
+       const struct devlink_port_ops *ops;
        unsigned int index;
        spinlock_t type_lock; /* Protects type and type_eth/ib
                               * structures consistency.
@@ -1261,7 +1262,7 @@ struct devlink_ops {
        /**
         * @supported_flash_update_params:
         * mask of parameters supported by the driver's .flash_update
-        * implemementation.
+        * implementation.
         */
        u32 supported_flash_update_params;
        unsigned long reload_actions;
@@ -1273,12 +1274,6 @@ struct devlink_ops {
        int (*reload_up)(struct devlink *devlink, enum devlink_reload_action action,
                         enum devlink_reload_limit limit, u32 *actions_performed,
                         struct netlink_ext_ack *extack);
-       int (*port_type_set)(struct devlink_port *devlink_port,
-                            enum devlink_port_type port_type);
-       int (*port_split)(struct devlink *devlink, struct devlink_port *port,
-                         unsigned int count, struct netlink_ext_ack *extack);
-       int (*port_unsplit)(struct devlink *devlink, struct devlink_port *port,
-                           struct netlink_ext_ack *extack);
        int (*sb_pool_get)(struct devlink *devlink, unsigned int sb_index,
                           u16 pool_index,
                           struct devlink_sb_pool_info *pool_info);
@@ -1435,80 +1430,17 @@ struct devlink_ops {
                                        const struct devlink_trap_policer *policer,
                                        u64 *p_drops);
        /**
-        * @port_function_hw_addr_get: Port function's hardware address get function.
-        *
-        * Should be used by device drivers to report the hardware address of a function managed
-        * by the devlink port. Driver should return -EOPNOTSUPP if it doesn't support port
-        * function handling for a particular port.
-        *
-        * Note: @extack can be NULL when port notifier queries the port function.
-        */
-       int (*port_function_hw_addr_get)(struct devlink_port *port, u8 *hw_addr,
-                                        int *hw_addr_len,
-                                        struct netlink_ext_ack *extack);
-       /**
-        * @port_function_hw_addr_set: Port function's hardware address set function.
-        *
-        * Should be used by device drivers to set the hardware address of a function managed
-        * by the devlink port. Driver should return -EOPNOTSUPP if it doesn't support port
-        * function handling for a particular port.
-        */
-       int (*port_function_hw_addr_set)(struct devlink_port *port,
-                                        const u8 *hw_addr, int hw_addr_len,
-                                        struct netlink_ext_ack *extack);
-       /**
-        * @port_fn_roce_get: Port function's roce get function.
-        *
-        * Query RoCE state of a function managed by the devlink port.
-        * Return -EOPNOTSUPP if port function RoCE handling is not supported.
-        */
-       int (*port_fn_roce_get)(struct devlink_port *devlink_port,
-                               bool *is_enable,
-                               struct netlink_ext_ack *extack);
-       /**
-        * @port_fn_roce_set: Port function's roce set function.
-        *
-        * Enable/Disable the RoCE state of a function managed by the devlink
-        * port.
-        * Return -EOPNOTSUPP if port function RoCE handling is not supported.
-        */
-       int (*port_fn_roce_set)(struct devlink_port *devlink_port,
-                               bool enable, struct netlink_ext_ack *extack);
-       /**
-        * @port_fn_migratable_get: Port function's migratable get function.
-        *
-        * Query migratable state of a function managed by the devlink port.
-        * Return -EOPNOTSUPP if port function migratable handling is not
-        * supported.
-        */
-       int (*port_fn_migratable_get)(struct devlink_port *devlink_port,
-                                     bool *is_enable,
-                                     struct netlink_ext_ack *extack);
-       /**
-        * @port_fn_migratable_set: Port function's migratable set function.
-        *
-        * Enable/Disable migratable state of a function managed by the devlink
-        * port.
-        * Return -EOPNOTSUPP if port function migratable handling is not
-        * supported.
-        */
-       int (*port_fn_migratable_set)(struct devlink_port *devlink_port,
-                                     bool enable,
-                                     struct netlink_ext_ack *extack);
-       /**
         * port_new() - Add a new port function of a specified flavor
         * @devlink: Devlink instance
         * @attrs: attributes of the new port
         * @extack: extack for reporting error messages
-        * @new_port_index: index of the new port
+        * @devlink_port: pointer to store new devlink port pointer
         *
         * Devlink core will call this device driver function upon user request
         * to create a new port function of a specified flavor and optional
         * attributes
         *
         * Notes:
-        *      - Called without devlink instance lock being held. Drivers must
-        *        implement own means of synchronization
         *      - On success, drivers must register a port with devlink core
         *
         * Return: 0 on success, negative value otherwise.
@@ -1516,56 +1448,7 @@ struct devlink_ops {
        int (*port_new)(struct devlink *devlink,
                        const struct devlink_port_new_attrs *attrs,
                        struct netlink_ext_ack *extack,
-                       unsigned int *new_port_index);
-       /**
-        * port_del() - Delete a port function
-        * @devlink: Devlink instance
-        * @port_index: port function index to delete
-        * @extack: extack for reporting error messages
-        *
-        * Devlink core will call this device driver function upon user request
-        * to delete a previously created port function
-        *
-        * Notes:
-        *      - Called without devlink instance lock being held. Drivers must
-        *        implement own means of synchronization
-        *      - On success, drivers must unregister the corresponding devlink
-        *        port
-        *
-        * Return: 0 on success, negative value otherwise.
-        */
-       int (*port_del)(struct devlink *devlink, unsigned int port_index,
-                       struct netlink_ext_ack *extack);
-       /**
-        * port_fn_state_get() - Get the state of a port function
-        * @devlink: Devlink instance
-        * @port: The devlink port
-        * @state: Admin configured state
-        * @opstate: Current operational state
-        * @extack: extack for reporting error messages
-        *
-        * Reports the admin and operational state of a devlink port function
-        *
-        * Return: 0 on success, negative value otherwise.
-        */
-       int (*port_fn_state_get)(struct devlink_port *port,
-                                enum devlink_port_fn_state *state,
-                                enum devlink_port_fn_opstate *opstate,
-                                struct netlink_ext_ack *extack);
-       /**
-        * port_fn_state_set() - Set the admin state of a port function
-        * @devlink: Devlink instance
-        * @port: The devlink port
-        * @state: Admin state
-        * @extack: extack for reporting error messages
-        *
-        * Set the admin state of a devlink port function
-        *
-        * Return: 0 on success, negative value otherwise.
-        */
-       int (*port_fn_state_set)(struct devlink_port *port,
-                                enum devlink_port_fn_state state,
-                                struct netlink_ext_ack *extack);
+                       struct devlink_port **devlink_port);
 
        /**
         * Rate control callbacks.
@@ -1655,15 +1538,116 @@ void devl_unregister(struct devlink *devlink);
 void devlink_register(struct devlink *devlink);
 void devlink_unregister(struct devlink *devlink);
 void devlink_free(struct devlink *devlink);
+
+/**
+ * struct devlink_port_ops - Port operations
+ * @port_split: Callback used to split the port into multiple ones.
+ * @port_unsplit: Callback used to unsplit the port group back into
+ *               a single port.
+ * @port_type_set: Callback used to set a type of a port.
+ * @port_del: Callback used to delete selected port along with related function.
+ *           Devlink core calls this upon user request to delete
+ *           a port previously created by devlink_ops->port_new().
+ * @port_fn_hw_addr_get: Callback used to set port function's hardware address.
+ *                      Should be used by device drivers to report
+ *                      the hardware address of a function managed
+ *                      by the devlink port.
+ * @port_fn_hw_addr_set: Callback used to set port function's hardware address.
+ *                      Should be used by device drivers to set the hardware
+ *                      address of a function managed by the devlink port.
+ * @port_fn_roce_get: Callback used to get port function's RoCE capability.
+ *                   Should be used by device drivers to report
+ *                   the current state of RoCE capability of a function
+ *                   managed by the devlink port.
+ * @port_fn_roce_set: Callback used to set port function's RoCE capability.
+ *                   Should be used by device drivers to enable/disable
+ *                   RoCE capability of a function managed
+ *                   by the devlink port.
+ * @port_fn_migratable_get: Callback used to get port function's migratable
+ *                         capability. Should be used by device drivers
+ *                         to report the current state of migratable capability
+ *                         of a function managed by the devlink port.
+ * @port_fn_migratable_set: Callback used to set port function's migratable
+ *                         capability. Should be used by device drivers
+ *                         to enable/disable migratable capability of
+ *                         a function managed by the devlink port.
+ * @port_fn_state_get: Callback used to get port function's state.
+ *                    Should be used by device drivers to report
+ *                    the current admin and operational state of a
+ *                    function managed by the devlink port.
+ * @port_fn_state_set: Callback used to get port function's state.
+ *                    Should be used by device drivers set
+ *                    the admin state of a function managed
+ *                    by the devlink port.
+ *
+ * Note: Driver should return -EOPNOTSUPP if it doesn't support
+ * port function (@port_fn_*) handling for a particular port.
+ */
+struct devlink_port_ops {
+       int (*port_split)(struct devlink *devlink, struct devlink_port *port,
+                         unsigned int count, struct netlink_ext_ack *extack);
+       int (*port_unsplit)(struct devlink *devlink, struct devlink_port *port,
+                           struct netlink_ext_ack *extack);
+       int (*port_type_set)(struct devlink_port *devlink_port,
+                            enum devlink_port_type port_type);
+       int (*port_del)(struct devlink *devlink, struct devlink_port *port,
+                       struct netlink_ext_ack *extack);
+       int (*port_fn_hw_addr_get)(struct devlink_port *port, u8 *hw_addr,
+                                  int *hw_addr_len,
+                                  struct netlink_ext_ack *extack);
+       int (*port_fn_hw_addr_set)(struct devlink_port *port,
+                                  const u8 *hw_addr, int hw_addr_len,
+                                  struct netlink_ext_ack *extack);
+       int (*port_fn_roce_get)(struct devlink_port *devlink_port,
+                               bool *is_enable,
+                               struct netlink_ext_ack *extack);
+       int (*port_fn_roce_set)(struct devlink_port *devlink_port,
+                               bool enable, struct netlink_ext_ack *extack);
+       int (*port_fn_migratable_get)(struct devlink_port *devlink_port,
+                                     bool *is_enable,
+                                     struct netlink_ext_ack *extack);
+       int (*port_fn_migratable_set)(struct devlink_port *devlink_port,
+                                     bool enable,
+                                     struct netlink_ext_ack *extack);
+       int (*port_fn_state_get)(struct devlink_port *port,
+                                enum devlink_port_fn_state *state,
+                                enum devlink_port_fn_opstate *opstate,
+                                struct netlink_ext_ack *extack);
+       int (*port_fn_state_set)(struct devlink_port *port,
+                                enum devlink_port_fn_state state,
+                                struct netlink_ext_ack *extack);
+};
+
 void devlink_port_init(struct devlink *devlink,
                       struct devlink_port *devlink_port);
 void devlink_port_fini(struct devlink_port *devlink_port);
-int devl_port_register(struct devlink *devlink,
-                      struct devlink_port *devlink_port,
-                      unsigned int port_index);
-int devlink_port_register(struct devlink *devlink,
-                         struct devlink_port *devlink_port,
-                         unsigned int port_index);
+
+int devl_port_register_with_ops(struct devlink *devlink,
+                               struct devlink_port *devlink_port,
+                               unsigned int port_index,
+                               const struct devlink_port_ops *ops);
+
+static inline int devl_port_register(struct devlink *devlink,
+                                    struct devlink_port *devlink_port,
+                                    unsigned int port_index)
+{
+       return devl_port_register_with_ops(devlink, devlink_port,
+                                          port_index, NULL);
+}
+
+int devlink_port_register_with_ops(struct devlink *devlink,
+                                  struct devlink_port *devlink_port,
+                                  unsigned int port_index,
+                                  const struct devlink_port_ops *ops);
+
+static inline int devlink_port_register(struct devlink *devlink,
+                                       struct devlink_port *devlink_port,
+                                       unsigned int port_index)
+{
+       return devlink_port_register_with_ops(devlink, devlink_port,
+                                             port_index, NULL);
+}
+
 void devl_port_unregister(struct devlink_port *devlink_port);
 void devlink_port_unregister(struct devlink_port *devlink_port);
 void devlink_port_type_eth_set(struct devlink_port *devlink_port);
index 8903053..75022cf 100644 (file)
@@ -867,9 +867,15 @@ struct dsa_switch_ops {
                                                      phy_interface_t iface);
        int     (*phylink_mac_link_state)(struct dsa_switch *ds, int port,
                                          struct phylink_link_state *state);
+       int     (*phylink_mac_prepare)(struct dsa_switch *ds, int port,
+                                      unsigned int mode,
+                                      phy_interface_t interface);
        void    (*phylink_mac_config)(struct dsa_switch *ds, int port,
                                      unsigned int mode,
                                      const struct phylink_link_state *state);
+       int     (*phylink_mac_finish)(struct dsa_switch *ds, int port,
+                                     unsigned int mode,
+                                     phy_interface_t interface);
        void    (*phylink_mac_an_restart)(struct dsa_switch *ds, int port);
        void    (*phylink_mac_link_down)(struct dsa_switch *ds, int port,
                                         unsigned int mode,
index bb8651a..7f0adda 100644 (file)
@@ -116,11 +116,10 @@ static inline void flowi4_init_output(struct flowi4 *fl4, int oif,
 }
 
 /* Reset some input parameters after previous lookup */
-static inline void flowi4_update_output(struct flowi4 *fl4, int oif, __u8 tos,
+static inline void flowi4_update_output(struct flowi4 *fl4, int oif,
                                        __be32 daddr, __be32 saddr)
 {
        fl4->flowi4_oif = oif;
-       fl4->flowi4_tos = tos;
        fl4->daddr = daddr;
        fl4->saddr = saddr;
 }
index 85b2281..8b41668 100644 (file)
@@ -243,10 +243,12 @@ struct flow_dissector_key_ip {
  * struct flow_dissector_key_meta:
  * @ingress_ifindex: ingress ifindex
  * @ingress_iftype: ingress interface type
+ * @l2_miss: packet did not match an L2 entry during forwarding
  */
 struct flow_dissector_key_meta {
        int ingress_ifindex;
        u16 ingress_iftype;
+       u8 l2_miss;
 };
 
 /**
index a4fab70..7b47dd6 100644 (file)
 #include <net/udp.h>
 
 struct napi_gro_cb {
-       /* Virtual address of skb_shinfo(skb)->frags[0].page + offset. */
-       void    *frag0;
+       union {
+               struct {
+                       /* Virtual address of skb_shinfo(skb)->frags[0].page + offset. */
+                       void    *frag0;
 
-       /* Length of frag0. */
-       unsigned int frag0_len;
+                       /* Length of frag0. */
+                       unsigned int frag0_len;
+               };
+
+               struct {
+                       /* used in skb_gro_receive() slow path */
+                       struct sk_buff *last;
+
+                       /* jiffies when first packet was created/queued */
+                       unsigned long age;
+               };
+       };
 
        /* This indicates where we are processing relative to skb->data. */
        int     data_offset;
@@ -32,9 +44,6 @@ struct napi_gro_cb {
        /* Used in ipv6_gro_receive() and foo-over-udp */
        u16     proto;
 
-       /* jiffies when first packet was created/queued */
-       unsigned long age;
-
 /* Used in napi_gro_cb::free */
 #define NAPI_GRO_FREE             1
 #define NAPI_GRO_FREE_STOLEN_HEAD 2
@@ -77,9 +86,6 @@ struct napi_gro_cb {
 
        /* used to support CHECKSUM_COMPLETE for tunneling protocols */
        __wsum  csum;
-
-       /* used in skb_gro_receive() slow path */
-       struct sk_buff *last;
 };
 
 #define NAPI_GRO_CB(skb) ((struct napi_gro_cb *)(skb)->cb)
index cec453c..77f4b0e 100644 (file)
@@ -31,6 +31,8 @@ int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr,
                       int addr_len, int flags);
 int inet_accept(struct socket *sock, struct socket *newsock, int flags,
                bool kern);
+void __inet_accept(struct socket *sock, struct socket *newsock,
+                  struct sock *newsk);
 int inet_send_prepare(struct sock *sk);
 int inet_sendmsg(struct socket *sock, struct msghdr *msg, size_t size);
 ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset,
index acec504..50d4358 100644 (file)
@@ -222,8 +222,6 @@ int ip_append_data(struct sock *sk, struct flowi4 *fl4,
                   unsigned int flags);
 int ip_generic_getfrag(void *from, char *to, int offset, int len, int odd,
                       struct sk_buff *skb);
-ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page,
-                      int offset, size_t size, int flags);
 struct sk_buff *__ip_make_skb(struct sock *sk, struct flowi4 *fl4,
                              struct sk_buff_head *queue,
                              struct inet_cork *cork);
@@ -244,14 +242,22 @@ static inline struct sk_buff *ip_finish_skb(struct sock *sk, struct flowi4 *fl4)
        return __ip_make_skb(sk, fl4, &sk->sk_write_queue, &inet_sk(sk)->cork.base);
 }
 
-static inline __u8 get_rttos(struct ipcm_cookie* ipc, struct inet_sock *inet)
+/* Get the route scope that should be used when sending a packet. */
+static inline u8 ip_sendmsg_scope(const struct inet_sock *inet,
+                                 const struct ipcm_cookie *ipc,
+                                 const struct msghdr *msg)
 {
-       return (ipc->tos != -1) ? RT_TOS(ipc->tos) : RT_TOS(inet->tos);
+       if (sock_flag(&inet->sk, SOCK_LOCALROUTE) ||
+           msg->msg_flags & MSG_DONTROUTE ||
+           (ipc->opt && ipc->opt->opt.is_strictroute))
+               return RT_SCOPE_LINK;
+
+       return RT_SCOPE_UNIVERSE;
 }
 
-static inline __u8 get_rtconn_flags(struct ipcm_cookie* ipc, struct sock* sk)
+static inline __u8 get_rttos(struct ipcm_cookie* ipc, struct inet_sock *inet)
 {
-       return (ipc->tos != -1) ? RT_CONN_FLAGS_TOS(sk, ipc->tos) : RT_CONN_FLAGS(sk);
+       return (ipc->tos != -1) ? RT_TOS(ipc->tos) : RT_TOS(inet->tos);
 }
 
 /* datagram.c */
@@ -282,7 +288,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
                           const struct ip_options *sopt,
                           __be32 daddr, __be32 saddr,
                           const struct ip_reply_arg *arg,
-                          unsigned int len, u64 transmit_time);
+                          unsigned int len, u64 transmit_time, u32 txhash);
 
 #define IP_INC_STATS(net, field)       SNMP_INC_STATS64((net)->mib.ip_statistics, field)
 #define __IP_INC_STATS(net, field)     __SNMP_INC_STATS64((net)->mib.ip_statistics, field)
index 5b9c61c..441ed8f 100644 (file)
@@ -8,6 +8,7 @@
 #define _NET_MACSEC_H_
 
 #include <linux/u64_stats_sync.h>
+#include <linux/if_vlan.h>
 #include <uapi/linux/if_link.h>
 #include <uapi/linux/if_macsec.h>
 
@@ -312,4 +313,13 @@ static inline bool macsec_send_sci(const struct macsec_secy *secy)
                (secy->n_rx_sc > 1 && !tx_sc->end_station && !tx_sc->scb);
 }
 
+static inline void *macsec_netdev_priv(const struct net_device *dev)
+{
+#if IS_ENABLED(CONFIG_VLAN_8021Q)
+       if (is_vlan_dev(dev))
+               return netdev_priv(vlan_dev_priv(dev)->real_dev);
+#endif
+       return netdev_priv(dev);
+}
+
 #endif /* _NET_MACSEC_H_ */
index 0855b60..cf0d81b 100644 (file)
@@ -26,6 +26,15 @@ struct nf_conntrack_expect {
        struct nf_conntrack_tuple tuple;
        struct nf_conntrack_tuple_mask mask;
 
+       /* Usage count. */
+       refcount_t use;
+
+       /* Flags */
+       unsigned int flags;
+
+       /* Expectation class */
+       unsigned int class;
+
        /* Function to call after setup and insertion */
        void (*expectfn)(struct nf_conn *new,
                         struct nf_conntrack_expect *this);
@@ -39,15 +48,6 @@ struct nf_conntrack_expect {
        /* Timer function; deletes the expectation. */
        struct timer_list timeout;
 
-       /* Usage count. */
-       refcount_t use;
-
-       /* Flags */
-       unsigned int flags;
-
-       /* Expectation class */
-       unsigned int class;
-
 #if IS_ENABLED(CONFIG_NF_NAT)
        union nf_inet_addr saved_addr;
        /* This is the original per-proto part, used to map the
index ebb28ec..546fc4a 100644 (file)
@@ -263,8 +263,8 @@ nf_flow_table_offload_del_cb(struct nf_flowtable *flow_table,
        up_write(&flow_table->flow_block_lock);
 }
 
-int flow_offload_route_init(struct flow_offload *flow,
-                           const struct nf_flow_route *route);
+void flow_offload_route_init(struct flow_offload *flow,
+                            const struct nf_flow_route *route);
 
 int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow);
 void flow_offload_refresh(struct nf_flowtable *flow_table,
index db762e3..a4efb7a 100644 (file)
@@ -194,6 +194,7 @@ struct netns_ipv4 {
        int sysctl_udp_rmem_min;
 
        u8 sysctl_fib_notify_on_flag_change;
+       u8 sysctl_tcp_syn_linear_timeouts;
 
 #ifdef CONFIG_NET_L3_MASTER_DEV
        u8 sysctl_udp_l3mdev_accept;
index b3b5b0b..a2ea45c 100644 (file)
@@ -868,6 +868,7 @@ struct tc_htb_qopt_offload {
        u16 qid;
        u64 rate;
        u64 ceil;
+       u8 prio;
 };
 
 #define TC_HTB_CLASSID_ROOT U32_MAX
index 5722931..518febb 100644 (file)
@@ -187,6 +187,32 @@ struct tc_taprio_caps {
        bool broken_mqprio:1;
 };
 
+enum tc_taprio_qopt_cmd {
+       TAPRIO_CMD_REPLACE,
+       TAPRIO_CMD_DESTROY,
+       TAPRIO_CMD_STATS,
+       TAPRIO_CMD_TC_STATS,
+};
+
+/**
+ * struct tc_taprio_qopt_stats - IEEE 802.1Qbv statistics
+ * @window_drops: Frames that were dropped because they were too large to be
+ *     transmitted in any of the allotted time windows (open gates) for their
+ *     traffic class.
+ * @tx_overruns: Frames still being transmitted by the MAC after the
+ *     transmission gate associated with their traffic class has closed.
+ *     Equivalent to `12.29.1.1.2 TransmissionOverrun` from 802.1Q-2018.
+ */
+struct tc_taprio_qopt_stats {
+       u64 window_drops;
+       u64 tx_overruns;
+};
+
+struct tc_taprio_qopt_tc_stats {
+       int tc;
+       struct tc_taprio_qopt_stats stats;
+};
+
 struct tc_taprio_sched_entry {
        u8 command; /* TC_TAPRIO_CMD_* */
 
@@ -196,16 +222,26 @@ struct tc_taprio_sched_entry {
 };
 
 struct tc_taprio_qopt_offload {
-       struct tc_mqprio_qopt_offload mqprio;
-       struct netlink_ext_ack *extack;
-       u8 enable;
-       ktime_t base_time;
-       u64 cycle_time;
-       u64 cycle_time_extension;
-       u32 max_sdu[TC_MAX_QUEUE];
-
-       size_t num_entries;
-       struct tc_taprio_sched_entry entries[];
+       enum tc_taprio_qopt_cmd cmd;
+
+       union {
+               /* TAPRIO_CMD_STATS */
+               struct tc_taprio_qopt_stats stats;
+               /* TAPRIO_CMD_TC_STATS */
+               struct tc_taprio_qopt_tc_stats tc_stats;
+               /* TAPRIO_CMD_REPLACE */
+               struct {
+                       struct tc_mqprio_qopt_offload mqprio;
+                       struct netlink_ext_ack *extack;
+                       ktime_t base_time;
+                       u64 cycle_time;
+                       u64 cycle_time_extension;
+                       u32 max_sdu[TC_MAX_QUEUE];
+
+                       size_t num_entries;
+                       struct tc_taprio_sched_entry entries[];
+               };
+       };
 };
 
 #if IS_ENABLED(CONFIG_NET_SCH_TAPRIO)
index bcc367c..5a5c726 100644 (file)
@@ -321,8 +321,7 @@ static inline struct rtable *ip_route_connect(struct flowi4 *fl4, __be32 dst,
                if (IS_ERR(rt))
                        return rt;
                ip_rt_put(rt);
-               flowi4_update_output(fl4, oif, fl4->flowi4_tos, fl4->daddr,
-                                    fl4->saddr);
+               flowi4_update_output(fl4, oif, fl4->daddr, fl4->saddr);
        }
        security_sk_classify_flow(sk, flowi4_to_flowi_common(fl4));
        return ip_route_output_flow(net, fl4, sk);
@@ -337,8 +336,7 @@ static inline struct rtable *ip_route_newports(struct flowi4 *fl4, struct rtable
                fl4->fl4_dport = dport;
                fl4->fl4_sport = sport;
                ip_rt_put(rt);
-               flowi4_update_output(fl4, sk->sk_bound_dev_if,
-                                    RT_CONN_FLAGS(sk), fl4->daddr,
+               flowi4_update_output(fl4, sk->sk_bound_dev_if, fl4->daddr,
                                     fl4->saddr);
                security_sk_classify_flow(sk, flowi4_to_flowi_common(fl4));
                return ip_route_output_flow(sock_net(sk), fl4, sk);
index 5066e45..68990a8 100644 (file)
@@ -161,8 +161,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
 #define MAX_TCP_KEEPCNT                127
 #define MAX_TCP_SYNCNT         127
 
-#define TCP_SYNQ_INTERVAL      (HZ/5)  /* Period of SYNACK timer */
-
 #define TCP_PAWS_24DAYS        (60 * 60 * 24 * 24)
 #define TCP_PAWS_MSL   60              /* Per-host timestamps are invalidated
                                         * after this time. It should be equal
@@ -333,8 +331,6 @@ int tcp_sendpage(struct sock *sk, struct page *page, int offset, size_t size,
                 int flags);
 int tcp_sendpage_locked(struct sock *sk, struct page *page, int offset,
                        size_t size, int flags);
-ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset,
-                size_t size, int flags);
 int tcp_send_mss(struct sock *sk, int *size_goal, int flags);
 void tcp_push(struct sock *sk, int flags, int mss_now, int nonagle,
              int size_goal);
@@ -2046,7 +2042,7 @@ INDIRECT_CALLABLE_DECLARE(int tcp4_gro_complete(struct sk_buff *skb, int thoff))
 INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp4_gro_receive(struct list_head *head, struct sk_buff *skb));
 INDIRECT_CALLABLE_DECLARE(int tcp6_gro_complete(struct sk_buff *skb, int thoff));
 INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp6_gro_receive(struct list_head *head, struct sk_buff *skb));
-int tcp_gro_complete(struct sk_buff *skb);
+void tcp_gro_complete(struct sk_buff *skb);
 
 void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr);
 
index 596595c..b7d0f1e 100644 (file)
@@ -259,7 +259,7 @@ struct tls_context {
        struct scatterlist *partially_sent_record;
        u16 partially_sent_offset;
 
-       bool in_tcp_sendpages;
+       bool splicing_pages;
        bool pending_open_record_frags;
 
        struct mutex tx_lock; /* protects partially_sent_* fields and
index de4b528..5cad443 100644 (file)
@@ -437,7 +437,6 @@ struct udp_seq_afinfo {
 struct udp_iter_state {
        struct seq_net_private  p;
        int                     bucket;
-       struct udp_seq_afinfo   *bpf_seq_afinfo;
 };
 
 void *udp_seq_start(struct seq_file *seq, loff_t *pos);
index 20bd7d8..0be91ca 100644 (file)
@@ -328,6 +328,7 @@ struct vxlan_dev {
 #define VXLAN_F_TTL_INHERIT            0x10000
 #define VXLAN_F_VNIFILTER               0x20000
 #define VXLAN_F_MDB                    0x40000
+#define VXLAN_F_LOCALBYPASS            0x80000
 
 /* Flags that are used in the receive path. These flags must match in
  * order for a socket to be shareable
@@ -348,7 +349,8 @@ struct vxlan_dev {
                                         VXLAN_F_UDP_ZERO_CSUM6_TX |    \
                                         VXLAN_F_UDP_ZERO_CSUM6_RX |    \
                                         VXLAN_F_COLLECT_METADATA  |    \
-                                        VXLAN_F_VNIFILTER)
+                                        VXLAN_F_VNIFILTER         |    \
+                                        VXLAN_F_LOCALBYPASS)
 
 struct net_device *vxlan_dev_create(struct net *net, const char *name,
                                    u8 name_assign_type, struct vxlan_config *conf);
index d318c76..a8d7b8a 100644 (file)
@@ -180,7 +180,7 @@ static inline bool xp_desc_crosses_non_contig_pg(struct xsk_buff_pool *pool,
        if (likely(!cross_pg))
                return false;
 
-       return pool->dma_pages_cnt &&
+       return pool->dma_pages &&
               !(pool->dma_pages[addr >> PAGE_SHIFT] & XSK_NEXT_PG_CONTIG_MASK);
 }
 
index c994ff5..6961a7b 100644 (file)
@@ -1273,6 +1273,9 @@ enum {
 
 /* Create a map that will be registered/unregesitered by the backed bpf_link */
        BPF_F_LINK              = (1U << 13),
+
+/* Get path from provided FD in BPF_OBJ_PIN/BPF_OBJ_GET commands */
+       BPF_F_PATH_FD           = (1U << 14),
 };
 
 /* Flags for BPF_PROG_QUERY. */
@@ -1421,6 +1424,13 @@ union bpf_attr {
                __aligned_u64   pathname;
                __u32           bpf_fd;
                __u32           file_flags;
+               /* Same as dirfd in openat() syscall; see openat(2)
+                * manpage for details of path FD and pathname semantics;
+                * path_fd should accompanied by BPF_F_PATH_FD flag set in
+                * file_flags field, otherwise it should be set to zero;
+                * if BPF_F_PATH_FD flag is not set, AT_FDCWD is assumed.
+                */
+               __s32           path_fd;
        };
 
        struct { /* anonymous struct used by BPF_PROG_ATTACH/DETACH commands */
index 4ac1000..0f6a0fe 100644 (file)
@@ -828,6 +828,7 @@ enum {
        IFLA_VXLAN_TTL_INHERIT,
        IFLA_VXLAN_DF,
        IFLA_VXLAN_VNIFILTER, /* only applicable with COLLECT_METADATA mode */
+       IFLA_VXLAN_LOCALBYPASS,
        __IFLA_VXLAN_MAX
 };
 #define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1)
index 256b463..b826598 100644 (file)
 #define MDIO_PMA_EXTABLE_BT1           0x0800  /* BASE-T1 ability */
 #define MDIO_PMA_EXTABLE_NBT           0x4000  /* 2.5/5GBASE-T ability */
 
+/* AN Clause 73 linkword */
+#define MDIO_AN_C73_0_S_MASK           GENMASK(4, 0)
+#define MDIO_AN_C73_0_E_MASK           GENMASK(9, 5)
+#define MDIO_AN_C73_0_PAUSE            BIT(10)
+#define MDIO_AN_C73_0_ASM_DIR          BIT(11)
+#define MDIO_AN_C73_0_C2               BIT(12)
+#define MDIO_AN_C73_0_RF               BIT(13)
+#define MDIO_AN_C73_0_ACK              BIT(14)
+#define MDIO_AN_C73_0_NP               BIT(15)
+#define MDIO_AN_C73_1_T_MASK           GENMASK(4, 0)
+#define MDIO_AN_C73_1_1000BASE_KX      BIT(5)
+#define MDIO_AN_C73_1_10GBASE_KX4      BIT(6)
+#define MDIO_AN_C73_1_10GBASE_KR       BIT(7)
+#define MDIO_AN_C73_1_40GBASE_KR4      BIT(8)
+#define MDIO_AN_C73_1_40GBASE_CR4      BIT(9)
+#define MDIO_AN_C73_1_100GBASE_CR10    BIT(10)
+#define MDIO_AN_C73_1_100GBASE_KP4     BIT(11)
+#define MDIO_AN_C73_1_100GBASE_KR4     BIT(12)
+#define MDIO_AN_C73_1_100GBASE_CR4     BIT(13)
+#define MDIO_AN_C73_1_25GBASE_R_S      BIT(14)
+#define MDIO_AN_C73_1_25GBASE_R                BIT(15)
+#define MDIO_AN_C73_2_2500BASE_KX      BIT(0)
+#define MDIO_AN_C73_2_5GBASE_KR                BIT(1)
+
 /* PHY XGXS lane state register. */
 #define MDIO_PHYXS_LNSTAT_SYNC0                0x0001
 #define MDIO_PHYXS_LNSTAT_SYNC1                0x0002
index c4d4d8e..e059dc2 100644 (file)
@@ -859,12 +859,14 @@ enum nft_exthdr_flags {
  * @NFT_EXTHDR_OP_TCP: match against tcp options
  * @NFT_EXTHDR_OP_IPV4: match against ipv4 options
  * @NFT_EXTHDR_OP_SCTP: match against sctp chunks
+ * @NFT_EXTHDR_OP_DCCP: match against dccp otions
  */
 enum nft_exthdr_op {
        NFT_EXTHDR_OP_IPV6,
        NFT_EXTHDR_OP_TCPOPT,
        NFT_EXTHDR_OP_IPV4,
        NFT_EXTHDR_OP_SCTP,
+       NFT_EXTHDR_OP_DCCP,
        __NFT_EXTHDR_OP_MAX
 };
 #define NFT_EXTHDR_OP_MAX      (__NFT_EXTHDR_OP_MAX - 1)
index 648a82f..00933dd 100644 (file)
@@ -594,6 +594,8 @@ enum {
 
        TCA_FLOWER_KEY_L2TPV3_SID,      /* be32 */
 
+       TCA_FLOWER_L2_MISS,             /* u8 */
+
        __TCA_FLOWER_MAX,
 };
 
index 51a7add..00f6ff0 100644 (file)
@@ -1260,6 +1260,16 @@ enum {
 };
 
 enum {
+       TCA_TAPRIO_OFFLOAD_STATS_PAD = 1,       /* u64 */
+       TCA_TAPRIO_OFFLOAD_STATS_WINDOW_DROPS,  /* u64 */
+       TCA_TAPRIO_OFFLOAD_STATS_TX_OVERRUNS,   /* u64 */
+
+       /* add new constants above here */
+       __TCA_TAPRIO_OFFLOAD_STATS_CNT,
+       TCA_TAPRIO_OFFLOAD_STATS_MAX = (__TCA_TAPRIO_OFFLOAD_STATS_CNT - 1)
+};
+
+enum {
        TCA_TAPRIO_ATTR_UNSPEC,
        TCA_TAPRIO_ATTR_PRIOMAP, /* struct tc_mqprio_qopt */
        TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST, /* nested of entry */
index 89e8390..f7cbb3c 100644 (file)
@@ -389,6 +389,7 @@ int io_send(struct io_kiocb *req, unsigned int issue_flags)
        if (flags & MSG_WAITALL)
                min_ret = iov_iter_count(&msg.msg_iter);
 
+       flags &= ~MSG_INTERNAL_SENDMSG_FLAGS;
        msg.msg_flags = flags;
        ret = sock_sendmsg(sock, &msg);
        if (ret < min_ret) {
@@ -1136,6 +1137,7 @@ int io_send_zc(struct io_kiocb *req, unsigned int issue_flags)
                msg_flags |= MSG_DONTWAIT;
        if (msg_flags & MSG_WAITALL)
                min_ret = iov_iter_count(&msg.msg_iter);
+       msg_flags &= ~MSG_INTERNAL_SENDMSG_FLAGS;
 
        msg.msg_flags = msg_flags;
        msg.msg_ubuf = &io_notif_to_data(zc->notif)->uarg;
index d99e89f..3dabdd1 100644 (file)
@@ -41,7 +41,12 @@ static struct list_head *local_pending_list(struct bpf_lru_locallist *loc_l)
 /* bpf_lru_node helpers */
 static bool bpf_lru_node_is_ref(const struct bpf_lru_node *node)
 {
-       return node->ref;
+       return READ_ONCE(node->ref);
+}
+
+static void bpf_lru_node_clear_ref(struct bpf_lru_node *node)
+{
+       WRITE_ONCE(node->ref, 0);
 }
 
 static void bpf_lru_list_count_inc(struct bpf_lru_list *l,
@@ -89,7 +94,7 @@ static void __bpf_lru_node_move_in(struct bpf_lru_list *l,
 
        bpf_lru_list_count_inc(l, tgt_type);
        node->type = tgt_type;
-       node->ref = 0;
+       bpf_lru_node_clear_ref(node);
        list_move(&node->list, &l->lists[tgt_type]);
 }
 
@@ -110,7 +115,7 @@ static void __bpf_lru_node_move(struct bpf_lru_list *l,
                bpf_lru_list_count_inc(l, tgt_type);
                node->type = tgt_type;
        }
-       node->ref = 0;
+       bpf_lru_node_clear_ref(node);
 
        /* If the moving node is the next_inactive_rotation candidate,
         * move the next_inactive_rotation pointer also.
@@ -353,7 +358,7 @@ static void __local_list_add_pending(struct bpf_lru *lru,
        *(u32 *)((void *)node + lru->hash_offset) = hash;
        node->cpu = cpu;
        node->type = BPF_LRU_LOCAL_LIST_T_PENDING;
-       node->ref = 0;
+       bpf_lru_node_clear_ref(node);
        list_add(&node->list, local_pending_list(loc_l));
 }
 
@@ -419,7 +424,7 @@ static struct bpf_lru_node *bpf_percpu_lru_pop_free(struct bpf_lru *lru,
        if (!list_empty(free_list)) {
                node = list_first_entry(free_list, struct bpf_lru_node, list);
                *(u32 *)((void *)node + lru->hash_offset) = hash;
-               node->ref = 0;
+               bpf_lru_node_clear_ref(node);
                __bpf_lru_node_move(l, node, BPF_LRU_LIST_T_INACTIVE);
        }
 
@@ -522,7 +527,7 @@ static void bpf_common_lru_push_free(struct bpf_lru *lru,
                }
 
                node->type = BPF_LRU_LOCAL_LIST_T_FREE;
-               node->ref = 0;
+               bpf_lru_node_clear_ref(node);
                list_move(&node->list, local_free_list(loc_l));
 
                raw_spin_unlock_irqrestore(&loc_l->lock, flags);
@@ -568,7 +573,7 @@ static void bpf_common_lru_populate(struct bpf_lru *lru, void *buf,
 
                node = (struct bpf_lru_node *)(buf + node_offset);
                node->type = BPF_LRU_LIST_T_FREE;
-               node->ref = 0;
+               bpf_lru_node_clear_ref(node);
                list_add(&node->list, &l->lists[BPF_LRU_LIST_T_FREE]);
                buf += elem_size;
        }
@@ -594,7 +599,7 @@ again:
                node = (struct bpf_lru_node *)(buf + node_offset);
                node->cpu = cpu;
                node->type = BPF_LRU_LIST_T_FREE;
-               node->ref = 0;
+               bpf_lru_node_clear_ref(node);
                list_add(&node->list, &l->lists[BPF_LRU_LIST_T_FREE]);
                i++;
                buf += elem_size;
index 4ea227c..8f3c8b2 100644 (file)
@@ -64,11 +64,8 @@ struct bpf_lru {
 
 static inline void bpf_lru_node_set_ref(struct bpf_lru_node *node)
 {
-       /* ref is an approximation on access frequency.  It does not
-        * have to be very accurate.  Hence, no protection is used.
-        */
-       if (!node->ref)
-               node->ref = 1;
+       if (!READ_ONCE(node->ref))
+               WRITE_ONCE(node->ref, 1);
 }
 
 int bpf_lru_init(struct bpf_lru *lru, bool percpu, u32 hash_offset,
index 6b682b8..947f0b8 100644 (file)
@@ -222,10 +222,17 @@ enum btf_kfunc_hook {
 enum {
        BTF_KFUNC_SET_MAX_CNT = 256,
        BTF_DTOR_KFUNC_MAX_CNT = 256,
+       BTF_KFUNC_FILTER_MAX_CNT = 16,
+};
+
+struct btf_kfunc_hook_filter {
+       btf_kfunc_filter_t filters[BTF_KFUNC_FILTER_MAX_CNT];
+       u32 nr_filters;
 };
 
 struct btf_kfunc_set_tab {
        struct btf_id_set8 *sets[BTF_KFUNC_HOOK_MAX];
+       struct btf_kfunc_hook_filter hook_filters[BTF_KFUNC_HOOK_MAX];
 };
 
 struct btf_id_dtor_kfunc_tab {
@@ -7669,9 +7676,12 @@ static int btf_check_kfunc_protos(struct btf *btf, u32 func_id, u32 func_flags)
 /* Kernel Function (kfunc) BTF ID set registration API */
 
 static int btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook,
-                                 struct btf_id_set8 *add_set)
+                                 const struct btf_kfunc_id_set *kset)
 {
+       struct btf_kfunc_hook_filter *hook_filter;
+       struct btf_id_set8 *add_set = kset->set;
        bool vmlinux_set = !btf_is_module(btf);
+       bool add_filter = !!kset->filter;
        struct btf_kfunc_set_tab *tab;
        struct btf_id_set8 *set;
        u32 set_cnt;
@@ -7686,6 +7696,24 @@ static int btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook,
                return 0;
 
        tab = btf->kfunc_set_tab;
+
+       if (tab && add_filter) {
+               u32 i;
+
+               hook_filter = &tab->hook_filters[hook];
+               for (i = 0; i < hook_filter->nr_filters; i++) {
+                       if (hook_filter->filters[i] == kset->filter) {
+                               add_filter = false;
+                               break;
+                       }
+               }
+
+               if (add_filter && hook_filter->nr_filters == BTF_KFUNC_FILTER_MAX_CNT) {
+                       ret = -E2BIG;
+                       goto end;
+               }
+       }
+
        if (!tab) {
                tab = kzalloc(sizeof(*tab), GFP_KERNEL | __GFP_NOWARN);
                if (!tab)
@@ -7708,7 +7736,7 @@ static int btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook,
         */
        if (!vmlinux_set) {
                tab->sets[hook] = add_set;
-               return 0;
+               goto do_add_filter;
        }
 
        /* In case of vmlinux sets, there may be more than one set being
@@ -7750,6 +7778,11 @@ static int btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook,
 
        sort(set->pairs, set->cnt, sizeof(set->pairs[0]), btf_id_cmp_func, NULL);
 
+do_add_filter:
+       if (add_filter) {
+               hook_filter = &tab->hook_filters[hook];
+               hook_filter->filters[hook_filter->nr_filters++] = kset->filter;
+       }
        return 0;
 end:
        btf_free_kfunc_set_tab(btf);
@@ -7758,15 +7791,22 @@ end:
 
 static u32 *__btf_kfunc_id_set_contains(const struct btf *btf,
                                        enum btf_kfunc_hook hook,
-                                       u32 kfunc_btf_id)
+                                       u32 kfunc_btf_id,
+                                       const struct bpf_prog *prog)
 {
+       struct btf_kfunc_hook_filter *hook_filter;
        struct btf_id_set8 *set;
-       u32 *id;
+       u32 *id, i;
 
        if (hook >= BTF_KFUNC_HOOK_MAX)
                return NULL;
        if (!btf->kfunc_set_tab)
                return NULL;
+       hook_filter = &btf->kfunc_set_tab->hook_filters[hook];
+       for (i = 0; i < hook_filter->nr_filters; i++) {
+               if (hook_filter->filters[i](prog, kfunc_btf_id))
+                       return NULL;
+       }
        set = btf->kfunc_set_tab->sets[hook];
        if (!set)
                return NULL;
@@ -7821,23 +7861,25 @@ static int bpf_prog_type_to_kfunc_hook(enum bpf_prog_type prog_type)
  * protection for looking up a well-formed btf->kfunc_set_tab.
  */
 u32 *btf_kfunc_id_set_contains(const struct btf *btf,
-                              enum bpf_prog_type prog_type,
-                              u32 kfunc_btf_id)
+                              u32 kfunc_btf_id,
+                              const struct bpf_prog *prog)
 {
+       enum bpf_prog_type prog_type = resolve_prog_type(prog);
        enum btf_kfunc_hook hook;
        u32 *kfunc_flags;
 
-       kfunc_flags = __btf_kfunc_id_set_contains(btf, BTF_KFUNC_HOOK_COMMON, kfunc_btf_id);
+       kfunc_flags = __btf_kfunc_id_set_contains(btf, BTF_KFUNC_HOOK_COMMON, kfunc_btf_id, prog);
        if (kfunc_flags)
                return kfunc_flags;
 
        hook = bpf_prog_type_to_kfunc_hook(prog_type);
-       return __btf_kfunc_id_set_contains(btf, hook, kfunc_btf_id);
+       return __btf_kfunc_id_set_contains(btf, hook, kfunc_btf_id, prog);
 }
 
-u32 *btf_kfunc_is_modify_return(const struct btf *btf, u32 kfunc_btf_id)
+u32 *btf_kfunc_is_modify_return(const struct btf *btf, u32 kfunc_btf_id,
+                               const struct bpf_prog *prog)
 {
-       return __btf_kfunc_id_set_contains(btf, BTF_KFUNC_HOOK_FMODRET, kfunc_btf_id);
+       return __btf_kfunc_id_set_contains(btf, BTF_KFUNC_HOOK_FMODRET, kfunc_btf_id, prog);
 }
 
 static int __register_btf_kfunc_id_set(enum btf_kfunc_hook hook,
@@ -7868,7 +7910,8 @@ static int __register_btf_kfunc_id_set(enum btf_kfunc_hook hook,
                        goto err_out;
        }
 
-       ret = btf_populate_kfunc_set(btf, hook, kset->set);
+       ret = btf_populate_kfunc_set(btf, hook, kset);
+
 err_out:
        btf_put(btf);
        return ret;
index 517b6a5..5b2741a 100644 (file)
@@ -1826,6 +1826,12 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level,
                ret = 1;
        } else if (ctx.optlen > max_optlen || ctx.optlen < -1) {
                /* optlen is out of bounds */
+               if (*optlen > PAGE_SIZE && ctx.optlen >= 0) {
+                       pr_info_once("bpf setsockopt: ignoring program buffer with optlen=%d (max_optlen=%d)\n",
+                                    ctx.optlen, max_optlen);
+                       ret = 0;
+                       goto out;
+               }
                ret = -EFAULT;
        } else {
                /* optlen within bounds, run kernel handler */
@@ -1881,8 +1887,10 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
                .optname = optname,
                .current_task = current,
        };
+       int orig_optlen;
        int ret;
 
+       orig_optlen = max_optlen;
        ctx.optlen = max_optlen;
        max_optlen = sockopt_alloc_buf(&ctx, max_optlen, &buf);
        if (max_optlen < 0)
@@ -1905,6 +1913,7 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
                        ret = -EFAULT;
                        goto out;
                }
+               orig_optlen = ctx.optlen;
 
                if (copy_from_user(ctx.optval, optval,
                                   min(ctx.optlen, max_optlen)) != 0) {
@@ -1922,6 +1931,12 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
                goto out;
 
        if (optval && (ctx.optlen > max_optlen || ctx.optlen < 0)) {
+               if (orig_optlen > PAGE_SIZE && ctx.optlen >= 0) {
+                       pr_info_once("bpf getsockopt: ignoring program buffer with optlen=%d (max_optlen=%d)\n",
+                                    ctx.optlen, max_optlen);
+                       ret = retval;
+                       goto out;
+               }
                ret = -EFAULT;
                goto out;
        }
index 8d368fa..4ef4c4f 100644 (file)
@@ -1423,7 +1423,7 @@ static const struct bpf_func_proto bpf_kptr_xchg_proto = {
 #define DYNPTR_SIZE_MASK       0xFFFFFF
 #define DYNPTR_RDONLY_BIT      BIT(31)
 
-static bool bpf_dynptr_is_rdonly(const struct bpf_dynptr_kern *ptr)
+static bool __bpf_dynptr_is_rdonly(const struct bpf_dynptr_kern *ptr)
 {
        return ptr->size & DYNPTR_RDONLY_BIT;
 }
@@ -1443,11 +1443,18 @@ static enum bpf_dynptr_type bpf_dynptr_get_type(const struct bpf_dynptr_kern *pt
        return (ptr->size & ~(DYNPTR_RDONLY_BIT)) >> DYNPTR_TYPE_SHIFT;
 }
 
-u32 bpf_dynptr_get_size(const struct bpf_dynptr_kern *ptr)
+u32 __bpf_dynptr_size(const struct bpf_dynptr_kern *ptr)
 {
        return ptr->size & DYNPTR_SIZE_MASK;
 }
 
+static void bpf_dynptr_set_size(struct bpf_dynptr_kern *ptr, u32 new_size)
+{
+       u32 metadata = ptr->size & ~DYNPTR_SIZE_MASK;
+
+       ptr->size = new_size | metadata;
+}
+
 int bpf_dynptr_check_size(u32 size)
 {
        return size > DYNPTR_MAX_SIZE ? -E2BIG : 0;
@@ -1469,7 +1476,7 @@ void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr)
 
 static int bpf_dynptr_check_off_len(const struct bpf_dynptr_kern *ptr, u32 offset, u32 len)
 {
-       u32 size = bpf_dynptr_get_size(ptr);
+       u32 size = __bpf_dynptr_size(ptr);
 
        if (len > size || offset > size - len)
                return -E2BIG;
@@ -1563,7 +1570,7 @@ BPF_CALL_5(bpf_dynptr_write, const struct bpf_dynptr_kern *, dst, u32, offset, v
        enum bpf_dynptr_type type;
        int err;
 
-       if (!dst->data || bpf_dynptr_is_rdonly(dst))
+       if (!dst->data || __bpf_dynptr_is_rdonly(dst))
                return -EINVAL;
 
        err = bpf_dynptr_check_off_len(dst, offset, len);
@@ -1619,7 +1626,7 @@ BPF_CALL_3(bpf_dynptr_data, const struct bpf_dynptr_kern *, ptr, u32, offset, u3
        if (err)
                return 0;
 
-       if (bpf_dynptr_is_rdonly(ptr))
+       if (__bpf_dynptr_is_rdonly(ptr))
                return 0;
 
        type = bpf_dynptr_get_type(ptr);
@@ -2142,6 +2149,22 @@ __bpf_kfunc struct cgroup *bpf_cgroup_from_id(u64 cgid)
                return NULL;
        return cgrp;
 }
+
+/**
+ * bpf_task_under_cgroup - wrap task_under_cgroup_hierarchy() as a kfunc, test
+ * task's membership of cgroup ancestry.
+ * @task: the task to be tested
+ * @ancestor: possible ancestor of @task's cgroup
+ *
+ * Tests whether @task's default cgroup hierarchy is a descendant of @ancestor.
+ * It follows all the same rules as cgroup_is_descendant, and only applies
+ * to the default hierarchy.
+ */
+__bpf_kfunc long bpf_task_under_cgroup(struct task_struct *task,
+                                      struct cgroup *ancestor)
+{
+       return task_under_cgroup_hierarchy(task, ancestor);
+}
 #endif /* CONFIG_CGROUPS */
 
 /**
@@ -2167,13 +2190,15 @@ __bpf_kfunc struct task_struct *bpf_task_from_pid(s32 pid)
  * bpf_dynptr_slice() - Obtain a read-only pointer to the dynptr data.
  * @ptr: The dynptr whose data slice to retrieve
  * @offset: Offset into the dynptr
- * @buffer: User-provided buffer to copy contents into
- * @buffer__szk: Size (in bytes) of the buffer. This is the length of the
- *              requested slice. This must be a constant.
+ * @buffer__opt: User-provided buffer to copy contents into.  May be NULL
+ * @buffer__szk: Size (in bytes) of the buffer if present. This is the
+ *               length of the requested slice. This must be a constant.
  *
  * For non-skb and non-xdp type dynptrs, there is no difference between
  * bpf_dynptr_slice and bpf_dynptr_data.
  *
+ *  If buffer__opt is NULL, the call will fail if buffer_opt was needed.
+ *
  * If the intention is to write to the data slice, please use
  * bpf_dynptr_slice_rdwr.
  *
@@ -2190,7 +2215,7 @@ __bpf_kfunc struct task_struct *bpf_task_from_pid(s32 pid)
  * direct pointer)
  */
 __bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr_kern *ptr, u32 offset,
-                                  void *buffer, u32 buffer__szk)
+                                  void *buffer__opt, u32 buffer__szk)
 {
        enum bpf_dynptr_type type;
        u32 len = buffer__szk;
@@ -2210,15 +2235,17 @@ __bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr_kern *ptr, u32 offset
        case BPF_DYNPTR_TYPE_RINGBUF:
                return ptr->data + ptr->offset + offset;
        case BPF_DYNPTR_TYPE_SKB:
-               return skb_header_pointer(ptr->data, ptr->offset + offset, len, buffer);
+               return skb_header_pointer(ptr->data, ptr->offset + offset, len, buffer__opt);
        case BPF_DYNPTR_TYPE_XDP:
        {
                void *xdp_ptr = bpf_xdp_pointer(ptr->data, ptr->offset + offset, len);
                if (xdp_ptr)
                        return xdp_ptr;
 
-               bpf_xdp_copy_buf(ptr->data, ptr->offset + offset, buffer, len, false);
-               return buffer;
+               if (!buffer__opt)
+                       return NULL;
+               bpf_xdp_copy_buf(ptr->data, ptr->offset + offset, buffer__opt, len, false);
+               return buffer__opt;
        }
        default:
                WARN_ONCE(true, "unknown dynptr type %d\n", type);
@@ -2230,13 +2257,15 @@ __bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr_kern *ptr, u32 offset
  * bpf_dynptr_slice_rdwr() - Obtain a writable pointer to the dynptr data.
  * @ptr: The dynptr whose data slice to retrieve
  * @offset: Offset into the dynptr
- * @buffer: User-provided buffer to copy contents into
- * @buffer__szk: Size (in bytes) of the buffer. This is the length of the
- *              requested slice. This must be a constant.
+ * @buffer__opt: User-provided buffer to copy contents into. May be NULL
+ * @buffer__szk: Size (in bytes) of the buffer if present. This is the
+ *               length of the requested slice. This must be a constant.
  *
  * For non-skb and non-xdp type dynptrs, there is no difference between
  * bpf_dynptr_slice and bpf_dynptr_data.
  *
+ * If buffer__opt is NULL, the call will fail if buffer_opt was needed.
+ *
  * The returned pointer is writable and may point to either directly the dynptr
  * data at the requested offset or to the buffer if unable to obtain a direct
  * data pointer to (example: the requested slice is to the paged area of an skb
@@ -2267,9 +2296,9 @@ __bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr_kern *ptr, u32 offset
  * direct pointer)
  */
 __bpf_kfunc void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr_kern *ptr, u32 offset,
-                                       void *buffer, u32 buffer__szk)
+                                       void *buffer__opt, u32 buffer__szk)
 {
-       if (!ptr->data || bpf_dynptr_is_rdonly(ptr))
+       if (!ptr->data || __bpf_dynptr_is_rdonly(ptr))
                return NULL;
 
        /* bpf_dynptr_slice_rdwr is the same logic as bpf_dynptr_slice.
@@ -2294,7 +2323,59 @@ __bpf_kfunc void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr_kern *ptr, u32 o
         * will be copied out into the buffer and the user will need to call
         * bpf_dynptr_write() to commit changes.
         */
-       return bpf_dynptr_slice(ptr, offset, buffer, buffer__szk);
+       return bpf_dynptr_slice(ptr, offset, buffer__opt, buffer__szk);
+}
+
+__bpf_kfunc int bpf_dynptr_adjust(struct bpf_dynptr_kern *ptr, u32 start, u32 end)
+{
+       u32 size;
+
+       if (!ptr->data || start > end)
+               return -EINVAL;
+
+       size = __bpf_dynptr_size(ptr);
+
+       if (start > size || end > size)
+               return -ERANGE;
+
+       ptr->offset += start;
+       bpf_dynptr_set_size(ptr, end - start);
+
+       return 0;
+}
+
+__bpf_kfunc bool bpf_dynptr_is_null(struct bpf_dynptr_kern *ptr)
+{
+       return !ptr->data;
+}
+
+__bpf_kfunc bool bpf_dynptr_is_rdonly(struct bpf_dynptr_kern *ptr)
+{
+       if (!ptr->data)
+               return false;
+
+       return __bpf_dynptr_is_rdonly(ptr);
+}
+
+__bpf_kfunc __u32 bpf_dynptr_size(const struct bpf_dynptr_kern *ptr)
+{
+       if (!ptr->data)
+               return -EINVAL;
+
+       return __bpf_dynptr_size(ptr);
+}
+
+__bpf_kfunc int bpf_dynptr_clone(struct bpf_dynptr_kern *ptr,
+                                struct bpf_dynptr_kern *clone__uninit)
+{
+       if (!ptr->data) {
+               bpf_dynptr_set_null(clone__uninit);
+               return -EINVAL;
+       }
+
+       *clone__uninit = *ptr;
+
+       return 0;
 }
 
 __bpf_kfunc void *bpf_cast_to_kern_ctx(void *obj)
@@ -2341,6 +2422,7 @@ BTF_ID_FLAGS(func, bpf_cgroup_acquire, KF_ACQUIRE | KF_RCU | KF_RET_NULL)
 BTF_ID_FLAGS(func, bpf_cgroup_release, KF_RELEASE)
 BTF_ID_FLAGS(func, bpf_cgroup_ancestor, KF_ACQUIRE | KF_RCU | KF_RET_NULL)
 BTF_ID_FLAGS(func, bpf_cgroup_from_id, KF_ACQUIRE | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_task_under_cgroup, KF_RCU)
 #endif
 BTF_ID_FLAGS(func, bpf_task_from_pid, KF_ACQUIRE | KF_RET_NULL)
 BTF_SET8_END(generic_btf_ids)
@@ -2369,6 +2451,11 @@ BTF_ID_FLAGS(func, bpf_dynptr_slice_rdwr, KF_RET_NULL)
 BTF_ID_FLAGS(func, bpf_iter_num_new, KF_ITER_NEW)
 BTF_ID_FLAGS(func, bpf_iter_num_next, KF_ITER_NEXT | KF_RET_NULL)
 BTF_ID_FLAGS(func, bpf_iter_num_destroy, KF_ITER_DESTROY)
+BTF_ID_FLAGS(func, bpf_dynptr_adjust)
+BTF_ID_FLAGS(func, bpf_dynptr_is_null)
+BTF_ID_FLAGS(func, bpf_dynptr_is_rdonly)
+BTF_ID_FLAGS(func, bpf_dynptr_size)
+BTF_ID_FLAGS(func, bpf_dynptr_clone)
 BTF_SET8_END(common_btf_ids)
 
 static const struct btf_kfunc_id_set common_kfunc_set = {
index 9948b54..4174f76 100644 (file)
@@ -435,7 +435,7 @@ static int bpf_iter_link_pin_kernel(struct dentry *parent,
        return ret;
 }
 
-static int bpf_obj_do_pin(const char __user *pathname, void *raw,
+static int bpf_obj_do_pin(int path_fd, const char __user *pathname, void *raw,
                          enum bpf_type type)
 {
        struct dentry *dentry;
@@ -444,22 +444,21 @@ static int bpf_obj_do_pin(const char __user *pathname, void *raw,
        umode_t mode;
        int ret;
 
-       dentry = user_path_create(AT_FDCWD, pathname, &path, 0);
+       dentry = user_path_create(path_fd, pathname, &path, 0);
        if (IS_ERR(dentry))
                return PTR_ERR(dentry);
 
-       mode = S_IFREG | ((S_IRUSR | S_IWUSR) & ~current_umask());
-
-       ret = security_path_mknod(&path, dentry, mode, 0);
-       if (ret)
-               goto out;
-
        dir = d_inode(path.dentry);
        if (dir->i_op != &bpf_dir_iops) {
                ret = -EPERM;
                goto out;
        }
 
+       mode = S_IFREG | ((S_IRUSR | S_IWUSR) & ~current_umask());
+       ret = security_path_mknod(&path, dentry, mode, 0);
+       if (ret)
+               goto out;
+
        switch (type) {
        case BPF_TYPE_PROG:
                ret = vfs_mkobj(dentry, mode, bpf_mkprog, raw);
@@ -478,7 +477,7 @@ out:
        return ret;
 }
 
-int bpf_obj_pin_user(u32 ufd, const char __user *pathname)
+int bpf_obj_pin_user(u32 ufd, int path_fd, const char __user *pathname)
 {
        enum bpf_type type;
        void *raw;
@@ -488,14 +487,14 @@ int bpf_obj_pin_user(u32 ufd, const char __user *pathname)
        if (IS_ERR(raw))
                return PTR_ERR(raw);
 
-       ret = bpf_obj_do_pin(pathname, raw, type);
+       ret = bpf_obj_do_pin(path_fd, pathname, raw, type);
        if (ret != 0)
                bpf_any_put(raw, type);
 
        return ret;
 }
 
-static void *bpf_obj_do_get(const char __user *pathname,
+static void *bpf_obj_do_get(int path_fd, const char __user *pathname,
                            enum bpf_type *type, int flags)
 {
        struct inode *inode;
@@ -503,7 +502,7 @@ static void *bpf_obj_do_get(const char __user *pathname,
        void *raw;
        int ret;
 
-       ret = user_path_at(AT_FDCWD, pathname, LOOKUP_FOLLOW, &path);
+       ret = user_path_at(path_fd, pathname, LOOKUP_FOLLOW, &path);
        if (ret)
                return ERR_PTR(ret);
 
@@ -527,7 +526,7 @@ out:
        return ERR_PTR(ret);
 }
 
-int bpf_obj_get_user(const char __user *pathname, int flags)
+int bpf_obj_get_user(int path_fd, const char __user *pathname, int flags)
 {
        enum bpf_type type = BPF_TYPE_UNSPEC;
        int f_flags;
@@ -538,7 +537,7 @@ int bpf_obj_get_user(const char __user *pathname, int flags)
        if (f_flags < 0)
                return f_flags;
 
-       raw = bpf_obj_do_get(pathname, &type, f_flags);
+       raw = bpf_obj_do_get(path_fd, pathname, &type, f_flags);
        if (IS_ERR(raw))
                return PTR_ERR(raw);
 
index 046ddff..8504944 100644 (file)
@@ -62,9 +62,6 @@ void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt,
 
        n = vscnprintf(log->kbuf, BPF_VERIFIER_TMP_LOG_SIZE, fmt, args);
 
-       WARN_ONCE(n >= BPF_VERIFIER_TMP_LOG_SIZE - 1,
-                 "verifier log line truncated - local buffer too short\n");
-
        if (log->level == BPF_LOG_KERNEL) {
                bool newline = n > 0 && log->kbuf[n - 1] == '\n';
 
index 0c21d0d..476ec95 100644 (file)
@@ -1931,6 +1931,11 @@ static int map_freeze(const union bpf_attr *attr)
                return -ENOTSUPP;
        }
 
+       if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
+               fdput(f);
+               return -EPERM;
+       }
+
        mutex_lock(&map->freeze_mutex);
        if (bpf_map_write_active(map)) {
                err = -EBUSY;
@@ -1940,10 +1945,6 @@ static int map_freeze(const union bpf_attr *attr)
                err = -EBUSY;
                goto err_put;
        }
-       if (!bpf_capable()) {
-               err = -EPERM;
-               goto err_put;
-       }
 
        WRITE_ONCE(map->frozen, true);
 err_put:
@@ -2701,23 +2702,38 @@ free_prog:
        return err;
 }
 
-#define BPF_OBJ_LAST_FIELD file_flags
+#define BPF_OBJ_LAST_FIELD path_fd
 
 static int bpf_obj_pin(const union bpf_attr *attr)
 {
-       if (CHECK_ATTR(BPF_OBJ) || attr->file_flags != 0)
+       int path_fd;
+
+       if (CHECK_ATTR(BPF_OBJ) || attr->file_flags & ~BPF_F_PATH_FD)
+               return -EINVAL;
+
+       /* path_fd has to be accompanied by BPF_F_PATH_FD flag */
+       if (!(attr->file_flags & BPF_F_PATH_FD) && attr->path_fd)
                return -EINVAL;
 
-       return bpf_obj_pin_user(attr->bpf_fd, u64_to_user_ptr(attr->pathname));
+       path_fd = attr->file_flags & BPF_F_PATH_FD ? attr->path_fd : AT_FDCWD;
+       return bpf_obj_pin_user(attr->bpf_fd, path_fd,
+                               u64_to_user_ptr(attr->pathname));
 }
 
 static int bpf_obj_get(const union bpf_attr *attr)
 {
+       int path_fd;
+
        if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0 ||
-           attr->file_flags & ~BPF_OBJ_FLAG_MASK)
+           attr->file_flags & ~(BPF_OBJ_FLAG_MASK | BPF_F_PATH_FD))
+               return -EINVAL;
+
+       /* path_fd has to be accompanied by BPF_F_PATH_FD flag */
+       if (!(attr->file_flags & BPF_F_PATH_FD) && attr->path_fd)
                return -EINVAL;
 
-       return bpf_obj_get_user(u64_to_user_ptr(attr->pathname),
+       path_fd = attr->file_flags & BPF_F_PATH_FD ? attr->path_fd : AT_FDCWD;
+       return bpf_obj_get_user(path_fd, u64_to_user_ptr(attr->pathname),
                                attr->file_flags);
 }
 
@@ -2972,10 +2988,17 @@ static void bpf_tracing_link_show_fdinfo(const struct bpf_link *link,
 {
        struct bpf_tracing_link *tr_link =
                container_of(link, struct bpf_tracing_link, link.link);
+       u32 target_btf_id, target_obj_id;
 
+       bpf_trampoline_unpack_key(tr_link->trampoline->key,
+                                 &target_obj_id, &target_btf_id);
        seq_printf(seq,
-                  "attach_type:\t%d\n",
-                  tr_link->attach_type);
+                  "attach_type:\t%d\n"
+                  "target_obj_id:\t%u\n"
+                  "target_btf_id:\t%u\n",
+                  tr_link->attach_type,
+                  target_obj_id,
+                  target_btf_id);
 }
 
 static int bpf_tracing_link_fill_link_info(const struct bpf_link *link,
@@ -5389,7 +5412,8 @@ static int bpf_unpriv_handler(struct ctl_table *table, int write,
                *(int *)table->data = unpriv_enable;
        }
 
-       unpriv_ebpf_notify(unpriv_enable);
+       if (write)
+               unpriv_ebpf_notify(unpriv_enable);
 
        return ret;
 }
index ac021bc..78acf28 100644 (file)
@@ -251,11 +251,8 @@ bpf_trampoline_get_progs(const struct bpf_trampoline *tr, int *total, bool *ip_a
        return tlinks;
 }
 
-static void __bpf_tramp_image_put_deferred(struct work_struct *work)
+static void bpf_tramp_image_free(struct bpf_tramp_image *im)
 {
-       struct bpf_tramp_image *im;
-
-       im = container_of(work, struct bpf_tramp_image, work);
        bpf_image_ksym_del(&im->ksym);
        bpf_jit_free_exec(im->image);
        bpf_jit_uncharge_modmem(PAGE_SIZE);
@@ -263,6 +260,14 @@ static void __bpf_tramp_image_put_deferred(struct work_struct *work)
        kfree_rcu(im, rcu);
 }
 
+static void __bpf_tramp_image_put_deferred(struct work_struct *work)
+{
+       struct bpf_tramp_image *im;
+
+       im = container_of(work, struct bpf_tramp_image, work);
+       bpf_tramp_image_free(im);
+}
+
 /* callback, fexit step 3 or fentry step 2 */
 static void __bpf_tramp_image_put_rcu(struct rcu_head *rcu)
 {
@@ -344,7 +349,7 @@ static void bpf_tramp_image_put(struct bpf_tramp_image *im)
        call_rcu_tasks_trace(&im->rcu, __bpf_tramp_image_put_rcu_tasks);
 }
 
-static struct bpf_tramp_image *bpf_tramp_image_alloc(u64 key, u32 idx)
+static struct bpf_tramp_image *bpf_tramp_image_alloc(u64 key)
 {
        struct bpf_tramp_image *im;
        struct bpf_ksym *ksym;
@@ -371,7 +376,7 @@ static struct bpf_tramp_image *bpf_tramp_image_alloc(u64 key, u32 idx)
 
        ksym = &im->ksym;
        INIT_LIST_HEAD_RCU(&ksym->lnode);
-       snprintf(ksym->name, KSYM_NAME_LEN, "bpf_trampoline_%llu_%u", key, idx);
+       snprintf(ksym->name, KSYM_NAME_LEN, "bpf_trampoline_%llu", key);
        bpf_image_ksym_add(image, ksym);
        return im;
 
@@ -401,11 +406,10 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mut
                err = unregister_fentry(tr, tr->cur_image->image);
                bpf_tramp_image_put(tr->cur_image);
                tr->cur_image = NULL;
-               tr->selector = 0;
                goto out;
        }
 
-       im = bpf_tramp_image_alloc(tr->key, tr->selector);
+       im = bpf_tramp_image_alloc(tr->key);
        if (IS_ERR(im)) {
                err = PTR_ERR(im);
                goto out;
@@ -438,12 +442,11 @@ again:
                                          &tr->func.model, tr->flags, tlinks,
                                          tr->func.addr);
        if (err < 0)
-               goto out;
+               goto out_free;
 
        set_memory_rox((long)im->image, 1);
 
-       WARN_ON(tr->cur_image && tr->selector == 0);
-       WARN_ON(!tr->cur_image && tr->selector);
+       WARN_ON(tr->cur_image && total == 0);
        if (tr->cur_image)
                /* progs already running at this address */
                err = modify_fentry(tr, tr->cur_image->image, im->image, lock_direct_mutex);
@@ -468,18 +471,21 @@ again:
        }
 #endif
        if (err)
-               goto out;
+               goto out_free;
 
        if (tr->cur_image)
                bpf_tramp_image_put(tr->cur_image);
        tr->cur_image = im;
-       tr->selector++;
 out:
        /* If any error happens, restore previous flags */
        if (err)
                tr->flags = orig_flags;
        kfree(tlinks);
        return err;
+
+out_free:
+       bpf_tramp_image_free(im);
+       goto out;
 }
 
 static enum bpf_tramp_prog_type bpf_attach_type_to_tramp(struct bpf_prog *prog)
index 5871aa7..086b2a1 100644 (file)
@@ -240,6 +240,12 @@ static void bpf_map_key_store(struct bpf_insn_aux_data *aux, u64 state)
                             (poisoned ? BPF_MAP_KEY_POISON : 0ULL);
 }
 
+static bool bpf_helper_call(const struct bpf_insn *insn)
+{
+       return insn->code == (BPF_JMP | BPF_CALL) &&
+              insn->src_reg == 0;
+}
+
 static bool bpf_pseudo_call(const struct bpf_insn *insn)
 {
        return insn->code == (BPF_JMP | BPF_CALL) &&
@@ -273,11 +279,6 @@ struct bpf_call_arg_meta {
        struct btf_field *kptr_field;
 };
 
-struct btf_and_id {
-       struct btf *btf;
-       u32 btf_id;
-};
-
 struct bpf_kfunc_call_arg_meta {
        /* In parameters */
        struct btf *btf;
@@ -296,10 +297,18 @@ struct bpf_kfunc_call_arg_meta {
                u64 value;
                bool found;
        } arg_constant;
-       union {
-               struct btf_and_id arg_obj_drop;
-               struct btf_and_id arg_refcount_acquire;
-       };
+
+       /* arg_btf and arg_btf_id are used by kfunc-specific handling,
+        * generally to pass info about user-defined local kptr types to later
+        * verification logic
+        *   bpf_obj_drop
+        *     Record the local kptr type to be drop'd
+        *   bpf_refcount_acquire (via KF_ARG_PTR_TO_REFCOUNTED_KPTR arg type)
+        *     Record the local kptr type to be refcount_incr'd
+        */
+       struct btf *arg_btf;
+       u32 arg_btf_id;
+
        struct {
                struct btf_field *field;
        } arg_list_head;
@@ -309,6 +318,7 @@ struct bpf_kfunc_call_arg_meta {
        struct {
                enum bpf_dynptr_type type;
                u32 id;
+               u32 ref_obj_id;
        } initialized_dynptr;
        struct {
                u8 spi;
@@ -468,6 +478,13 @@ static struct btf_record *reg_btf_record(const struct bpf_reg_state *reg)
        return rec;
 }
 
+static bool subprog_is_global(const struct bpf_verifier_env *env, int subprog)
+{
+       struct bpf_func_info_aux *aux = env->prog->aux->func_info_aux;
+
+       return aux && aux[subprog].linkage == BTF_FUNC_GLOBAL;
+}
+
 static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
 {
        return btf_record_has_field(reg_btf_record(reg), BPF_SPIN_LOCK);
@@ -515,6 +532,8 @@ static bool is_dynptr_ref_function(enum bpf_func_id func_id)
        return func_id == BPF_FUNC_dynptr_data;
 }
 
+static bool is_callback_calling_kfunc(u32 btf_id);
+
 static bool is_callback_calling_function(enum bpf_func_id func_id)
 {
        return func_id == BPF_FUNC_for_each_map_elem ||
@@ -524,6 +543,11 @@ static bool is_callback_calling_function(enum bpf_func_id func_id)
               func_id == BPF_FUNC_user_ringbuf_drain;
 }
 
+static bool is_async_callback_calling_function(enum bpf_func_id func_id)
+{
+       return func_id == BPF_FUNC_timer_set_callback;
+}
+
 static bool is_storage_get_function(enum bpf_func_id func_id)
 {
        return func_id == BPF_FUNC_sk_storage_get ||
@@ -604,9 +628,9 @@ static const char *reg_type_str(struct bpf_verifier_env *env,
                 type & PTR_TRUSTED ? "trusted_" : ""
        );
 
-       snprintf(env->type_str_buf, TYPE_STR_BUF_LEN, "%s%s%s",
+       snprintf(env->tmp_str_buf, TMP_STR_BUF_LEN, "%s%s%s",
                 prefix, str[base_type(type)], postfix);
-       return env->type_str_buf;
+       return env->tmp_str_buf;
 }
 
 static char slot_type_char[] = {
@@ -847,11 +871,11 @@ static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env,
                                        struct bpf_func_state *state, int spi);
 
 static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
-                                  enum bpf_arg_type arg_type, int insn_idx)
+                                  enum bpf_arg_type arg_type, int insn_idx, int clone_ref_obj_id)
 {
        struct bpf_func_state *state = func(env, reg);
        enum bpf_dynptr_type type;
-       int spi, i, id, err;
+       int spi, i, err;
 
        spi = dynptr_get_spi(env, reg);
        if (spi < 0)
@@ -887,7 +911,13 @@ static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_
 
        if (dynptr_type_refcounted(type)) {
                /* The id is used to track proper releasing */
-               id = acquire_reference_state(env, insn_idx);
+               int id;
+
+               if (clone_ref_obj_id)
+                       id = clone_ref_obj_id;
+               else
+                       id = acquire_reference_state(env, insn_idx);
+
                if (id < 0)
                        return id;
 
@@ -901,24 +931,15 @@ static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_
        return 0;
 }
 
-static int unmark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
+static void invalidate_dynptr(struct bpf_verifier_env *env, struct bpf_func_state *state, int spi)
 {
-       struct bpf_func_state *state = func(env, reg);
-       int spi, i;
-
-       spi = dynptr_get_spi(env, reg);
-       if (spi < 0)
-               return spi;
+       int i;
 
        for (i = 0; i < BPF_REG_SIZE; i++) {
                state->stack[spi].slot_type[i] = STACK_INVALID;
                state->stack[spi - 1].slot_type[i] = STACK_INVALID;
        }
 
-       /* Invalidate any slices associated with this dynptr */
-       if (dynptr_type_refcounted(state->stack[spi].spilled_ptr.dynptr.type))
-               WARN_ON_ONCE(release_reference(env, state->stack[spi].spilled_ptr.ref_obj_id));
-
        __mark_reg_not_init(env, &state->stack[spi].spilled_ptr);
        __mark_reg_not_init(env, &state->stack[spi - 1].spilled_ptr);
 
@@ -945,6 +966,50 @@ static int unmark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_re
         */
        state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
        state->stack[spi - 1].spilled_ptr.live |= REG_LIVE_WRITTEN;
+}
+
+static int unmark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
+{
+       struct bpf_func_state *state = func(env, reg);
+       int spi, ref_obj_id, i;
+
+       spi = dynptr_get_spi(env, reg);
+       if (spi < 0)
+               return spi;
+
+       if (!dynptr_type_refcounted(state->stack[spi].spilled_ptr.dynptr.type)) {
+               invalidate_dynptr(env, state, spi);
+               return 0;
+       }
+
+       ref_obj_id = state->stack[spi].spilled_ptr.ref_obj_id;
+
+       /* If the dynptr has a ref_obj_id, then we need to invalidate
+        * two things:
+        *
+        * 1) Any dynptrs with a matching ref_obj_id (clones)
+        * 2) Any slices derived from this dynptr.
+        */
+
+       /* Invalidate any slices associated with this dynptr */
+       WARN_ON_ONCE(release_reference(env, ref_obj_id));
+
+       /* Invalidate any dynptr clones */
+       for (i = 1; i < state->allocated_stack / BPF_REG_SIZE; i++) {
+               if (state->stack[i].spilled_ptr.ref_obj_id != ref_obj_id)
+                       continue;
+
+               /* it should always be the case that if the ref obj id
+                * matches then the stack slot also belongs to a
+                * dynptr
+                */
+               if (state->stack[i].slot_type[0] != STACK_DYNPTR) {
+                       verbose(env, "verifier internal error: misconfigured ref_obj_id\n");
+                       return -EFAULT;
+               }
+               if (state->stack[i].spilled_ptr.dynptr.first_slot)
+                       invalidate_dynptr(env, state, i);
+       }
 
        return 0;
 }
@@ -1254,6 +1319,12 @@ static bool is_spilled_reg(const struct bpf_stack_state *stack)
        return stack->slot_type[BPF_REG_SIZE - 1] == STACK_SPILL;
 }
 
+static bool is_spilled_scalar_reg(const struct bpf_stack_state *stack)
+{
+       return stack->slot_type[BPF_REG_SIZE - 1] == STACK_SPILL &&
+              stack->spilled_ptr.type == SCALAR_VALUE;
+}
+
 static void scrub_spilled_slot(u8 *stype)
 {
        if (*stype != STACK_INVALID)
@@ -3144,12 +3215,172 @@ static const char *disasm_kfunc_name(void *data, const struct bpf_insn *insn)
        return btf_name_by_offset(desc_btf, func->name_off);
 }
 
+static inline void bt_init(struct backtrack_state *bt, u32 frame)
+{
+       bt->frame = frame;
+}
+
+static inline void bt_reset(struct backtrack_state *bt)
+{
+       struct bpf_verifier_env *env = bt->env;
+
+       memset(bt, 0, sizeof(*bt));
+       bt->env = env;
+}
+
+static inline u32 bt_empty(struct backtrack_state *bt)
+{
+       u64 mask = 0;
+       int i;
+
+       for (i = 0; i <= bt->frame; i++)
+               mask |= bt->reg_masks[i] | bt->stack_masks[i];
+
+       return mask == 0;
+}
+
+static inline int bt_subprog_enter(struct backtrack_state *bt)
+{
+       if (bt->frame == MAX_CALL_FRAMES - 1) {
+               verbose(bt->env, "BUG subprog enter from frame %d\n", bt->frame);
+               WARN_ONCE(1, "verifier backtracking bug");
+               return -EFAULT;
+       }
+       bt->frame++;
+       return 0;
+}
+
+static inline int bt_subprog_exit(struct backtrack_state *bt)
+{
+       if (bt->frame == 0) {
+               verbose(bt->env, "BUG subprog exit from frame 0\n");
+               WARN_ONCE(1, "verifier backtracking bug");
+               return -EFAULT;
+       }
+       bt->frame--;
+       return 0;
+}
+
+static inline void bt_set_frame_reg(struct backtrack_state *bt, u32 frame, u32 reg)
+{
+       bt->reg_masks[frame] |= 1 << reg;
+}
+
+static inline void bt_clear_frame_reg(struct backtrack_state *bt, u32 frame, u32 reg)
+{
+       bt->reg_masks[frame] &= ~(1 << reg);
+}
+
+static inline void bt_set_reg(struct backtrack_state *bt, u32 reg)
+{
+       bt_set_frame_reg(bt, bt->frame, reg);
+}
+
+static inline void bt_clear_reg(struct backtrack_state *bt, u32 reg)
+{
+       bt_clear_frame_reg(bt, bt->frame, reg);
+}
+
+static inline void bt_set_frame_slot(struct backtrack_state *bt, u32 frame, u32 slot)
+{
+       bt->stack_masks[frame] |= 1ull << slot;
+}
+
+static inline void bt_clear_frame_slot(struct backtrack_state *bt, u32 frame, u32 slot)
+{
+       bt->stack_masks[frame] &= ~(1ull << slot);
+}
+
+static inline void bt_set_slot(struct backtrack_state *bt, u32 slot)
+{
+       bt_set_frame_slot(bt, bt->frame, slot);
+}
+
+static inline void bt_clear_slot(struct backtrack_state *bt, u32 slot)
+{
+       bt_clear_frame_slot(bt, bt->frame, slot);
+}
+
+static inline u32 bt_frame_reg_mask(struct backtrack_state *bt, u32 frame)
+{
+       return bt->reg_masks[frame];
+}
+
+static inline u32 bt_reg_mask(struct backtrack_state *bt)
+{
+       return bt->reg_masks[bt->frame];
+}
+
+static inline u64 bt_frame_stack_mask(struct backtrack_state *bt, u32 frame)
+{
+       return bt->stack_masks[frame];
+}
+
+static inline u64 bt_stack_mask(struct backtrack_state *bt)
+{
+       return bt->stack_masks[bt->frame];
+}
+
+static inline bool bt_is_reg_set(struct backtrack_state *bt, u32 reg)
+{
+       return bt->reg_masks[bt->frame] & (1 << reg);
+}
+
+static inline bool bt_is_slot_set(struct backtrack_state *bt, u32 slot)
+{
+       return bt->stack_masks[bt->frame] & (1ull << slot);
+}
+
+/* format registers bitmask, e.g., "r0,r2,r4" for 0x15 mask */
+static void fmt_reg_mask(char *buf, ssize_t buf_sz, u32 reg_mask)
+{
+       DECLARE_BITMAP(mask, 64);
+       bool first = true;
+       int i, n;
+
+       buf[0] = '\0';
+
+       bitmap_from_u64(mask, reg_mask);
+       for_each_set_bit(i, mask, 32) {
+               n = snprintf(buf, buf_sz, "%sr%d", first ? "" : ",", i);
+               first = false;
+               buf += n;
+               buf_sz -= n;
+               if (buf_sz < 0)
+                       break;
+       }
+}
+/* format stack slots bitmask, e.g., "-8,-24,-40" for 0x15 mask */
+static void fmt_stack_mask(char *buf, ssize_t buf_sz, u64 stack_mask)
+{
+       DECLARE_BITMAP(mask, 64);
+       bool first = true;
+       int i, n;
+
+       buf[0] = '\0';
+
+       bitmap_from_u64(mask, stack_mask);
+       for_each_set_bit(i, mask, 64) {
+               n = snprintf(buf, buf_sz, "%s%d", first ? "" : ",", -(i + 1) * 8);
+               first = false;
+               buf += n;
+               buf_sz -= n;
+               if (buf_sz < 0)
+                       break;
+       }
+}
+
 /* For given verifier state backtrack_insn() is called from the last insn to
  * the first insn. Its purpose is to compute a bitmask of registers and
  * stack slots that needs precision in the parent verifier state.
+ *
+ * @idx is an index of the instruction we are currently processing;
+ * @subseq_idx is an index of the subsequent instruction that:
+ *   - *would be* executed next, if jump history is viewed in forward order;
+ *   - *was* processed previously during backtracking.
  */
-static int backtrack_insn(struct bpf_verifier_env *env, int idx,
-                         u32 *reg_mask, u64 *stack_mask)
+static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx,
+                         struct backtrack_state *bt)
 {
        const struct bpf_insn_cbs cbs = {
                .cb_call        = disasm_kfunc_name,
@@ -3160,20 +3391,24 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx,
        u8 class = BPF_CLASS(insn->code);
        u8 opcode = BPF_OP(insn->code);
        u8 mode = BPF_MODE(insn->code);
-       u32 dreg = 1u << insn->dst_reg;
-       u32 sreg = 1u << insn->src_reg;
-       u32 spi;
+       u32 dreg = insn->dst_reg;
+       u32 sreg = insn->src_reg;
+       u32 spi, i;
 
        if (insn->code == 0)
                return 0;
        if (env->log.level & BPF_LOG_LEVEL2) {
-               verbose(env, "regs=%x stack=%llx before ", *reg_mask, *stack_mask);
+               fmt_reg_mask(env->tmp_str_buf, TMP_STR_BUF_LEN, bt_reg_mask(bt));
+               verbose(env, "mark_precise: frame%d: regs=%s ",
+                       bt->frame, env->tmp_str_buf);
+               fmt_stack_mask(env->tmp_str_buf, TMP_STR_BUF_LEN, bt_stack_mask(bt));
+               verbose(env, "stack=%s before ", env->tmp_str_buf);
                verbose(env, "%d: ", idx);
                print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
        }
 
        if (class == BPF_ALU || class == BPF_ALU64) {
-               if (!(*reg_mask & dreg))
+               if (!bt_is_reg_set(bt, dreg))
                        return 0;
                if (opcode == BPF_MOV) {
                        if (BPF_SRC(insn->code) == BPF_X) {
@@ -3181,8 +3416,8 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx,
                                 * dreg needs precision after this insn
                                 * sreg needs precision before this insn
                                 */
-                               *reg_mask &= ~dreg;
-                               *reg_mask |= sreg;
+                               bt_clear_reg(bt, dreg);
+                               bt_set_reg(bt, sreg);
                        } else {
                                /* dreg = K
                                 * dreg needs precision after this insn.
@@ -3190,7 +3425,7 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx,
                                 * as precise=true in this verifier state.
                                 * No further markings in parent are necessary
                                 */
-                               *reg_mask &= ~dreg;
+                               bt_clear_reg(bt, dreg);
                        }
                } else {
                        if (BPF_SRC(insn->code) == BPF_X) {
@@ -3198,15 +3433,15 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx,
                                 * both dreg and sreg need precision
                                 * before this insn
                                 */
-                               *reg_mask |= sreg;
+                               bt_set_reg(bt, sreg);
                        } /* else dreg += K
                           * dreg still needs precision before this insn
                           */
                }
        } else if (class == BPF_LDX) {
-               if (!(*reg_mask & dreg))
+               if (!bt_is_reg_set(bt, dreg))
                        return 0;
-               *reg_mask &= ~dreg;
+               bt_clear_reg(bt, dreg);
 
                /* scalars can only be spilled into stack w/o losing precision.
                 * Load from any other memory can be zero extended.
@@ -3227,9 +3462,9 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx,
                        WARN_ONCE(1, "verifier backtracking bug");
                        return -EFAULT;
                }
-               *stack_mask |= 1ull << spi;
+               bt_set_slot(bt, spi);
        } else if (class == BPF_STX || class == BPF_ST) {
-               if (*reg_mask & dreg)
+               if (bt_is_reg_set(bt, dreg))
                        /* stx & st shouldn't be using _scalar_ dst_reg
                         * to access memory. It means backtracking
                         * encountered a case of pointer subtraction.
@@ -3244,20 +3479,92 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx,
                        WARN_ONCE(1, "verifier backtracking bug");
                        return -EFAULT;
                }
-               if (!(*stack_mask & (1ull << spi)))
+               if (!bt_is_slot_set(bt, spi))
                        return 0;
-               *stack_mask &= ~(1ull << spi);
+               bt_clear_slot(bt, spi);
                if (class == BPF_STX)
-                       *reg_mask |= sreg;
+                       bt_set_reg(bt, sreg);
        } else if (class == BPF_JMP || class == BPF_JMP32) {
-               if (opcode == BPF_CALL) {
-                       if (insn->src_reg == BPF_PSEUDO_CALL)
-                               return -ENOTSUPP;
-                       /* BPF helpers that invoke callback subprogs are
-                        * equivalent to BPF_PSEUDO_CALL above
+               if (bpf_pseudo_call(insn)) {
+                       int subprog_insn_idx, subprog;
+
+                       subprog_insn_idx = idx + insn->imm + 1;
+                       subprog = find_subprog(env, subprog_insn_idx);
+                       if (subprog < 0)
+                               return -EFAULT;
+
+                       if (subprog_is_global(env, subprog)) {
+                               /* check that jump history doesn't have any
+                                * extra instructions from subprog; the next
+                                * instruction after call to global subprog
+                                * should be literally next instruction in
+                                * caller program
+                                */
+                               WARN_ONCE(idx + 1 != subseq_idx, "verifier backtracking bug");
+                               /* r1-r5 are invalidated after subprog call,
+                                * so for global func call it shouldn't be set
+                                * anymore
+                                */
+                               if (bt_reg_mask(bt) & BPF_REGMASK_ARGS) {
+                                       verbose(env, "BUG regs %x\n", bt_reg_mask(bt));
+                                       WARN_ONCE(1, "verifier backtracking bug");
+                                       return -EFAULT;
+                               }
+                               /* global subprog always sets R0 */
+                               bt_clear_reg(bt, BPF_REG_0);
+                               return 0;
+                       } else {
+                               /* static subprog call instruction, which
+                                * means that we are exiting current subprog,
+                                * so only r1-r5 could be still requested as
+                                * precise, r0 and r6-r10 or any stack slot in
+                                * the current frame should be zero by now
+                                */
+                               if (bt_reg_mask(bt) & ~BPF_REGMASK_ARGS) {
+                                       verbose(env, "BUG regs %x\n", bt_reg_mask(bt));
+                                       WARN_ONCE(1, "verifier backtracking bug");
+                                       return -EFAULT;
+                               }
+                               /* we don't track register spills perfectly,
+                                * so fallback to force-precise instead of failing */
+                               if (bt_stack_mask(bt) != 0)
+                                       return -ENOTSUPP;
+                               /* propagate r1-r5 to the caller */
+                               for (i = BPF_REG_1; i <= BPF_REG_5; i++) {
+                                       if (bt_is_reg_set(bt, i)) {
+                                               bt_clear_reg(bt, i);
+                                               bt_set_frame_reg(bt, bt->frame - 1, i);
+                                       }
+                               }
+                               if (bt_subprog_exit(bt))
+                                       return -EFAULT;
+                               return 0;
+                       }
+               } else if ((bpf_helper_call(insn) &&
+                           is_callback_calling_function(insn->imm) &&
+                           !is_async_callback_calling_function(insn->imm)) ||
+                          (bpf_pseudo_kfunc_call(insn) && is_callback_calling_kfunc(insn->imm))) {
+                       /* callback-calling helper or kfunc call, which means
+                        * we are exiting from subprog, but unlike the subprog
+                        * call handling above, we shouldn't propagate
+                        * precision of r1-r5 (if any requested), as they are
+                        * not actually arguments passed directly to callback
+                        * subprogs
                         */
-                       if (insn->src_reg == 0 && is_callback_calling_function(insn->imm))
+                       if (bt_reg_mask(bt) & ~BPF_REGMASK_ARGS) {
+                               verbose(env, "BUG regs %x\n", bt_reg_mask(bt));
+                               WARN_ONCE(1, "verifier backtracking bug");
+                               return -EFAULT;
+                       }
+                       if (bt_stack_mask(bt) != 0)
                                return -ENOTSUPP;
+                       /* clear r1-r5 in callback subprog's mask */
+                       for (i = BPF_REG_1; i <= BPF_REG_5; i++)
+                               bt_clear_reg(bt, i);
+                       if (bt_subprog_exit(bt))
+                               return -EFAULT;
+                       return 0;
+               } else if (opcode == BPF_CALL) {
                        /* kfunc with imm==0 is invalid and fixup_kfunc_call will
                         * catch this error later. Make backtracking conservative
                         * with ENOTSUPP.
@@ -3265,19 +3572,51 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx,
                        if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL && insn->imm == 0)
                                return -ENOTSUPP;
                        /* regular helper call sets R0 */
-                       *reg_mask &= ~1;
-                       if (*reg_mask & 0x3f) {
+                       bt_clear_reg(bt, BPF_REG_0);
+                       if (bt_reg_mask(bt) & BPF_REGMASK_ARGS) {
                                /* if backtracing was looking for registers R1-R5
                                 * they should have been found already.
                                 */
-                               verbose(env, "BUG regs %x\n", *reg_mask);
+                               verbose(env, "BUG regs %x\n", bt_reg_mask(bt));
                                WARN_ONCE(1, "verifier backtracking bug");
                                return -EFAULT;
                        }
                } else if (opcode == BPF_EXIT) {
-                       return -ENOTSUPP;
+                       bool r0_precise;
+
+                       if (bt_reg_mask(bt) & BPF_REGMASK_ARGS) {
+                               /* if backtracing was looking for registers R1-R5
+                                * they should have been found already.
+                                */
+                               verbose(env, "BUG regs %x\n", bt_reg_mask(bt));
+                               WARN_ONCE(1, "verifier backtracking bug");
+                               return -EFAULT;
+                       }
+
+                       /* BPF_EXIT in subprog or callback always returns
+                        * right after the call instruction, so by checking
+                        * whether the instruction at subseq_idx-1 is subprog
+                        * call or not we can distinguish actual exit from
+                        * *subprog* from exit from *callback*. In the former
+                        * case, we need to propagate r0 precision, if
+                        * necessary. In the former we never do that.
+                        */
+                       r0_precise = subseq_idx - 1 >= 0 &&
+                                    bpf_pseudo_call(&env->prog->insnsi[subseq_idx - 1]) &&
+                                    bt_is_reg_set(bt, BPF_REG_0);
+
+                       bt_clear_reg(bt, BPF_REG_0);
+                       if (bt_subprog_enter(bt))
+                               return -EFAULT;
+
+                       if (r0_precise)
+                               bt_set_reg(bt, BPF_REG_0);
+                       /* r6-r9 and stack slots will stay set in caller frame
+                        * bitmasks until we return back from callee(s)
+                        */
+                       return 0;
                } else if (BPF_SRC(insn->code) == BPF_X) {
-                       if (!(*reg_mask & (dreg | sreg)))
+                       if (!bt_is_reg_set(bt, dreg) && !bt_is_reg_set(bt, sreg))
                                return 0;
                        /* dreg <cond> sreg
                         * Both dreg and sreg need precision before
@@ -3285,7 +3624,8 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx,
                         * before it would be equally necessary to
                         * propagate it to dreg.
                         */
-                       *reg_mask |= (sreg | dreg);
+                       bt_set_reg(bt, dreg);
+                       bt_set_reg(bt, sreg);
                         /* else dreg <cond> K
                          * Only dreg still needs precision before
                          * this insn, so for the K-based conditional
@@ -3293,9 +3633,9 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx,
                          */
                }
        } else if (class == BPF_LD) {
-               if (!(*reg_mask & dreg))
+               if (!bt_is_reg_set(bt, dreg))
                        return 0;
-               *reg_mask &= ~dreg;
+               bt_clear_reg(bt, dreg);
                /* It's ld_imm64 or ld_abs or ld_ind.
                 * For ld_imm64 no further tracking of precision
                 * into parent is necessary
@@ -3366,6 +3706,11 @@ static void mark_all_scalars_precise(struct bpf_verifier_env *env,
        struct bpf_reg_state *reg;
        int i, j;
 
+       if (env->log.level & BPF_LOG_LEVEL2) {
+               verbose(env, "mark_precise: frame%d: falling back to forcing all scalars precise\n",
+                       st->curframe);
+       }
+
        /* big hammer: mark all scalars precise in this path.
         * pop_stack may still get !precise scalars.
         * We also skip current state and go straight to first parent state,
@@ -3377,17 +3722,25 @@ static void mark_all_scalars_precise(struct bpf_verifier_env *env,
                        func = st->frame[i];
                        for (j = 0; j < BPF_REG_FP; j++) {
                                reg = &func->regs[j];
-                               if (reg->type != SCALAR_VALUE)
+                               if (reg->type != SCALAR_VALUE || reg->precise)
                                        continue;
                                reg->precise = true;
+                               if (env->log.level & BPF_LOG_LEVEL2) {
+                                       verbose(env, "force_precise: frame%d: forcing r%d to be precise\n",
+                                               i, j);
+                               }
                        }
                        for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
                                if (!is_spilled_reg(&func->stack[j]))
                                        continue;
                                reg = &func->stack[j].spilled_ptr;
-                               if (reg->type != SCALAR_VALUE)
+                               if (reg->type != SCALAR_VALUE || reg->precise)
                                        continue;
                                reg->precise = true;
+                               if (env->log.level & BPF_LOG_LEVEL2) {
+                                       verbose(env, "force_precise: frame%d: forcing fp%d to be precise\n",
+                                               i, -(j + 1) * 8);
+                               }
                        }
                }
        }
@@ -3505,62 +3858,49 @@ static void mark_all_scalars_imprecise(struct bpf_verifier_env *env, struct bpf_
  * mark_all_scalars_imprecise() to hopefully get more permissive and generic
  * finalized states which help in short circuiting more future states.
  */
-static int __mark_chain_precision(struct bpf_verifier_env *env, int frame, int regno,
-                                 int spi)
+static int __mark_chain_precision(struct bpf_verifier_env *env, int regno)
 {
+       struct backtrack_state *bt = &env->bt;
        struct bpf_verifier_state *st = env->cur_state;
        int first_idx = st->first_insn_idx;
        int last_idx = env->insn_idx;
+       int subseq_idx = -1;
        struct bpf_func_state *func;
        struct bpf_reg_state *reg;
-       u32 reg_mask = regno >= 0 ? 1u << regno : 0;
-       u64 stack_mask = spi >= 0 ? 1ull << spi : 0;
        bool skip_first = true;
-       bool new_marks = false;
-       int i, err;
+       int i, fr, err;
 
        if (!env->bpf_capable)
                return 0;
 
+       /* set frame number from which we are starting to backtrack */
+       bt_init(bt, env->cur_state->curframe);
+
        /* Do sanity checks against current state of register and/or stack
         * slot, but don't set precise flag in current state, as precision
         * tracking in the current state is unnecessary.
         */
-       func = st->frame[frame];
+       func = st->frame[bt->frame];
        if (regno >= 0) {
                reg = &func->regs[regno];
                if (reg->type != SCALAR_VALUE) {
                        WARN_ONCE(1, "backtracing misuse");
                        return -EFAULT;
                }
-               new_marks = true;
-       }
-
-       while (spi >= 0) {
-               if (!is_spilled_reg(&func->stack[spi])) {
-                       stack_mask = 0;
-                       break;
-               }
-               reg = &func->stack[spi].spilled_ptr;
-               if (reg->type != SCALAR_VALUE) {
-                       stack_mask = 0;
-                       break;
-               }
-               new_marks = true;
-               break;
+               bt_set_reg(bt, regno);
        }
 
-       if (!new_marks)
-               return 0;
-       if (!reg_mask && !stack_mask)
+       if (bt_empty(bt))
                return 0;
 
        for (;;) {
                DECLARE_BITMAP(mask, 64);
                u32 history = st->jmp_history_cnt;
 
-               if (env->log.level & BPF_LOG_LEVEL2)
-                       verbose(env, "last_idx %d first_idx %d\n", last_idx, first_idx);
+               if (env->log.level & BPF_LOG_LEVEL2) {
+                       verbose(env, "mark_precise: frame%d: last_idx %d first_idx %d subseq_idx %d \n",
+                               bt->frame, last_idx, first_idx, subseq_idx);
+               }
 
                if (last_idx < 0) {
                        /* we are at the entry into subprog, which
@@ -3571,12 +3911,13 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int frame, int r
                        if (st->curframe == 0 &&
                            st->frame[0]->subprogno > 0 &&
                            st->frame[0]->callsite == BPF_MAIN_FUNC &&
-                           stack_mask == 0 && (reg_mask & ~0x3e) == 0) {
-                               bitmap_from_u64(mask, reg_mask);
+                           bt_stack_mask(bt) == 0 &&
+                           (bt_reg_mask(bt) & ~BPF_REGMASK_ARGS) == 0) {
+                               bitmap_from_u64(mask, bt_reg_mask(bt));
                                for_each_set_bit(i, mask, 32) {
                                        reg = &st->frame[0]->regs[i];
                                        if (reg->type != SCALAR_VALUE) {
-                                               reg_mask &= ~(1u << i);
+                                               bt_clear_reg(bt, i);
                                                continue;
                                        }
                                        reg->precise = true;
@@ -3584,8 +3925,8 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int frame, int r
                                return 0;
                        }
 
-                       verbose(env, "BUG backtracing func entry subprog %d reg_mask %x stack_mask %llx\n",
-                               st->frame[0]->subprogno, reg_mask, stack_mask);
+                       verbose(env, "BUG backtracking func entry subprog %d reg_mask %x stack_mask %llx\n",
+                               st->frame[0]->subprogno, bt_reg_mask(bt), bt_stack_mask(bt));
                        WARN_ONCE(1, "verifier backtracking bug");
                        return -EFAULT;
                }
@@ -3595,15 +3936,16 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int frame, int r
                                err = 0;
                                skip_first = false;
                        } else {
-                               err = backtrack_insn(env, i, &reg_mask, &stack_mask);
+                               err = backtrack_insn(env, i, subseq_idx, bt);
                        }
                        if (err == -ENOTSUPP) {
-                               mark_all_scalars_precise(env, st);
+                               mark_all_scalars_precise(env, env->cur_state);
+                               bt_reset(bt);
                                return 0;
                        } else if (err) {
                                return err;
                        }
-                       if (!reg_mask && !stack_mask)
+                       if (bt_empty(bt))
                                /* Found assignment(s) into tracked register in this state.
                                 * Since this state is already marked, just return.
                                 * Nothing to be tracked further in the parent state.
@@ -3611,6 +3953,7 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int frame, int r
                                return 0;
                        if (i == first_idx)
                                break;
+                       subseq_idx = i;
                        i = get_prev_insn_idx(st, i, &history);
                        if (i >= env->prog->len) {
                                /* This can happen if backtracking reached insn 0
@@ -3628,84 +3971,95 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int frame, int r
                if (!st)
                        break;
 
-               new_marks = false;
-               func = st->frame[frame];
-               bitmap_from_u64(mask, reg_mask);
-               for_each_set_bit(i, mask, 32) {
-                       reg = &func->regs[i];
-                       if (reg->type != SCALAR_VALUE) {
-                               reg_mask &= ~(1u << i);
-                               continue;
-                       }
-                       if (!reg->precise)
-                               new_marks = true;
-                       reg->precise = true;
-               }
-
-               bitmap_from_u64(mask, stack_mask);
-               for_each_set_bit(i, mask, 64) {
-                       if (i >= func->allocated_stack / BPF_REG_SIZE) {
-                               /* the sequence of instructions:
-                                * 2: (bf) r3 = r10
-                                * 3: (7b) *(u64 *)(r3 -8) = r0
-                                * 4: (79) r4 = *(u64 *)(r10 -8)
-                                * doesn't contain jmps. It's backtracked
-                                * as a single block.
-                                * During backtracking insn 3 is not recognized as
-                                * stack access, so at the end of backtracking
-                                * stack slot fp-8 is still marked in stack_mask.
-                                * However the parent state may not have accessed
-                                * fp-8 and it's "unallocated" stack space.
-                                * In such case fallback to conservative.
-                                */
-                               mark_all_scalars_precise(env, st);
-                               return 0;
+               for (fr = bt->frame; fr >= 0; fr--) {
+                       func = st->frame[fr];
+                       bitmap_from_u64(mask, bt_frame_reg_mask(bt, fr));
+                       for_each_set_bit(i, mask, 32) {
+                               reg = &func->regs[i];
+                               if (reg->type != SCALAR_VALUE) {
+                                       bt_clear_frame_reg(bt, fr, i);
+                                       continue;
+                               }
+                               if (reg->precise)
+                                       bt_clear_frame_reg(bt, fr, i);
+                               else
+                                       reg->precise = true;
                        }
 
-                       if (!is_spilled_reg(&func->stack[i])) {
-                               stack_mask &= ~(1ull << i);
-                               continue;
+                       bitmap_from_u64(mask, bt_frame_stack_mask(bt, fr));
+                       for_each_set_bit(i, mask, 64) {
+                               if (i >= func->allocated_stack / BPF_REG_SIZE) {
+                                       /* the sequence of instructions:
+                                        * 2: (bf) r3 = r10
+                                        * 3: (7b) *(u64 *)(r3 -8) = r0
+                                        * 4: (79) r4 = *(u64 *)(r10 -8)
+                                        * doesn't contain jmps. It's backtracked
+                                        * as a single block.
+                                        * During backtracking insn 3 is not recognized as
+                                        * stack access, so at the end of backtracking
+                                        * stack slot fp-8 is still marked in stack_mask.
+                                        * However the parent state may not have accessed
+                                        * fp-8 and it's "unallocated" stack space.
+                                        * In such case fallback to conservative.
+                                        */
+                                       mark_all_scalars_precise(env, env->cur_state);
+                                       bt_reset(bt);
+                                       return 0;
+                               }
+
+                               if (!is_spilled_scalar_reg(&func->stack[i])) {
+                                       bt_clear_frame_slot(bt, fr, i);
+                                       continue;
+                               }
+                               reg = &func->stack[i].spilled_ptr;
+                               if (reg->precise)
+                                       bt_clear_frame_slot(bt, fr, i);
+                               else
+                                       reg->precise = true;
                        }
-                       reg = &func->stack[i].spilled_ptr;
-                       if (reg->type != SCALAR_VALUE) {
-                               stack_mask &= ~(1ull << i);
-                               continue;
+                       if (env->log.level & BPF_LOG_LEVEL2) {
+                               fmt_reg_mask(env->tmp_str_buf, TMP_STR_BUF_LEN,
+                                            bt_frame_reg_mask(bt, fr));
+                               verbose(env, "mark_precise: frame%d: parent state regs=%s ",
+                                       fr, env->tmp_str_buf);
+                               fmt_stack_mask(env->tmp_str_buf, TMP_STR_BUF_LEN,
+                                              bt_frame_stack_mask(bt, fr));
+                               verbose(env, "stack=%s: ", env->tmp_str_buf);
+                               print_verifier_state(env, func, true);
                        }
-                       if (!reg->precise)
-                               new_marks = true;
-                       reg->precise = true;
-               }
-               if (env->log.level & BPF_LOG_LEVEL2) {
-                       verbose(env, "parent %s regs=%x stack=%llx marks:",
-                               new_marks ? "didn't have" : "already had",
-                               reg_mask, stack_mask);
-                       print_verifier_state(env, func, true);
                }
 
-               if (!reg_mask && !stack_mask)
-                       break;
-               if (!new_marks)
-                       break;
+               if (bt_empty(bt))
+                       return 0;
 
+               subseq_idx = first_idx;
                last_idx = st->last_insn_idx;
                first_idx = st->first_insn_idx;
        }
+
+       /* if we still have requested precise regs or slots, we missed
+        * something (e.g., stack access through non-r10 register), so
+        * fallback to marking all precise
+        */
+       if (!bt_empty(bt)) {
+               mark_all_scalars_precise(env, env->cur_state);
+               bt_reset(bt);
+       }
+
        return 0;
 }
 
 int mark_chain_precision(struct bpf_verifier_env *env, int regno)
 {
-       return __mark_chain_precision(env, env->cur_state->curframe, regno, -1);
+       return __mark_chain_precision(env, regno);
 }
 
-static int mark_chain_precision_frame(struct bpf_verifier_env *env, int frame, int regno)
-{
-       return __mark_chain_precision(env, frame, regno, -1);
-}
-
-static int mark_chain_precision_stack_frame(struct bpf_verifier_env *env, int frame, int spi)
+/* mark_chain_precision_batch() assumes that env->bt is set in the caller to
+ * desired reg and stack masks across all relevant frames
+ */
+static int mark_chain_precision_batch(struct bpf_verifier_env *env)
 {
-       return __mark_chain_precision(env, frame, -1, spi);
+       return __mark_chain_precision(env, -1);
 }
 
 static bool is_spillable_regtype(enum bpf_reg_type type)
@@ -4067,6 +4421,7 @@ static void mark_reg_stack_read(struct bpf_verifier_env *env,
        for (i = min_off; i < max_off; i++) {
                slot = -i - 1;
                spi = slot / BPF_REG_SIZE;
+               mark_stack_slot_scratched(env, spi);
                stype = ptr_state->stack[spi].slot_type;
                if (stype[slot % BPF_REG_SIZE] != STACK_ZERO)
                        break;
@@ -4118,6 +4473,8 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
        stype = reg_state->stack[spi].slot_type;
        reg = &reg_state->stack[spi].spilled_ptr;
 
+       mark_stack_slot_scratched(env, spi);
+
        if (is_spilled_reg(&reg_state->stack[spi])) {
                u8 spill_size = 1;
 
@@ -6677,7 +7034,7 @@ static int process_kptr_func(struct bpf_verifier_env *env, int regno,
  * type, and declare it as 'const struct bpf_dynptr *' in their prototype.
  */
 static int process_dynptr_func(struct bpf_verifier_env *env, int regno, int insn_idx,
-                              enum bpf_arg_type arg_type)
+                              enum bpf_arg_type arg_type, int clone_ref_obj_id)
 {
        struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
        int err;
@@ -6721,7 +7078,7 @@ static int process_dynptr_func(struct bpf_verifier_env *env, int regno, int insn
                                return err;
                }
 
-               err = mark_stack_slots_dynptr(env, reg, arg_type, insn_idx);
+               err = mark_stack_slots_dynptr(env, reg, arg_type, insn_idx, clone_ref_obj_id);
        } else /* MEM_RDONLY and None case from above */ {
                /* For the reg->type == PTR_TO_STACK case, bpf_dynptr is never const */
                if (reg->type == CONST_PTR_TO_DYNPTR && !(arg_type & MEM_RDONLY)) {
@@ -7143,12 +7500,16 @@ static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
         * ARG_PTR_TO_MEM + MAYBE_NULL is compatible with PTR_TO_MEM and PTR_TO_MEM + MAYBE_NULL,
         * but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM but NOT with PTR_TO_MEM + MAYBE_NULL
         *
+        * ARG_PTR_TO_MEM is compatible with PTR_TO_MEM that is tagged with a dynptr type.
+        *
         * Therefore we fold these flags depending on the arg_type before comparison.
         */
        if (arg_type & MEM_RDONLY)
                type &= ~MEM_RDONLY;
        if (arg_type & PTR_MAYBE_NULL)
                type &= ~PTR_MAYBE_NULL;
+       if (base_type(arg_type) == ARG_PTR_TO_MEM)
+               type &= ~DYNPTR_TYPE_FLAG_MASK;
 
        if (meta->func_id == BPF_FUNC_kptr_xchg && type & MEM_ALLOC)
                type &= ~MEM_ALLOC;
@@ -7631,7 +7992,7 @@ skip_type_check:
                err = check_mem_size_reg(env, reg, regno, true, meta);
                break;
        case ARG_PTR_TO_DYNPTR:
-               err = process_dynptr_func(env, regno, insn_idx, arg_type);
+               err = process_dynptr_func(env, regno, insn_idx, arg_type, 0);
                if (err)
                        return err;
                break;
@@ -8178,17 +8539,13 @@ static int set_callee_state(struct bpf_verifier_env *env,
                            struct bpf_func_state *caller,
                            struct bpf_func_state *callee, int insn_idx);
 
-static bool is_callback_calling_kfunc(u32 btf_id);
-
 static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
                             int *insn_idx, int subprog,
                             set_callee_state_fn set_callee_state_cb)
 {
        struct bpf_verifier_state *state = env->cur_state;
-       struct bpf_func_info_aux *func_info_aux;
        struct bpf_func_state *caller, *callee;
        int err;
-       bool is_global = false;
 
        if (state->curframe + 1 >= MAX_CALL_FRAMES) {
                verbose(env, "the call stack of %d frames is too deep\n",
@@ -8203,13 +8560,10 @@ static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn
                return -EFAULT;
        }
 
-       func_info_aux = env->prog->aux->func_info_aux;
-       if (func_info_aux)
-               is_global = func_info_aux[subprog].linkage == BTF_FUNC_GLOBAL;
        err = btf_check_subprog_call(env, subprog, caller->regs);
        if (err == -EFAULT)
                return err;
-       if (is_global) {
+       if (subprog_is_global(env, subprog)) {
                if (err) {
                        verbose(env, "Caller passes invalid args into func#%d\n",
                                subprog);
@@ -9398,6 +9752,11 @@ static bool is_kfunc_arg_const_mem_size(const struct btf *btf,
        return __kfunc_param_match_suffix(btf, arg, "__szk");
 }
 
+static bool is_kfunc_arg_optional(const struct btf *btf, const struct btf_param *arg)
+{
+       return __kfunc_param_match_suffix(btf, arg, "__opt");
+}
+
 static bool is_kfunc_arg_constant(const struct btf *btf, const struct btf_param *arg)
 {
        return __kfunc_param_match_suffix(btf, arg, "__k");
@@ -9595,6 +9954,7 @@ enum special_kfunc_type {
        KF_bpf_dynptr_from_xdp,
        KF_bpf_dynptr_slice,
        KF_bpf_dynptr_slice_rdwr,
+       KF_bpf_dynptr_clone,
 };
 
 BTF_SET_START(special_kfunc_set)
@@ -9614,6 +9974,7 @@ BTF_ID(func, bpf_dynptr_from_skb)
 BTF_ID(func, bpf_dynptr_from_xdp)
 BTF_ID(func, bpf_dynptr_slice)
 BTF_ID(func, bpf_dynptr_slice_rdwr)
+BTF_ID(func, bpf_dynptr_clone)
 BTF_SET_END(special_kfunc_set)
 
 BTF_ID_LIST(special_kfunc_list)
@@ -9635,6 +9996,7 @@ BTF_ID(func, bpf_dynptr_from_skb)
 BTF_ID(func, bpf_dynptr_from_xdp)
 BTF_ID(func, bpf_dynptr_slice)
 BTF_ID(func, bpf_dynptr_slice_rdwr)
+BTF_ID(func, bpf_dynptr_clone)
 
 static bool is_kfunc_bpf_rcu_read_lock(struct bpf_kfunc_call_arg_meta *meta)
 {
@@ -10323,13 +10685,14 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
                        }
                        if (meta->btf == btf_vmlinux &&
                            meta->func_id == special_kfunc_list[KF_bpf_obj_drop_impl]) {
-                               meta->arg_obj_drop.btf = reg->btf;
-                               meta->arg_obj_drop.btf_id = reg->btf_id;
+                               meta->arg_btf = reg->btf;
+                               meta->arg_btf_id = reg->btf_id;
                        }
                        break;
                case KF_ARG_PTR_TO_DYNPTR:
                {
                        enum bpf_arg_type dynptr_arg_type = ARG_PTR_TO_DYNPTR;
+                       int clone_ref_obj_id = 0;
 
                        if (reg->type != PTR_TO_STACK &&
                            reg->type != CONST_PTR_TO_DYNPTR) {
@@ -10343,12 +10706,28 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
                        if (is_kfunc_arg_uninit(btf, &args[i]))
                                dynptr_arg_type |= MEM_UNINIT;
 
-                       if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_skb])
+                       if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_skb]) {
                                dynptr_arg_type |= DYNPTR_TYPE_SKB;
-                       else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_xdp])
+                       } else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_xdp]) {
                                dynptr_arg_type |= DYNPTR_TYPE_XDP;
+                       } else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_clone] &&
+                                  (dynptr_arg_type & MEM_UNINIT)) {
+                               enum bpf_dynptr_type parent_type = meta->initialized_dynptr.type;
 
-                       ret = process_dynptr_func(env, regno, insn_idx, dynptr_arg_type);
+                               if (parent_type == BPF_DYNPTR_TYPE_INVALID) {
+                                       verbose(env, "verifier internal error: no dynptr type for parent of clone\n");
+                                       return -EFAULT;
+                               }
+
+                               dynptr_arg_type |= (unsigned int)get_dynptr_type_flag(parent_type);
+                               clone_ref_obj_id = meta->initialized_dynptr.ref_obj_id;
+                               if (dynptr_type_refcounted(parent_type) && !clone_ref_obj_id) {
+                                       verbose(env, "verifier internal error: missing ref obj id for parent of clone\n");
+                                       return -EFAULT;
+                               }
+                       }
+
+                       ret = process_dynptr_func(env, regno, insn_idx, dynptr_arg_type, clone_ref_obj_id);
                        if (ret < 0)
                                return ret;
 
@@ -10361,6 +10740,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
                                }
                                meta->initialized_dynptr.id = id;
                                meta->initialized_dynptr.type = dynptr_get_type(env, reg);
+                               meta->initialized_dynptr.ref_obj_id = dynptr_ref_obj_id(env, reg);
                        }
 
                        break;
@@ -10464,13 +10844,17 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
                        break;
                case KF_ARG_PTR_TO_MEM_SIZE:
                {
+                       struct bpf_reg_state *buff_reg = &regs[regno];
+                       const struct btf_param *buff_arg = &args[i];
                        struct bpf_reg_state *size_reg = &regs[regno + 1];
                        const struct btf_param *size_arg = &args[i + 1];
 
-                       ret = check_kfunc_mem_size_reg(env, size_reg, regno + 1);
-                       if (ret < 0) {
-                               verbose(env, "arg#%d arg#%d memory, len pair leads to invalid memory access\n", i, i + 1);
-                               return ret;
+                       if (!register_is_null(buff_reg) || !is_kfunc_arg_optional(meta->btf, buff_arg)) {
+                               ret = check_kfunc_mem_size_reg(env, size_reg, regno + 1);
+                               if (ret < 0) {
+                                       verbose(env, "arg#%d arg#%d memory, len pair leads to invalid memory access\n", i, i + 1);
+                                       return ret;
+                               }
                        }
 
                        if (is_kfunc_arg_const_mem_size(meta->btf, size_arg, size_reg)) {
@@ -10513,8 +10897,8 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
                                verbose(env, "bpf_refcount_acquire calls are disabled for now\n");
                                return -EINVAL;
                        }
-                       meta->arg_refcount_acquire.btf = reg->btf;
-                       meta->arg_refcount_acquire.btf_id = reg->btf_id;
+                       meta->arg_btf = reg->btf;
+                       meta->arg_btf_id = reg->btf_id;
                        break;
                }
        }
@@ -10555,7 +10939,7 @@ static int fetch_kfunc_meta(struct bpf_verifier_env *env,
                *kfunc_name = func_name;
        func_proto = btf_type_by_id(desc_btf, func->type);
 
-       kfunc_flags = btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog), func_id);
+       kfunc_flags = btf_kfunc_id_set_contains(desc_btf, func_id, env->prog);
        if (!kfunc_flags) {
                return -EACCES;
        }
@@ -10746,12 +11130,12 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
                        } else if (meta.func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl]) {
                                mark_reg_known_zero(env, regs, BPF_REG_0);
                                regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC;
-                               regs[BPF_REG_0].btf = meta.arg_refcount_acquire.btf;
-                               regs[BPF_REG_0].btf_id = meta.arg_refcount_acquire.btf_id;
+                               regs[BPF_REG_0].btf = meta.arg_btf;
+                               regs[BPF_REG_0].btf_id = meta.arg_btf_id;
 
                                insn_aux->kptr_struct_meta =
-                                       btf_find_struct_meta(meta.arg_refcount_acquire.btf,
-                                                            meta.arg_refcount_acquire.btf_id);
+                                       btf_find_struct_meta(meta.arg_btf,
+                                                            meta.arg_btf_id);
                        } else if (meta.func_id == special_kfunc_list[KF_bpf_list_pop_front] ||
                                   meta.func_id == special_kfunc_list[KF_bpf_list_pop_back]) {
                                struct btf_field *field = meta.arg_list_head.field;
@@ -10881,8 +11265,8 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
                if (meta.btf == btf_vmlinux && btf_id_set_contains(&special_kfunc_set, meta.func_id)) {
                        if (meta.func_id == special_kfunc_list[KF_bpf_obj_drop_impl]) {
                                insn_aux->kptr_struct_meta =
-                                       btf_find_struct_meta(meta.arg_obj_drop.btf,
-                                                            meta.arg_obj_drop.btf_id);
+                                       btf_find_struct_meta(meta.arg_btf,
+                                                            meta.arg_btf_id);
                        }
                }
        }
@@ -15118,20 +15502,25 @@ static int propagate_precision(struct bpf_verifier_env *env,
        struct bpf_reg_state *state_reg;
        struct bpf_func_state *state;
        int i, err = 0, fr;
+       bool first;
 
        for (fr = old->curframe; fr >= 0; fr--) {
                state = old->frame[fr];
                state_reg = state->regs;
+               first = true;
                for (i = 0; i < BPF_REG_FP; i++, state_reg++) {
                        if (state_reg->type != SCALAR_VALUE ||
                            !state_reg->precise ||
                            !(state_reg->live & REG_LIVE_READ))
                                continue;
-                       if (env->log.level & BPF_LOG_LEVEL2)
-                               verbose(env, "frame %d: propagating r%d\n", fr, i);
-                       err = mark_chain_precision_frame(env, fr, i);
-                       if (err < 0)
-                               return err;
+                       if (env->log.level & BPF_LOG_LEVEL2) {
+                               if (first)
+                                       verbose(env, "frame %d: propagating r%d", fr, i);
+                               else
+                                       verbose(env, ",r%d", i);
+                       }
+                       bt_set_frame_reg(&env->bt, fr, i);
+                       first = false;
                }
 
                for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
@@ -15142,14 +15531,24 @@ static int propagate_precision(struct bpf_verifier_env *env,
                            !state_reg->precise ||
                            !(state_reg->live & REG_LIVE_READ))
                                continue;
-                       if (env->log.level & BPF_LOG_LEVEL2)
-                               verbose(env, "frame %d: propagating fp%d\n",
-                                       fr, (-i - 1) * BPF_REG_SIZE);
-                       err = mark_chain_precision_stack_frame(env, fr, i);
-                       if (err < 0)
-                               return err;
+                       if (env->log.level & BPF_LOG_LEVEL2) {
+                               if (first)
+                                       verbose(env, "frame %d: propagating fp%d",
+                                               fr, (-i - 1) * BPF_REG_SIZE);
+                               else
+                                       verbose(env, ",fp%d", (-i - 1) * BPF_REG_SIZE);
+                       }
+                       bt_set_frame_slot(&env->bt, fr, i);
+                       first = false;
                }
+               if (!first)
+                       verbose(env, "\n");
        }
+
+       err = mark_chain_precision_batch(env);
+       if (err < 0)
+               return err;
+
        return 0;
 }
 
@@ -18611,7 +19010,8 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
                                 * in the fmodret id set with the KF_SLEEPABLE flag.
                                 */
                                else {
-                                       u32 *flags = btf_kfunc_is_modify_return(btf, btf_id);
+                                       u32 *flags = btf_kfunc_is_modify_return(btf, btf_id,
+                                                                               prog);
 
                                        if (flags && (*flags & KF_SLEEPABLE))
                                                ret = 0;
@@ -18639,7 +19039,7 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
                                return -EINVAL;
                        }
                        ret = -EINVAL;
-                       if (btf_kfunc_is_modify_return(btf, btf_id) ||
+                       if (btf_kfunc_is_modify_return(btf, btf_id, prog) ||
                            !check_attach_modify_return(addr, tname))
                                ret = 0;
                        if (ret) {
@@ -18806,6 +19206,8 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
        if (!env)
                return -ENOMEM;
 
+       env->bt.env = env;
+
        len = (*prog)->len;
        env->insn_aux_data =
                vzalloc(array_size(sizeof(struct bpf_insn_aux_data), len));
index 1f4b07d..03b7f6b 100644 (file)
@@ -1359,9 +1359,9 @@ __bpf_kfunc int bpf_verify_pkcs7_signature(struct bpf_dynptr_kern *data_ptr,
        }
 
        return verify_pkcs7_signature(data_ptr->data,
-                                     bpf_dynptr_get_size(data_ptr),
+                                     __bpf_dynptr_size(data_ptr),
                                      sig_ptr->data,
-                                     bpf_dynptr_get_size(sig_ptr),
+                                     __bpf_dynptr_size(sig_ptr),
                                      trusted_keyring->key,
                                      VERIFYING_UNSPECIFIED_SIGNATURE, NULL,
                                      NULL);
index c17201d..42bb047 100644 (file)
@@ -2,7 +2,8 @@
 #include <linux/string.h>
 #include <linux/if_ether.h>
 #include <linux/ctype.h>
-#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/hex.h>
 
 bool mac_pton(const char *s, u8 *mac)
 {
index dc7b14a..cf5609b 100644 (file)
@@ -1,11 +1,16 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
+
+#define pr_fmt(fmt) "ref_tracker: " fmt
+
 #include <linux/export.h>
+#include <linux/list_sort.h>
 #include <linux/ref_tracker.h>
 #include <linux/slab.h>
 #include <linux/stacktrace.h>
 #include <linux/stackdepot.h>
 
 #define REF_TRACKER_STACK_ENTRIES 16
+#define STACK_BUF_SIZE 1024
 
 struct ref_tracker {
        struct list_head        head;   /* anchor into dir->list or dir->quarantine */
@@ -14,6 +19,141 @@ struct ref_tracker {
        depot_stack_handle_t    free_stack_handle;
 };
 
+struct ref_tracker_dir_stats {
+       int total;
+       int count;
+       struct {
+               depot_stack_handle_t stack_handle;
+               unsigned int count;
+       } stacks[];
+};
+
+static struct ref_tracker_dir_stats *
+ref_tracker_get_stats(struct ref_tracker_dir *dir, unsigned int limit)
+{
+       struct ref_tracker_dir_stats *stats;
+       struct ref_tracker *tracker;
+
+       stats = kmalloc(struct_size(stats, stacks, limit),
+                       GFP_NOWAIT | __GFP_NOWARN);
+       if (!stats)
+               return ERR_PTR(-ENOMEM);
+       stats->total = 0;
+       stats->count = 0;
+
+       list_for_each_entry(tracker, &dir->list, head) {
+               depot_stack_handle_t stack = tracker->alloc_stack_handle;
+               int i;
+
+               ++stats->total;
+               for (i = 0; i < stats->count; ++i)
+                       if (stats->stacks[i].stack_handle == stack)
+                               break;
+               if (i >= limit)
+                       continue;
+               if (i >= stats->count) {
+                       stats->stacks[i].stack_handle = stack;
+                       stats->stacks[i].count = 0;
+                       ++stats->count;
+               }
+               ++stats->stacks[i].count;
+       }
+
+       return stats;
+}
+
+struct ostream {
+       char *buf;
+       int size, used;
+};
+
+#define pr_ostream(stream, fmt, args...) \
+({ \
+       struct ostream *_s = (stream); \
+\
+       if (!_s->buf) { \
+               pr_err(fmt, ##args); \
+       } else { \
+               int ret, len = _s->size - _s->used; \
+               ret = snprintf(_s->buf + _s->used, len, pr_fmt(fmt), ##args); \
+               _s->used += min(ret, len); \
+       } \
+})
+
+static void
+__ref_tracker_dir_pr_ostream(struct ref_tracker_dir *dir,
+                            unsigned int display_limit, struct ostream *s)
+{
+       struct ref_tracker_dir_stats *stats;
+       unsigned int i = 0, skipped;
+       depot_stack_handle_t stack;
+       char *sbuf;
+
+       lockdep_assert_held(&dir->lock);
+
+       if (list_empty(&dir->list))
+               return;
+
+       stats = ref_tracker_get_stats(dir, display_limit);
+       if (IS_ERR(stats)) {
+               pr_ostream(s, "%s@%pK: couldn't get stats, error %pe\n",
+                          dir->name, dir, stats);
+               return;
+       }
+
+       sbuf = kmalloc(STACK_BUF_SIZE, GFP_NOWAIT | __GFP_NOWARN);
+
+       for (i = 0, skipped = stats->total; i < stats->count; ++i) {
+               stack = stats->stacks[i].stack_handle;
+               if (sbuf && !stack_depot_snprint(stack, sbuf, STACK_BUF_SIZE, 4))
+                       sbuf[0] = 0;
+               pr_ostream(s, "%s@%pK has %d/%d users at\n%s\n", dir->name, dir,
+                          stats->stacks[i].count, stats->total, sbuf);
+               skipped -= stats->stacks[i].count;
+       }
+
+       if (skipped)
+               pr_ostream(s, "%s@%pK skipped reports about %d/%d users.\n",
+                          dir->name, dir, skipped, stats->total);
+
+       kfree(sbuf);
+
+       kfree(stats);
+}
+
+void ref_tracker_dir_print_locked(struct ref_tracker_dir *dir,
+                                 unsigned int display_limit)
+{
+       struct ostream os = {};
+
+       __ref_tracker_dir_pr_ostream(dir, display_limit, &os);
+}
+EXPORT_SYMBOL(ref_tracker_dir_print_locked);
+
+void ref_tracker_dir_print(struct ref_tracker_dir *dir,
+                          unsigned int display_limit)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&dir->lock, flags);
+       ref_tracker_dir_print_locked(dir, display_limit);
+       spin_unlock_irqrestore(&dir->lock, flags);
+}
+EXPORT_SYMBOL(ref_tracker_dir_print);
+
+int ref_tracker_dir_snprint(struct ref_tracker_dir *dir, char *buf, size_t size)
+{
+       struct ostream os = { .buf = buf, .size = size };
+       unsigned long flags;
+
+       spin_lock_irqsave(&dir->lock, flags);
+       __ref_tracker_dir_pr_ostream(dir, 16, &os);
+       spin_unlock_irqrestore(&dir->lock, flags);
+
+       return os.used;
+}
+EXPORT_SYMBOL(ref_tracker_dir_snprint);
+
 void ref_tracker_dir_exit(struct ref_tracker_dir *dir)
 {
        struct ref_tracker *tracker, *n;
@@ -27,13 +167,13 @@ void ref_tracker_dir_exit(struct ref_tracker_dir *dir)
                kfree(tracker);
                dir->quarantine_avail++;
        }
-       list_for_each_entry_safe(tracker, n, &dir->list, head) {
-               pr_err("leaked reference.\n");
-               if (tracker->alloc_stack_handle)
-                       stack_depot_print(tracker->alloc_stack_handle);
+       if (!list_empty(&dir->list)) {
+               ref_tracker_dir_print_locked(dir, 16);
                leak = true;
-               list_del(&tracker->head);
-               kfree(tracker);
+               list_for_each_entry_safe(tracker, n, &dir->list, head) {
+                       list_del(&tracker->head);
+                       kfree(tracker);
+               }
        }
        spin_unlock_irqrestore(&dir->lock, flags);
        WARN_ON_ONCE(leak);
@@ -42,28 +182,6 @@ void ref_tracker_dir_exit(struct ref_tracker_dir *dir)
 }
 EXPORT_SYMBOL(ref_tracker_dir_exit);
 
-void ref_tracker_dir_print(struct ref_tracker_dir *dir,
-                          unsigned int display_limit)
-{
-       struct ref_tracker *tracker;
-       unsigned long flags;
-       unsigned int i = 0;
-
-       spin_lock_irqsave(&dir->lock, flags);
-       list_for_each_entry(tracker, &dir->list, head) {
-               if (i < display_limit) {
-                       pr_err("leaked reference.\n");
-                       if (tracker->alloc_stack_handle)
-                               stack_depot_print(tracker->alloc_stack_handle);
-                       i++;
-               } else {
-                       break;
-               }
-       }
-       spin_unlock_irqrestore(&dir->lock, flags);
-}
-EXPORT_SYMBOL(ref_tracker_dir_print);
-
 int ref_tracker_alloc(struct ref_tracker_dir *dir,
                      struct ref_tracker **trackerp,
                      gfp_t gfp)
@@ -71,7 +189,7 @@ int ref_tracker_alloc(struct ref_tracker_dir *dir,
        unsigned long entries[REF_TRACKER_STACK_ENTRIES];
        struct ref_tracker *tracker;
        unsigned int nr_entries;
-       gfp_t gfp_mask = gfp;
+       gfp_t gfp_mask = gfp | __GFP_NOWARN;
        unsigned long flags;
 
        WARN_ON_ONCE(dir->dead);
@@ -119,7 +237,8 @@ int ref_tracker_free(struct ref_tracker_dir *dir,
                return -EEXIST;
        }
        nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 1);
-       stack_handle = stack_depot_save(entries, nr_entries, GFP_ATOMIC);
+       stack_handle = stack_depot_save(entries, nr_entries,
+                                       GFP_NOWAIT | __GFP_NOWARN);
 
        spin_lock_irqsave(&dir->lock, flags);
        if (tracker->dead) {
index 8d7519a..e97d706 100644 (file)
@@ -9,6 +9,8 @@
 #include <linux/scatterlist.h>
 #include <linux/highmem.h>
 #include <linux/kmemleak.h>
+#include <linux/bvec.h>
+#include <linux/uio.h>
 
 /**
  * sg_next - return the next scatterlist entry in a list
@@ -1095,3 +1097,270 @@ size_t sg_zero_buffer(struct scatterlist *sgl, unsigned int nents,
        return offset;
 }
 EXPORT_SYMBOL(sg_zero_buffer);
+
+/*
+ * Extract and pin a list of up to sg_max pages from UBUF- or IOVEC-class
+ * iterators, and add them to the scatterlist.
+ */
+static ssize_t extract_user_to_sg(struct iov_iter *iter,
+                                 ssize_t maxsize,
+                                 struct sg_table *sgtable,
+                                 unsigned int sg_max,
+                                 iov_iter_extraction_t extraction_flags)
+{
+       struct scatterlist *sg = sgtable->sgl + sgtable->nents;
+       struct page **pages;
+       unsigned int npages;
+       ssize_t ret = 0, res;
+       size_t len, off;
+
+       /* We decant the page list into the tail of the scatterlist */
+       pages = (void *)sgtable->sgl +
+               array_size(sg_max, sizeof(struct scatterlist));
+       pages -= sg_max;
+
+       do {
+               res = iov_iter_extract_pages(iter, &pages, maxsize, sg_max,
+                                            extraction_flags, &off);
+               if (res < 0)
+                       goto failed;
+
+               len = res;
+               maxsize -= len;
+               ret += len;
+               npages = DIV_ROUND_UP(off + len, PAGE_SIZE);
+               sg_max -= npages;
+
+               for (; npages > 0; npages--) {
+                       struct page *page = *pages;
+                       size_t seg = min_t(size_t, PAGE_SIZE - off, len);
+
+                       *pages++ = NULL;
+                       sg_set_page(sg, page, seg, off);
+                       sgtable->nents++;
+                       sg++;
+                       len -= seg;
+                       off = 0;
+               }
+       } while (maxsize > 0 && sg_max > 0);
+
+       return ret;
+
+failed:
+       while (sgtable->nents > sgtable->orig_nents)
+               put_page(sg_page(&sgtable->sgl[--sgtable->nents]));
+       return res;
+}
+
+/*
+ * Extract up to sg_max pages from a BVEC-type iterator and add them to the
+ * scatterlist.  The pages are not pinned.
+ */
+static ssize_t extract_bvec_to_sg(struct iov_iter *iter,
+                                 ssize_t maxsize,
+                                 struct sg_table *sgtable,
+                                 unsigned int sg_max,
+                                 iov_iter_extraction_t extraction_flags)
+{
+       const struct bio_vec *bv = iter->bvec;
+       struct scatterlist *sg = sgtable->sgl + sgtable->nents;
+       unsigned long start = iter->iov_offset;
+       unsigned int i;
+       ssize_t ret = 0;
+
+       for (i = 0; i < iter->nr_segs; i++) {
+               size_t off, len;
+
+               len = bv[i].bv_len;
+               if (start >= len) {
+                       start -= len;
+                       continue;
+               }
+
+               len = min_t(size_t, maxsize, len - start);
+               off = bv[i].bv_offset + start;
+
+               sg_set_page(sg, bv[i].bv_page, len, off);
+               sgtable->nents++;
+               sg++;
+               sg_max--;
+
+               ret += len;
+               maxsize -= len;
+               if (maxsize <= 0 || sg_max == 0)
+                       break;
+               start = 0;
+       }
+
+       if (ret > 0)
+               iov_iter_advance(iter, ret);
+       return ret;
+}
+
+/*
+ * Extract up to sg_max pages from a KVEC-type iterator and add them to the
+ * scatterlist.  This can deal with vmalloc'd buffers as well as kmalloc'd or
+ * static buffers.  The pages are not pinned.
+ */
+static ssize_t extract_kvec_to_sg(struct iov_iter *iter,
+                                 ssize_t maxsize,
+                                 struct sg_table *sgtable,
+                                 unsigned int sg_max,
+                                 iov_iter_extraction_t extraction_flags)
+{
+       const struct kvec *kv = iter->kvec;
+       struct scatterlist *sg = sgtable->sgl + sgtable->nents;
+       unsigned long start = iter->iov_offset;
+       unsigned int i;
+       ssize_t ret = 0;
+
+       for (i = 0; i < iter->nr_segs; i++) {
+               struct page *page;
+               unsigned long kaddr;
+               size_t off, len, seg;
+
+               len = kv[i].iov_len;
+               if (start >= len) {
+                       start -= len;
+                       continue;
+               }
+
+               kaddr = (unsigned long)kv[i].iov_base + start;
+               off = kaddr & ~PAGE_MASK;
+               len = min_t(size_t, maxsize, len - start);
+               kaddr &= PAGE_MASK;
+
+               maxsize -= len;
+               ret += len;
+               do {
+                       seg = min_t(size_t, len, PAGE_SIZE - off);
+                       if (is_vmalloc_or_module_addr((void *)kaddr))
+                               page = vmalloc_to_page((void *)kaddr);
+                       else
+                               page = virt_to_page(kaddr);
+
+                       sg_set_page(sg, page, len, off);
+                       sgtable->nents++;
+                       sg++;
+                       sg_max--;
+
+                       len -= seg;
+                       kaddr += PAGE_SIZE;
+                       off = 0;
+               } while (len > 0 && sg_max > 0);
+
+               if (maxsize <= 0 || sg_max == 0)
+                       break;
+               start = 0;
+       }
+
+       if (ret > 0)
+               iov_iter_advance(iter, ret);
+       return ret;
+}
+
+/*
+ * Extract up to sg_max folios from an XARRAY-type iterator and add them to
+ * the scatterlist.  The pages are not pinned.
+ */
+static ssize_t extract_xarray_to_sg(struct iov_iter *iter,
+                                   ssize_t maxsize,
+                                   struct sg_table *sgtable,
+                                   unsigned int sg_max,
+                                   iov_iter_extraction_t extraction_flags)
+{
+       struct scatterlist *sg = sgtable->sgl + sgtable->nents;
+       struct xarray *xa = iter->xarray;
+       struct folio *folio;
+       loff_t start = iter->xarray_start + iter->iov_offset;
+       pgoff_t index = start / PAGE_SIZE;
+       ssize_t ret = 0;
+       size_t offset, len;
+       XA_STATE(xas, xa, index);
+
+       rcu_read_lock();
+
+       xas_for_each(&xas, folio, ULONG_MAX) {
+               if (xas_retry(&xas, folio))
+                       continue;
+               if (WARN_ON(xa_is_value(folio)))
+                       break;
+               if (WARN_ON(folio_test_hugetlb(folio)))
+                       break;
+
+               offset = offset_in_folio(folio, start);
+               len = min_t(size_t, maxsize, folio_size(folio) - offset);
+
+               sg_set_page(sg, folio_page(folio, 0), len, offset);
+               sgtable->nents++;
+               sg++;
+               sg_max--;
+
+               maxsize -= len;
+               ret += len;
+               if (maxsize <= 0 || sg_max == 0)
+                       break;
+       }
+
+       rcu_read_unlock();
+       if (ret > 0)
+               iov_iter_advance(iter, ret);
+       return ret;
+}
+
+/**
+ * extract_iter_to_sg - Extract pages from an iterator and add to an sglist
+ * @iter: The iterator to extract from
+ * @maxsize: The amount of iterator to copy
+ * @sgtable: The scatterlist table to fill in
+ * @sg_max: Maximum number of elements in @sgtable that may be filled
+ * @extraction_flags: Flags to qualify the request
+ *
+ * Extract the page fragments from the given amount of the source iterator and
+ * add them to a scatterlist that refers to all of those bits, to a maximum
+ * addition of @sg_max elements.
+ *
+ * The pages referred to by UBUF- and IOVEC-type iterators are extracted and
+ * pinned; BVEC-, KVEC- and XARRAY-type are extracted but aren't pinned; PIPE-
+ * and DISCARD-type are not supported.
+ *
+ * No end mark is placed on the scatterlist; that's left to the caller.
+ *
+ * @extraction_flags can have ITER_ALLOW_P2PDMA set to request peer-to-peer DMA
+ * be allowed on the pages extracted.
+ *
+ * If successful, @sgtable->nents is updated to include the number of elements
+ * added and the number of bytes added is returned.  @sgtable->orig_nents is
+ * left unaltered.
+ *
+ * The iov_iter_extract_mode() function should be used to query how cleanup
+ * should be performed.
+ */
+ssize_t extract_iter_to_sg(struct iov_iter *iter, size_t maxsize,
+                          struct sg_table *sgtable, unsigned int sg_max,
+                          iov_iter_extraction_t extraction_flags)
+{
+       if (maxsize == 0)
+               return 0;
+
+       switch (iov_iter_type(iter)) {
+       case ITER_UBUF:
+       case ITER_IOVEC:
+               return extract_user_to_sg(iter, maxsize, sgtable, sg_max,
+                                         extraction_flags);
+       case ITER_BVEC:
+               return extract_bvec_to_sg(iter, maxsize, sgtable, sg_max,
+                                         extraction_flags);
+       case ITER_KVEC:
+               return extract_kvec_to_sg(iter, maxsize, sgtable, sg_max,
+                                         extraction_flags);
+       case ITER_XARRAY:
+               return extract_xarray_to_sg(iter, maxsize, sgtable, sg_max,
+                                           extraction_flags);
+       default:
+               pr_err("%s(%u) unsupported\n", __func__, iov_iter_type(iter));
+               WARN_ON_ONCE(1);
+               return -EIO;
+       }
+}
+EXPORT_SYMBOL_GPL(extract_iter_to_sg);
index 19d7dec..49970a7 100644 (file)
@@ -64,7 +64,7 @@ static int __init test_ref_tracker_init(void)
 {
        int i;
 
-       ref_tracker_dir_init(&ref_dir, 100);
+       ref_tracker_dir_init(&ref_dir, 100, "selftest");
 
        timer_setup(&test_ref_tracker_timer, test_ref_tracker_timer_func, 0);
        mod_timer(&test_ref_tracker_timer, jiffies + 1);
index 7d39c17..2fb25b5 100644 (file)
@@ -324,7 +324,7 @@ config CGROUP_NET_CLASSID
 
 config NET_RX_BUSY_POLL
        bool
-       default y if !PREEMPT_RT
+       default y if !PREEMPT_RT || (PREEMPT_RT && !NETCONSOLE)
 
 config BQL
        bool
index e79e3a4..2321bd2 100644 (file)
@@ -561,29 +561,6 @@ __bpf_kfunc int bpf_modify_return_test(int a, int *b)
        return a + *b;
 }
 
-__bpf_kfunc u64 bpf_kfunc_call_test1(struct sock *sk, u32 a, u64 b, u32 c, u64 d)
-{
-       return a + b + c + d;
-}
-
-__bpf_kfunc int bpf_kfunc_call_test2(struct sock *sk, u32 a, u32 b)
-{
-       return a + b;
-}
-
-__bpf_kfunc struct sock *bpf_kfunc_call_test3(struct sock *sk)
-{
-       return sk;
-}
-
-long noinline bpf_kfunc_call_test4(signed char a, short b, int c, long d)
-{
-       /* Provoke the compiler to assume that the caller has sign-extended a,
-        * b and c on platforms where this is required (e.g. s390x).
-        */
-       return (long)a + (long)b + (long)c + d;
-}
-
 int noinline bpf_fentry_shadow_test(int a)
 {
        return a + 1;
@@ -606,32 +583,6 @@ struct prog_test_ref_kfunc {
        refcount_t cnt;
 };
 
-static struct prog_test_ref_kfunc prog_test_struct = {
-       .a = 42,
-       .b = 108,
-       .next = &prog_test_struct,
-       .cnt = REFCOUNT_INIT(1),
-};
-
-__bpf_kfunc struct prog_test_ref_kfunc *
-bpf_kfunc_call_test_acquire(unsigned long *scalar_ptr)
-{
-       refcount_inc(&prog_test_struct.cnt);
-       return &prog_test_struct;
-}
-
-__bpf_kfunc void bpf_kfunc_call_test_offset(struct prog_test_ref_kfunc *p)
-{
-       WARN_ON_ONCE(1);
-}
-
-__bpf_kfunc struct prog_test_member *
-bpf_kfunc_call_memb_acquire(void)
-{
-       WARN_ON_ONCE(1);
-       return NULL;
-}
-
 __bpf_kfunc void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p)
 {
        refcount_dec(&p->cnt);
@@ -641,134 +592,6 @@ __bpf_kfunc void bpf_kfunc_call_memb_release(struct prog_test_member *p)
 {
 }
 
-__bpf_kfunc void bpf_kfunc_call_memb1_release(struct prog_test_member1 *p)
-{
-       WARN_ON_ONCE(1);
-}
-
-static int *__bpf_kfunc_call_test_get_mem(struct prog_test_ref_kfunc *p, const int size)
-{
-       if (size > 2 * sizeof(int))
-               return NULL;
-
-       return (int *)p;
-}
-
-__bpf_kfunc int *bpf_kfunc_call_test_get_rdwr_mem(struct prog_test_ref_kfunc *p,
-                                                 const int rdwr_buf_size)
-{
-       return __bpf_kfunc_call_test_get_mem(p, rdwr_buf_size);
-}
-
-__bpf_kfunc int *bpf_kfunc_call_test_get_rdonly_mem(struct prog_test_ref_kfunc *p,
-                                                   const int rdonly_buf_size)
-{
-       return __bpf_kfunc_call_test_get_mem(p, rdonly_buf_size);
-}
-
-/* the next 2 ones can't be really used for testing expect to ensure
- * that the verifier rejects the call.
- * Acquire functions must return struct pointers, so these ones are
- * failing.
- */
-__bpf_kfunc int *bpf_kfunc_call_test_acq_rdonly_mem(struct prog_test_ref_kfunc *p,
-                                                   const int rdonly_buf_size)
-{
-       return __bpf_kfunc_call_test_get_mem(p, rdonly_buf_size);
-}
-
-__bpf_kfunc void bpf_kfunc_call_int_mem_release(int *p)
-{
-}
-
-struct prog_test_pass1 {
-       int x0;
-       struct {
-               int x1;
-               struct {
-                       int x2;
-                       struct {
-                               int x3;
-                       };
-               };
-       };
-};
-
-struct prog_test_pass2 {
-       int len;
-       short arr1[4];
-       struct {
-               char arr2[4];
-               unsigned long arr3[8];
-       } x;
-};
-
-struct prog_test_fail1 {
-       void *p;
-       int x;
-};
-
-struct prog_test_fail2 {
-       int x8;
-       struct prog_test_pass1 x;
-};
-
-struct prog_test_fail3 {
-       int len;
-       char arr1[2];
-       char arr2[];
-};
-
-__bpf_kfunc void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb)
-{
-}
-
-__bpf_kfunc void bpf_kfunc_call_test_pass1(struct prog_test_pass1 *p)
-{
-}
-
-__bpf_kfunc void bpf_kfunc_call_test_pass2(struct prog_test_pass2 *p)
-{
-}
-
-__bpf_kfunc void bpf_kfunc_call_test_fail1(struct prog_test_fail1 *p)
-{
-}
-
-__bpf_kfunc void bpf_kfunc_call_test_fail2(struct prog_test_fail2 *p)
-{
-}
-
-__bpf_kfunc void bpf_kfunc_call_test_fail3(struct prog_test_fail3 *p)
-{
-}
-
-__bpf_kfunc void bpf_kfunc_call_test_mem_len_pass1(void *mem, int mem__sz)
-{
-}
-
-__bpf_kfunc void bpf_kfunc_call_test_mem_len_fail1(void *mem, int len)
-{
-}
-
-__bpf_kfunc void bpf_kfunc_call_test_mem_len_fail2(u64 *mem, int len)
-{
-}
-
-__bpf_kfunc void bpf_kfunc_call_test_ref(struct prog_test_ref_kfunc *p)
-{
-       /* p != NULL, but p->cnt could be 0 */
-}
-
-__bpf_kfunc void bpf_kfunc_call_test_destructive(void)
-{
-}
-
-__bpf_kfunc static u32 bpf_kfunc_call_test_static_unused_arg(u32 arg, u32 unused)
-{
-       return arg;
-}
-
 __diag_pop();
 
 BTF_SET8_START(bpf_test_modify_return_ids)
@@ -782,32 +605,8 @@ static const struct btf_kfunc_id_set bpf_test_modify_return_set = {
 };
 
 BTF_SET8_START(test_sk_check_kfunc_ids)
-BTF_ID_FLAGS(func, bpf_kfunc_call_test1)
-BTF_ID_FLAGS(func, bpf_kfunc_call_test2)
-BTF_ID_FLAGS(func, bpf_kfunc_call_test3)
-BTF_ID_FLAGS(func, bpf_kfunc_call_test4)
-BTF_ID_FLAGS(func, bpf_kfunc_call_test_acquire, KF_ACQUIRE | KF_RET_NULL)
-BTF_ID_FLAGS(func, bpf_kfunc_call_memb_acquire, KF_ACQUIRE | KF_RET_NULL)
 BTF_ID_FLAGS(func, bpf_kfunc_call_test_release, KF_RELEASE)
 BTF_ID_FLAGS(func, bpf_kfunc_call_memb_release, KF_RELEASE)
-BTF_ID_FLAGS(func, bpf_kfunc_call_memb1_release, KF_RELEASE)
-BTF_ID_FLAGS(func, bpf_kfunc_call_test_get_rdwr_mem, KF_RET_NULL)
-BTF_ID_FLAGS(func, bpf_kfunc_call_test_get_rdonly_mem, KF_RET_NULL)
-BTF_ID_FLAGS(func, bpf_kfunc_call_test_acq_rdonly_mem, KF_ACQUIRE | KF_RET_NULL)
-BTF_ID_FLAGS(func, bpf_kfunc_call_int_mem_release, KF_RELEASE)
-BTF_ID_FLAGS(func, bpf_kfunc_call_test_pass_ctx)
-BTF_ID_FLAGS(func, bpf_kfunc_call_test_pass1)
-BTF_ID_FLAGS(func, bpf_kfunc_call_test_pass2)
-BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail1)
-BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail2)
-BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail3)
-BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_pass1)
-BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail1)
-BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail2)
-BTF_ID_FLAGS(func, bpf_kfunc_call_test_ref, KF_TRUSTED_ARGS | KF_RCU)
-BTF_ID_FLAGS(func, bpf_kfunc_call_test_destructive, KF_DESTRUCTIVE)
-BTF_ID_FLAGS(func, bpf_kfunc_call_test_static_unused_arg)
-BTF_ID_FLAGS(func, bpf_kfunc_call_test_offset)
 BTF_SET8_END(test_sk_check_kfunc_ids)
 
 static void *bpf_test_init(const union bpf_attr *kattr, u32 user_size,
@@ -1415,11 +1214,10 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
                        }
 
                        frag = &sinfo->frags[sinfo->nr_frags++];
-                       __skb_frag_set_page(frag, page);
 
                        data_len = min_t(u32, kattr->test.data_size_in - size,
                                         PAGE_SIZE);
-                       skb_frag_size_set(frag, data_len);
+                       skb_frag_fill_page_desc(frag, page, 0, data_len);
 
                        if (copy_from_user(page_address(page), data_in + size,
                                           data_len)) {
index 8eca8a5..9a5ea06 100644 (file)
@@ -39,6 +39,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
        u16 vid = 0;
 
        memset(skb->cb, 0, sizeof(struct br_input_skb_cb));
+       br_tc_skb_miss_set(skb, false);
 
        rcu_read_lock();
        nf_ops = rcu_dereference(nf_br_ops);
index 84d6dd5..6116eba 100644 (file)
@@ -203,6 +203,8 @@ void br_flood(struct net_bridge *br, struct sk_buff *skb,
        struct net_bridge_port *prev = NULL;
        struct net_bridge_port *p;
 
+       br_tc_skb_miss_set(skb, pkt_type != BR_PKT_BROADCAST);
+
        list_for_each_entry_rcu(p, &br->port_list, list) {
                /* Do not flood unicast traffic to ports that turn it off, nor
                 * other traffic if flood off, except for traffic we originate
@@ -295,6 +297,7 @@ void br_multicast_flood(struct net_bridge_mdb_entry *mdst,
                        allow_mode_include = false;
        } else {
                p = NULL;
+               br_tc_skb_miss_set(skb, true);
        }
 
        while (p || rp) {
index fc17b9f..c34a0b0 100644 (file)
@@ -334,6 +334,7 @@ static rx_handler_result_t br_handle_frame(struct sk_buff **pskb)
                return RX_HANDLER_CONSUMED;
 
        memset(skb->cb, 0, sizeof(struct br_input_skb_cb));
+       br_tc_skb_miss_set(skb, false);
 
        p = br_port_get_rcu(skb->dev);
        if (p->flags & BR_VLAN_TUNNEL)
index 2119729..a63b32c 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/u64_stats_sync.h>
 #include <net/route.h>
 #include <net/ip6_fib.h>
+#include <net/pkt_cls.h>
 #include <linux/if_vlan.h>
 #include <linux/rhashtable.h>
 #include <linux/refcount.h>
@@ -754,6 +755,32 @@ void br_boolopt_multi_get(const struct net_bridge *br,
                          struct br_boolopt_multi *bm);
 void br_opt_toggle(struct net_bridge *br, enum net_bridge_opts opt, bool on);
 
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+static inline void br_tc_skb_miss_set(struct sk_buff *skb, bool miss)
+{
+       struct tc_skb_ext *ext;
+
+       if (!tc_skb_ext_tc_enabled())
+               return;
+
+       ext = skb_ext_find(skb, TC_SKB_EXT);
+       if (ext) {
+               ext->l2_miss = miss;
+               return;
+       }
+       if (!miss)
+               return;
+       ext = tc_skb_ext_alloc(skb);
+       if (!ext)
+               return;
+       ext->l2_miss = true;
+}
+#else
+static inline void br_tc_skb_miss_set(struct sk_buff *skb, bool miss)
+{
+}
+#endif
+
 /* br_device.c */
 void br_dev_setup(struct net_device *dev);
 void br_dev_delete(struct net_device *dev, struct list_head *list);
index c29f3e1..6d6f8a7 100644 (file)
@@ -6199,7 +6199,8 @@ restart:
        if (!napi)
                goto out;
 
-       preempt_disable();
+       if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+               preempt_disable();
        for (;;) {
                int work = 0;
 
@@ -6241,7 +6242,8 @@ count:
                if (unlikely(need_resched())) {
                        if (napi_poll)
                                busy_poll_stop(napi, have_poll_lock, prefer_busy_poll, budget);
-                       preempt_enable();
+                       if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+                               preempt_enable();
                        rcu_read_unlock();
                        cond_resched();
                        if (loop_end(loop_end_arg, start_time))
@@ -6252,7 +6254,8 @@ count:
        }
        if (napi_poll)
                busy_poll_stop(napi, have_poll_lock, prefer_busy_poll, budget);
-       preempt_enable();
+       if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+               preempt_enable();
 out:
        rcu_read_unlock();
 }
@@ -10570,8 +10573,10 @@ void netdev_sw_irq_coalesce_default_on(struct net_device *dev)
 {
        WARN_ON(dev->reg_state == NETREG_REGISTERED);
 
-       dev->gro_flush_timeout = 20000;
-       dev->napi_defer_hard_irqs = 1;
+       if (!IS_ENABLED(CONFIG_PREEMPT_RT)) {
+               dev->gro_flush_timeout = 20000;
+               dev->napi_defer_hard_irqs = 1;
+       }
 }
 EXPORT_SYMBOL_GPL(netdev_sw_irq_coalesce_default_on);
 
@@ -10632,7 +10637,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
        dev = PTR_ALIGN(p, NETDEV_ALIGN);
        dev->padded = (char *)dev - (char *)p;
 
-       ref_tracker_dir_init(&dev->refcnt_tracker, 128);
+       ref_tracker_dir_init(&dev->refcnt_tracker, 128, name);
 #ifdef CONFIG_PCPU_DEV_REFCNT
        dev->pcpu_refcnt = alloc_percpu(int);
        if (!dev->pcpu_refcnt)
index d9ce04c..968139f 100644 (file)
@@ -6916,6 +6916,8 @@ u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type,
                                        FIELD));                        \
        } while (0)
 
+       BTF_TYPE_EMIT(struct bpf_tcp_sock);
+
        switch (si->off) {
        case offsetof(struct bpf_tcp_sock, rtt_min):
                BUILD_BUG_ON(sizeof_field(struct tcp_sock, rtt_min) !=
@@ -11721,3 +11723,66 @@ static int __init bpf_kfunc_init(void)
        return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &bpf_kfunc_set_xdp);
 }
 late_initcall(bpf_kfunc_init);
+
+/* Disables missing prototype warnings */
+__diag_push();
+__diag_ignore_all("-Wmissing-prototypes",
+                 "Global functions as their definitions will be in vmlinux BTF");
+
+/* bpf_sock_destroy: Destroy the given socket with ECONNABORTED error code.
+ *
+ * The function expects a non-NULL pointer to a socket, and invokes the
+ * protocol specific socket destroy handlers.
+ *
+ * The helper can only be called from BPF contexts that have acquired the socket
+ * locks.
+ *
+ * Parameters:
+ * @sock: Pointer to socket to be destroyed
+ *
+ * Return:
+ * On error, may return EPROTONOSUPPORT, EINVAL.
+ * EPROTONOSUPPORT if protocol specific destroy handler is not supported.
+ * 0 otherwise
+ */
+__bpf_kfunc int bpf_sock_destroy(struct sock_common *sock)
+{
+       struct sock *sk = (struct sock *)sock;
+
+       /* The locking semantics that allow for synchronous execution of the
+        * destroy handlers are only supported for TCP and UDP.
+        * Supporting protocols will need to acquire sock lock in the BPF context
+        * prior to invoking this kfunc.
+        */
+       if (!sk->sk_prot->diag_destroy || (sk->sk_protocol != IPPROTO_TCP &&
+                                          sk->sk_protocol != IPPROTO_UDP))
+               return -EOPNOTSUPP;
+
+       return sk->sk_prot->diag_destroy(sk, ECONNABORTED);
+}
+
+__diag_pop()
+
+BTF_SET8_START(bpf_sk_iter_kfunc_ids)
+BTF_ID_FLAGS(func, bpf_sock_destroy, KF_TRUSTED_ARGS)
+BTF_SET8_END(bpf_sk_iter_kfunc_ids)
+
+static int tracing_iter_filter(const struct bpf_prog *prog, u32 kfunc_id)
+{
+       if (btf_id_set8_contains(&bpf_sk_iter_kfunc_ids, kfunc_id) &&
+           prog->expected_attach_type != BPF_TRACE_ITER)
+               return -EACCES;
+       return 0;
+}
+
+static const struct btf_kfunc_id_set bpf_sk_iter_kfunc_set = {
+       .owner = THIS_MODULE,
+       .set   = &bpf_sk_iter_kfunc_ids,
+       .filter = tracing_iter_filter,
+};
+
+static int init_subsystem(void)
+{
+       return register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &bpf_sk_iter_kfunc_set);
+}
+late_initcall(init_subsystem);
index 25fb0bb..481ca40 100644 (file)
@@ -27,6 +27,7 @@
 #include <linux/tcp.h>
 #include <linux/ptp_classify.h>
 #include <net/flow_dissector.h>
+#include <net/pkt_cls.h>
 #include <scsi/fc/fc_fcoe.h>
 #include <uapi/linux/batadv_packet.h>
 #include <linux/bpf.h>
@@ -241,6 +242,15 @@ void skb_flow_dissect_meta(const struct sk_buff *skb,
                                         FLOW_DISSECTOR_KEY_META,
                                         target_container);
        meta->ingress_ifindex = skb->skb_iif;
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+       if (tc_skb_ext_tc_enabled()) {
+               struct tc_skb_ext *ext;
+
+               ext = skb_ext_find(skb, TC_SKB_EXT);
+               if (ext)
+                       meta->l2_miss = ext->l2_miss;
+       }
+#endif
 }
 EXPORT_SYMBOL(skb_flow_dissect_meta);
 
index 2d84165..4d45f78 100644 (file)
@@ -239,9 +239,7 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
 
                pinfo->nr_frags = nr_frags + 1 + skbinfo->nr_frags;
 
-               __skb_frag_set_page(frag, page);
-               skb_frag_off_set(frag, first_offset);
-               skb_frag_size_set(frag, first_size);
+               skb_frag_fill_page_desc(frag, page, first_offset, first_size);
 
                memcpy(frag + 1, skbinfo->frags, sizeof(*frag) * skbinfo->nr_frags);
                /* We dont need to clear skbinfo->nr_frags here */
@@ -460,6 +458,14 @@ static void gro_pull_from_frag0(struct sk_buff *skb, int grow)
        }
 }
 
+static void gro_try_pull_from_frag0(struct sk_buff *skb)
+{
+       int grow = skb_gro_offset(skb) - skb_headlen(skb);
+
+       if (grow > 0)
+               gro_pull_from_frag0(skb, grow);
+}
+
 static void gro_flush_oldest(struct napi_struct *napi, struct list_head *head)
 {
        struct sk_buff *oldest;
@@ -489,7 +495,6 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
        struct sk_buff *pp = NULL;
        enum gro_result ret;
        int same_flow;
-       int grow;
 
        if (netif_elide_gro(skb->dev))
                goto normal;
@@ -564,17 +569,14 @@ found_ptype:
        else
                gro_list->count++;
 
+       /* Must be called before setting NAPI_GRO_CB(skb)->{age|last} */
+       gro_try_pull_from_frag0(skb);
        NAPI_GRO_CB(skb)->age = jiffies;
        NAPI_GRO_CB(skb)->last = skb;
        if (!skb_is_gso(skb))
                skb_shinfo(skb)->gso_size = skb_gro_len(skb);
        list_add(&skb->list, &gro_list->list);
        ret = GRO_HELD;
-
-pull:
-       grow = skb_gro_offset(skb) - skb_headlen(skb);
-       if (grow > 0)
-               gro_pull_from_frag0(skb, grow);
 ok:
        if (gro_list->count) {
                if (!test_bit(bucket, &napi->gro_bitmask))
@@ -587,7 +589,8 @@ ok:
 
 normal:
        ret = GRO_NORMAL;
-       goto pull;
+       gro_try_pull_from_frag0(skb);
+       goto ok;
 }
 
 struct packet_offload *gro_find_receive_by_type(__be16 type)
index 3e3598c..f4183c4 100644 (file)
@@ -308,7 +308,7 @@ EXPORT_SYMBOL_GPL(get_net_ns_by_id);
 /* init code that must occur even if setup_net() is not called. */
 static __net_init void preinit_net(struct net *net)
 {
-       ref_tracker_dir_init(&net->notrefcnt_tracker, 128);
+       ref_tracker_dir_init(&net->notrefcnt_tracker, 128, "net notrefcnt");
 }
 
 /*
@@ -322,7 +322,7 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
        LIST_HEAD(net_exit_list);
 
        refcount_set(&net->ns.count, 1);
-       ref_tracker_dir_init(&net->refcnt_tracker, 128);
+       ref_tracker_dir_init(&net->refcnt_tracker, 128, "net refcnt");
 
        refcount_set(&net->passive, 1);
        get_random_bytes(&net->hash_mix, sizeof(u32));
index de17ca2..ea92313 100644 (file)
@@ -8,7 +8,7 @@
 
 #include "netdev-genl-gen.h"
 
-#include <linux/netdev.h>
+#include <uapi/linux/netdev.h>
 
 /* NETDEV_CMD_DEV_GET - do */
 static const struct nla_policy netdev_dev_get_nl_policy[NETDEV_A_DEV_IFINDEX + 1] = {
index 74d74fc..7b370c0 100644 (file)
@@ -9,7 +9,7 @@
 #include <net/netlink.h>
 #include <net/genetlink.h>
 
-#include <linux/netdev.h>
+#include <uapi/linux/netdev.h>
 
 int netdev_nl_dev_get_doit(struct sk_buff *skb, struct genl_info *info);
 int netdev_nl_dev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb);
index 7602381..f56b8d6 100644 (file)
@@ -2785,14 +2785,17 @@ static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb,
                                        break;
                        }
                        get_page(pkt_dev->page);
-                       skb_frag_set_page(skb, i, pkt_dev->page);
-                       skb_frag_off_set(&skb_shinfo(skb)->frags[i], 0);
+
                        /*last fragment, fill rest of data*/
                        if (i == (frags - 1))
-                               skb_frag_size_set(&skb_shinfo(skb)->frags[i],
-                                   (datalen < PAGE_SIZE ? datalen : PAGE_SIZE));
+                               skb_frag_fill_page_desc(&skb_shinfo(skb)->frags[i],
+                                                       pkt_dev->page, 0,
+                                                       (datalen < PAGE_SIZE ?
+                                                        datalen : PAGE_SIZE));
                        else
-                               skb_frag_size_set(&skb_shinfo(skb)->frags[i], frag_len);
+                               skb_frag_fill_page_desc(&skb_shinfo(skb)->frags[i],
+                                                       pkt_dev->page, 0, frag_len);
+
                        datalen -= skb_frag_size(&skb_shinfo(skb)->frags[i]);
                        skb->len += skb_frag_size(&skb_shinfo(skb)->frags[i]);
                        skb->data_len += skb_frag_size(&skb_shinfo(skb)->frags[i]);
index cea28d3..7c43382 100644 (file)
@@ -92,15 +92,7 @@ static struct kmem_cache *skbuff_fclone_cache __ro_after_init;
 static struct kmem_cache *skbuff_ext_cache __ro_after_init;
 #endif
 
-/* skb_small_head_cache and related code is only supported
- * for CONFIG_SLAB and CONFIG_SLUB.
- * As soon as SLOB is removed from the kernel, we can clean up this.
- */
-#if !defined(CONFIG_SLOB)
-# define HAVE_SKB_SMALL_HEAD_CACHE 1
-#endif
 
-#ifdef HAVE_SKB_SMALL_HEAD_CACHE
 static struct kmem_cache *skb_small_head_cache __ro_after_init;
 
 #define SKB_SMALL_HEAD_SIZE SKB_HEAD_ALIGN(MAX_TCP_HEADER)
@@ -117,7 +109,6 @@ static struct kmem_cache *skb_small_head_cache __ro_after_init;
 
 #define SKB_SMALL_HEAD_HEADROOM                                                \
        SKB_WITH_OVERHEAD(SKB_SMALL_HEAD_CACHE_SIZE)
-#endif /* HAVE_SKB_SMALL_HEAD_CACHE */
 
 int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS;
 EXPORT_SYMBOL(sysctl_max_skb_frags);
@@ -562,7 +553,6 @@ static void *kmalloc_reserve(unsigned int *size, gfp_t flags, int node,
        void *obj;
 
        obj_size = SKB_HEAD_ALIGN(*size);
-#ifdef HAVE_SKB_SMALL_HEAD_CACHE
        if (obj_size <= SKB_SMALL_HEAD_CACHE_SIZE &&
            !(flags & KMALLOC_NOT_NORMAL_BITS)) {
                obj = kmem_cache_alloc_node(skb_small_head_cache,
@@ -576,7 +566,6 @@ static void *kmalloc_reserve(unsigned int *size, gfp_t flags, int node,
                obj = kmem_cache_alloc_node(skb_small_head_cache, flags, node);
                goto out;
        }
-#endif
        *size = obj_size = kmalloc_size_roundup(obj_size);
        /*
         * Try a regular allocation, when that fails and we're not entitled
@@ -898,11 +887,9 @@ static bool skb_pp_recycle(struct sk_buff *skb, void *data, bool napi_safe)
 
 static void skb_kfree_head(void *head, unsigned int end_offset)
 {
-#ifdef HAVE_SKB_SMALL_HEAD_CACHE
        if (end_offset == SKB_SMALL_HEAD_HEADROOM)
                kmem_cache_free(skb_small_head_cache, head);
        else
-#endif
                kfree(head);
 }
 
@@ -2160,7 +2147,6 @@ int __skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri)
        if (likely(skb_end_offset(skb) == saved_end_offset))
                return 0;
 
-#ifdef HAVE_SKB_SMALL_HEAD_CACHE
        /* We can not change skb->end if the original or new value
         * is SKB_SMALL_HEAD_HEADROOM, as it might break skb_kfree_head().
         */
@@ -2174,7 +2160,6 @@ int __skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri)
                WARN_ON_ONCE(1);
                return 0;
        }
-#endif
 
        shinfo = skb_shinfo(skb);
 
@@ -4203,13 +4188,13 @@ unsigned int skb_find_text(struct sk_buff *skb, unsigned int from,
 EXPORT_SYMBOL(skb_find_text);
 
 int skb_append_pagefrags(struct sk_buff *skb, struct page *page,
-                        int offset, size_t size)
+                        int offset, size_t size, size_t max_frags)
 {
        int i = skb_shinfo(skb)->nr_frags;
 
        if (skb_can_coalesce(skb, i, page, offset)) {
                skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], size);
-       } else if (i < MAX_SKB_FRAGS) {
+       } else if (i < max_frags) {
                skb_zcopy_downgrade_managed(skb);
                get_page(page);
                skb_fill_page_desc_noacc(skb, i, page, offset, size);
@@ -4249,10 +4234,9 @@ static inline skb_frag_t skb_head_frag_to_page_desc(struct sk_buff *frag_skb)
        struct page *page;
 
        page = virt_to_head_page(frag_skb->head);
-       __skb_frag_set_page(&head_frag, page);
-       skb_frag_off_set(&head_frag, frag_skb->data -
-                        (unsigned char *)page_address(page));
-       skb_frag_size_set(&head_frag, skb_headlen(frag_skb));
+       skb_frag_fill_page_desc(&head_frag, page, frag_skb->data -
+                               (unsigned char *)page_address(page),
+                               skb_headlen(frag_skb));
        return head_frag;
 }
 
@@ -4768,7 +4752,6 @@ void __init skb_init(void)
                                                0,
                                                SLAB_HWCACHE_ALIGN|SLAB_PANIC,
                                                NULL);
-#ifdef HAVE_SKB_SMALL_HEAD_CACHE
        /* usercopy should only access first SKB_SMALL_HEAD_HEADROOM bytes.
         * struct skb_shared_info is located at the end of skb->head,
         * and should not be copied to/from user.
@@ -4780,7 +4763,6 @@ void __init skb_init(void)
                                                0,
                                                SKB_SMALL_HEAD_HEADROOM,
                                                NULL);
-#endif
        skb_extensions_init();
 }
 
@@ -6912,3 +6894,91 @@ nodefer: __kfree_skb(skb);
        if (unlikely(kick) && !cmpxchg(&sd->defer_ipi_scheduled, 0, 1))
                smp_call_function_single_async(cpu, &sd->defer_csd);
 }
+
+static void skb_splice_csum_page(struct sk_buff *skb, struct page *page,
+                                size_t offset, size_t len)
+{
+       const char *kaddr;
+       __wsum csum;
+
+       kaddr = kmap_local_page(page);
+       csum = csum_partial(kaddr + offset, len, 0);
+       kunmap_local(kaddr);
+       skb->csum = csum_block_add(skb->csum, csum, skb->len);
+}
+
+/**
+ * skb_splice_from_iter - Splice (or copy) pages to skbuff
+ * @skb: The buffer to add pages to
+ * @iter: Iterator representing the pages to be added
+ * @maxsize: Maximum amount of pages to be added
+ * @gfp: Allocation flags
+ *
+ * This is a common helper function for supporting MSG_SPLICE_PAGES.  It
+ * extracts pages from an iterator and adds them to the socket buffer if
+ * possible, copying them to fragments if not possible (such as if they're slab
+ * pages).
+ *
+ * Returns the amount of data spliced/copied or -EMSGSIZE if there's
+ * insufficient space in the buffer to transfer anything.
+ */
+ssize_t skb_splice_from_iter(struct sk_buff *skb, struct iov_iter *iter,
+                            ssize_t maxsize, gfp_t gfp)
+{
+       size_t frag_limit = READ_ONCE(sysctl_max_skb_frags);
+       struct page *pages[8], **ppages = pages;
+       ssize_t spliced = 0, ret = 0;
+       unsigned int i;
+
+       while (iter->count > 0) {
+               ssize_t space, nr, len;
+               size_t off;
+
+               ret = -EMSGSIZE;
+               space = frag_limit - skb_shinfo(skb)->nr_frags;
+               if (space < 0)
+                       break;
+
+               /* We might be able to coalesce without increasing nr_frags */
+               nr = clamp_t(size_t, space, 1, ARRAY_SIZE(pages));
+
+               len = iov_iter_extract_pages(iter, &ppages, maxsize, nr, 0, &off);
+               if (len <= 0) {
+                       ret = len ?: -EIO;
+                       break;
+               }
+
+               i = 0;
+               do {
+                       struct page *page = pages[i++];
+                       size_t part = min_t(size_t, PAGE_SIZE - off, len);
+
+                       ret = -EIO;
+                       if (WARN_ON_ONCE(!sendpage_ok(page)))
+                               goto out;
+
+                       ret = skb_append_pagefrags(skb, page, off, part,
+                                                  frag_limit);
+                       if (ret < 0) {
+                               iov_iter_revert(iter, len);
+                               goto out;
+                       }
+
+                       if (skb->ip_summed == CHECKSUM_NONE)
+                               skb_splice_csum_page(skb, page, off, part);
+
+                       off = 0;
+                       spliced += part;
+                       maxsize -= part;
+                       len -= part;
+               } while (len > 0);
+
+               if (maxsize <= 0)
+                       break;
+       }
+
+out:
+       skb_len_add(skb, spliced);
+       return spliced ?: ret;
+}
+EXPORT_SYMBOL(skb_splice_from_iter);
index 0839706..194340a 100644 (file)
@@ -480,7 +480,7 @@ static void devlink_recover_notify(struct devlink_health_reporter *reporter,
        int err;
 
        WARN_ON(cmd != DEVLINK_CMD_HEALTH_REPORTER_RECOVER);
-       WARN_ON(!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED));
+       ASSERT_DEVLINK_REGISTERED(devlink);
 
        msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
        if (!msg)
index cd02549..649a970 100644 (file)
@@ -447,18 +447,18 @@ static void devlink_port_fn_cap_fill(struct nla_bitfield32 *caps,
                caps->value |= cap;
 }
 
-static int devlink_port_fn_roce_fill(const struct devlink_ops *ops,
-                                    struct devlink_port *devlink_port,
+static int devlink_port_fn_roce_fill(struct devlink_port *devlink_port,
                                     struct nla_bitfield32 *caps,
                                     struct netlink_ext_ack *extack)
 {
        bool is_enable;
        int err;
 
-       if (!ops->port_fn_roce_get)
+       if (!devlink_port->ops->port_fn_roce_get)
                return 0;
 
-       err = ops->port_fn_roce_get(devlink_port, &is_enable, extack);
+       err = devlink_port->ops->port_fn_roce_get(devlink_port, &is_enable,
+                                                 extack);
        if (err) {
                if (err == -EOPNOTSUPP)
                        return 0;
@@ -469,19 +469,19 @@ static int devlink_port_fn_roce_fill(const struct devlink_ops *ops,
        return 0;
 }
 
-static int devlink_port_fn_migratable_fill(const struct devlink_ops *ops,
-                                          struct devlink_port *devlink_port,
+static int devlink_port_fn_migratable_fill(struct devlink_port *devlink_port,
                                           struct nla_bitfield32 *caps,
                                           struct netlink_ext_ack *extack)
 {
        bool is_enable;
        int err;
 
-       if (!ops->port_fn_migratable_get ||
+       if (!devlink_port->ops->port_fn_migratable_get ||
            devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_PCI_VF)
                return 0;
 
-       err = ops->port_fn_migratable_get(devlink_port, &is_enable, extack);
+       err = devlink_port->ops->port_fn_migratable_get(devlink_port,
+                                                       &is_enable, extack);
        if (err) {
                if (err == -EOPNOTSUPP)
                        return 0;
@@ -492,8 +492,7 @@ static int devlink_port_fn_migratable_fill(const struct devlink_ops *ops,
        return 0;
 }
 
-static int devlink_port_fn_caps_fill(const struct devlink_ops *ops,
-                                    struct devlink_port *devlink_port,
+static int devlink_port_fn_caps_fill(struct devlink_port *devlink_port,
                                     struct sk_buff *msg,
                                     struct netlink_ext_ack *extack,
                                     bool *msg_updated)
@@ -501,11 +500,11 @@ static int devlink_port_fn_caps_fill(const struct devlink_ops *ops,
        struct nla_bitfield32 caps = {};
        int err;
 
-       err = devlink_port_fn_roce_fill(ops, devlink_port, &caps, extack);
+       err = devlink_port_fn_roce_fill(devlink_port, &caps, extack);
        if (err)
                return err;
 
-       err = devlink_port_fn_migratable_fill(ops, devlink_port, &caps, extack);
+       err = devlink_port_fn_migratable_fill(devlink_port, &caps, extack);
        if (err)
                return err;
 
@@ -691,8 +690,7 @@ static int devlink_nl_port_attrs_put(struct sk_buff *msg,
        return 0;
 }
 
-static int devlink_port_fn_hw_addr_fill(const struct devlink_ops *ops,
-                                       struct devlink_port *port,
+static int devlink_port_fn_hw_addr_fill(struct devlink_port *port,
                                        struct sk_buff *msg,
                                        struct netlink_ext_ack *extack,
                                        bool *msg_updated)
@@ -701,10 +699,10 @@ static int devlink_port_fn_hw_addr_fill(const struct devlink_ops *ops,
        int hw_addr_len;
        int err;
 
-       if (!ops->port_function_hw_addr_get)
+       if (!port->ops->port_fn_hw_addr_get)
                return 0;
 
-       err = ops->port_function_hw_addr_get(port, hw_addr, &hw_addr_len,
+       err = port->ops->port_fn_hw_addr_get(port, hw_addr, &hw_addr_len,
                                             extack);
        if (err) {
                if (err == -EOPNOTSUPP)
@@ -789,8 +787,7 @@ devlink_port_fn_opstate_valid(enum devlink_port_fn_opstate opstate)
               opstate == DEVLINK_PORT_FN_OPSTATE_ATTACHED;
 }
 
-static int devlink_port_fn_state_fill(const struct devlink_ops *ops,
-                                     struct devlink_port *port,
+static int devlink_port_fn_state_fill(struct devlink_port *port,
                                      struct sk_buff *msg,
                                      struct netlink_ext_ack *extack,
                                      bool *msg_updated)
@@ -799,10 +796,10 @@ static int devlink_port_fn_state_fill(const struct devlink_ops *ops,
        enum devlink_port_fn_state state;
        int err;
 
-       if (!ops->port_fn_state_get)
+       if (!port->ops->port_fn_state_get)
                return 0;
 
-       err = ops->port_fn_state_get(port, &state, &opstate, extack);
+       err = port->ops->port_fn_state_get(port, &state, &opstate, extack);
        if (err) {
                if (err == -EOPNOTSUPP)
                        return 0;
@@ -829,18 +826,16 @@ static int
 devlink_port_fn_mig_set(struct devlink_port *devlink_port, bool enable,
                        struct netlink_ext_ack *extack)
 {
-       const struct devlink_ops *ops = devlink_port->devlink->ops;
-
-       return ops->port_fn_migratable_set(devlink_port, enable, extack);
+       return devlink_port->ops->port_fn_migratable_set(devlink_port, enable,
+                                                        extack);
 }
 
 static int
 devlink_port_fn_roce_set(struct devlink_port *devlink_port, bool enable,
                         struct netlink_ext_ack *extack)
 {
-       const struct devlink_ops *ops = devlink_port->devlink->ops;
-
-       return ops->port_fn_roce_set(devlink_port, enable, extack);
+       return devlink_port->ops->port_fn_roce_set(devlink_port, enable,
+                                                  extack);
 }
 
 static int devlink_port_fn_caps_set(struct devlink_port *devlink_port,
@@ -874,7 +869,6 @@ static int
 devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *port,
                                   struct netlink_ext_ack *extack)
 {
-       const struct devlink_ops *ops;
        struct nlattr *function_attr;
        bool msg_updated = false;
        int err;
@@ -883,16 +877,13 @@ devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *por
        if (!function_attr)
                return -EMSGSIZE;
 
-       ops = port->devlink->ops;
-       err = devlink_port_fn_hw_addr_fill(ops, port, msg, extack,
-                                          &msg_updated);
+       err = devlink_port_fn_hw_addr_fill(port, msg, extack, &msg_updated);
        if (err)
                goto out;
-       err = devlink_port_fn_caps_fill(ops, port, msg, extack,
-                                       &msg_updated);
+       err = devlink_port_fn_caps_fill(port, msg, extack, &msg_updated);
        if (err)
                goto out;
-       err = devlink_port_fn_state_fill(ops, port, msg, extack, &msg_updated);
+       err = devlink_port_fn_state_fill(port, msg, extack, &msg_updated);
 out:
        if (err || !msg_updated)
                nla_nest_cancel(msg, function_attr);
@@ -1137,14 +1128,13 @@ static int devlink_port_type_set(struct devlink_port *devlink_port,
 {
        int err;
 
-       if (!devlink_port->devlink->ops->port_type_set)
+       if (!devlink_port->ops->port_type_set)
                return -EOPNOTSUPP;
 
        if (port_type == devlink_port->type)
                return 0;
 
-       err = devlink_port->devlink->ops->port_type_set(devlink_port,
-                                                       port_type);
+       err = devlink_port->ops->port_type_set(devlink_port, port_type);
        if (err)
                return err;
 
@@ -1157,7 +1147,6 @@ static int devlink_port_function_hw_addr_set(struct devlink_port *port,
                                             const struct nlattr *attr,
                                             struct netlink_ext_ack *extack)
 {
-       const struct devlink_ops *ops = port->devlink->ops;
        const u8 *hw_addr;
        int hw_addr_len;
 
@@ -1178,7 +1167,7 @@ static int devlink_port_function_hw_addr_set(struct devlink_port *port,
                }
        }
 
-       return ops->port_function_hw_addr_set(port, hw_addr, hw_addr_len,
+       return port->ops->port_fn_hw_addr_set(port, hw_addr, hw_addr_len,
                                              extack);
 }
 
@@ -1187,22 +1176,20 @@ static int devlink_port_fn_state_set(struct devlink_port *port,
                                     struct netlink_ext_ack *extack)
 {
        enum devlink_port_fn_state state;
-       const struct devlink_ops *ops;
 
        state = nla_get_u8(attr);
-       ops = port->devlink->ops;
-       return ops->port_fn_state_set(port, state, extack);
+       return port->ops->port_fn_state_set(port, state, extack);
 }
 
 static int devlink_port_function_validate(struct devlink_port *devlink_port,
                                          struct nlattr **tb,
                                          struct netlink_ext_ack *extack)
 {
-       const struct devlink_ops *ops = devlink_port->devlink->ops;
+       const struct devlink_port_ops *ops = devlink_port->ops;
        struct nlattr *attr;
 
        if (tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR] &&
-           !ops->port_function_hw_addr_set) {
+           !ops->port_fn_hw_addr_set) {
                NL_SET_ERR_MSG_ATTR(extack, tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR],
                                    "Port doesn't support function attributes");
                return -EOPNOTSUPP;
@@ -1320,7 +1307,7 @@ static int devlink_nl_cmd_port_split_doit(struct sk_buff *skb,
 
        if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_PORT_SPLIT_COUNT))
                return -EINVAL;
-       if (!devlink->ops->port_split)
+       if (!devlink_port->ops->port_split)
                return -EOPNOTSUPP;
 
        count = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_SPLIT_COUNT]);
@@ -1339,8 +1326,8 @@ static int devlink_nl_cmd_port_split_doit(struct sk_buff *skb,
                return -EINVAL;
        }
 
-       return devlink->ops->port_split(devlink, devlink_port, count,
-                                       info->extack);
+       return devlink_port->ops->port_split(devlink, devlink_port, count,
+                                            info->extack);
 }
 
 static int devlink_nl_cmd_port_unsplit_doit(struct sk_buff *skb,
@@ -1349,40 +1336,9 @@ static int devlink_nl_cmd_port_unsplit_doit(struct sk_buff *skb,
        struct devlink_port *devlink_port = info->user_ptr[1];
        struct devlink *devlink = info->user_ptr[0];
 
-       if (!devlink->ops->port_unsplit)
+       if (!devlink_port->ops->port_unsplit)
                return -EOPNOTSUPP;
-       return devlink->ops->port_unsplit(devlink, devlink_port, info->extack);
-}
-
-static int devlink_port_new_notify(struct devlink *devlink,
-                                  unsigned int port_index,
-                                  struct genl_info *info)
-{
-       struct devlink_port *devlink_port;
-       struct sk_buff *msg;
-       int err;
-
-       msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
-       if (!msg)
-               return -ENOMEM;
-
-       lockdep_assert_held(&devlink->lock);
-       devlink_port = devlink_port_get_by_index(devlink, port_index);
-       if (!devlink_port) {
-               err = -ENODEV;
-               goto out;
-       }
-
-       err = devlink_nl_port_fill(msg, devlink_port, DEVLINK_CMD_NEW,
-                                  info->snd_portid, info->snd_seq, 0, NULL);
-       if (err)
-               goto out;
-
-       return genlmsg_reply(msg, info);
-
-out:
-       nlmsg_free(msg);
-       return err;
+       return devlink_port->ops->port_unsplit(devlink, devlink_port, info->extack);
 }
 
 static int devlink_nl_cmd_port_new_doit(struct sk_buff *skb,
@@ -1391,10 +1347,11 @@ static int devlink_nl_cmd_port_new_doit(struct sk_buff *skb,
        struct netlink_ext_ack *extack = info->extack;
        struct devlink_port_new_attrs new_attrs = {};
        struct devlink *devlink = info->user_ptr[0];
-       unsigned int new_port_index;
+       struct devlink_port *devlink_port;
+       struct sk_buff *msg;
        int err;
 
-       if (!devlink->ops->port_new || !devlink->ops->port_del)
+       if (!devlink->ops->port_new)
                return -EOPNOTSUPP;
 
        if (!info->attrs[DEVLINK_ATTR_PORT_FLAVOUR] ||
@@ -1423,36 +1380,43 @@ static int devlink_nl_cmd_port_new_doit(struct sk_buff *skb,
                new_attrs.sfnum_valid = true;
        }
 
-       err = devlink->ops->port_new(devlink, &new_attrs, extack,
-                                    &new_port_index);
+       err = devlink->ops->port_new(devlink, &new_attrs,
+                                    extack, &devlink_port);
        if (err)
                return err;
 
-       err = devlink_port_new_notify(devlink, new_port_index, info);
-       if (err && err != -ENODEV) {
-               /* Fail to send the response; destroy newly created port. */
-               devlink->ops->port_del(devlink, new_port_index, extack);
+       msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+       if (!msg) {
+               err = -ENOMEM;
+               goto err_out_port_del;
        }
+       err = devlink_nl_port_fill(msg, devlink_port, DEVLINK_CMD_NEW,
+                                  info->snd_portid, info->snd_seq, 0, NULL);
+       if (WARN_ON_ONCE(err))
+               goto err_out_msg_free;
+       err = genlmsg_reply(msg, info);
+       if (err)
+               goto err_out_port_del;
+       return 0;
+
+err_out_msg_free:
+       nlmsg_free(msg);
+err_out_port_del:
+       devlink_port->ops->port_del(devlink, devlink_port, NULL);
        return err;
 }
 
 static int devlink_nl_cmd_port_del_doit(struct sk_buff *skb,
                                        struct genl_info *info)
 {
+       struct devlink_port *devlink_port = info->user_ptr[1];
        struct netlink_ext_ack *extack = info->extack;
        struct devlink *devlink = info->user_ptr[0];
-       unsigned int port_index;
 
-       if (!devlink->ops->port_del)
+       if (!devlink_port->ops->port_del)
                return -EOPNOTSUPP;
 
-       if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_PORT_INDEX)) {
-               NL_SET_ERR_MSG(extack, "Port index is not specified");
-               return -EINVAL;
-       }
-       port_index = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_INDEX]);
-
-       return devlink->ops->port_del(devlink, port_index, extack);
+       return devlink_port->ops->port_del(devlink, devlink_port, extack);
 }
 
 static int
@@ -6384,6 +6348,7 @@ const struct genl_small_ops devlink_nl_ops[56] = {
                .cmd = DEVLINK_CMD_PORT_DEL,
                .doit = devlink_nl_cmd_port_del_doit,
                .flags = GENL_ADMIN_PERM,
+               .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
        },
        {
                .cmd = DEVLINK_CMD_LINECARD_GET,
@@ -6809,7 +6774,7 @@ static void devlink_port_type_warn_cancel(struct devlink_port *devlink_port)
  * @devlink: devlink
  * @devlink_port: devlink port
  *
- * Initialize essencial stuff that is needed for functions
+ * Initialize essential stuff that is needed for functions
  * that may be called before devlink port registration.
  * Call to this function is optional and not needed
  * in case the driver does not use such functions.
@@ -6830,7 +6795,7 @@ EXPORT_SYMBOL_GPL(devlink_port_init);
  *
  * @devlink_port: devlink port
  *
- * Deinitialize essencial stuff that is in use for functions
+ * Deinitialize essential stuff that is in use for functions
  * that may be called after devlink port unregistration.
  * Call to this function is optional and not needed
  * in case the driver does not use such functions.
@@ -6841,12 +6806,15 @@ void devlink_port_fini(struct devlink_port *devlink_port)
 }
 EXPORT_SYMBOL_GPL(devlink_port_fini);
 
+static const struct devlink_port_ops devlink_port_dummy_ops = {};
+
 /**
- * devl_port_register() - Register devlink port
+ * devl_port_register_with_ops() - Register devlink port
  *
  * @devlink: devlink
  * @devlink_port: devlink port
  * @port_index: driver-specific numerical identifier of the port
+ * @ops: port ops
  *
  * Register devlink port with provided port index. User can use
  * any indexing, even hw-related one. devlink_port structure
@@ -6854,9 +6822,10 @@ EXPORT_SYMBOL_GPL(devlink_port_fini);
  * Note that the caller should take care of zeroing the devlink_port
  * structure.
  */
-int devl_port_register(struct devlink *devlink,
-                      struct devlink_port *devlink_port,
-                      unsigned int port_index)
+int devl_port_register_with_ops(struct devlink *devlink,
+                               struct devlink_port *devlink_port,
+                               unsigned int port_index,
+                               const struct devlink_port_ops *ops)
 {
        int err;
 
@@ -6867,6 +6836,7 @@ int devl_port_register(struct devlink *devlink,
        devlink_port_init(devlink, devlink_port);
        devlink_port->registered = true;
        devlink_port->index = port_index;
+       devlink_port->ops = ops ? ops : &devlink_port_dummy_ops;
        spin_lock_init(&devlink_port->type_lock);
        INIT_LIST_HEAD(&devlink_port->reporter_list);
        err = xa_insert(&devlink->ports, port_index, devlink_port, GFP_KERNEL);
@@ -6878,14 +6848,15 @@ int devl_port_register(struct devlink *devlink,
        devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
        return 0;
 }
-EXPORT_SYMBOL_GPL(devl_port_register);
+EXPORT_SYMBOL_GPL(devl_port_register_with_ops);
 
 /**
- *     devlink_port_register - Register devlink port
+ *     devlink_port_register_with_ops - Register devlink port
  *
  *     @devlink: devlink
  *     @devlink_port: devlink port
  *     @port_index: driver-specific numerical identifier of the port
+ *     @ops: port ops
  *
  *     Register devlink port with provided port index. User can use
  *     any indexing, even hw-related one. devlink_port structure
@@ -6895,18 +6866,20 @@ EXPORT_SYMBOL_GPL(devl_port_register);
  *
  *     Context: Takes and release devlink->lock <mutex>.
  */
-int devlink_port_register(struct devlink *devlink,
-                         struct devlink_port *devlink_port,
-                         unsigned int port_index)
+int devlink_port_register_with_ops(struct devlink *devlink,
+                                  struct devlink_port *devlink_port,
+                                  unsigned int port_index,
+                                  const struct devlink_port_ops *ops)
 {
        int err;
 
        devl_lock(devlink);
-       err = devl_port_register(devlink, devlink_port, port_index);
+       err = devl_port_register_with_ops(devlink, devlink_port,
+                                         port_index, ops);
        devl_unlock(devlink);
        return err;
 }
-EXPORT_SYMBOL_GPL(devlink_port_register);
+EXPORT_SYMBOL_GPL(devlink_port_register_with_ops);
 
 /**
  * devl_port_unregister() - Unregister devlink port
index 71ba305..0ce8fd3 100644 (file)
@@ -1603,6 +1603,21 @@ dsa_port_phylink_mac_select_pcs(struct phylink_config *config,
        return pcs;
 }
 
+static int dsa_port_phylink_mac_prepare(struct phylink_config *config,
+                                       unsigned int mode,
+                                       phy_interface_t interface)
+{
+       struct dsa_port *dp = container_of(config, struct dsa_port, pl_config);
+       struct dsa_switch *ds = dp->ds;
+       int err = 0;
+
+       if (ds->ops->phylink_mac_prepare)
+               err = ds->ops->phylink_mac_prepare(ds, dp->index, mode,
+                                                  interface);
+
+       return err;
+}
+
 static void dsa_port_phylink_mac_config(struct phylink_config *config,
                                        unsigned int mode,
                                        const struct phylink_link_state *state)
@@ -1616,6 +1631,21 @@ static void dsa_port_phylink_mac_config(struct phylink_config *config,
        ds->ops->phylink_mac_config(ds, dp->index, mode, state);
 }
 
+static int dsa_port_phylink_mac_finish(struct phylink_config *config,
+                                      unsigned int mode,
+                                      phy_interface_t interface)
+{
+       struct dsa_port *dp = container_of(config, struct dsa_port, pl_config);
+       struct dsa_switch *ds = dp->ds;
+       int err = 0;
+
+       if (ds->ops->phylink_mac_finish)
+               err = ds->ops->phylink_mac_finish(ds, dp->index, mode,
+                                                 interface);
+
+       return err;
+}
+
 static void dsa_port_phylink_mac_an_restart(struct phylink_config *config)
 {
        struct dsa_port *dp = container_of(config, struct dsa_port, pl_config);
@@ -1671,7 +1701,9 @@ static const struct phylink_mac_ops dsa_port_phylink_mac_ops = {
        .validate = dsa_port_phylink_validate,
        .mac_select_pcs = dsa_port_phylink_mac_select_pcs,
        .mac_pcs_get_state = dsa_port_phylink_mac_pcs_get_state,
+       .mac_prepare = dsa_port_phylink_mac_prepare,
        .mac_config = dsa_port_phylink_mac_config,
+       .mac_finish = dsa_port_phylink_mac_finish,
        .mac_an_restart = dsa_port_phylink_mac_an_restart,
        .mac_link_down = dsa_port_phylink_mac_link_down,
        .mac_link_up = dsa_port_phylink_mac_link_up,
index 9f29efb..233be5c 100644 (file)
@@ -8,7 +8,7 @@
 
 #include "genl.h"
 
-#include <linux/handshake.h>
+#include <uapi/linux/handshake.h>
 
 /* HANDSHAKE_CMD_ACCEPT - do */
 static const struct nla_policy handshake_accept_nl_policy[HANDSHAKE_A_ACCEPT_HANDLER_CLASS + 1] = {
index 2c1f1aa..ae72a59 100644 (file)
@@ -9,7 +9,7 @@
 #include <net/netlink.h>
 #include <net/genetlink.h>
 
-#include <linux/handshake.h>
+#include <uapi/linux/handshake.h>
 
 int handshake_nl_accept_doit(struct sk_buff *skb, struct genl_info *info);
 int handshake_nl_done_doit(struct sk_buff *skb, struct genl_info *info);
index 4a76ebf..b5735b3 100644 (file)
@@ -732,6 +732,20 @@ int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
 }
 EXPORT_SYMBOL(inet_stream_connect);
 
+void __inet_accept(struct socket *sock, struct socket *newsock, struct sock *newsk)
+{
+       sock_rps_record_flow(newsk);
+       WARN_ON(!((1 << newsk->sk_state) &
+                 (TCPF_ESTABLISHED | TCPF_SYN_RECV |
+                 TCPF_CLOSE_WAIT | TCPF_CLOSE)));
+
+       if (test_bit(SOCK_SUPPORT_ZC, &sock->flags))
+               set_bit(SOCK_SUPPORT_ZC, &newsock->flags);
+       sock_graft(newsk, newsock);
+
+       newsock->state = SS_CONNECTED;
+}
+
 /*
  *     Accept a pending connection. The TCP layer now gives BSD semantics.
  */
@@ -745,24 +759,12 @@ int inet_accept(struct socket *sock, struct socket *newsock, int flags,
        /* IPV6_ADDRFORM can change sk->sk_prot under us. */
        sk2 = READ_ONCE(sk1->sk_prot)->accept(sk1, flags, &err, kern);
        if (!sk2)
-               goto do_err;
+               return err;
 
        lock_sock(sk2);
-
-       sock_rps_record_flow(sk2);
-       WARN_ON(!((1 << sk2->sk_state) &
-                 (TCPF_ESTABLISHED | TCPF_SYN_RECV |
-                 TCPF_CLOSE_WAIT | TCPF_CLOSE)));
-
-       if (test_bit(SOCK_SUPPORT_ZC, &sock->flags))
-               set_bit(SOCK_SUPPORT_ZC, &newsock->flags);
-       sock_graft(sk2, newsock);
-
-       newsock->state = SS_CONNECTED;
-       err = 0;
+       __inet_accept(sock, newsock, sk2);
        release_sock(sk2);
-do_err:
-       return err;
+       return 0;
 }
 EXPORT_SYMBOL(inet_accept);
 
index 6c37c4f..98b9010 100644 (file)
@@ -8,7 +8,7 @@
 
 #include "fou_nl.h"
 
-#include <linux/fou.h>
+#include <uapi/linux/fou.h>
 
 /* Global operation policy for fou */
 const struct nla_policy fou_nl_policy[FOU_ATTR_IFINDEX + 1] = {
index dbd0780..63a6c4e 100644 (file)
@@ -9,7 +9,7 @@
 #include <net/netlink.h>
 #include <net/genetlink.h>
 
-#include <linux/fou.h>
+#include <uapi/linux/fou.h>
 
 /* Global operation policy for fou */
 extern const struct nla_policy fou_nl_policy[FOU_ATTR_IFINDEX + 1];
index 1386787..15424de 100644 (file)
@@ -792,7 +792,7 @@ struct dst_entry *inet_csk_route_req(const struct sock *sk,
        opt = rcu_dereference(ireq->ireq_opt);
 
        flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark,
-                          RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
+                          ip_sock_rt_tos(sk), ip_sock_rt_scope(sk),
                           sk->sk_protocol, inet_sk_flowi_flags(sk),
                           (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr,
                           ireq->ir_loc_addr, ireq->ir_rmt_port,
@@ -830,7 +830,7 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk,
        fl4 = &newinet->cork.fl.u.ip4;
 
        flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark,
-                          RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
+                          ip_sock_rt_tos(sk), ip_sock_rt_scope(sk),
                           sk->sk_protocol, inet_sk_flowi_flags(sk),
                           (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr,
                           ireq->ir_loc_addr, ireq->ir_rmt_port,
index e55a202..81a1cce 100644 (file)
@@ -189,10 +189,10 @@ static int ipgre_err(struct sk_buff *skb, u32 info,
        }
 
 #if IS_ENABLED(CONFIG_IPV6)
-       if (tpi->proto == htons(ETH_P_IPV6) &&
-           !ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len,
-                                      type, data_len))
-               return 0;
+       if (tpi->proto == htons(ETH_P_IPV6) &&
+           !ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len,
+                                       type, data_len))
+               return 0;
 #endif
 
        if (t->parms.iph.daddr == 0 ||
index 6189226..244fb93 100644 (file)
@@ -946,17 +946,6 @@ ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk
 }
 EXPORT_SYMBOL(ip_generic_getfrag);
 
-static inline __wsum
-csum_page(struct page *page, int offset, int copy)
-{
-       char *kaddr;
-       __wsum csum;
-       kaddr = kmap(page);
-       csum = csum_partial(kaddr + offset, copy, 0);
-       kunmap(page);
-       return csum;
-}
-
 static int __ip_append_data(struct sock *sk,
                            struct flowi4 *fl4,
                            struct sk_buff_head *queue,
@@ -1048,6 +1037,14 @@ static int __ip_append_data(struct sock *sk,
                                skb_zcopy_set(skb, uarg, &extra_uref);
                        }
                }
+       } else if ((flags & MSG_SPLICE_PAGES) && length) {
+               if (inet->hdrincl)
+                       return -EPERM;
+               if (rt->dst.dev->features & NETIF_F_SG)
+                       /* We need an empty buffer to attach stuff to */
+                       paged = true;
+               else
+                       flags &= ~MSG_SPLICE_PAGES;
        }
 
        cork->length += length;
@@ -1207,6 +1204,15 @@ alloc_new_skb:
                                err = -EFAULT;
                                goto error;
                        }
+               } else if (flags & MSG_SPLICE_PAGES) {
+                       struct msghdr *msg = from;
+
+                       err = skb_splice_from_iter(skb, &msg->msg_iter, copy,
+                                                  sk->sk_allocation);
+                       if (err < 0)
+                               goto error;
+                       copy = err;
+                       wmem_alloc_delta += copy;
                } else if (!zc) {
                        int i = skb_shinfo(skb)->nr_frags;
 
@@ -1310,10 +1316,10 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork,
 }
 
 /*
- *     ip_append_data() and ip_append_page() can make one large IP datagram
- *     from many pieces of data. Each pieces will be holded on the socket
- *     until ip_push_pending_frames() is called. Each piece can be a page
- *     or non-page data.
+ *     ip_append_data() can make one large IP datagram from many pieces of
+ *     data.  Each piece will be held on the socket until
+ *     ip_push_pending_frames() is called. Each piece can be a page or
+ *     non-page data.
  *
  *     Not only UDP, other transport protocols - e.g. raw sockets - can use
  *     this interface potentially.
@@ -1346,134 +1352,6 @@ int ip_append_data(struct sock *sk, struct flowi4 *fl4,
                                from, length, transhdrlen, flags);
 }
 
-ssize_t        ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page,
-                      int offset, size_t size, int flags)
-{
-       struct inet_sock *inet = inet_sk(sk);
-       struct sk_buff *skb;
-       struct rtable *rt;
-       struct ip_options *opt = NULL;
-       struct inet_cork *cork;
-       int hh_len;
-       int mtu;
-       int len;
-       int err;
-       unsigned int maxfraglen, fragheaderlen, fraggap, maxnonfragsize;
-
-       if (inet->hdrincl)
-               return -EPERM;
-
-       if (flags&MSG_PROBE)
-               return 0;
-
-       if (skb_queue_empty(&sk->sk_write_queue))
-               return -EINVAL;
-
-       cork = &inet->cork.base;
-       rt = (struct rtable *)cork->dst;
-       if (cork->flags & IPCORK_OPT)
-               opt = cork->opt;
-
-       if (!(rt->dst.dev->features & NETIF_F_SG))
-               return -EOPNOTSUPP;
-
-       hh_len = LL_RESERVED_SPACE(rt->dst.dev);
-       mtu = cork->gso_size ? IP_MAX_MTU : cork->fragsize;
-
-       fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
-       maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
-       maxnonfragsize = ip_sk_ignore_df(sk) ? 0xFFFF : mtu;
-
-       if (cork->length + size > maxnonfragsize - fragheaderlen) {
-               ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport,
-                              mtu - (opt ? opt->optlen : 0));
-               return -EMSGSIZE;
-       }
-
-       skb = skb_peek_tail(&sk->sk_write_queue);
-       if (!skb)
-               return -EINVAL;
-
-       cork->length += size;
-
-       while (size > 0) {
-               /* Check if the remaining data fits into current packet. */
-               len = mtu - skb->len;
-               if (len < size)
-                       len = maxfraglen - skb->len;
-
-               if (len <= 0) {
-                       struct sk_buff *skb_prev;
-                       int alloclen;
-
-                       skb_prev = skb;
-                       fraggap = skb_prev->len - maxfraglen;
-
-                       alloclen = fragheaderlen + hh_len + fraggap + 15;
-                       skb = sock_wmalloc(sk, alloclen, 1, sk->sk_allocation);
-                       if (unlikely(!skb)) {
-                               err = -ENOBUFS;
-                               goto error;
-                       }
-
-                       /*
-                        *      Fill in the control structures
-                        */
-                       skb->ip_summed = CHECKSUM_NONE;
-                       skb->csum = 0;
-                       skb_reserve(skb, hh_len);
-
-                       /*
-                        *      Find where to start putting bytes.
-                        */
-                       skb_put(skb, fragheaderlen + fraggap);
-                       skb_reset_network_header(skb);
-                       skb->transport_header = (skb->network_header +
-                                                fragheaderlen);
-                       if (fraggap) {
-                               skb->csum = skb_copy_and_csum_bits(skb_prev,
-                                                                  maxfraglen,
-                                                   skb_transport_header(skb),
-                                                                  fraggap);
-                               skb_prev->csum = csum_sub(skb_prev->csum,
-                                                         skb->csum);
-                               pskb_trim_unique(skb_prev, maxfraglen);
-                       }
-
-                       /*
-                        * Put the packet on the pending queue.
-                        */
-                       __skb_queue_tail(&sk->sk_write_queue, skb);
-                       continue;
-               }
-
-               if (len > size)
-                       len = size;
-
-               if (skb_append_pagefrags(skb, page, offset, len)) {
-                       err = -EMSGSIZE;
-                       goto error;
-               }
-
-               if (skb->ip_summed == CHECKSUM_NONE) {
-                       __wsum csum;
-                       csum = csum_page(page, offset, len);
-                       skb->csum = csum_block_add(skb->csum, csum, skb->len);
-               }
-
-               skb_len_add(skb, len);
-               refcount_add(len, &sk->sk_wmem_alloc);
-               offset += len;
-               size -= len;
-       }
-       return 0;
-
-error:
-       cork->length -= size;
-       IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS);
-       return err;
-}
-
 static void ip_cork_release(struct inet_cork *cork)
 {
        cork->flags &= ~IPCORK_OPT;
@@ -1692,7 +1570,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
                           const struct ip_options *sopt,
                           __be32 daddr, __be32 saddr,
                           const struct ip_reply_arg *arg,
-                          unsigned int len, u64 transmit_time)
+                          unsigned int len, u64 transmit_time, u32 txhash)
 {
        struct ip_options_data replyopts;
        struct ipcm_cookie ipc;
@@ -1755,6 +1633,8 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
                                                                arg->csum));
                nskb->ip_summed = CHECKSUM_NONE;
                nskb->mono_delivery_time = !!transmit_time;
+               if (txhash)
+                       skb_set_hash(nskb, txhash, PKT_HASH_TYPE_L4);
                ip_push_pending_frames(sk, &fl4);
        }
 out:
index e90bc0a..c56b6fe 100644 (file)
@@ -665,6 +665,9 @@ static struct packet_type bootp_packet_type __initdata = {
        .func = ic_bootp_recv,
 };
 
+/* DHCPACK can overwrite DNS if fallback was set upon first BOOTP reply */
+static int ic_nameservers_fallback __initdata;
+
 /*
  *  Initialize DHCP/BOOTP extension fields in the request.
  */
@@ -938,7 +941,8 @@ static void __init ic_do_bootp_ext(u8 *ext)
                if (servers > CONF_NAMESERVERS_MAX)
                        servers = CONF_NAMESERVERS_MAX;
                for (i = 0; i < servers; i++) {
-                       if (ic_nameservers[i] == NONE)
+                       if (ic_nameservers[i] == NONE ||
+                           ic_nameservers_fallback)
                                memcpy(&ic_nameservers[i], ext+1+4*i, 4);
                }
                break;
@@ -1158,8 +1162,10 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str
        ic_addrservaddr = b->iph.saddr;
        if (ic_gateway == NONE && b->relay_ip)
                ic_gateway = b->relay_ip;
-       if (ic_nameservers[0] == NONE)
+       if (ic_nameservers[0] == NONE) {
                ic_nameservers[0] = ic_servaddr;
+               ic_nameservers_fallback = 1;
+       }
        ic_got_reply = IC_BOOTP;
 
 drop_unlock:
index 5178a3f..25dd78c 100644 (file)
 #include <net/transp_v6.h>
 #endif
 
-#define ping_portaddr_for_each_entry(__sk, node, list) \
-       hlist_nulls_for_each_entry(__sk, node, list, sk_nulls_node)
-#define ping_portaddr_for_each_entry_rcu(__sk, node, list) \
-       hlist_nulls_for_each_entry_rcu(__sk, node, list, sk_nulls_node)
-
 struct ping_table {
-       struct hlist_nulls_head hash[PING_HTABLE_SIZE];
+       struct hlist_head       hash[PING_HTABLE_SIZE];
        spinlock_t              lock;
 };
 
@@ -74,17 +69,16 @@ static inline u32 ping_hashfn(const struct net *net, u32 num, u32 mask)
 }
 EXPORT_SYMBOL_GPL(ping_hash);
 
-static inline struct hlist_nulls_head *ping_hashslot(struct ping_table *table,
-                                            struct net *net, unsigned int num)
+static inline struct hlist_head *ping_hashslot(struct ping_table *table,
+                                              struct net *net, unsigned int num)
 {
        return &table->hash[ping_hashfn(net, num, PING_HTABLE_MASK)];
 }
 
 int ping_get_port(struct sock *sk, unsigned short ident)
 {
-       struct hlist_nulls_node *node;
-       struct hlist_nulls_head *hlist;
        struct inet_sock *isk, *isk2;
+       struct hlist_head *hlist;
        struct sock *sk2 = NULL;
 
        isk = inet_sk(sk);
@@ -98,7 +92,7 @@ int ping_get_port(struct sock *sk, unsigned short ident)
                                result++; /* avoid zero */
                        hlist = ping_hashslot(&ping_table, sock_net(sk),
                                            result);
-                       ping_portaddr_for_each_entry(sk2, node, hlist) {
+                       sk_for_each(sk2, hlist) {
                                isk2 = inet_sk(sk2);
 
                                if (isk2->inet_num == result)
@@ -115,7 +109,7 @@ next_port:
                        goto fail;
        } else {
                hlist = ping_hashslot(&ping_table, sock_net(sk), ident);
-               ping_portaddr_for_each_entry(sk2, node, hlist) {
+               sk_for_each(sk2, hlist) {
                        isk2 = inet_sk(sk2);
 
                        /* BUG? Why is this reuse and not reuseaddr? ping.c
@@ -133,9 +127,8 @@ next_port:
        isk->inet_num = ident;
        if (sk_unhashed(sk)) {
                pr_debug("was not hashed\n");
-               sock_hold(sk);
+               sk_add_node_rcu(sk, hlist);
                sock_set_flag(sk, SOCK_RCU_FREE);
-               hlist_nulls_add_head_rcu(&sk->sk_nulls_node, hlist);
                sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
        }
        spin_unlock(&ping_table.lock);
@@ -161,9 +154,7 @@ void ping_unhash(struct sock *sk)
 
        pr_debug("ping_unhash(isk=%p,isk->num=%u)\n", isk, isk->inet_num);
        spin_lock(&ping_table.lock);
-       if (sk_hashed(sk)) {
-               hlist_nulls_del_init_rcu(&sk->sk_nulls_node);
-               sock_put(sk);
+       if (sk_del_node_init_rcu(sk)) {
                isk->inet_num = 0;
                isk->inet_sport = 0;
                sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
@@ -175,10 +166,9 @@ EXPORT_SYMBOL_GPL(ping_unhash);
 /* Called under rcu_read_lock() */
 static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident)
 {
-       struct hlist_nulls_head *hslot = ping_hashslot(&ping_table, net, ident);
+       struct hlist_head *hslot = ping_hashslot(&ping_table, net, ident);
        struct sock *sk = NULL;
        struct inet_sock *isk;
-       struct hlist_nulls_node *hnode;
        int dif, sdif;
 
        if (skb->protocol == htons(ETH_P_IP)) {
@@ -197,7 +187,7 @@ static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident)
                return NULL;
        }
 
-       ping_portaddr_for_each_entry_rcu(sk, hnode, hslot) {
+       sk_for_each_rcu(sk, hslot) {
                isk = inet_sk(sk);
 
                pr_debug("iterate\n");
@@ -715,7 +705,7 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
        struct ip_options_data opt_copy;
        int free = 0;
        __be32 saddr, daddr, faddr;
-       u8  tos;
+       u8 tos, scope;
        int err;
 
        pr_debug("ping_v4_sendmsg(sk=%p,sk->num=%u)\n", inet, inet->inet_num);
@@ -779,11 +769,7 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
                faddr = ipc.opt->opt.faddr;
        }
        tos = get_rttos(&ipc, inet);
-       if (sock_flag(sk, SOCK_LOCALROUTE) ||
-           (msg->msg_flags & MSG_DONTROUTE) ||
-           (ipc.opt && ipc.opt->opt.is_strictroute)) {
-               tos |= RTO_ONLINK;
-       }
+       scope = ip_sendmsg_scope(inet, &ipc, msg);
 
        if (ipv4_is_multicast(daddr)) {
                if (!ipc.oif || netif_index_is_l3_master(sock_net(sk), ipc.oif))
@@ -793,10 +779,9 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
        } else if (!ipc.oif)
                ipc.oif = inet->uc_index;
 
-       flowi4_init_output(&fl4, ipc.oif, ipc.sockc.mark, tos,
-                          RT_SCOPE_UNIVERSE, sk->sk_protocol,
-                          inet_sk_flowi_flags(sk), faddr, saddr, 0, 0,
-                          sk->sk_uid);
+       flowi4_init_output(&fl4, ipc.oif, ipc.sockc.mark, tos, scope,
+                          sk->sk_protocol, inet_sk_flowi_flags(sk), faddr,
+                          saddr, 0, 0, sk->sk_uid);
 
        fl4.fl4_icmp_type = user_icmph.type;
        fl4.fl4_icmp_code = user_icmph.code;
@@ -1045,15 +1030,14 @@ static struct sock *ping_get_first(struct seq_file *seq, int start)
 
        for (state->bucket = start; state->bucket < PING_HTABLE_SIZE;
             ++state->bucket) {
-               struct hlist_nulls_node *node;
-               struct hlist_nulls_head *hslot;
+               struct hlist_head *hslot;
 
                hslot = &ping_table.hash[state->bucket];
 
-               if (hlist_nulls_empty(hslot))
+               if (hlist_empty(hslot))
                        continue;
 
-               sk_nulls_for_each(sk, node, hslot) {
+               sk_for_each(sk, hslot) {
                        if (net_eq(sock_net(sk), net) &&
                            sk->sk_family == state->family)
                                goto found;
@@ -1070,7 +1054,7 @@ static struct sock *ping_get_next(struct seq_file *seq, struct sock *sk)
        struct net *net = seq_file_net(seq);
 
        do {
-               sk = sk_nulls_next(sk);
+               sk = sk_next(sk);
        } while (sk && (!net_eq(sock_net(sk), net)));
 
        if (!sk)
@@ -1206,6 +1190,6 @@ void __init ping_init(void)
        int i;
 
        for (i = 0; i < PING_HTABLE_SIZE; i++)
-               INIT_HLIST_NULLS_HEAD(&ping_table.hash[i], i);
+               INIT_HLIST_HEAD(&ping_table.hash[i]);
        spin_lock_init(&ping_table.lock);
 }
index eadf1c9..9aacce9 100644 (file)
@@ -476,10 +476,10 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
        struct ipcm_cookie ipc;
        struct rtable *rt = NULL;
        struct flowi4 fl4;
+       u8 tos, scope;
        int free = 0;
        __be32 daddr;
        __be32 saddr;
-       u8  tos;
        int err;
        struct ip_options_data opt_copy;
        struct raw_frag_vec rfv;
@@ -575,9 +575,8 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
                        daddr = ipc.opt->opt.faddr;
                }
        }
-       tos = get_rtconn_flags(&ipc, sk);
-       if (msg->msg_flags & MSG_DONTROUTE)
-               tos |= RTO_ONLINK;
+       tos = get_rttos(&ipc, inet);
+       scope = ip_sendmsg_scope(inet, &ipc, msg);
 
        if (ipv4_is_multicast(daddr)) {
                if (!ipc.oif || netif_index_is_l3_master(sock_net(sk), ipc.oif))
@@ -600,8 +599,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
                }
        }
 
-       flowi4_init_output(&fl4, ipc.oif, ipc.sockc.mark, tos,
-                          RT_SCOPE_UNIVERSE,
+       flowi4_init_output(&fl4, ipc.oif, ipc.sockc.mark, tos, scope,
                           hdrincl ? ipc.protocol : sk->sk_protocol,
                           inet_sk_flowi_flags(sk) |
                            (hdrincl ? FLOWI_FLAG_KNOWN_NH : 0),
index 26fb97d..dc478a0 100644 (file)
@@ -418,8 +418,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
         * no easy way to do this.
         */
        flowi4_init_output(&fl4, ireq->ir_iif, ireq->ir_mark,
-                          RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, IPPROTO_TCP,
-                          inet_sk_flowi_flags(sk),
+                          ip_sock_rt_tos(sk), ip_sock_rt_scope(sk),
+                          IPPROTO_TCP, inet_sk_flowi_flags(sk),
                           opt->srr ? opt->faddr : ireq->ir_rmt_addr,
                           ireq->ir_loc_addr, th->source, th->dest, sk->sk_uid);
        security_req_classify_flow(req, flowi4_to_flowi_common(&fl4));
index 88dfe51..356afe5 100644 (file)
@@ -34,6 +34,7 @@ static int ip_ttl_min = 1;
 static int ip_ttl_max = 255;
 static int tcp_syn_retries_min = 1;
 static int tcp_syn_retries_max = MAX_TCP_SYNCNT;
+static int tcp_syn_linear_timeouts_max = MAX_TCP_SYNCNT;
 static unsigned long ip_ping_group_range_min[] = { 0, 0 };
 static unsigned long ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX };
 static u32 u32_max_div_HZ = UINT_MAX / HZ;
@@ -1470,6 +1471,15 @@ static struct ctl_table ipv4_net_table[] = {
                .extra1         = SYSCTL_ZERO,
                .extra2         = &tcp_plb_max_cong_thresh,
        },
+       {
+               .procname       = "tcp_syn_linear_timeouts",
+               .data           = &init_net.ipv4.sysctl_tcp_syn_linear_timeouts,
+               .maxlen         = sizeof(u8),
+               .mode           = 0644,
+               .proc_handler   = proc_dou8vec_minmax,
+               .extra1         = SYSCTL_ZERO,
+               .extra2         = &tcp_syn_linear_timeouts_max,
+       },
        { }
 };
 
index 8d20d92..53b7751 100644 (file)
@@ -974,175 +974,24 @@ static int tcp_wmem_schedule(struct sock *sk, int copy)
        return min(copy, sk->sk_forward_alloc);
 }
 
-static struct sk_buff *tcp_build_frag(struct sock *sk, int size_goal, int flags,
-                                     struct page *page, int offset, size_t *size)
-{
-       struct sk_buff *skb = tcp_write_queue_tail(sk);
-       struct tcp_sock *tp = tcp_sk(sk);
-       bool can_coalesce;
-       int copy, i;
-
-       if (!skb || (copy = size_goal - skb->len) <= 0 ||
-           !tcp_skb_can_collapse_to(skb)) {
-new_segment:
-               if (!sk_stream_memory_free(sk))
-                       return NULL;
-
-               skb = tcp_stream_alloc_skb(sk, 0, sk->sk_allocation,
-                                          tcp_rtx_and_write_queues_empty(sk));
-               if (!skb)
-                       return NULL;
-
-#ifdef CONFIG_TLS_DEVICE
-               skb->decrypted = !!(flags & MSG_SENDPAGE_DECRYPTED);
-#endif
-               tcp_skb_entail(sk, skb);
-               copy = size_goal;
-       }
-
-       if (copy > *size)
-               copy = *size;
-
-       i = skb_shinfo(skb)->nr_frags;
-       can_coalesce = skb_can_coalesce(skb, i, page, offset);
-       if (!can_coalesce && i >= READ_ONCE(sysctl_max_skb_frags)) {
-               tcp_mark_push(tp, skb);
-               goto new_segment;
-       }
-       if (tcp_downgrade_zcopy_pure(sk, skb))
-               return NULL;
-
-       copy = tcp_wmem_schedule(sk, copy);
-       if (!copy)
-               return NULL;
-
-       if (can_coalesce) {
-               skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
-       } else {
-               get_page(page);
-               skb_fill_page_desc_noacc(skb, i, page, offset, copy);
-       }
-
-       if (!(flags & MSG_NO_SHARED_FRAGS))
-               skb_shinfo(skb)->flags |= SKBFL_SHARED_FRAG;
-
-       skb->len += copy;
-       skb->data_len += copy;
-       skb->truesize += copy;
-       sk_wmem_queued_add(sk, copy);
-       sk_mem_charge(sk, copy);
-       WRITE_ONCE(tp->write_seq, tp->write_seq + copy);
-       TCP_SKB_CB(skb)->end_seq += copy;
-       tcp_skb_pcount_set(skb, 0);
-
-       *size = copy;
-       return skb;
-}
-
-ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset,
-                        size_t size, int flags)
-{
-       struct tcp_sock *tp = tcp_sk(sk);
-       int mss_now, size_goal;
-       int err;
-       ssize_t copied;
-       long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
-
-       if (IS_ENABLED(CONFIG_DEBUG_VM) &&
-           WARN_ONCE(!sendpage_ok(page),
-                     "page must not be a Slab one and have page_count > 0"))
-               return -EINVAL;
-
-       /* Wait for a connection to finish. One exception is TCP Fast Open
-        * (passive side) where data is allowed to be sent before a connection
-        * is fully established.
-        */
-       if (((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) &&
-           !tcp_passive_fastopen(sk)) {
-               err = sk_stream_wait_connect(sk, &timeo);
-               if (err != 0)
-                       goto out_err;
-       }
-
-       sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
-
-       mss_now = tcp_send_mss(sk, &size_goal, flags);
-       copied = 0;
-
-       err = -EPIPE;
-       if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
-               goto out_err;
-
-       while (size > 0) {
-               struct sk_buff *skb;
-               size_t copy = size;
-
-               skb = tcp_build_frag(sk, size_goal, flags, page, offset, &copy);
-               if (!skb)
-                       goto wait_for_space;
-
-               if (!copied)
-                       TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH;
-
-               copied += copy;
-               offset += copy;
-               size -= copy;
-               if (!size)
-                       goto out;
-
-               if (skb->len < size_goal || (flags & MSG_OOB))
-                       continue;
-
-               if (forced_push(tp)) {
-                       tcp_mark_push(tp, skb);
-                       __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_PUSH);
-               } else if (skb == tcp_send_head(sk))
-                       tcp_push_one(sk, mss_now);
-               continue;
-
-wait_for_space:
-               set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
-               tcp_push(sk, flags & ~MSG_MORE, mss_now,
-                        TCP_NAGLE_PUSH, size_goal);
-
-               err = sk_stream_wait_memory(sk, &timeo);
-               if (err != 0)
-                       goto do_error;
-
-               mss_now = tcp_send_mss(sk, &size_goal, flags);
-       }
-
-out:
-       if (copied) {
-               tcp_tx_timestamp(sk, sk->sk_tsflags);
-               if (!(flags & MSG_SENDPAGE_NOTLAST))
-                       tcp_push(sk, flags, mss_now, tp->nonagle, size_goal);
-       }
-       return copied;
-
-do_error:
-       tcp_remove_empty_skb(sk);
-       if (copied)
-               goto out;
-out_err:
-       /* make sure we wake any epoll edge trigger waiter */
-       if (unlikely(tcp_rtx_and_write_queues_empty(sk) && err == -EAGAIN)) {
-               sk->sk_write_space(sk);
-               tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED);
-       }
-       return sk_stream_error(sk, flags, err);
-}
-EXPORT_SYMBOL_GPL(do_tcp_sendpages);
-
 int tcp_sendpage_locked(struct sock *sk, struct page *page, int offset,
                        size_t size, int flags)
 {
+       struct bio_vec bvec;
+       struct msghdr msg = { .msg_flags = flags | MSG_SPLICE_PAGES, };
+
        if (!(sk->sk_route_caps & NETIF_F_SG))
                return sock_no_sendpage_locked(sk, page, offset, size, flags);
 
        tcp_rate_check_app_limited(sk);  /* is sending application-limited? */
 
-       return do_tcp_sendpages(sk, page, offset, size, flags);
+       bvec_set_page(&bvec, page, size, offset);
+       iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size);
+
+       if (flags & MSG_SENDPAGE_NOTLAST)
+               msg.msg_flags |= MSG_MORE;
+
+       return tcp_sendmsg_locked(sk, &msg, size);
 }
 EXPORT_SYMBOL_GPL(tcp_sendpage_locked);
 
@@ -1223,28 +1072,31 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
        int flags, err, copied = 0;
        int mss_now = 0, size_goal, copied_syn = 0;
        int process_backlog = 0;
-       bool zc = false;
+       int zc = 0;
        long timeo;
 
        flags = msg->msg_flags;
 
        if ((flags & MSG_ZEROCOPY) && size) {
-               skb = tcp_write_queue_tail(sk);
-
                if (msg->msg_ubuf) {
                        uarg = msg->msg_ubuf;
-                       net_zcopy_get(uarg);
-                       zc = sk->sk_route_caps & NETIF_F_SG;
+                       if (sk->sk_route_caps & NETIF_F_SG)
+                               zc = MSG_ZEROCOPY;
                } else if (sock_flag(sk, SOCK_ZEROCOPY)) {
+                       skb = tcp_write_queue_tail(sk);
                        uarg = msg_zerocopy_realloc(sk, size, skb_zcopy(skb));
                        if (!uarg) {
                                err = -ENOBUFS;
                                goto out_err;
                        }
-                       zc = sk->sk_route_caps & NETIF_F_SG;
-                       if (!zc)
+                       if (sk->sk_route_caps & NETIF_F_SG)
+                               zc = MSG_ZEROCOPY;
+                       else
                                uarg_to_msgzc(uarg)->zerocopy = 0;
                }
+       } else if (unlikely(msg->msg_flags & MSG_SPLICE_PAGES) && size) {
+               if (sk->sk_route_caps & NETIF_F_SG)
+                       zc = MSG_SPLICE_PAGES;
        }
 
        if (unlikely(flags & MSG_FASTOPEN || inet_sk(sk)->defer_connect) &&
@@ -1307,7 +1159,7 @@ restart:
                goto do_error;
 
        while (msg_data_left(msg)) {
-               int copy = 0;
+               ssize_t copy = 0;
 
                skb = tcp_write_queue_tail(sk);
                if (skb)
@@ -1348,7 +1200,7 @@ new_segment:
                if (copy > msg_data_left(msg))
                        copy = msg_data_left(msg);
 
-               if (!zc) {
+               if (zc == 0) {
                        bool merge = true;
                        int i = skb_shinfo(skb)->nr_frags;
                        struct page_frag *pfrag = sk_page_frag(sk);
@@ -1393,7 +1245,7 @@ new_segment:
                                page_ref_inc(pfrag->page);
                        }
                        pfrag->offset += copy;
-               } else {
+               } else if (zc == MSG_ZEROCOPY)  {
                        /* First append to a fragless skb builds initial
                         * pure zerocopy skb
                         */
@@ -1414,6 +1266,30 @@ new_segment:
                        if (err < 0)
                                goto do_error;
                        copy = err;
+               } else if (zc == MSG_SPLICE_PAGES) {
+                       /* Splice in data if we can; copy if we can't. */
+                       if (tcp_downgrade_zcopy_pure(sk, skb))
+                               goto wait_for_space;
+                       copy = tcp_wmem_schedule(sk, copy);
+                       if (!copy)
+                               goto wait_for_space;
+
+                       err = skb_splice_from_iter(skb, &msg->msg_iter, copy,
+                                                  sk->sk_allocation);
+                       if (err < 0) {
+                               if (err == -EMSGSIZE) {
+                                       tcp_mark_push(tp, skb);
+                                       goto new_segment;
+                               }
+                               goto do_error;
+                       }
+                       copy = err;
+
+                       if (!(flags & MSG_NO_SHARED_FRAGS))
+                               skb_shinfo(skb)->flags |= SKBFL_SHARED_FRAG;
+
+                       sk_wmem_queued_add(sk, copy);
+                       sk_mem_charge(sk, copy);
                }
 
                if (!copied)
@@ -1459,7 +1335,9 @@ out:
                tcp_push(sk, flags, mss_now, tp->nonagle, size_goal);
        }
 out_nopush:
-       net_zcopy_put(uarg);
+       /* msg->msg_ubuf is pinned by the caller so we don't take extra refs */
+       if (uarg && !msg->msg_ubuf)
+               net_zcopy_put(uarg);
        return copied + copied_syn;
 
 do_error:
@@ -1468,7 +1346,9 @@ do_error:
        if (copied + copied_syn)
                goto out;
 out_err:
-       net_zcopy_put_abort(uarg, true);
+       /* msg->msg_ubuf is pinned by the caller so we don't take extra refs */
+       if (uarg && !msg->msg_ubuf)
+               net_zcopy_put_abort(uarg, true);
        err = sk_stream_error(sk, flags, err);
        /* make sure we wake any epoll edge trigger waiter */
        if (unlikely(tcp_rtx_and_write_queues_empty(sk) && err == -EAGAIN)) {
@@ -4680,8 +4560,10 @@ int tcp_abort(struct sock *sk, int err)
                return 0;
        }
 
-       /* Don't race with userspace socket closes such as tcp_close. */
-       lock_sock(sk);
+       /* BPF context ensures sock locking. */
+       if (!has_current_bpf_ctx())
+               /* Don't race with userspace socket closes such as tcp_close. */
+               lock_sock(sk);
 
        if (sk->sk_state == TCP_LISTEN) {
                tcp_set_state(sk, TCP_CLOSE);
@@ -4705,7 +4587,8 @@ int tcp_abort(struct sock *sk, int err)
        bh_unlock_sock(sk);
        local_bh_enable();
        tcp_write_queue_purge(sk);
-       release_sock(sk);
+       if (!has_current_bpf_ctx())
+               release_sock(sk);
        return 0;
 }
 EXPORT_SYMBOL_GPL(tcp_abort);
index 5f93918..e75023e 100644 (file)
@@ -90,11 +90,13 @@ static int tcp_bpf_push(struct sock *sk, struct sk_msg *msg, u32 apply_bytes,
 {
        bool apply = apply_bytes;
        struct scatterlist *sge;
+       struct msghdr msghdr = { .msg_flags = flags | MSG_SPLICE_PAGES, };
        struct page *page;
        int size, ret = 0;
        u32 off;
 
        while (1) {
+               struct bio_vec bvec;
                bool has_tx_ulp;
 
                sge = sk_msg_elem(msg, msg->sg.start);
@@ -106,16 +108,18 @@ static int tcp_bpf_push(struct sock *sk, struct sk_msg *msg, u32 apply_bytes,
                tcp_rate_check_app_limited(sk);
 retry:
                has_tx_ulp = tls_sw_has_ctx_tx(sk);
-               if (has_tx_ulp) {
-                       flags |= MSG_SENDPAGE_NOPOLICY;
-                       ret = kernel_sendpage_locked(sk,
-                                                    page, off, size, flags);
-               } else {
-                       ret = do_tcp_sendpages(sk, page, off, size, flags);
-               }
+               if (has_tx_ulp)
+                       msghdr.msg_flags |= MSG_SENDPAGE_NOPOLICY;
 
+               if (flags & MSG_SENDPAGE_NOTLAST)
+                       msghdr.msg_flags |= MSG_MORE;
+
+               bvec_set_page(&bvec, page, size, off);
+               iov_iter_bvec(&msghdr.msg_iter, ITER_SOURCE, &bvec, 1, size);
+               ret = tcp_sendmsg_locked(sk, &msghdr, size);
                if (ret <= 0)
                        return ret;
+
                if (apply)
                        apply_bytes -= ret;
                msg->sg.size -= ret;
@@ -481,7 +485,7 @@ static int tcp_bpf_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
        long timeo;
        int flags;
 
-       /* Don't let internal do_tcp_sendpages() flags through */
+       /* Don't let internal sendpage flags through */
        flags = (msg->msg_flags & ~MSG_SENDPAGE_DECRYPTED);
        flags |= MSG_NO_SHARED_FRAGS;
 
index 06d2573..53e9ce2 100644 (file)
@@ -692,6 +692,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
        u64 transmit_time = 0;
        struct sock *ctl_sk;
        struct net *net;
+       u32 txhash = 0;
 
        /* Never send a reset in response to a reset. */
        if (th->rst)
@@ -829,6 +830,8 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
                                   inet_twsk(sk)->tw_priority : sk->sk_priority;
                transmit_time = tcp_transmit_time(sk);
                xfrm_sk_clone_policy(ctl_sk, sk);
+               txhash = (sk->sk_state == TCP_TIME_WAIT) ?
+                        inet_twsk(sk)->tw_txhash : sk->sk_txhash;
        } else {
                ctl_sk->sk_mark = 0;
                ctl_sk->sk_priority = 0;
@@ -837,7 +840,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
                              skb, &TCP_SKB_CB(skb)->header.h4.opt,
                              ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
                              &arg, arg.iov[0].iov_len,
-                             transmit_time);
+                             transmit_time, txhash);
 
        xfrm_sk_free_policy(ctl_sk);
        sock_net_set(ctl_sk, &init_net);
@@ -859,7 +862,7 @@ static void tcp_v4_send_ack(const struct sock *sk,
                            struct sk_buff *skb, u32 seq, u32 ack,
                            u32 win, u32 tsval, u32 tsecr, int oif,
                            struct tcp_md5sig_key *key,
-                           int reply_flags, u8 tos)
+                           int reply_flags, u8 tos, u32 txhash)
 {
        const struct tcphdr *th = tcp_hdr(skb);
        struct {
@@ -935,7 +938,7 @@ static void tcp_v4_send_ack(const struct sock *sk,
                              skb, &TCP_SKB_CB(skb)->header.h4.opt,
                              ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
                              &arg, arg.iov[0].iov_len,
-                             transmit_time);
+                             transmit_time, txhash);
 
        sock_net_set(ctl_sk, &init_net);
        __TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
@@ -955,7 +958,8 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
                        tw->tw_bound_dev_if,
                        tcp_twsk_md5_key(tcptw),
                        tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
-                       tw->tw_tos
+                       tw->tw_tos,
+                       tw->tw_txhash
                        );
 
        inet_twsk_put(tw);
@@ -988,7 +992,7 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
                        0,
                        tcp_md5_do_lookup(sk, l3index, addr, AF_INET),
                        inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
-                       ip_hdr(skb)->tos);
+                       ip_hdr(skb)->tos, tcp_rsk(req)->txhash);
 }
 
 /*
@@ -2963,7 +2967,6 @@ static int bpf_iter_tcp_seq_show(struct seq_file *seq, void *v)
        struct bpf_iter_meta meta;
        struct bpf_prog *prog;
        struct sock *sk = v;
-       bool slow;
        uid_t uid;
        int ret;
 
@@ -2971,7 +2974,7 @@ static int bpf_iter_tcp_seq_show(struct seq_file *seq, void *v)
                return 0;
 
        if (sk_fullsock(sk))
-               slow = lock_sock_fast(sk);
+               lock_sock(sk);
 
        if (unlikely(sk_unhashed(sk))) {
                ret = SEQ_SKIP;
@@ -2995,7 +2998,7 @@ static int bpf_iter_tcp_seq_show(struct seq_file *seq, void *v)
 
 unlock:
        if (sk_fullsock(sk))
-               unlock_sock_fast(sk, slow);
+               release_sock(sk);
        return ret;
 
 }
@@ -3276,6 +3279,7 @@ static int __net_init tcp_sk_init(struct net *net)
        else
                net->ipv4.tcp_congestion_control = &tcp_reno;
 
+       net->ipv4.sysctl_tcp_syn_linear_timeouts = 4;
        return 0;
 }
 
@@ -3356,7 +3360,7 @@ static struct bpf_iter_reg tcp_reg_info = {
        .ctx_arg_info_size      = 1,
        .ctx_arg_info           = {
                { offsetof(struct bpf_iter__tcp, sk_common),
-                 PTR_TO_BTF_ID_OR_NULL },
+                 PTR_TO_BTF_ID_OR_NULL | PTR_TRUSTED },
        },
        .get_func_proto         = bpf_iter_tcp_get_func_proto,
        .seq_info               = &tcp_seq_info,
index dac0d62..04fc328 100644 (file)
@@ -303,6 +303,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
                tcptw->tw_ts_offset     = tp->tsoffset;
                tcptw->tw_last_oow_ack_time = 0;
                tcptw->tw_tx_delay      = tp->tcp_tx_delay;
+               tw->tw_txhash           = sk->sk_txhash;
 #if IS_ENABLED(CONFIG_IPV6)
                if (tw->tw_family == PF_INET6) {
                        struct ipv6_pinfo *np = inet6_sk(sk);
@@ -311,7 +312,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
                        tw->tw_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
                        tw->tw_tclass = np->tclass;
                        tw->tw_flowlabel = be32_to_cpu(np->flow_label & IPV6_FLOWLABEL_MASK);
-                       tw->tw_txhash = sk->sk_txhash;
                        tw->tw_ipv6only = sk->sk_ipv6only;
                }
 #endif
index 4851211..05b38f5 100644 (file)
@@ -295,7 +295,7 @@ out:
        return pp;
 }
 
-int tcp_gro_complete(struct sk_buff *skb)
+void tcp_gro_complete(struct sk_buff *skb)
 {
        struct tcphdr *th = tcp_hdr(skb);
 
@@ -310,8 +310,6 @@ int tcp_gro_complete(struct sk_buff *skb)
 
        if (skb->encapsulation)
                skb->inner_transport_header = skb->transport_header;
-
-       return 0;
 }
 EXPORT_SYMBOL(tcp_gro_complete);
 
@@ -341,7 +339,8 @@ INDIRECT_CALLABLE_SCOPE int tcp4_gro_complete(struct sk_buff *skb, int thoff)
        if (NAPI_GRO_CB(skb)->is_atomic)
                skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_FIXEDID;
 
-       return tcp_gro_complete(skb);
+       tcp_gro_complete(skb);
+       return 0;
 }
 
 static const struct net_offload tcpv4_offload = {
index 39eb947..470f581 100644 (file)
@@ -234,14 +234,19 @@ static int tcp_write_timeout(struct sock *sk)
        struct tcp_sock *tp = tcp_sk(sk);
        struct net *net = sock_net(sk);
        bool expired = false, do_reset;
-       int retry_until;
+       int retry_until, max_retransmits;
 
        if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
                if (icsk->icsk_retransmits)
                        __dst_negative_advice(sk);
                retry_until = icsk->icsk_syn_retries ? :
                        READ_ONCE(net->ipv4.sysctl_tcp_syn_retries);
-               expired = icsk->icsk_retransmits >= retry_until;
+
+               max_retransmits = retry_until;
+               if (sk->sk_state == TCP_SYN_SENT)
+                       max_retransmits += READ_ONCE(net->ipv4.sysctl_tcp_syn_linear_timeouts);
+
+               expired = icsk->icsk_retransmits >= max_retransmits;
        } else {
                if (retransmits_timed_out(sk, READ_ONCE(net->ipv4.sysctl_tcp_retries1), 0)) {
                        /* Black hole detection */
@@ -587,8 +592,12 @@ out_reset_timer:
            icsk->icsk_retransmits <= TCP_THIN_LINEAR_RETRIES) {
                icsk->icsk_backoff = 0;
                icsk->icsk_rto = min(__tcp_set_rto(tp), TCP_RTO_MAX);
-       } else {
-               /* Use normal (exponential) backoff */
+       } else if (sk->sk_state != TCP_SYN_SENT ||
+                  icsk->icsk_backoff >
+                  READ_ONCE(net->ipv4.sysctl_tcp_syn_linear_timeouts)) {
+               /* Use normal (exponential) backoff unless linear timeouts are
+                * activated.
+                */
                icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX);
        }
        inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
index 9482def..fd3dae0 100644 (file)
@@ -1062,8 +1062,8 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
        int free = 0;
        int connected = 0;
        __be32 daddr, faddr, saddr;
+       u8 tos, scope;
        __be16 dport;
-       u8  tos;
        int err, is_udplite = IS_UDPLITE(sk);
        int corkreq = READ_ONCE(up->corkflag) || msg->msg_flags&MSG_MORE;
        int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
@@ -1183,12 +1183,9 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
                connected = 0;
        }
        tos = get_rttos(&ipc, inet);
-       if (sock_flag(sk, SOCK_LOCALROUTE) ||
-           (msg->msg_flags & MSG_DONTROUTE) ||
-           (ipc.opt && ipc.opt->opt.is_strictroute)) {
-               tos |= RTO_ONLINK;
+       scope = ip_sendmsg_scope(inet, &ipc, msg);
+       if (scope == RT_SCOPE_LINK)
                connected = 0;
-       }
 
        if (ipv4_is_multicast(daddr)) {
                if (!ipc.oif || netif_index_is_l3_master(sock_net(sk), ipc.oif))
@@ -1221,11 +1218,9 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 
                fl4 = &fl4_stack;
 
-               flowi4_init_output(fl4, ipc.oif, ipc.sockc.mark, tos,
-                                  RT_SCOPE_UNIVERSE, sk->sk_protocol,
-                                  flow_flags,
-                                  faddr, saddr, dport, inet->inet_sport,
-                                  sk->sk_uid);
+               flowi4_init_output(fl4, ipc.oif, ipc.sockc.mark, tos, scope,
+                                  sk->sk_protocol, flow_flags, faddr, saddr,
+                                  dport, inet->inet_sport, sk->sk_uid);
 
                security_sk_classify_flow(sk, flowi4_to_flowi_common(fl4));
                rt = ip_route_output_flow(net, fl4, sk);
@@ -1332,54 +1327,15 @@ EXPORT_SYMBOL(udp_sendmsg);
 int udp_sendpage(struct sock *sk, struct page *page, int offset,
                 size_t size, int flags)
 {
-       struct inet_sock *inet = inet_sk(sk);
-       struct udp_sock *up = udp_sk(sk);
-       int ret;
+       struct bio_vec bvec;
+       struct msghdr msg = { .msg_flags = flags | MSG_SPLICE_PAGES };
 
        if (flags & MSG_SENDPAGE_NOTLAST)
-               flags |= MSG_MORE;
-
-       if (!up->pending) {
-               struct msghdr msg = {   .msg_flags = flags|MSG_MORE };
+               msg.msg_flags |= MSG_MORE;
 
-               /* Call udp_sendmsg to specify destination address which
-                * sendpage interface can't pass.
-                * This will succeed only when the socket is connected.
-                */
-               ret = udp_sendmsg(sk, &msg, 0);
-               if (ret < 0)
-                       return ret;
-       }
-
-       lock_sock(sk);
-
-       if (unlikely(!up->pending)) {
-               release_sock(sk);
-
-               net_dbg_ratelimited("cork failed\n");
-               return -EINVAL;
-       }
-
-       ret = ip_append_page(sk, &inet->cork.fl.u.ip4,
-                            page, offset, size, flags);
-       if (ret == -EOPNOTSUPP) {
-               release_sock(sk);
-               return sock_no_sendpage(sk->sk_socket, page, offset,
-                                       size, flags);
-       }
-       if (ret < 0) {
-               udp_flush_pending_frames(sk);
-               goto out;
-       }
-
-       up->len += size;
-       if (!(READ_ONCE(up->corkflag) || (flags&MSG_MORE)))
-               ret = udp_push_pending_frames(sk);
-       if (!ret)
-               ret = size;
-out:
-       release_sock(sk);
-       return ret;
+       bvec_set_page(&bvec, page, size, offset);
+       iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size);
+       return udp_sendmsg(sk, &msg, size);
 }
 
 #define UDP_SKB_IS_STATELESS 0x80000000
@@ -2927,7 +2883,8 @@ EXPORT_SYMBOL(udp_poll);
 
 int udp_abort(struct sock *sk, int err)
 {
-       lock_sock(sk);
+       if (!has_current_bpf_ctx())
+               lock_sock(sk);
 
        /* udp{v6}_destroy_sock() sets it under the sk lock, avoid racing
         * with close()
@@ -2940,7 +2897,8 @@ int udp_abort(struct sock *sk, int err)
        __udp_disconnect(sk, 0);
 
 out:
-       release_sock(sk);
+       if (!has_current_bpf_ctx())
+               release_sock(sk);
 
        return 0;
 }
@@ -2985,9 +2943,30 @@ EXPORT_SYMBOL(udp_prot);
 /* ------------------------------------------------------------------------ */
 #ifdef CONFIG_PROC_FS
 
-static struct udp_table *udp_get_table_afinfo(struct udp_seq_afinfo *afinfo,
-                                             struct net *net)
+static unsigned short seq_file_family(const struct seq_file *seq);
+static bool seq_sk_match(struct seq_file *seq, const struct sock *sk)
+{
+       unsigned short family = seq_file_family(seq);
+
+       /* AF_UNSPEC is used as a match all */
+       return ((family == AF_UNSPEC || family == sk->sk_family) &&
+               net_eq(sock_net(sk), seq_file_net(seq)));
+}
+
+#ifdef CONFIG_BPF_SYSCALL
+static const struct seq_operations bpf_iter_udp_seq_ops;
+#endif
+static struct udp_table *udp_get_table_seq(struct seq_file *seq,
+                                          struct net *net)
 {
+       const struct udp_seq_afinfo *afinfo;
+
+#ifdef CONFIG_BPF_SYSCALL
+       if (seq->op == &bpf_iter_udp_seq_ops)
+               return net->ipv4.udp_table;
+#endif
+
+       afinfo = pde_data(file_inode(seq->file));
        return afinfo->udp_table ? : net->ipv4.udp_table;
 }
 
@@ -2995,16 +2974,10 @@ static struct sock *udp_get_first(struct seq_file *seq, int start)
 {
        struct udp_iter_state *state = seq->private;
        struct net *net = seq_file_net(seq);
-       struct udp_seq_afinfo *afinfo;
        struct udp_table *udptable;
        struct sock *sk;
 
-       if (state->bpf_seq_afinfo)
-               afinfo = state->bpf_seq_afinfo;
-       else
-               afinfo = pde_data(file_inode(seq->file));
-
-       udptable = udp_get_table_afinfo(afinfo, net);
+       udptable = udp_get_table_seq(seq, net);
 
        for (state->bucket = start; state->bucket <= udptable->mask;
             ++state->bucket) {
@@ -3015,10 +2988,7 @@ static struct sock *udp_get_first(struct seq_file *seq, int start)
 
                spin_lock_bh(&hslot->lock);
                sk_for_each(sk, &hslot->head) {
-                       if (!net_eq(sock_net(sk), net))
-                               continue;
-                       if (afinfo->family == AF_UNSPEC ||
-                           sk->sk_family == afinfo->family)
+                       if (seq_sk_match(seq, sk))
                                goto found;
                }
                spin_unlock_bh(&hslot->lock);
@@ -3032,22 +3002,14 @@ static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk)
 {
        struct udp_iter_state *state = seq->private;
        struct net *net = seq_file_net(seq);
-       struct udp_seq_afinfo *afinfo;
        struct udp_table *udptable;
 
-       if (state->bpf_seq_afinfo)
-               afinfo = state->bpf_seq_afinfo;
-       else
-               afinfo = pde_data(file_inode(seq->file));
-
        do {
                sk = sk_next(sk);
-       } while (sk && (!net_eq(sock_net(sk), net) ||
-                       (afinfo->family != AF_UNSPEC &&
-                        sk->sk_family != afinfo->family)));
+       } while (sk && !seq_sk_match(seq, sk));
 
        if (!sk) {
-               udptable = udp_get_table_afinfo(afinfo, net);
+               udptable = udp_get_table_seq(seq, net);
 
                if (state->bucket <= udptable->mask)
                        spin_unlock_bh(&udptable->hash[state->bucket].lock);
@@ -3093,15 +3055,9 @@ EXPORT_SYMBOL(udp_seq_next);
 void udp_seq_stop(struct seq_file *seq, void *v)
 {
        struct udp_iter_state *state = seq->private;
-       struct udp_seq_afinfo *afinfo;
        struct udp_table *udptable;
 
-       if (state->bpf_seq_afinfo)
-               afinfo = state->bpf_seq_afinfo;
-       else
-               afinfo = pde_data(file_inode(seq->file));
-
-       udptable = udp_get_table_afinfo(afinfo, seq_file_net(seq));
+       udptable = udp_get_table_seq(seq, seq_file_net(seq));
 
        if (state->bucket <= udptable->mask)
                spin_unlock_bh(&udptable->hash[state->bucket].lock);
@@ -3154,6 +3110,143 @@ struct bpf_iter__udp {
        int bucket __aligned(8);
 };
 
+struct bpf_udp_iter_state {
+       struct udp_iter_state state;
+       unsigned int cur_sk;
+       unsigned int end_sk;
+       unsigned int max_sk;
+       int offset;
+       struct sock **batch;
+       bool st_bucket_done;
+};
+
+static int bpf_iter_udp_realloc_batch(struct bpf_udp_iter_state *iter,
+                                     unsigned int new_batch_sz);
+static struct sock *bpf_iter_udp_batch(struct seq_file *seq)
+{
+       struct bpf_udp_iter_state *iter = seq->private;
+       struct udp_iter_state *state = &iter->state;
+       struct net *net = seq_file_net(seq);
+       struct udp_table *udptable;
+       unsigned int batch_sks = 0;
+       bool resized = false;
+       struct sock *sk;
+
+       /* The current batch is done, so advance the bucket. */
+       if (iter->st_bucket_done) {
+               state->bucket++;
+               iter->offset = 0;
+       }
+
+       udptable = udp_get_table_seq(seq, net);
+
+again:
+       /* New batch for the next bucket.
+        * Iterate over the hash table to find a bucket with sockets matching
+        * the iterator attributes, and return the first matching socket from
+        * the bucket. The remaining matched sockets from the bucket are batched
+        * before releasing the bucket lock. This allows BPF programs that are
+        * called in seq_show to acquire the bucket lock if needed.
+        */
+       iter->cur_sk = 0;
+       iter->end_sk = 0;
+       iter->st_bucket_done = false;
+       batch_sks = 0;
+
+       for (; state->bucket <= udptable->mask; state->bucket++) {
+               struct udp_hslot *hslot2 = &udptable->hash2[state->bucket];
+
+               if (hlist_empty(&hslot2->head)) {
+                       iter->offset = 0;
+                       continue;
+               }
+
+               spin_lock_bh(&hslot2->lock);
+               udp_portaddr_for_each_entry(sk, &hslot2->head) {
+                       if (seq_sk_match(seq, sk)) {
+                               /* Resume from the last iterated socket at the
+                                * offset in the bucket before iterator was stopped.
+                                */
+                               if (iter->offset) {
+                                       --iter->offset;
+                                       continue;
+                               }
+                               if (iter->end_sk < iter->max_sk) {
+                                       sock_hold(sk);
+                                       iter->batch[iter->end_sk++] = sk;
+                               }
+                               batch_sks++;
+                       }
+               }
+               spin_unlock_bh(&hslot2->lock);
+
+               if (iter->end_sk)
+                       break;
+
+               /* Reset the current bucket's offset before moving to the next bucket. */
+               iter->offset = 0;
+       }
+
+       /* All done: no batch made. */
+       if (!iter->end_sk)
+               return NULL;
+
+       if (iter->end_sk == batch_sks) {
+               /* Batching is done for the current bucket; return the first
+                * socket to be iterated from the batch.
+                */
+               iter->st_bucket_done = true;
+               goto done;
+       }
+       if (!resized && !bpf_iter_udp_realloc_batch(iter, batch_sks * 3 / 2)) {
+               resized = true;
+               /* After allocating a larger batch, retry one more time to grab
+                * the whole bucket.
+                */
+               state->bucket--;
+               goto again;
+       }
+done:
+       return iter->batch[0];
+}
+
+static void *bpf_iter_udp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+       struct bpf_udp_iter_state *iter = seq->private;
+       struct sock *sk;
+
+       /* Whenever seq_next() is called, the iter->cur_sk is
+        * done with seq_show(), so unref the iter->cur_sk.
+        */
+       if (iter->cur_sk < iter->end_sk) {
+               sock_put(iter->batch[iter->cur_sk++]);
+               ++iter->offset;
+       }
+
+       /* After updating iter->cur_sk, check if there are more sockets
+        * available in the current bucket batch.
+        */
+       if (iter->cur_sk < iter->end_sk)
+               sk = iter->batch[iter->cur_sk];
+       else
+               /* Prepare a new batch. */
+               sk = bpf_iter_udp_batch(seq);
+
+       ++*pos;
+       return sk;
+}
+
+static void *bpf_iter_udp_seq_start(struct seq_file *seq, loff_t *pos)
+{
+       /* bpf iter does not support lseek, so it always
+        * continue from where it was stop()-ped.
+        */
+       if (*pos)
+               return bpf_iter_udp_batch(seq);
+
+       return SEQ_START_TOKEN;
+}
+
 static int udp_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta,
                             struct udp_sock *udp_sk, uid_t uid, int bucket)
 {
@@ -3174,18 +3267,37 @@ static int bpf_iter_udp_seq_show(struct seq_file *seq, void *v)
        struct bpf_prog *prog;
        struct sock *sk = v;
        uid_t uid;
+       int ret;
 
        if (v == SEQ_START_TOKEN)
                return 0;
 
+       lock_sock(sk);
+
+       if (unlikely(sk_unhashed(sk))) {
+               ret = SEQ_SKIP;
+               goto unlock;
+       }
+
        uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk));
        meta.seq = seq;
        prog = bpf_iter_get_info(&meta, false);
-       return udp_prog_seq_show(prog, &meta, v, uid, state->bucket);
+       ret = udp_prog_seq_show(prog, &meta, v, uid, state->bucket);
+
+unlock:
+       release_sock(sk);
+       return ret;
+}
+
+static void bpf_iter_udp_put_batch(struct bpf_udp_iter_state *iter)
+{
+       while (iter->cur_sk < iter->end_sk)
+               sock_put(iter->batch[iter->cur_sk++]);
 }
 
 static void bpf_iter_udp_seq_stop(struct seq_file *seq, void *v)
 {
+       struct bpf_udp_iter_state *iter = seq->private;
        struct bpf_iter_meta meta;
        struct bpf_prog *prog;
 
@@ -3196,17 +3308,35 @@ static void bpf_iter_udp_seq_stop(struct seq_file *seq, void *v)
                        (void)udp_prog_seq_show(prog, &meta, v, 0, 0);
        }
 
-       udp_seq_stop(seq, v);
+       if (iter->cur_sk < iter->end_sk) {
+               bpf_iter_udp_put_batch(iter);
+               iter->st_bucket_done = false;
+       }
 }
 
 static const struct seq_operations bpf_iter_udp_seq_ops = {
-       .start          = udp_seq_start,
-       .next           = udp_seq_next,
+       .start          = bpf_iter_udp_seq_start,
+       .next           = bpf_iter_udp_seq_next,
        .stop           = bpf_iter_udp_seq_stop,
        .show           = bpf_iter_udp_seq_show,
 };
 #endif
 
+static unsigned short seq_file_family(const struct seq_file *seq)
+{
+       const struct udp_seq_afinfo *afinfo;
+
+#ifdef CONFIG_BPF_SYSCALL
+       /* BPF iterator: bpf programs to filter sockets. */
+       if (seq->op == &bpf_iter_udp_seq_ops)
+               return AF_UNSPEC;
+#endif
+
+       /* Proc fs iterator */
+       afinfo = pde_data(file_inode(seq->file));
+       return afinfo->family;
+}
+
 const struct seq_operations udp_seq_ops = {
        .start          = udp_seq_start,
        .next           = udp_seq_next,
@@ -3415,38 +3545,55 @@ static struct pernet_operations __net_initdata udp_sysctl_ops = {
 DEFINE_BPF_ITER_FUNC(udp, struct bpf_iter_meta *meta,
                     struct udp_sock *udp_sk, uid_t uid, int bucket)
 
-static int bpf_iter_init_udp(void *priv_data, struct bpf_iter_aux_info *aux)
+static int bpf_iter_udp_realloc_batch(struct bpf_udp_iter_state *iter,
+                                     unsigned int new_batch_sz)
 {
-       struct udp_iter_state *st = priv_data;
-       struct udp_seq_afinfo *afinfo;
-       int ret;
+       struct sock **new_batch;
 
-       afinfo = kmalloc(sizeof(*afinfo), GFP_USER | __GFP_NOWARN);
-       if (!afinfo)
+       new_batch = kvmalloc_array(new_batch_sz, sizeof(*new_batch),
+                                  GFP_USER | __GFP_NOWARN);
+       if (!new_batch)
                return -ENOMEM;
 
-       afinfo->family = AF_UNSPEC;
-       afinfo->udp_table = NULL;
-       st->bpf_seq_afinfo = afinfo;
+       bpf_iter_udp_put_batch(iter);
+       kvfree(iter->batch);
+       iter->batch = new_batch;
+       iter->max_sk = new_batch_sz;
+
+       return 0;
+}
+
+#define INIT_BATCH_SZ 16
+
+static int bpf_iter_init_udp(void *priv_data, struct bpf_iter_aux_info *aux)
+{
+       struct bpf_udp_iter_state *iter = priv_data;
+       int ret;
+
        ret = bpf_iter_init_seq_net(priv_data, aux);
        if (ret)
-               kfree(afinfo);
+               return ret;
+
+       ret = bpf_iter_udp_realloc_batch(iter, INIT_BATCH_SZ);
+       if (ret)
+               bpf_iter_fini_seq_net(priv_data);
+
        return ret;
 }
 
 static void bpf_iter_fini_udp(void *priv_data)
 {
-       struct udp_iter_state *st = priv_data;
+       struct bpf_udp_iter_state *iter = priv_data;
 
-       kfree(st->bpf_seq_afinfo);
        bpf_iter_fini_seq_net(priv_data);
+       kvfree(iter->batch);
 }
 
 static const struct bpf_iter_seq_info udp_seq_info = {
        .seq_ops                = &bpf_iter_udp_seq_ops,
        .init_seq_private       = bpf_iter_init_udp,
        .fini_seq_private       = bpf_iter_fini_udp,
-       .seq_priv_size          = sizeof(struct udp_iter_state),
+       .seq_priv_size          = sizeof(struct bpf_udp_iter_state),
 };
 
 static struct bpf_iter_reg udp_reg_info = {
@@ -3454,7 +3601,7 @@ static struct bpf_iter_reg udp_reg_info = {
        .ctx_arg_info_size      = 1,
        .ctx_arg_info           = {
                { offsetof(struct bpf_iter__udp, udp_sk),
-                 PTR_TO_BTF_ID_OR_NULL },
+                 PTR_TO_BTF_ID_OR_NULL | PTR_TRUSTED },
        },
        .seq_info               = &udp_seq_info,
 };
index 3797917..5479da0 100644 (file)
@@ -3633,8 +3633,8 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
                                idev->if_flags |= IF_READY;
                        }
 
-                       pr_info("ADDRCONF(NETDEV_CHANGE): %s: link becomes ready\n",
-                               dev->name);
+                       pr_debug("ADDRCONF(NETDEV_CHANGE): %s: link becomes ready\n",
+                                dev->name);
 
                        run_pending = 1;
                }
index 5fa0e37..a543df5 100644 (file)
@@ -458,8 +458,6 @@ looped_back:
 
        ipv6_hdr(skb)->daddr = *addr;
 
-       skb_dst_drop(skb);
-
        ip6_route_input(skb);
 
        if (skb_dst(skb)->error) {
@@ -827,7 +825,6 @@ looped_back:
        *addr = ipv6_hdr(skb)->daddr;
        ipv6_hdr(skb)->daddr = daddr;
 
-       skb_dst_drop(skb);
        ip6_route_input(skb);
        if (skb_dst(skb)->error) {
                skb_push(skb, skb->data - skb_network_header(skb));
index 9554cf4..c722cb8 100644 (file)
@@ -1589,6 +1589,14 @@ emsgsize:
                                skb_zcopy_set(skb, uarg, &extra_uref);
                        }
                }
+       } else if ((flags & MSG_SPLICE_PAGES) && length) {
+               if (inet_sk(sk)->hdrincl)
+                       return -EPERM;
+               if (rt->dst.dev->features & NETIF_F_SG)
+                       /* We need an empty buffer to attach stuff to */
+                       paged = true;
+               else
+                       flags &= ~MSG_SPLICE_PAGES;
        }
 
        /*
@@ -1778,6 +1786,15 @@ alloc_new_skb:
                                err = -EFAULT;
                                goto error;
                        }
+               } else if (flags & MSG_SPLICE_PAGES) {
+                       struct msghdr *msg = from;
+
+                       err = skb_splice_from_iter(skb, &msg->msg_iter, copy,
+                                                  sk->sk_allocation);
+                       if (err < 0)
+                               goto error;
+                       copy = err;
+                       wmem_alloc_delta += copy;
                } else if (!zc) {
                        int i = skb_shinfo(skb)->nr_frags;
 
index 34db881..03b877f 100644 (file)
@@ -470,8 +470,6 @@ static int seg6_input_core(struct net *net, struct sock *sk,
        dst = dst_cache_get(&slwt->cache);
        preempt_enable();
 
-       skb_dst_drop(skb);
-
        if (!dst) {
                ip6_route_input(skb);
                dst = skb_dst(skb);
@@ -482,6 +480,7 @@ static int seg6_input_core(struct net *net, struct sock *sk,
                        preempt_enable();
                }
        } else {
+               skb_dst_drop(skb);
                skb_dst_set(skb, dst);
        }
 
index 7132eb2..d657713 100644 (file)
@@ -93,12 +93,8 @@ static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
  * This avoids a dereference and allow compiler optimizations.
  * It is a specialized version of inet6_sk_generic().
  */
-static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk)
-{
-       unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo);
-
-       return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
-}
+#define tcp_inet6_sk(sk) (&container_of_const(tcp_sk(sk), \
+                                             struct tcp6_sock, tcp)->inet6)
 
 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
 {
@@ -533,7 +529,7 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
                              struct sk_buff *syn_skb)
 {
        struct inet_request_sock *ireq = inet_rsk(req);
-       struct ipv6_pinfo *np = tcp_inet6_sk(sk);
+       const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
        struct ipv6_txoptions *opt;
        struct flowi6 *fl6 = &fl->u.ip6;
        struct sk_buff *skb;
index 39db5a2..bf0c957 100644 (file)
@@ -36,7 +36,8 @@ INDIRECT_CALLABLE_SCOPE int tcp6_gro_complete(struct sk_buff *skb, int thoff)
                                  &iph->daddr, 0);
        skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV6;
 
-       return tcp_gro_complete(skb);
+       tcp_gro_complete(skb);
+       return 0;
 }
 
 static struct sk_buff *tcp6_gso_segment(struct sk_buff *skb,
index cfe828b..ba22af1 100644 (file)
@@ -761,149 +761,6 @@ static void kcm_push(struct kcm_sock *kcm)
                kcm_write_msgs(kcm);
 }
 
-static ssize_t kcm_sendpage(struct socket *sock, struct page *page,
-                           int offset, size_t size, int flags)
-
-{
-       struct sock *sk = sock->sk;
-       struct kcm_sock *kcm = kcm_sk(sk);
-       struct sk_buff *skb = NULL, *head = NULL;
-       long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
-       bool eor;
-       int err = 0;
-       int i;
-
-       if (flags & MSG_SENDPAGE_NOTLAST)
-               flags |= MSG_MORE;
-
-       /* No MSG_EOR from splice, only look at MSG_MORE */
-       eor = !(flags & MSG_MORE);
-
-       lock_sock(sk);
-
-       sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
-
-       err = -EPIPE;
-       if (sk->sk_err)
-               goto out_error;
-
-       if (kcm->seq_skb) {
-               /* Previously opened message */
-               head = kcm->seq_skb;
-               skb = kcm_tx_msg(head)->last_skb;
-               i = skb_shinfo(skb)->nr_frags;
-
-               if (skb_can_coalesce(skb, i, page, offset)) {
-                       skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], size);
-                       skb_shinfo(skb)->flags |= SKBFL_SHARED_FRAG;
-                       goto coalesced;
-               }
-
-               if (i >= MAX_SKB_FRAGS) {
-                       struct sk_buff *tskb;
-
-                       tskb = alloc_skb(0, sk->sk_allocation);
-                       while (!tskb) {
-                               kcm_push(kcm);
-                               err = sk_stream_wait_memory(sk, &timeo);
-                               if (err)
-                                       goto out_error;
-                       }
-
-                       if (head == skb)
-                               skb_shinfo(head)->frag_list = tskb;
-                       else
-                               skb->next = tskb;
-
-                       skb = tskb;
-                       skb->ip_summed = CHECKSUM_UNNECESSARY;
-                       i = 0;
-               }
-       } else {
-               /* Call the sk_stream functions to manage the sndbuf mem. */
-               if (!sk_stream_memory_free(sk)) {
-                       kcm_push(kcm);
-                       set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
-                       err = sk_stream_wait_memory(sk, &timeo);
-                       if (err)
-                               goto out_error;
-               }
-
-               head = alloc_skb(0, sk->sk_allocation);
-               while (!head) {
-                       kcm_push(kcm);
-                       err = sk_stream_wait_memory(sk, &timeo);
-                       if (err)
-                               goto out_error;
-               }
-
-               skb = head;
-               i = 0;
-       }
-
-       get_page(page);
-       skb_fill_page_desc_noacc(skb, i, page, offset, size);
-       skb_shinfo(skb)->flags |= SKBFL_SHARED_FRAG;
-
-coalesced:
-       skb->len += size;
-       skb->data_len += size;
-       skb->truesize += size;
-       sk->sk_wmem_queued += size;
-       sk_mem_charge(sk, size);
-
-       if (head != skb) {
-               head->len += size;
-               head->data_len += size;
-               head->truesize += size;
-       }
-
-       if (eor) {
-               bool not_busy = skb_queue_empty(&sk->sk_write_queue);
-
-               /* Message complete, queue it on send buffer */
-               __skb_queue_tail(&sk->sk_write_queue, head);
-               kcm->seq_skb = NULL;
-               KCM_STATS_INCR(kcm->stats.tx_msgs);
-
-               if (flags & MSG_BATCH) {
-                       kcm->tx_wait_more = true;
-               } else if (kcm->tx_wait_more || not_busy) {
-                       err = kcm_write_msgs(kcm);
-                       if (err < 0) {
-                               /* We got a hard error in write_msgs but have
-                                * already queued this message. Report an error
-                                * in the socket, but don't affect return value
-                                * from sendmsg
-                                */
-                               pr_warn("KCM: Hard failure on kcm_write_msgs\n");
-                               report_csk_error(&kcm->sk, -err);
-                       }
-               }
-       } else {
-               /* Message not complete, save state */
-               kcm->seq_skb = head;
-               kcm_tx_msg(head)->last_skb = skb;
-       }
-
-       KCM_STATS_ADD(kcm->stats.tx_bytes, size);
-
-       release_sock(sk);
-       return size;
-
-out_error:
-       kcm_push(kcm);
-
-       err = sk_stream_error(sk, flags, err);
-
-       /* make sure we wake any epoll edge trigger waiter */
-       if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 && err == -EAGAIN))
-               sk->sk_write_space(sk);
-
-       release_sock(sk);
-       return err;
-}
-
 static int kcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
 {
        struct sock *sk = sock->sk;
@@ -989,29 +846,52 @@ start:
                        merge = false;
                }
 
-               copy = min_t(int, msg_data_left(msg),
-                            pfrag->size - pfrag->offset);
+               if (msg->msg_flags & MSG_SPLICE_PAGES) {
+                       copy = msg_data_left(msg);
+                       if (!sk_wmem_schedule(sk, copy))
+                               goto wait_for_memory;
 
-               if (!sk_wmem_schedule(sk, copy))
-                       goto wait_for_memory;
+                       err = skb_splice_from_iter(skb, &msg->msg_iter, copy,
+                                                  sk->sk_allocation);
+                       if (err < 0) {
+                               if (err == -EMSGSIZE)
+                                       goto wait_for_memory;
+                               goto out_error;
+                       }
 
-               err = skb_copy_to_page_nocache(sk, &msg->msg_iter, skb,
-                                              pfrag->page,
-                                              pfrag->offset,
-                                              copy);
-               if (err)
-                       goto out_error;
+                       copy = err;
+                       skb_shinfo(skb)->flags |= SKBFL_SHARED_FRAG;
+                       sk_wmem_queued_add(sk, copy);
+                       sk_mem_charge(sk, copy);
 
-               /* Update the skb. */
-               if (merge) {
-                       skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
+                       if (head != skb)
+                               head->truesize += copy;
                } else {
-                       skb_fill_page_desc(skb, i, pfrag->page,
-                                          pfrag->offset, copy);
-                       get_page(pfrag->page);
+                       copy = min_t(int, msg_data_left(msg),
+                                    pfrag->size - pfrag->offset);
+                       if (!sk_wmem_schedule(sk, copy))
+                               goto wait_for_memory;
+
+                       err = skb_copy_to_page_nocache(sk, &msg->msg_iter, skb,
+                                                      pfrag->page,
+                                                      pfrag->offset,
+                                                      copy);
+                       if (err)
+                               goto out_error;
+
+                       /* Update the skb. */
+                       if (merge) {
+                               skb_frag_size_add(
+                                       &skb_shinfo(skb)->frags[i - 1], copy);
+                       } else {
+                               skb_fill_page_desc(skb, i, pfrag->page,
+                                                  pfrag->offset, copy);
+                               get_page(pfrag->page);
+                       }
+
+                       pfrag->offset += copy;
                }
 
-               pfrag->offset += copy;
                copied += copy;
                if (head != skb) {
                        head->len += copy;
@@ -1088,6 +968,24 @@ out_error:
        return err;
 }
 
+static ssize_t kcm_sendpage(struct socket *sock, struct page *page,
+                           int offset, size_t size, int flags)
+
+{
+       struct bio_vec bvec;
+       struct msghdr msg = { .msg_flags = flags | MSG_SPLICE_PAGES, };
+
+       if (flags & MSG_SENDPAGE_NOTLAST)
+               msg.msg_flags |= MSG_MORE;
+
+       if (flags & MSG_OOB)
+               return -EOPNOTSUPP;
+
+       bvec_set_page(&bvec, page, size, offset);
+       iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size);
+       return kcm_sendmsg(sock, &msg, size);
+}
+
 static int kcm_recvmsg(struct socket *sock, struct msghdr *msg,
                       size_t len, int flags)
 {
index 0dac286..a0990c3 100644 (file)
@@ -34,7 +34,11 @@ static const struct snmp_mib mptcp_snmp_list[] = {
        SNMP_MIB_ITEM("NoDSSInWindow", MPTCP_MIB_NODSSWINDOW),
        SNMP_MIB_ITEM("DuplicateData", MPTCP_MIB_DUPDATA),
        SNMP_MIB_ITEM("AddAddr", MPTCP_MIB_ADDADDR),
+       SNMP_MIB_ITEM("AddAddrTx", MPTCP_MIB_ADDADDRTX),
+       SNMP_MIB_ITEM("AddAddrTxDrop", MPTCP_MIB_ADDADDRTXDROP),
        SNMP_MIB_ITEM("EchoAdd", MPTCP_MIB_ECHOADD),
+       SNMP_MIB_ITEM("EchoAddTx", MPTCP_MIB_ECHOADDTX),
+       SNMP_MIB_ITEM("EchoAddTxDrop", MPTCP_MIB_ECHOADDTXDROP),
        SNMP_MIB_ITEM("PortAdd", MPTCP_MIB_PORTADD),
        SNMP_MIB_ITEM("AddAddrDrop", MPTCP_MIB_ADDADDRDROP),
        SNMP_MIB_ITEM("MPJoinPortSynRx", MPTCP_MIB_JOINPORTSYNRX),
@@ -44,6 +48,8 @@ static const struct snmp_mib mptcp_snmp_list[] = {
        SNMP_MIB_ITEM("MismatchPortAckRx", MPTCP_MIB_MISMATCHPORTACKRX),
        SNMP_MIB_ITEM("RmAddr", MPTCP_MIB_RMADDR),
        SNMP_MIB_ITEM("RmAddrDrop", MPTCP_MIB_RMADDRDROP),
+       SNMP_MIB_ITEM("RmAddrTx", MPTCP_MIB_RMADDRTX),
+       SNMP_MIB_ITEM("RmAddrTxDrop", MPTCP_MIB_RMADDRTXDROP),
        SNMP_MIB_ITEM("RmSubflow", MPTCP_MIB_RMSUBFLOW),
        SNMP_MIB_ITEM("MPPrioTx", MPTCP_MIB_MPPRIOTX),
        SNMP_MIB_ITEM("MPPrioRx", MPTCP_MIB_MPPRIORX),
index 2be3596..cae71d9 100644 (file)
@@ -27,7 +27,15 @@ enum linux_mptcp_mib_field {
        MPTCP_MIB_NODSSWINDOW,          /* Segments not in MPTCP windows */
        MPTCP_MIB_DUPDATA,              /* Segments discarded due to duplicate DSS */
        MPTCP_MIB_ADDADDR,              /* Received ADD_ADDR with echo-flag=0 */
+       MPTCP_MIB_ADDADDRTX,            /* Sent ADD_ADDR with echo-flag=0 */
+       MPTCP_MIB_ADDADDRTXDROP,        /* ADD_ADDR with echo-flag=0 not send due to
+                                        * resource exhaustion
+                                        */
        MPTCP_MIB_ECHOADD,              /* Received ADD_ADDR with echo-flag=1 */
+       MPTCP_MIB_ECHOADDTX,            /* Send ADD_ADDR with echo-flag=1 */
+       MPTCP_MIB_ECHOADDTXDROP,        /* ADD_ADDR with echo-flag=1 not send due
+                                        * to resource exhaustion
+                                        */
        MPTCP_MIB_PORTADD,              /* Received ADD_ADDR with a port-number */
        MPTCP_MIB_ADDADDRDROP,          /* Dropped incoming ADD_ADDR */
        MPTCP_MIB_JOINPORTSYNRX,        /* Received a SYN MP_JOIN with a different port-number */
@@ -37,6 +45,8 @@ enum linux_mptcp_mib_field {
        MPTCP_MIB_MISMATCHPORTACKRX,    /* Received an ACK MP_JOIN with a mismatched port-number */
        MPTCP_MIB_RMADDR,               /* Received RM_ADDR */
        MPTCP_MIB_RMADDRDROP,           /* Dropped incoming RM_ADDR */
+       MPTCP_MIB_RMADDRTX,             /* Sent RM_ADDR */
+       MPTCP_MIB_RMADDRTXDROP,         /* RM_ADDR not sent due to resource exhaustion */
        MPTCP_MIB_RMSUBFLOW,            /* Remove a subflow */
        MPTCP_MIB_MPPRIOTX,             /* Transmit a MP_PRIO */
        MPTCP_MIB_MPPRIORX,             /* Received a MP_PRIO */
@@ -63,6 +73,14 @@ struct mptcp_mib {
        unsigned long mibs[LINUX_MIB_MPTCP_MAX];
 };
 
+static inline void MPTCP_ADD_STATS(struct net *net,
+                                  enum linux_mptcp_mib_field field,
+                                  int val)
+{
+       if (likely(net->mib.mptcp_statistics))
+               SNMP_ADD_STATS(net->mib.mptcp_statistics, field, val);
+}
+
 static inline void MPTCP_INC_STATS(struct net *net,
                                   enum linux_mptcp_mib_field field)
 {
index 19a01b6..8a80832 100644 (file)
@@ -687,9 +687,12 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff *
        }
        opts->suboptions |= OPTION_MPTCP_ADD_ADDR;
        if (!echo) {
+               MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ADDADDRTX);
                opts->ahmac = add_addr_generate_hmac(msk->local_key,
                                                     msk->remote_key,
                                                     &opts->addr);
+       } else {
+               MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ECHOADDTX);
        }
        pr_debug("addr_id=%d, ahmac=%llu, echo=%d, port=%d",
                 opts->addr.id, opts->ahmac, echo, ntohs(opts->addr.port));
@@ -723,7 +726,7 @@ static bool mptcp_established_options_rm_addr(struct sock *sk,
 
        for (i = 0; i < opts->rm_list.nr; i++)
                pr_debug("rm_list_ids[%d]=%d", i, opts->rm_list.ids[i]);
-
+       MPTCP_ADD_STATS(sock_net(sk), MPTCP_MIB_RMADDRTX, opts->rm_list.nr);
        return true;
 }
 
index 76612bc..92d540e 100644 (file)
@@ -26,7 +26,8 @@ int mptcp_pm_announce_addr(struct mptcp_sock *msk,
 
        if (add_addr &
            (echo ? BIT(MPTCP_ADD_ADDR_ECHO) : BIT(MPTCP_ADD_ADDR_SIGNAL))) {
-               pr_warn("addr_signal error, add_addr=%d, echo=%d", add_addr, echo);
+               MPTCP_INC_STATS(sock_net((struct sock *)msk),
+                               echo ? MPTCP_MIB_ECHOADDTXDROP : MPTCP_MIB_ADDADDRTXDROP);
                return -EINVAL;
        }
 
@@ -48,7 +49,8 @@ int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_
        pr_debug("msk=%p, rm_list_nr=%d", msk, rm_list->nr);
 
        if (rm_addr) {
-               pr_warn("addr_signal error, rm_addr=%d", rm_addr);
+               MPTCP_ADD_STATS(sock_net((struct sock *)msk),
+                               MPTCP_MIB_RMADDRTXDROP, rm_list->nr);
                return -EINVAL;
        }
 
index 67311e7..5df5cc0 100644 (file)
@@ -3758,6 +3758,7 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
 {
        struct mptcp_sock *msk = mptcp_sk(sock->sk);
        struct socket *ssock;
+       struct sock *newsk;
        int err;
 
        pr_debug("msk=%p", msk);
@@ -3769,17 +3770,20 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
        if (!ssock)
                return -EINVAL;
 
-       err = ssock->ops->accept(sock, newsock, flags, kern);
-       if (err == 0 && !mptcp_is_tcpsk(newsock->sk)) {
-               struct mptcp_sock *msk = mptcp_sk(newsock->sk);
+       newsk = mptcp_accept(sock->sk, flags, &err, kern);
+       if (!newsk)
+               return err;
+
+       lock_sock(newsk);
+
+       __inet_accept(sock, newsock, newsk);
+       if (!mptcp_is_tcpsk(newsock->sk)) {
+               struct mptcp_sock *msk = mptcp_sk(newsk);
                struct mptcp_subflow_context *subflow;
-               struct sock *newsk = newsock->sk;
 
                set_bit(SOCK_CUSTOM_SOCKOPT, &newsock->flags);
                msk->in_accept_queue = 0;
 
-               lock_sock(newsk);
-
                /* set ssk->sk_socket of accept()ed flows to mptcp socket.
                 * This is needed so NOSPACE flag can be set from tcp stack.
                 */
@@ -3800,11 +3804,10 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
                        if (unlikely(list_empty(&msk->conn_list)))
                                inet_sk_state_store(newsk, TCP_CLOSE);
                }
-
-               release_sock(newsk);
        }
+       release_sock(newsk);
 
-       return err;
+       return 0;
 }
 
 static __poll_t mptcp_check_writeable(struct mptcp_sock *msk)
index feb1d7f..c7652da 100644 (file)
@@ -139,7 +139,7 @@ retry:
                if (PTR_ERR(rt) == -EINVAL && *saddr &&
                    rt_mode & IP_VS_RT_MODE_CONNECT && !loop) {
                        *saddr = 0;
-                       flowi4_update_output(&fl4, 0, 0, daddr, 0);
+                       flowi4_update_output(&fl4, 0, daddr, 0);
                        goto retry;
                }
                IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", &daddr);
@@ -147,7 +147,7 @@ retry:
        } else if (!*saddr && rt_mode & IP_VS_RT_MODE_CONNECT && fl4.saddr) {
                ip_rt_put(rt);
                *saddr = fl4.saddr;
-               flowi4_update_output(&fl4, 0, 0, daddr, fl4.saddr);
+               flowi4_update_output(&fl4, 0, daddr, fl4.saddr);
                loop = true;
                goto retry;
        }
index 728eeb0..ad6f0ca 100644 (file)
@@ -296,6 +296,7 @@ void nf_conntrack_gre_init_net(struct net *net)
 /* protocol helper struct */
 const struct nf_conntrack_l4proto nf_conntrack_l4proto_gre = {
        .l4proto         = IPPROTO_GRE,
+       .allow_clash     = true,
 #ifdef CONFIG_NF_CONNTRACK_PROCFS
        .print_conntrack = gre_print_conntrack,
 #endif
index 04bd0ed..b46dd89 100644 (file)
@@ -125,9 +125,6 @@ static int flow_offload_fill_route(struct flow_offload *flow,
                break;
        case FLOW_OFFLOAD_XMIT_XFRM:
        case FLOW_OFFLOAD_XMIT_NEIGH:
-               if (!dst_hold_safe(route->tuple[dir].dst))
-                       return -1;
-
                flow_tuple->dst_cache = dst;
                flow_tuple->dst_cookie = flow_offload_dst_cookie(flow_tuple);
                break;
@@ -148,27 +145,12 @@ static void nft_flow_dst_release(struct flow_offload *flow,
                dst_release(flow->tuplehash[dir].tuple.dst_cache);
 }
 
-int flow_offload_route_init(struct flow_offload *flow,
+void flow_offload_route_init(struct flow_offload *flow,
                            const struct nf_flow_route *route)
 {
-       int err;
-
-       err = flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_ORIGINAL);
-       if (err < 0)
-               return err;
-
-       err = flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_REPLY);
-       if (err < 0)
-               goto err_route_reply;
-
+       flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_ORIGINAL);
+       flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_REPLY);
        flow->type = NF_FLOW_OFFLOAD_ROUTE;
-
-       return 0;
-
-err_route_reply:
-       nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
-
-       return err;
 }
 EXPORT_SYMBOL_GPL(flow_offload_route_init);
 
index 19efba1..d248763 100644 (file)
@@ -163,38 +163,43 @@ static void nf_flow_tuple_encap(struct sk_buff *skb,
        }
 }
 
-static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
-                           struct flow_offload_tuple *tuple, u32 *hdrsize,
-                           u32 offset)
+struct nf_flowtable_ctx {
+       const struct net_device *in;
+       u32                     offset;
+       u32                     hdrsize;
+};
+
+static int nf_flow_tuple_ip(struct nf_flowtable_ctx *ctx, struct sk_buff *skb,
+                           struct flow_offload_tuple *tuple)
 {
        struct flow_ports *ports;
        unsigned int thoff;
        struct iphdr *iph;
        u8 ipproto;
 
-       if (!pskb_may_pull(skb, sizeof(*iph) + offset))
+       if (!pskb_may_pull(skb, sizeof(*iph) + ctx->offset))
                return -1;
 
-       iph = (struct iphdr *)(skb_network_header(skb) + offset);
+       iph = (struct iphdr *)(skb_network_header(skb) + ctx->offset);
        thoff = (iph->ihl * 4);
 
        if (ip_is_fragment(iph) ||
            unlikely(ip_has_options(thoff)))
                return -1;
 
-       thoff += offset;
+       thoff += ctx->offset;
 
        ipproto = iph->protocol;
        switch (ipproto) {
        case IPPROTO_TCP:
-               *hdrsize = sizeof(struct tcphdr);
+               ctx->hdrsize = sizeof(struct tcphdr);
                break;
        case IPPROTO_UDP:
-               *hdrsize = sizeof(struct udphdr);
+               ctx->hdrsize = sizeof(struct udphdr);
                break;
 #ifdef CONFIG_NF_CT_PROTO_GRE
        case IPPROTO_GRE:
-               *hdrsize = sizeof(struct gre_base_hdr);
+               ctx->hdrsize = sizeof(struct gre_base_hdr);
                break;
 #endif
        default:
@@ -204,7 +209,7 @@ static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
        if (iph->ttl <= 1)
                return -1;
 
-       if (!pskb_may_pull(skb, thoff + *hdrsize))
+       if (!pskb_may_pull(skb, thoff + ctx->hdrsize))
                return -1;
 
        switch (ipproto) {
@@ -224,13 +229,13 @@ static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
        }
        }
 
-       iph = (struct iphdr *)(skb_network_header(skb) + offset);
+       iph = (struct iphdr *)(skb_network_header(skb) + ctx->offset);
 
        tuple->src_v4.s_addr    = iph->saddr;
        tuple->dst_v4.s_addr    = iph->daddr;
        tuple->l3proto          = AF_INET;
        tuple->l4proto          = ipproto;
-       tuple->iifidx           = dev->ifindex;
+       tuple->iifidx           = ctx->in->ifindex;
        nf_flow_tuple_encap(skb, tuple);
 
        return 0;
@@ -336,58 +341,56 @@ static unsigned int nf_flow_queue_xmit(struct net *net, struct sk_buff *skb,
        return NF_STOLEN;
 }
 
-unsigned int
-nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
-                       const struct nf_hook_state *state)
+static struct flow_offload_tuple_rhash *
+nf_flow_offload_lookup(struct nf_flowtable_ctx *ctx,
+                      struct nf_flowtable *flow_table, struct sk_buff *skb)
 {
-       struct flow_offload_tuple_rhash *tuplehash;
-       struct nf_flowtable *flow_table = priv;
        struct flow_offload_tuple tuple = {};
-       enum flow_offload_tuple_dir dir;
-       struct flow_offload *flow;
-       struct net_device *outdev;
-       u32 hdrsize, offset = 0;
-       unsigned int thoff, mtu;
-       struct rtable *rt;
-       struct iphdr *iph;
-       __be32 nexthop;
-       int ret;
 
        if (skb->protocol != htons(ETH_P_IP) &&
-           !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IP), &offset))
-               return NF_ACCEPT;
+           !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IP), &ctx->offset))
+               return NULL;
 
-       if (nf_flow_tuple_ip(skb, state->in, &tuple, &hdrsize, offset) < 0)
-               return NF_ACCEPT;
+       if (nf_flow_tuple_ip(ctx, skb, &tuple) < 0)
+               return NULL;
 
-       tuplehash = flow_offload_lookup(flow_table, &tuple);
-       if (tuplehash == NULL)
-               return NF_ACCEPT;
+       return flow_offload_lookup(flow_table, &tuple);
+}
+
+static int nf_flow_offload_forward(struct nf_flowtable_ctx *ctx,
+                                  struct nf_flowtable *flow_table,
+                                  struct flow_offload_tuple_rhash *tuplehash,
+                                  struct sk_buff *skb)
+{
+       enum flow_offload_tuple_dir dir;
+       struct flow_offload *flow;
+       unsigned int thoff, mtu;
+       struct iphdr *iph;
 
        dir = tuplehash->tuple.dir;
        flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
 
-       mtu = flow->tuplehash[dir].tuple.mtu + offset;
+       mtu = flow->tuplehash[dir].tuple.mtu + ctx->offset;
        if (unlikely(nf_flow_exceeds_mtu(skb, mtu)))
-               return NF_ACCEPT;
+               return 0;
 
-       iph = (struct iphdr *)(skb_network_header(skb) + offset);
-       thoff = (iph->ihl * 4) + offset;
+       iph = (struct iphdr *)(skb_network_header(skb) + ctx->offset);
+       thoff = (iph->ihl * 4) + ctx->offset;
        if (nf_flow_state_check(flow, iph->protocol, skb, thoff))
-               return NF_ACCEPT;
+               return 0;
 
        if (!nf_flow_dst_check(&tuplehash->tuple)) {
                flow_offload_teardown(flow);
-               return NF_ACCEPT;
+               return 0;
        }
 
-       if (skb_try_make_writable(skb, thoff + hdrsize))
-               return NF_DROP;
+       if (skb_try_make_writable(skb, thoff + ctx->hdrsize))
+               return -1;
 
        flow_offload_refresh(flow_table, flow);
 
        nf_flow_encap_pop(skb, tuplehash);
-       thoff -= offset;
+       thoff -= ctx->offset;
 
        iph = ip_hdr(skb);
        nf_flow_nat_ip(flow, skb, thoff, dir, iph);
@@ -398,6 +401,35 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
        if (flow_table->flags & NF_FLOWTABLE_COUNTER)
                nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len);
 
+       return 1;
+}
+
+unsigned int
+nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
+                       const struct nf_hook_state *state)
+{
+       struct flow_offload_tuple_rhash *tuplehash;
+       struct nf_flowtable *flow_table = priv;
+       enum flow_offload_tuple_dir dir;
+       struct nf_flowtable_ctx ctx = {
+               .in     = state->in,
+       };
+       struct flow_offload *flow;
+       struct net_device *outdev;
+       struct rtable *rt;
+       __be32 nexthop;
+       int ret;
+
+       tuplehash = nf_flow_offload_lookup(&ctx, flow_table, skb);
+       if (!tuplehash)
+               return NF_ACCEPT;
+
+       ret = nf_flow_offload_forward(&ctx, flow_table, tuplehash, skb);
+       if (ret < 0)
+               return NF_DROP;
+       else if (ret == 0)
+               return NF_ACCEPT;
+
        if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
                rt = (struct rtable *)tuplehash->tuple.dst_cache;
                memset(skb->cb, 0, sizeof(struct inet_skb_parm));
@@ -406,6 +438,9 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
                return nf_flow_xmit_xfrm(skb, state, &rt->dst);
        }
 
+       dir = tuplehash->tuple.dir;
+       flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
+
        switch (tuplehash->tuple.xmit_type) {
        case FLOW_OFFLOAD_XMIT_NEIGH:
                rt = (struct rtable *)tuplehash->tuple.dst_cache;
@@ -535,32 +570,31 @@ static void nf_flow_nat_ipv6(const struct flow_offload *flow,
        }
 }
 
-static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
-                             struct flow_offload_tuple *tuple, u32 *hdrsize,
-                             u32 offset)
+static int nf_flow_tuple_ipv6(struct nf_flowtable_ctx *ctx, struct sk_buff *skb,
+                             struct flow_offload_tuple *tuple)
 {
        struct flow_ports *ports;
        struct ipv6hdr *ip6h;
        unsigned int thoff;
        u8 nexthdr;
 
-       thoff = sizeof(*ip6h) + offset;
+       thoff = sizeof(*ip6h) + ctx->offset;
        if (!pskb_may_pull(skb, thoff))
                return -1;
 
-       ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
+       ip6h = (struct ipv6hdr *)(skb_network_header(skb) + ctx->offset);
 
        nexthdr = ip6h->nexthdr;
        switch (nexthdr) {
        case IPPROTO_TCP:
-               *hdrsize = sizeof(struct tcphdr);
+               ctx->hdrsize = sizeof(struct tcphdr);
                break;
        case IPPROTO_UDP:
-               *hdrsize = sizeof(struct udphdr);
+               ctx->hdrsize = sizeof(struct udphdr);
                break;
 #ifdef CONFIG_NF_CT_PROTO_GRE
        case IPPROTO_GRE:
-               *hdrsize = sizeof(struct gre_base_hdr);
+               ctx->hdrsize = sizeof(struct gre_base_hdr);
                break;
 #endif
        default:
@@ -570,7 +604,7 @@ static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
        if (ip6h->hop_limit <= 1)
                return -1;
 
-       if (!pskb_may_pull(skb, thoff + *hdrsize))
+       if (!pskb_may_pull(skb, thoff + ctx->hdrsize))
                return -1;
 
        switch (nexthdr) {
@@ -590,65 +624,47 @@ static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
        }
        }
 
-       ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
+       ip6h = (struct ipv6hdr *)(skb_network_header(skb) + ctx->offset);
 
        tuple->src_v6           = ip6h->saddr;
        tuple->dst_v6           = ip6h->daddr;
        tuple->l3proto          = AF_INET6;
        tuple->l4proto          = nexthdr;
-       tuple->iifidx           = dev->ifindex;
+       tuple->iifidx           = ctx->in->ifindex;
        nf_flow_tuple_encap(skb, tuple);
 
        return 0;
 }
 
-unsigned int
-nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
-                         const struct nf_hook_state *state)
+static int nf_flow_offload_ipv6_forward(struct nf_flowtable_ctx *ctx,
+                                       struct nf_flowtable *flow_table,
+                                       struct flow_offload_tuple_rhash *tuplehash,
+                                       struct sk_buff *skb)
 {
-       struct flow_offload_tuple_rhash *tuplehash;
-       struct nf_flowtable *flow_table = priv;
-       struct flow_offload_tuple tuple = {};
        enum flow_offload_tuple_dir dir;
-       const struct in6_addr *nexthop;
        struct flow_offload *flow;
-       struct net_device *outdev;
        unsigned int thoff, mtu;
-       u32 hdrsize, offset = 0;
        struct ipv6hdr *ip6h;
-       struct rt6_info *rt;
-       int ret;
-
-       if (skb->protocol != htons(ETH_P_IPV6) &&
-           !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IPV6), &offset))
-               return NF_ACCEPT;
-
-       if (nf_flow_tuple_ipv6(skb, state->in, &tuple, &hdrsize, offset) < 0)
-               return NF_ACCEPT;
-
-       tuplehash = flow_offload_lookup(flow_table, &tuple);
-       if (tuplehash == NULL)
-               return NF_ACCEPT;
 
        dir = tuplehash->tuple.dir;
        flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
 
-       mtu = flow->tuplehash[dir].tuple.mtu + offset;
+       mtu = flow->tuplehash[dir].tuple.mtu + ctx->offset;
        if (unlikely(nf_flow_exceeds_mtu(skb, mtu)))
-               return NF_ACCEPT;
+               return 0;
 
-       ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
-       thoff = sizeof(*ip6h) + offset;
+       ip6h = (struct ipv6hdr *)(skb_network_header(skb) + ctx->offset);
+       thoff = sizeof(*ip6h) + ctx->offset;
        if (nf_flow_state_check(flow, ip6h->nexthdr, skb, thoff))
-               return NF_ACCEPT;
+               return 0;
 
        if (!nf_flow_dst_check(&tuplehash->tuple)) {
                flow_offload_teardown(flow);
-               return NF_ACCEPT;
+               return 0;
        }
 
-       if (skb_try_make_writable(skb, thoff + hdrsize))
-               return NF_DROP;
+       if (skb_try_make_writable(skb, thoff + ctx->hdrsize))
+               return -1;
 
        flow_offload_refresh(flow_table, flow);
 
@@ -663,6 +679,52 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
        if (flow_table->flags & NF_FLOWTABLE_COUNTER)
                nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len);
 
+       return 1;
+}
+
+static struct flow_offload_tuple_rhash *
+nf_flow_offload_ipv6_lookup(struct nf_flowtable_ctx *ctx,
+                           struct nf_flowtable *flow_table,
+                           struct sk_buff *skb)
+{
+       struct flow_offload_tuple tuple = {};
+
+       if (skb->protocol != htons(ETH_P_IPV6) &&
+           !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IPV6), &ctx->offset))
+               return NULL;
+
+       if (nf_flow_tuple_ipv6(ctx, skb, &tuple) < 0)
+               return NULL;
+
+       return flow_offload_lookup(flow_table, &tuple);
+}
+
+unsigned int
+nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
+                         const struct nf_hook_state *state)
+{
+       struct flow_offload_tuple_rhash *tuplehash;
+       struct nf_flowtable *flow_table = priv;
+       enum flow_offload_tuple_dir dir;
+       struct nf_flowtable_ctx ctx = {
+               .in     = state->in,
+       };
+       const struct in6_addr *nexthop;
+       struct flow_offload *flow;
+       struct net_device *outdev;
+       struct rt6_info *rt;
+       int ret;
+
+       tuplehash = nf_flow_offload_ipv6_lookup(&ctx, flow_table, skb);
+       if (tuplehash == NULL)
+               return NF_ACCEPT;
+
+       ret = nf_flow_offload_ipv6_forward(&ctx, flow_table, tuplehash, skb);
+       if (ret < 0)
+               return NF_DROP;
+       else if (ret == 0)
+               return NF_ACCEPT;
+
        if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
                rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
                memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
@@ -671,6 +733,9 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
                return nf_flow_xmit_xfrm(skb, state, &rt->dst);
        }
 
+       dir = tuplehash->tuple.dir;
+       flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
+
        switch (tuplehash->tuple.xmit_type) {
        case FLOW_OFFLOAD_XMIT_NEIGH:
                rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
index 0519d45..dc58991 100644 (file)
@@ -6541,10 +6541,13 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
                goto err_element_clash;
        }
 
-       if (!(flags & NFT_SET_ELEM_CATCHALL) && set->size &&
-           !atomic_add_unless(&set->nelems, 1, set->size + set->ndeact)) {
-               err = -ENFILE;
-               goto err_set_full;
+       if (!(flags & NFT_SET_ELEM_CATCHALL)) {
+               unsigned int max = set->size ? set->size + set->ndeact : UINT_MAX;
+
+               if (!atomic_add_unless(&set->nelems, 1, max)) {
+                       err = -ENFILE;
+                       goto err_set_full;
+               }
        }
 
        nft_trans_elem(trans) = elem;
index a54a7f7..671474e 100644 (file)
@@ -10,6 +10,7 @@
 #include <linux/netlink.h>
 #include <linux/netfilter.h>
 #include <linux/netfilter/nf_tables.h>
+#include <linux/dccp.h>
 #include <linux/sctp.h>
 #include <net/netfilter/nf_tables_core.h>
 #include <net/netfilter/nf_tables.h>
@@ -406,6 +407,82 @@ err:
                regs->verdict.code = NFT_BREAK;
 }
 
+static void nft_exthdr_dccp_eval(const struct nft_expr *expr,
+                                struct nft_regs *regs,
+                                const struct nft_pktinfo *pkt)
+{
+       struct nft_exthdr *priv = nft_expr_priv(expr);
+       unsigned int thoff, dataoff, optoff, optlen, i;
+       u32 *dest = &regs->data[priv->dreg];
+       const struct dccp_hdr *dh;
+       struct dccp_hdr _dh;
+
+       if (pkt->tprot != IPPROTO_DCCP || pkt->fragoff)
+               goto err;
+
+       thoff = nft_thoff(pkt);
+
+       dh = skb_header_pointer(pkt->skb, thoff, sizeof(_dh), &_dh);
+       if (!dh)
+               goto err;
+
+       dataoff = dh->dccph_doff * sizeof(u32);
+       optoff = __dccp_hdr_len(dh);
+       if (dataoff <= optoff)
+               goto err;
+
+       optlen = dataoff - optoff;
+
+       for (i = 0; i < optlen; ) {
+               /* Options 0 (DCCPO_PADDING) - 31 (DCCPO_MAX_RESERVED) are 1B in
+                * the length; the remaining options are at least 2B long.  In
+                * all cases, the first byte contains the option type.  In
+                * multi-byte options, the second byte contains the option
+                * length, which must be at least two: 1 for the type plus 1 for
+                * the length plus 0-253 for any following option data.  We
+                * aren't interested in the option data, only the type and the
+                * length, so we don't need to read more than two bytes at a
+                * time.
+                */
+               unsigned int buflen = optlen - i;
+               u8 buf[2], *bufp;
+               u8 type, len;
+
+               if (buflen > sizeof(buf))
+                       buflen = sizeof(buf);
+
+               bufp = skb_header_pointer(pkt->skb, thoff + optoff + i, buflen,
+                                         &buf);
+               if (!bufp)
+                       goto err;
+
+               type = bufp[0];
+
+               if (type == priv->type) {
+                       *dest = 1;
+                       return;
+               }
+
+               if (type <= DCCPO_MAX_RESERVED) {
+                       i++;
+                       continue;
+               }
+
+               if (buflen < 2)
+                       goto err;
+
+               len = bufp[1];
+
+               if (len < 2)
+                       goto err;
+
+               i += len;
+       }
+
+err:
+       *dest = 0;
+}
+
 static const struct nla_policy nft_exthdr_policy[NFTA_EXTHDR_MAX + 1] = {
        [NFTA_EXTHDR_DREG]              = { .type = NLA_U32 },
        [NFTA_EXTHDR_TYPE]              = { .type = NLA_U8 },
@@ -557,6 +634,22 @@ static int nft_exthdr_ipv4_init(const struct nft_ctx *ctx,
        return 0;
 }
 
+static int nft_exthdr_dccp_init(const struct nft_ctx *ctx,
+                               const struct nft_expr *expr,
+                               const struct nlattr * const tb[])
+{
+       struct nft_exthdr *priv = nft_expr_priv(expr);
+       int err = nft_exthdr_init(ctx, expr, tb);
+
+       if (err < 0)
+               return err;
+
+       if (!(priv->flags & NFT_EXTHDR_F_PRESENT))
+               return -EOPNOTSUPP;
+
+       return 0;
+}
+
 static int nft_exthdr_dump_common(struct sk_buff *skb, const struct nft_exthdr *priv)
 {
        if (nla_put_u8(skb, NFTA_EXTHDR_TYPE, priv->type))
@@ -686,6 +779,15 @@ static const struct nft_expr_ops nft_exthdr_sctp_ops = {
        .reduce         = nft_exthdr_reduce,
 };
 
+static const struct nft_expr_ops nft_exthdr_dccp_ops = {
+       .type           = &nft_exthdr_type,
+       .size           = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)),
+       .eval           = nft_exthdr_dccp_eval,
+       .init           = nft_exthdr_dccp_init,
+       .dump           = nft_exthdr_dump,
+       .reduce         = nft_exthdr_reduce,
+};
+
 static const struct nft_expr_ops *
 nft_exthdr_select_ops(const struct nft_ctx *ctx,
                      const struct nlattr * const tb[])
@@ -720,6 +822,10 @@ nft_exthdr_select_ops(const struct nft_ctx *ctx,
                if (tb[NFTA_EXTHDR_DREG])
                        return &nft_exthdr_sctp_ops;
                break;
+       case NFT_EXTHDR_OP_DCCP:
+               if (tb[NFTA_EXTHDR_DREG])
+                       return &nft_exthdr_dccp_ops;
+               break;
        }
 
        return ERR_PTR(-EOPNOTSUPP);
index e860d8f..5ef9146 100644 (file)
@@ -250,9 +250,14 @@ static int nft_flow_route(const struct nft_pktinfo *pkt,
                break;
        }
 
+       if (!dst_hold_safe(this_dst))
+               return -ENOENT;
+
        nf_route(nft_net(pkt), &other_dst, &fl, false, nft_pf(pkt));
-       if (!other_dst)
+       if (!other_dst) {
+               dst_release(this_dst);
                return -ENOENT;
+       }
 
        nft_default_forward_path(route, this_dst, dir);
        nft_default_forward_path(route, other_dst, !dir);
@@ -349,8 +354,7 @@ static void nft_flow_offload_eval(const struct nft_expr *expr,
        if (!flow)
                goto err_flow_alloc;
 
-       if (flow_offload_route_init(flow, &route) < 0)
-               goto err_flow_add;
+       flow_offload_route_init(flow, &route);
 
        if (tcph) {
                ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
@@ -361,12 +365,12 @@ static void nft_flow_offload_eval(const struct nft_expr *expr,
        if (ret < 0)
                goto err_flow_add;
 
-       dst_release(route.tuple[!dir].dst);
        return;
 
 err_flow_add:
        flow_offload_free(flow);
 err_flow_alloc:
+       dst_release(route.tuple[dir].dst);
        dst_release(route.tuple[!dir].dst);
 err_flow_route:
        clear_bit(IPS_OFFLOAD_BIT, &ct->status);
index 03ef4fd..29ac48c 100644 (file)
@@ -19,6 +19,7 @@ struct nft_lookup {
        struct nft_set                  *set;
        u8                              sreg;
        u8                              dreg;
+       bool                            dreg_set;
        bool                            invert;
        struct nft_set_binding          binding;
 };
@@ -75,7 +76,7 @@ void nft_lookup_eval(const struct nft_expr *expr,
        }
 
        if (ext) {
-               if (set->flags & NFT_SET_MAP)
+               if (priv->dreg_set)
                        nft_data_copy(&regs->data[priv->dreg],
                                      nft_set_ext_data(ext), set->dlen);
 
@@ -122,11 +123,8 @@ static int nft_lookup_init(const struct nft_ctx *ctx,
                if (flags & ~NFT_LOOKUP_F_INV)
                        return -EINVAL;
 
-               if (flags & NFT_LOOKUP_F_INV) {
-                       if (set->flags & NFT_SET_MAP)
-                               return -EINVAL;
+               if (flags & NFT_LOOKUP_F_INV)
                        priv->invert = true;
-               }
        }
 
        if (tb[NFTA_LOOKUP_DREG] != NULL) {
@@ -140,8 +138,17 @@ static int nft_lookup_init(const struct nft_ctx *ctx,
                                               set->dlen);
                if (err < 0)
                        return err;
-       } else if (set->flags & NFT_SET_MAP)
-               return -EINVAL;
+               priv->dreg_set = true;
+       } else if (set->flags & NFT_SET_MAP) {
+               /* Map given, but user asks for lookup only (i.e. to
+                * ignore value assoicated with key).
+                *
+                * This makes no sense for anonymous maps since they are
+                * scoped to the rule, but for named sets this can be useful.
+                */
+               if (set->flags & NFT_SET_ANONYMOUS)
+                       return -EINVAL;
+       }
 
        priv->binding.flags = set->flags & NFT_SET_MAP;
 
@@ -188,7 +195,7 @@ static int nft_lookup_dump(struct sk_buff *skb,
                goto nla_put_failure;
        if (nft_dump_register(skb, NFTA_LOOKUP_SREG, priv->sreg))
                goto nla_put_failure;
-       if (priv->set->flags & NFT_SET_MAP)
+       if (priv->dreg_set)
                if (nft_dump_register(skb, NFTA_LOOKUP_DREG, priv->dreg))
                        goto nla_put_failure;
        if (nla_put_be32(skb, NFTA_LOOKUP_FLAGS, htonl(flags)))
index 06d46d1..34c684e 100644 (file)
@@ -1274,8 +1274,7 @@ static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old)
        struct nft_pipapo_match *new;
        int i;
 
-       new = kmalloc(sizeof(*new) + sizeof(*dst) * old->field_count,
-                     GFP_KERNEL);
+       new = kmalloc(struct_size(new, f, old->field_count), GFP_KERNEL);
        if (!new)
                return ERR_PTR(-ENOMEM);
 
@@ -2059,8 +2058,7 @@ static int nft_pipapo_init(const struct nft_set *set,
        if (field_count > NFT_PIPAPO_MAX_FIELDS)
                return -EINVAL;
 
-       m = kmalloc(sizeof(*priv->match) + sizeof(*f) * field_count,
-                   GFP_KERNEL);
+       m = kmalloc(struct_size(m, f, field_count), GFP_KERNEL);
        if (!m)
                return -ENOMEM;
 
index 41e3a20..cdb001d 100644 (file)
@@ -390,7 +390,8 @@ int nfc_llcp_send_connect(struct nfc_llcp_sock *sock)
        const u8 *service_name_tlv = NULL;
        const u8 *miux_tlv = NULL;
        const u8 *rw_tlv = NULL;
-       u8 service_name_tlv_length, miux_tlv_length,  rw_tlv_length, rw;
+       u8 service_name_tlv_length = 0;
+       u8 miux_tlv_length,  rw_tlv_length, rw;
        int err;
        u16 size = 0;
        __be16 miux;
index f2698d2..c4ebf81 100644 (file)
@@ -69,9 +69,7 @@ static struct dp_meter_instance *dp_meter_instance_alloc(const u32 size)
 {
        struct dp_meter_instance *ti;
 
-       ti = kvzalloc(sizeof(*ti) +
-                     sizeof(struct dp_meter *) * size,
-                     GFP_KERNEL);
+       ti = kvzalloc(struct_size(ti, dp_meters, size), GFP_KERNEL);
        if (!ti)
                return NULL;
 
index 815c3e4..e02ecab 100644 (file)
@@ -120,6 +120,7 @@ struct cls_fl_filter {
        u32 handle;
        u32 flags;
        u32 in_hw_count;
+       u8 needs_tc_skb_ext:1;
        struct rcu_work rwork;
        struct net_device *hw_dev;
        /* Flower classifier is unlocked, which means that its reference counter
@@ -415,6 +416,8 @@ static struct cls_fl_head *fl_head_dereference(struct tcf_proto *tp)
 
 static void __fl_destroy_filter(struct cls_fl_filter *f)
 {
+       if (f->needs_tc_skb_ext)
+               tc_skb_ext_tc_disable();
        tcf_exts_destroy(&f->exts);
        tcf_exts_put_net(&f->exts);
        kfree(f);
@@ -615,7 +618,8 @@ static void *fl_get(struct tcf_proto *tp, u32 handle)
 }
 
 static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
-       [TCA_FLOWER_UNSPEC]             = { .type = NLA_UNSPEC },
+       [TCA_FLOWER_UNSPEC]             = { .strict_start_type =
+                                               TCA_FLOWER_L2_MISS },
        [TCA_FLOWER_CLASSID]            = { .type = NLA_U32 },
        [TCA_FLOWER_INDEV]              = { .type = NLA_STRING,
                                            .len = IFNAMSIZ },
@@ -720,7 +724,7 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
        [TCA_FLOWER_KEY_PPPOE_SID]      = { .type = NLA_U16 },
        [TCA_FLOWER_KEY_PPP_PROTO]      = { .type = NLA_U16 },
        [TCA_FLOWER_KEY_L2TPV3_SID]     = { .type = NLA_U32 },
-
+       [TCA_FLOWER_L2_MISS]            = NLA_POLICY_MAX(NLA_U8, 1),
 };
 
 static const struct nla_policy
@@ -1671,6 +1675,10 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
                mask->meta.ingress_ifindex = 0xffffffff;
        }
 
+       fl_set_key_val(tb, &key->meta.l2_miss, TCA_FLOWER_L2_MISS,
+                      &mask->meta.l2_miss, TCA_FLOWER_UNSPEC,
+                      sizeof(key->meta.l2_miss));
+
        fl_set_key_val(tb, key->eth.dst, TCA_FLOWER_KEY_ETH_DST,
                       mask->eth.dst, TCA_FLOWER_KEY_ETH_DST_MASK,
                       sizeof(key->eth.dst));
@@ -2088,6 +2096,11 @@ errout_cleanup:
        return ret;
 }
 
+static bool fl_needs_tc_skb_ext(const struct fl_flow_key *mask)
+{
+       return mask->meta.l2_miss;
+}
+
 static int fl_set_parms(struct net *net, struct tcf_proto *tp,
                        struct cls_fl_filter *f, struct fl_flow_mask *mask,
                        unsigned long base, struct nlattr **tb,
@@ -2124,6 +2137,14 @@ static int fl_set_parms(struct net *net, struct tcf_proto *tp,
                return -EINVAL;
        }
 
+       /* Enable tc skb extension if filter matches on data extracted from
+        * this extension.
+        */
+       if (fl_needs_tc_skb_ext(&mask->key)) {
+               f->needs_tc_skb_ext = 1;
+               tc_skb_ext_tc_enable();
+       }
+
        return 0;
 }
 
@@ -3077,6 +3098,11 @@ static int fl_dump_key(struct sk_buff *skb, struct net *net,
                        goto nla_put_failure;
        }
 
+       if (fl_dump_key_val(skb, &key->meta.l2_miss,
+                           TCA_FLOWER_L2_MISS, &mask->meta.l2_miss,
+                           TCA_FLOWER_UNSPEC, sizeof(key->meta.l2_miss)))
+               goto nla_put_failure;
+
        if (fl_dump_key_val(skb, key->eth.dst, TCA_FLOWER_KEY_ETH_DST,
                            mask->eth.dst, TCA_FLOWER_KEY_ETH_DST_MASK,
                            sizeof(key->eth.dst)) ||
index 8aef7dd..325c290 100644 (file)
@@ -1814,10 +1814,6 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
                        NL_SET_ERR_MSG(extack, "HTB offload doesn't support the quantum parameter");
                        goto failure;
                }
-               if (hopt->prio) {
-                       NL_SET_ERR_MSG(extack, "HTB offload doesn't support the prio parameter");
-                       goto failure;
-               }
        }
 
        /* Keeping backward compatible with rate_table based iproute2 tc */
@@ -1913,6 +1909,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
                                        TC_HTB_CLASSID_ROOT,
                                .rate = max_t(u64, hopt->rate.rate, rate64),
                                .ceil = max_t(u64, hopt->ceil.rate, ceil64),
+                               .prio = hopt->prio,
                                .extack = extack,
                        };
                        err = htb_offload(dev, &offload_opt);
@@ -1933,6 +1930,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
                                        TC_H_MIN(parent->common.classid),
                                .rate = max_t(u64, hopt->rate.rate, rate64),
                                .ceil = max_t(u64, hopt->ceil.rate, ceil64),
+                               .prio = hopt->prio,
                                .extack = extack,
                        };
                        err = htb_offload(dev, &offload_opt);
@@ -2018,6 +2016,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
                                .classid = cl->common.classid,
                                .rate = max_t(u64, hopt->rate.rate, rate64),
                                .ceil = max_t(u64, hopt->ceil.rate, ceil64),
+                               .prio = hopt->prio,
                                .extack = extack,
                        };
                        err = htb_offload(dev, &offload_opt);
index dd7dea2..5076da1 100644 (file)
@@ -27,6 +27,8 @@
 #include <net/sock.h>
 #include <net/tcp.h>
 
+#define TAPRIO_STAT_NOT_SET    (~0ULL)
+
 #include "sch_mqprio_lib.h"
 
 static LIST_HEAD(taprio_list);
@@ -1524,7 +1526,7 @@ static int taprio_enable_offload(struct net_device *dev,
                               "Not enough memory for enabling offload mode");
                return -ENOMEM;
        }
-       offload->enable = 1;
+       offload->cmd = TAPRIO_CMD_REPLACE;
        offload->extack = extack;
        mqprio_qopt_reconstruct(dev, &offload->mqprio.qopt);
        offload->mqprio.extack = extack;
@@ -1572,7 +1574,7 @@ static int taprio_disable_offload(struct net_device *dev,
                               "Not enough memory to disable offload mode");
                return -ENOMEM;
        }
-       offload->enable = 0;
+       offload->cmd = TAPRIO_CMD_DESTROY;
 
        err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TAPRIO, offload);
        if (err < 0) {
@@ -2289,6 +2291,72 @@ nla_put_failure:
        return -EMSGSIZE;
 }
 
+static int taprio_put_stat(struct sk_buff *skb, u64 val, u16 attrtype)
+{
+       if (val == TAPRIO_STAT_NOT_SET)
+               return 0;
+       if (nla_put_u64_64bit(skb, attrtype, val, TCA_TAPRIO_OFFLOAD_STATS_PAD))
+               return -EMSGSIZE;
+       return 0;
+}
+
+static int taprio_dump_xstats(struct Qdisc *sch, struct gnet_dump *d,
+                             struct tc_taprio_qopt_offload *offload,
+                             struct tc_taprio_qopt_stats *stats)
+{
+       struct net_device *dev = qdisc_dev(sch);
+       const struct net_device_ops *ops;
+       struct sk_buff *skb = d->skb;
+       struct nlattr *xstats;
+       int err;
+
+       ops = qdisc_dev(sch)->netdev_ops;
+
+       /* FIXME I could use qdisc_offload_dump_helper(), but that messes
+        * with sch->flags depending on whether the device reports taprio
+        * stats, and I'm not sure whether that's a good idea, considering
+        * that stats are optional to the offload itself
+        */
+       if (!ops->ndo_setup_tc)
+               return 0;
+
+       memset(stats, 0xff, sizeof(*stats));
+
+       err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TAPRIO, offload);
+       if (err == -EOPNOTSUPP)
+               return 0;
+       if (err)
+               return err;
+
+       xstats = nla_nest_start(skb, TCA_STATS_APP);
+       if (!xstats)
+               goto err;
+
+       if (taprio_put_stat(skb, stats->window_drops,
+                           TCA_TAPRIO_OFFLOAD_STATS_WINDOW_DROPS) ||
+           taprio_put_stat(skb, stats->tx_overruns,
+                           TCA_TAPRIO_OFFLOAD_STATS_TX_OVERRUNS))
+               goto err_cancel;
+
+       nla_nest_end(skb, xstats);
+
+       return 0;
+
+err_cancel:
+       nla_nest_cancel(skb, xstats);
+err:
+       return -EMSGSIZE;
+}
+
+static int taprio_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
+{
+       struct tc_taprio_qopt_offload offload = {
+               .cmd = TAPRIO_CMD_STATS,
+       };
+
+       return taprio_dump_xstats(sch, d, &offload, &offload.stats);
+}
+
 static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb)
 {
        struct taprio_sched *q = qdisc_priv(sch);
@@ -2388,12 +2456,20 @@ static int taprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
        __acquires(d->lock)
 {
        struct netdev_queue *dev_queue = taprio_queue_get(sch, cl);
+       struct tc_taprio_qopt_offload offload = {
+               .cmd = TAPRIO_CMD_TC_STATS,
+               .tc_stats = {
+                       .tc = cl - 1,
+               },
+       };
+       struct Qdisc *child;
 
-       sch = rtnl_dereference(dev_queue->qdisc_sleeping);
-       if (gnet_stats_copy_basic(d, NULL, &sch->bstats, true) < 0 ||
-           qdisc_qstats_copy(d, sch) < 0)
+       child = rtnl_dereference(dev_queue->qdisc_sleeping);
+       if (gnet_stats_copy_basic(d, NULL, &child->bstats, true) < 0 ||
+           qdisc_qstats_copy(d, child) < 0)
                return -1;
-       return 0;
+
+       return taprio_dump_xstats(sch, d, &offload, &offload.tc_stats.stats);
 }
 
 static void taprio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
@@ -2440,6 +2516,7 @@ static struct Qdisc_ops taprio_qdisc_ops __read_mostly = {
        .dequeue        = taprio_dequeue,
        .enqueue        = taprio_enqueue,
        .dump           = taprio_dump,
+       .dump_stats     = taprio_dump_stats,
        .owner          = THIS_MODULE,
 };
 
index c365df2..664d1f2 100644 (file)
@@ -500,9 +500,7 @@ static void sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
                        continue;
 
                fl4->fl4_sport = laddr->a.v4.sin_port;
-               flowi4_update_output(fl4,
-                                    asoc->base.sk->sk_bound_dev_if,
-                                    RT_CONN_FLAGS_TOS(asoc->base.sk, tos),
+               flowi4_update_output(fl4, asoc->base.sk->sk_bound_dev_if,
                                     daddr->v4.sin_addr.s_addr,
                                     laddr->a.v4.sin_addr.s_addr);
 
index cda8c28..a68e1d5 100644 (file)
@@ -8281,6 +8281,22 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname,
        return retval;
 }
 
+static bool sctp_bpf_bypass_getsockopt(int level, int optname)
+{
+       if (level == SOL_SCTP) {
+               switch (optname) {
+               case SCTP_SOCKOPT_PEELOFF:
+               case SCTP_SOCKOPT_PEELOFF_FLAGS:
+               case SCTP_SOCKOPT_CONNECTX3:
+                       return true;
+               default:
+                       return false;
+               }
+       }
+
+       return false;
+}
+
 static int sctp_hash(struct sock *sk)
 {
        /* STUB */
@@ -9650,6 +9666,7 @@ struct proto sctp_prot = {
        .shutdown    =  sctp_shutdown,
        .setsockopt  =  sctp_setsockopt,
        .getsockopt  =  sctp_getsockopt,
+       .bpf_bypass_getsockopt  = sctp_bpf_bypass_getsockopt,
        .sendmsg     =  sctp_sendmsg,
        .recvmsg     =  sctp_recvmsg,
        .bind        =  sctp_bind,
@@ -9705,6 +9722,7 @@ struct proto sctpv6_prot = {
        .shutdown       = sctp_shutdown,
        .setsockopt     = sctp_setsockopt,
        .getsockopt     = sctp_getsockopt,
+       .bpf_bypass_getsockopt  = sctp_bpf_bypass_getsockopt,
        .sendmsg        = sctp_sendmsg,
        .recvmsg        = sctp_recvmsg,
        .bind           = sctp_bind,
index e843760..54afbe4 100644 (file)
@@ -148,18 +148,19 @@ static void sctp_sched_free_sched(struct sctp_stream *stream)
 int sctp_sched_set_sched(struct sctp_association *asoc,
                         enum sctp_sched_type sched)
 {
-       struct sctp_sched_ops *n = sctp_sched_ops[sched];
        struct sctp_sched_ops *old = asoc->outqueue.sched;
        struct sctp_datamsg *msg = NULL;
+       struct sctp_sched_ops *n;
        struct sctp_chunk *ch;
        int i, ret = 0;
 
-       if (old == n)
-               return ret;
-
        if (sched > SCTP_SS_MAX)
                return -EINVAL;
 
+       n = sctp_sched_ops[sched];
+       if (old == n)
+               return ret;
+
        if (old)
                sctp_sched_free_sched(&asoc->stream);
 
index b7e01d0..3df96e9 100644 (file)
@@ -2138,6 +2138,7 @@ int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags,
                msg.msg_name = (struct sockaddr *)&address;
                msg.msg_namelen = addr_len;
        }
+       flags &= ~MSG_INTERNAL_SENDMSG_FLAGS;
        if (sock->file->f_flags & O_NONBLOCK)
                flags |= MSG_DONTWAIT;
        msg.msg_flags = flags;
@@ -2483,6 +2484,7 @@ static int ____sys_sendmsg(struct socket *sock, struct msghdr *msg_sys,
                msg_sys->msg_control = ctl_buf;
                msg_sys->msg_control_is_user = false;
        }
+       flags &= ~MSG_INTERNAL_SENDMSG_FLAGS;
        msg_sys->msg_flags = flags;
 
        if (sock->file->f_flags & O_NONBLOCK)
index 5388140..1d5d367 100644 (file)
@@ -176,7 +176,7 @@ static int bearer_name_validate(const char *name,
  */
 struct tipc_bearer *tipc_bearer_find(struct net *net, const char *name)
 {
-       struct tipc_net *tn = net_generic(net, tipc_net_id);
+       struct tipc_net *tn = tipc_net(net);
        struct tipc_bearer *b;
        u32 i;
 
@@ -211,11 +211,10 @@ int tipc_bearer_get_name(struct net *net, char *name, u32 bearer_id)
 
 void tipc_bearer_add_dest(struct net *net, u32 bearer_id, u32 dest)
 {
-       struct tipc_net *tn = net_generic(net, tipc_net_id);
        struct tipc_bearer *b;
 
        rcu_read_lock();
-       b = rcu_dereference(tn->bearer_list[bearer_id]);
+       b = bearer_get(net, bearer_id);
        if (b)
                tipc_disc_add_dest(b->disc);
        rcu_read_unlock();
@@ -223,11 +222,10 @@ void tipc_bearer_add_dest(struct net *net, u32 bearer_id, u32 dest)
 
 void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest)
 {
-       struct tipc_net *tn = net_generic(net, tipc_net_id);
        struct tipc_bearer *b;
 
        rcu_read_lock();
-       b = rcu_dereference(tn->bearer_list[bearer_id]);
+       b = bearer_get(net, bearer_id);
        if (b)
                tipc_disc_remove_dest(b->disc);
        rcu_read_unlock();
@@ -431,7 +429,7 @@ int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b,
        dev = dev_get_by_name(net, dev_name);
        if (!dev)
                return -ENODEV;
-       if (tipc_mtu_bad(dev, 0)) {
+       if (tipc_mtu_bad(dev)) {
                dev_put(dev);
                return -EINVAL;
        }
@@ -534,7 +532,7 @@ int tipc_bearer_mtu(struct net *net, u32 bearer_id)
        struct tipc_bearer *b;
 
        rcu_read_lock();
-       b = rcu_dereference(tipc_net(net)->bearer_list[bearer_id]);
+       b = bearer_get(net, bearer_id);
        if (b)
                mtu = b->mtu;
        rcu_read_unlock();
@@ -708,7 +706,7 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt,
                test_and_set_bit_lock(0, &b->up);
                break;
        case NETDEV_CHANGEMTU:
-               if (tipc_mtu_bad(dev, 0)) {
+               if (tipc_mtu_bad(dev)) {
                        bearer_disable(net, b);
                        break;
                }
@@ -745,7 +743,7 @@ void tipc_bearer_cleanup(void)
 
 void tipc_bearer_stop(struct net *net)
 {
-       struct tipc_net *tn = net_generic(net, tipc_net_id);
+       struct tipc_net *tn = tipc_net(net);
        struct tipc_bearer *b;
        u32 i;
 
@@ -881,7 +879,7 @@ int tipc_nl_bearer_dump(struct sk_buff *skb, struct netlink_callback *cb)
        struct tipc_bearer *bearer;
        struct tipc_nl_msg msg;
        struct net *net = sock_net(skb->sk);
-       struct tipc_net *tn = net_generic(net, tipc_net_id);
+       struct tipc_net *tn = tipc_net(net);
 
        if (i == MAX_BEARERS)
                return 0;
index bd0cc5c..1ee6064 100644 (file)
@@ -257,9 +257,9 @@ static inline void tipc_loopback_trace(struct net *net,
 }
 
 /* check if device MTU is too low for tipc headers */
-static inline bool tipc_mtu_bad(struct net_device *dev, unsigned int reserve)
+static inline bool tipc_mtu_bad(struct net_device *dev)
 {
-       if (dev->mtu >= TIPC_MIN_BEARER_MTU + reserve)
+       if (dev->mtu >= TIPC_MIN_BEARER_MTU)
                return false;
        netdev_warn(dev, "MTU too low for tipc bearer\n");
        return true;
index 0a85244..9262325 100644 (file)
@@ -739,10 +739,6 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b,
                udp_conf.use_udp_checksums = false;
                ub->ifindex = dev->ifindex;
                b->encap_hlen = sizeof(struct iphdr) + sizeof(struct udphdr);
-               if (tipc_mtu_bad(dev, b->encap_hlen)) {
-                       err = -EINVAL;
-                       goto err;
-               }
                b->mtu = b->media->mtu;
 #if IS_ENABLED(CONFIG_IPV6)
        } else if (local.proto == htons(ETH_P_IPV6)) {
index bf69c9d..a959572 100644 (file)
@@ -268,9 +268,8 @@ static void tls_append_frag(struct tls_record_info *record,
                skb_frag_size_add(frag, size);
        } else {
                ++frag;
-               __skb_frag_set_page(frag, pfrag->page);
-               skb_frag_off_set(frag, pfrag->offset);
-               skb_frag_size_set(frag, size);
+               skb_frag_fill_page_desc(frag, pfrag->page, pfrag->offset,
+                                       size);
                ++record->num_frags;
                get_page(pfrag->page);
        }
@@ -357,9 +356,8 @@ static int tls_create_new_record(struct tls_offload_context_tx *offload_ctx,
                return -ENOMEM;
 
        frag = &record->frags[0];
-       __skb_frag_set_page(frag, pfrag->page);
-       skb_frag_off_set(frag, pfrag->offset);
-       skb_frag_size_set(frag, prepend_size);
+       skb_frag_fill_page_desc(frag, pfrag->page, pfrag->offset,
+                               prepend_size);
 
        get_page(pfrag->page);
        pfrag->offset += prepend_size;
index f2e7302..e02a0d8 100644 (file)
@@ -125,7 +125,10 @@ int tls_push_sg(struct sock *sk,
                u16 first_offset,
                int flags)
 {
-       int sendpage_flags = flags | MSG_SENDPAGE_NOTLAST;
+       struct bio_vec bvec;
+       struct msghdr msg = {
+               .msg_flags = MSG_SENDPAGE_NOTLAST | MSG_SPLICE_PAGES | flags,
+       };
        int ret = 0;
        struct page *p;
        size_t size;
@@ -134,16 +137,19 @@ int tls_push_sg(struct sock *sk,
        size = sg->length - offset;
        offset += sg->offset;
 
-       ctx->in_tcp_sendpages = true;
+       ctx->splicing_pages = true;
        while (1) {
                if (sg_is_last(sg))
-                       sendpage_flags = flags;
+                       msg.msg_flags = flags;
 
                /* is sending application-limited? */
                tcp_rate_check_app_limited(sk);
                p = sg_page(sg);
 retry:
-               ret = do_tcp_sendpages(sk, p, offset, size, sendpage_flags);
+               bvec_set_page(&bvec, p, size, offset);
+               iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size);
+
+               ret = tcp_sendmsg_locked(sk, &msg, size);
 
                if (ret != size) {
                        if (ret > 0) {
@@ -155,7 +161,7 @@ retry:
                        offset -= sg->offset;
                        ctx->partially_sent_offset = offset;
                        ctx->partially_sent_record = (void *)sg;
-                       ctx->in_tcp_sendpages = false;
+                       ctx->splicing_pages = false;
                        return ret;
                }
 
@@ -169,7 +175,7 @@ retry:
                size = sg->length;
        }
 
-       ctx->in_tcp_sendpages = false;
+       ctx->splicing_pages = false;
 
        return 0;
 }
@@ -247,11 +253,11 @@ static void tls_write_space(struct sock *sk)
 {
        struct tls_context *ctx = tls_get_ctx(sk);
 
-       /* If in_tcp_sendpages call lower protocol write space handler
+       /* If splicing_pages call lower protocol write space handler
         * to ensure we wake up any waiting operations there. For example
-        * if do_tcp_sendpages where to call sk_wait_event.
+        * if splicing pages where to call sk_wait_event.
         */
-       if (ctx->in_tcp_sendpages) {
+       if (ctx->splicing_pages) {
                ctx->sk_write_space(sk);
                return;
        }
@@ -352,6 +358,39 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
                tls_ctx_free(sk, ctx);
 }
 
+static __poll_t tls_sk_poll(struct file *file, struct socket *sock,
+                           struct poll_table_struct *wait)
+{
+       struct tls_sw_context_rx *ctx;
+       struct tls_context *tls_ctx;
+       struct sock *sk = sock->sk;
+       struct sk_psock *psock;
+       __poll_t mask = 0;
+       u8 shutdown;
+       int state;
+
+       mask = tcp_poll(file, sock, wait);
+
+       state = inet_sk_state_load(sk);
+       shutdown = READ_ONCE(sk->sk_shutdown);
+       if (unlikely(state != TCP_ESTABLISHED || shutdown & RCV_SHUTDOWN))
+               return mask;
+
+       tls_ctx = tls_get_ctx(sk);
+       ctx = tls_sw_ctx_rx(tls_ctx);
+       psock = sk_psock_get(sk);
+
+       if (skb_queue_empty_lockless(&ctx->rx_list) &&
+           !tls_strp_msg_ready(ctx) &&
+           sk_psock_queue_empty(psock))
+               mask &= ~(EPOLLIN | EPOLLRDNORM);
+
+       if (psock)
+               sk_psock_put(sk, psock);
+
+       return mask;
+}
+
 static int do_tls_getsockopt_conf(struct sock *sk, char __user *optval,
                                  int __user *optlen, int tx)
 {
@@ -922,9 +961,11 @@ static void build_proto_ops(struct proto_ops ops[TLS_NUM_CONFIG][TLS_NUM_CONFIG]
 
        ops[TLS_BASE][TLS_SW  ] = ops[TLS_BASE][TLS_BASE];
        ops[TLS_BASE][TLS_SW  ].splice_read     = tls_sw_splice_read;
+       ops[TLS_BASE][TLS_SW  ].poll            = tls_sk_poll;
 
        ops[TLS_SW  ][TLS_SW  ] = ops[TLS_SW  ][TLS_BASE];
        ops[TLS_SW  ][TLS_SW  ].splice_read     = tls_sw_splice_read;
+       ops[TLS_SW  ][TLS_SW  ].poll            = tls_sk_poll;
 
 #ifdef CONFIG_TLS_DEVICE
        ops[TLS_HW  ][TLS_BASE] = ops[TLS_BASE][TLS_BASE];
index e7728b5..653136d 100644 (file)
@@ -1839,24 +1839,6 @@ static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
        }
 }
 
-static int maybe_init_creds(struct scm_cookie *scm,
-                           struct socket *socket,
-                           const struct sock *other)
-{
-       int err;
-       struct msghdr msg = { .msg_controllen = 0 };
-
-       err = scm_send(socket, &msg, scm, false);
-       if (err)
-               return err;
-
-       if (unix_passcred_enabled(socket, other)) {
-               scm->pid = get_pid(task_tgid(current));
-               current_uid_gid(&scm->creds.uid, &scm->creds.gid);
-       }
-       return err;
-}
-
 static bool unix_skb_scm_eq(struct sk_buff *skb,
                            struct scm_cookie *scm)
 {
@@ -2200,19 +2182,25 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
        while (sent < len) {
                size = len - sent;
 
-               /* Keep two messages in the pipe so it schedules better */
-               size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
+               if (unlikely(msg->msg_flags & MSG_SPLICE_PAGES)) {
+                       skb = sock_alloc_send_pskb(sk, 0, 0,
+                                                  msg->msg_flags & MSG_DONTWAIT,
+                                                  &err, 0);
+               } else {
+                       /* Keep two messages in the pipe so it schedules better */
+                       size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
 
-               /* allow fallback to order-0 allocations */
-               size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
+                       /* allow fallback to order-0 allocations */
+                       size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
 
-               data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
+                       data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
 
-               data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
+                       data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
 
-               skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
-                                          msg->msg_flags & MSG_DONTWAIT, &err,
-                                          get_order(UNIX_SKB_FRAGS_SZ));
+                       skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
+                                                  msg->msg_flags & MSG_DONTWAIT, &err,
+                                                  get_order(UNIX_SKB_FRAGS_SZ));
+               }
                if (!skb)
                        goto out_err;
 
@@ -2224,13 +2212,24 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
                }
                fds_sent = true;
 
-               skb_put(skb, size - data_len);
-               skb->data_len = data_len;
-               skb->len = size;
-               err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
-               if (err) {
-                       kfree_skb(skb);
-                       goto out_err;
+               if (unlikely(msg->msg_flags & MSG_SPLICE_PAGES)) {
+                       err = skb_splice_from_iter(skb, &msg->msg_iter, size,
+                                                  sk->sk_allocation);
+                       if (err < 0) {
+                               kfree_skb(skb);
+                               goto out_err;
+                       }
+                       size = err;
+                       refcount_add(size, &sk->sk_wmem_alloc);
+               } else {
+                       skb_put(skb, size - data_len);
+                       skb->data_len = data_len;
+                       skb->len = size;
+                       err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
+                       if (err) {
+                               kfree_skb(skb);
+                               goto out_err;
+                       }
                }
 
                unix_state_lock(other);
@@ -2275,117 +2274,15 @@ out_err:
 static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
                                    int offset, size_t size, int flags)
 {
-       int err;
-       bool send_sigpipe = false;
-       bool init_scm = true;
-       struct scm_cookie scm;
-       struct sock *other, *sk = socket->sk;
-       struct sk_buff *skb, *newskb = NULL, *tail = NULL;
-
-       if (flags & MSG_OOB)
-               return -EOPNOTSUPP;
-
-       other = unix_peer(sk);
-       if (!other || sk->sk_state != TCP_ESTABLISHED)
-               return -ENOTCONN;
-
-       if (false) {
-alloc_skb:
-               unix_state_unlock(other);
-               mutex_unlock(&unix_sk(other)->iolock);
-               newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
-                                             &err, 0);
-               if (!newskb)
-                       goto err;
-       }
-
-       /* we must acquire iolock as we modify already present
-        * skbs in the sk_receive_queue and mess with skb->len
-        */
-       err = mutex_lock_interruptible(&unix_sk(other)->iolock);
-       if (err) {
-               err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
-               goto err;
-       }
-
-       if (sk->sk_shutdown & SEND_SHUTDOWN) {
-               err = -EPIPE;
-               send_sigpipe = true;
-               goto err_unlock;
-       }
-
-       unix_state_lock(other);
-
-       if (sock_flag(other, SOCK_DEAD) ||
-           other->sk_shutdown & RCV_SHUTDOWN) {
-               err = -EPIPE;
-               send_sigpipe = true;
-               goto err_state_unlock;
-       }
+       struct bio_vec bvec;
+       struct msghdr msg = { .msg_flags = flags | MSG_SPLICE_PAGES };
 
-       if (init_scm) {
-               err = maybe_init_creds(&scm, socket, other);
-               if (err)
-                       goto err_state_unlock;
-               init_scm = false;
-       }
-
-       skb = skb_peek_tail(&other->sk_receive_queue);
-       if (tail && tail == skb) {
-               skb = newskb;
-       } else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
-               if (newskb) {
-                       skb = newskb;
-               } else {
-                       tail = skb;
-                       goto alloc_skb;
-               }
-       } else if (newskb) {
-               /* this is fast path, we don't necessarily need to
-                * call to kfree_skb even though with newskb == NULL
-                * this - does no harm
-                */
-               consume_skb(newskb);
-               newskb = NULL;
-       }
+       if (flags & MSG_SENDPAGE_NOTLAST)
+               msg.msg_flags |= MSG_MORE;
 
-       if (skb_append_pagefrags(skb, page, offset, size)) {
-               tail = skb;
-               goto alloc_skb;
-       }
-
-       skb->len += size;
-       skb->data_len += size;
-       skb->truesize += size;
-       refcount_add(size, &sk->sk_wmem_alloc);
-
-       if (newskb) {
-               err = unix_scm_to_skb(&scm, skb, false);
-               if (err)
-                       goto err_state_unlock;
-               spin_lock(&other->sk_receive_queue.lock);
-               __skb_queue_tail(&other->sk_receive_queue, newskb);
-               spin_unlock(&other->sk_receive_queue.lock);
-       }
-
-       unix_state_unlock(other);
-       mutex_unlock(&unix_sk(other)->iolock);
-
-       other->sk_data_ready(other);
-       scm_destroy(&scm);
-       return size;
-
-err_state_unlock:
-       unix_state_unlock(other);
-err_unlock:
-       mutex_unlock(&unix_sk(other)->iolock);
-err:
-       kfree_skb(newskb);
-       if (send_sigpipe && !(flags & MSG_NOSIGNAL))
-               send_sig(SIGPIPE, current, 0);
-       if (!init_scm)
-               scm_destroy(&scm);
-       return err;
+       bvec_set_page(&bvec, page, size, offset);
+       iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size);
+       return unix_stream_sendmsg(socket, &msg, size);
 }
 
 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
index b2df1e0..26f6d30 100644 (file)
@@ -350,7 +350,7 @@ void xp_dma_unmap(struct xsk_buff_pool *pool, unsigned long attrs)
 {
        struct xsk_dma_map *dma_map;
 
-       if (pool->dma_pages_cnt == 0)
+       if (!pool->dma_pages)
                return;
 
        dma_map = xp_find_dma_map(pool);
@@ -364,6 +364,7 @@ void xp_dma_unmap(struct xsk_buff_pool *pool, unsigned long attrs)
 
        __xp_dma_unmap(dma_map, attrs);
        kvfree(pool->dma_pages);
+       pool->dma_pages = NULL;
        pool->dma_pages_cnt = 0;
        pool->dev = NULL;
 }
@@ -503,7 +504,7 @@ static struct xdp_buff_xsk *__xp_alloc(struct xsk_buff_pool *pool)
        if (pool->unaligned) {
                xskb = pool->free_heads[--pool->free_heads_cnt];
                xp_init_xskb_addr(xskb, pool, addr);
-               if (pool->dma_pages_cnt)
+               if (pool->dma_pages)
                        xp_init_xskb_dma(xskb, pool, pool->dma_pages, addr);
        } else {
                xskb = &pool->heads[xp_aligned_extract_idx(pool, addr)];
@@ -569,7 +570,7 @@ static u32 xp_alloc_new_from_fq(struct xsk_buff_pool *pool, struct xdp_buff **xd
                if (pool->unaligned) {
                        xskb = pool->free_heads[--pool->free_heads_cnt];
                        xp_init_xskb_addr(xskb, pool, addr);
-                       if (pool->dma_pages_cnt)
+                       if (pool->dma_pages)
                                xp_init_xskb_dma(xskb, pool, pool->dma_pages, addr);
                } else {
                        xskb = &pool->heads[xp_aligned_extract_idx(pool, addr)];
index 872b801..3504925 100644 (file)
@@ -205,14 +205,16 @@ static int espintcp_sendskb_locked(struct sock *sk, struct espintcp_msg *emsg,
 static int espintcp_sendskmsg_locked(struct sock *sk,
                                     struct espintcp_msg *emsg, int flags)
 {
+       struct msghdr msghdr = { .msg_flags = flags | MSG_SPLICE_PAGES, };
        struct sk_msg *skmsg = &emsg->skmsg;
        struct scatterlist *sg;
        int done = 0;
        int ret;
 
-       flags |= MSG_SENDPAGE_NOTLAST;
+       msghdr.msg_flags |= MSG_SENDPAGE_NOTLAST;
        sg = &skmsg->sg.data[skmsg->sg.start];
        do {
+               struct bio_vec bvec;
                size_t size = sg->length - emsg->offset;
                int offset = sg->offset + emsg->offset;
                struct page *p;
@@ -220,11 +222,13 @@ static int espintcp_sendskmsg_locked(struct sock *sk,
                emsg->offset = 0;
 
                if (sg_is_last(sg))
-                       flags &= ~MSG_SENDPAGE_NOTLAST;
+                       msghdr.msg_flags &= ~MSG_SENDPAGE_NOTLAST;
 
                p = sg_page(sg);
 retry:
-               ret = do_tcp_sendpages(sk, p, offset, size, flags);
+               bvec_set_page(&bvec, p, size, offset);
+               iov_iter_bvec(&msghdr.msg_iter, ITER_SOURCE, &bvec, 1, size);
+               ret = tcp_sendmsg_locked(sk, &msghdr, size);
                if (ret < 0) {
                        emsg->offset = offset - sg->offset;
                        skmsg->sg.start += done;
index 8014336..9c0fa0e 100644 (file)
@@ -74,14 +74,11 @@ static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb)
                if (!page)
                        return -ENOMEM;
 
-               __skb_frag_set_page(frag, page);
-
                len = PAGE_SIZE;
                if (dlen < len)
                        len = dlen;
 
-               skb_frag_off_set(frag, 0);
-               skb_frag_size_set(frag, len);
+               skb_frag_fill_page_desc(frag, page, 0, len);
                memcpy(skb_frag_address(frag), scratch, len);
 
                skb->truesize += len;
index 8dfe09a..822b074 100644 (file)
@@ -47,7 +47,7 @@ int bpf_basertt(struct bpf_sock_ops *skops)
                case BPF_SOCK_OPS_BASE_RTT:
                        n = bpf_getsockopt(skops, SOL_TCP, TCP_CONGESTION,
                                           cong, sizeof(cong));
-                       if (!n && !__builtin_memcmp(cong, nv, sizeof(nv)+1)) {
+                       if (!n && !__builtin_memcmp(cong, nv, sizeof(nv))) {
                                /* Set base_rtt to 80us */
                                rv = 80;
                        } else if (n) {
index 1f1f1d3..728d551 100755 (executable)
@@ -23,5 +23,8 @@ if [ "${pahole_ver}" -ge "124" ]; then
        # see PAHOLE_HAS_LANG_EXCLUDE
        extra_paholeopt="${extra_paholeopt} --lang_exclude=rust"
 fi
+if [ "${pahole_ver}" -ge "125" ]; then
+       extra_paholeopt="${extra_paholeopt} --skip_encoding_btf_inconsistent_proto --btf_gen_optimized"
+fi
 
 echo ${extra_paholeopt}
index 11250c4..3b7ba03 100644 (file)
@@ -28,7 +28,7 @@ MAP COMMANDS
 |      **bpftool** **map** { **show** | **list** }   [*MAP*]
 |      **bpftool** **map create**     *FILE* **type** *TYPE* **key** *KEY_SIZE* **value** *VALUE_SIZE* \
 |              **entries** *MAX_ENTRIES* **name** *NAME* [**flags** *FLAGS*] [**inner_map** *MAP*] \
-|              [**dev** *NAME*]
+|              [**offload_dev** *NAME*]
 |      **bpftool** **map dump**       *MAP*
 |      **bpftool** **map update**     *MAP* [**key** *DATA*] [**value** *VALUE*] [*UPDATE_FLAGS*]
 |      **bpftool** **map lookup**     *MAP* [**key** *DATA*]
@@ -73,7 +73,7 @@ DESCRIPTION
                  maps. On such kernels bpftool will automatically emit this
                  information as well.
 
-       **bpftool map create** *FILE* **type** *TYPE* **key** *KEY_SIZE* **value** *VALUE_SIZE*  **entries** *MAX_ENTRIES* **name** *NAME* [**flags** *FLAGS*] [**inner_map** *MAP*] [**dev** *NAME*]
+       **bpftool map create** *FILE* **type** *TYPE* **key** *KEY_SIZE* **value** *VALUE_SIZE*  **entries** *MAX_ENTRIES* **name** *NAME* [**flags** *FLAGS*] [**inner_map** *MAP*] [**offload_dev** *NAME*]
                  Create a new map with given parameters and pin it to *bpffs*
                  as *FILE*.
 
@@ -86,8 +86,8 @@ DESCRIPTION
                  kernel needs it to collect metadata related to the inner maps
                  that the new map will work with.
 
-                 Keyword **dev** expects a network interface name, and is used
-                 to request hardware offload for the map.
+                 Keyword **offload_dev** expects a network interface name,
+                 and is used to request hardware offload for the map.
 
        **bpftool map dump**    *MAP*
                  Dump all entries in a given *MAP*.  In case of **name**,
index 9443c52..dcae81b 100644 (file)
@@ -31,7 +31,7 @@ PROG COMMANDS
 |      **bpftool** **prog dump xlated** *PROG* [{ **file** *FILE* | [**opcodes**] [**linum**] [**visual**] }]
 |      **bpftool** **prog dump jited**  *PROG* [{ **file** *FILE* | [**opcodes**] [**linum**] }]
 |      **bpftool** **prog pin** *PROG* *FILE*
-|      **bpftool** **prog** { **load** | **loadall** } *OBJ* *PATH* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*] [**pinmaps** *MAP_DIR*] [**autoattach**]
+|      **bpftool** **prog** { **load** | **loadall** } *OBJ* *PATH* [**type** *TYPE*] [**map** { **idx** *IDX* | **name** *NAME* } *MAP*] [{ **offload_dev** | **xdpmeta_dev** } *NAME*] [**pinmaps** *MAP_DIR*] [**autoattach**]
 |      **bpftool** **prog attach** *PROG* *ATTACH_TYPE* [*MAP*]
 |      **bpftool** **prog detach** *PROG* *ATTACH_TYPE* [*MAP*]
 |      **bpftool** **prog tracelog**
@@ -129,7 +129,7 @@ DESCRIPTION
                  contain a dot character ('.'), which is reserved for future
                  extensions of *bpffs*.
 
-       **bpftool prog { load | loadall }** *OBJ* *PATH* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*] [**pinmaps** *MAP_DIR*] [**autoattach**]
+       **bpftool prog { load | loadall }** *OBJ* *PATH* [**type** *TYPE*] [**map** { **idx** *IDX* | **name** *NAME* } *MAP*] [{ **offload_dev** | **xdpmeta_dev** } *NAME*] [**pinmaps** *MAP_DIR*] [**autoattach**]
                  Load bpf program(s) from binary *OBJ* and pin as *PATH*.
                  **bpftool prog load** pins only the first program from the
                  *OBJ* as *PATH*. **bpftool prog loadall** pins all programs
@@ -143,8 +143,11 @@ DESCRIPTION
                  to be replaced in the ELF file counting from 0, while *NAME*
                  allows to replace a map by name.  *MAP* specifies the map to
                  use, referring to it by **id** or through a **pinned** file.
-                 If **dev** *NAME* is specified program will be loaded onto
-                 given networking device (offload).
+                 If **offload_dev** *NAME* is specified program will be loaded
+                 onto given networking device (offload).
+                 If **xdpmeta_dev** *NAME* is specified program will become
+                 device-bound without offloading, this facilitates access
+                 to XDP metadata.
                  Optional **pinmaps** argument can be provided to pin all
                  maps under *MAP_DIR* directory.
 
index e7234d1..085bf18 100644 (file)
@@ -278,7 +278,7 @@ _bpftool()
             _bpftool_get_prog_tags
             return 0
             ;;
-        dev)
+        dev|offload_dev|xdpmeta_dev)
             _sysfs_get_netdevs
             return 0
             ;;
@@ -508,7 +508,8 @@ _bpftool()
                             ;;
                         *)
                             COMPREPLY=( $( compgen -W "map" -- "$cur" ) )
-                            _bpftool_once_attr 'type dev pinmaps autoattach'
+                            _bpftool_once_attr 'type pinmaps autoattach'
+                            _bpftool_one_of_list 'offload_dev xdpmeta_dev'
                             return 0
                             ;;
                     esac
@@ -733,7 +734,7 @@ _bpftool()
                             esac
                             ;;
                         *)
-                            _bpftool_once_attr 'type key value entries name flags dev'
+                            _bpftool_once_attr 'type key value entries name flags offload_dev'
                             if _bpftool_search_list 'array_of_maps' 'hash_of_maps'; then
                                 _bpftool_once_attr 'inner_map'
                             fi
index 1360c82..cc6e6aa 100644 (file)
@@ -68,7 +68,7 @@ void p_info(const char *fmt, ...)
        va_end(ap);
 }
 
-static bool is_bpffs(char *path)
+static bool is_bpffs(const char *path)
 {
        struct statfs st_fs;
 
@@ -244,13 +244,16 @@ int open_obj_pinned_any(const char *path, enum bpf_obj_type exp_type)
        return fd;
 }
 
-int mount_bpffs_for_pin(const char *name)
+int mount_bpffs_for_pin(const char *name, bool is_dir)
 {
        char err_str[ERR_MAX_LEN];
        char *file;
        char *dir;
        int err = 0;
 
+       if (is_dir && is_bpffs(name))
+               return err;
+
        file = malloc(strlen(name) + 1);
        if (!file) {
                p_err("mem alloc failed");
@@ -286,7 +289,7 @@ int do_pin_fd(int fd, const char *name)
 {
        int err;
 
-       err = mount_bpffs_for_pin(name);
+       err = mount_bpffs_for_pin(name, false);
        if (err)
                return err;
 
index da16e6a..0675d6a 100644 (file)
@@ -167,12 +167,12 @@ static int get_vendor_id(int ifindex)
        return strtol(buf, NULL, 0);
 }
 
-static int read_procfs(const char *path)
+static long read_procfs(const char *path)
 {
        char *endptr, *line = NULL;
        size_t len = 0;
        FILE *fd;
-       int res;
+       long res;
 
        fd = fopen(path, "r");
        if (!fd)
@@ -194,7 +194,7 @@ static int read_procfs(const char *path)
 
 static void probe_unprivileged_disabled(void)
 {
-       int res;
+       long res;
 
        /* No support for C-style ouptut */
 
@@ -216,14 +216,14 @@ static void probe_unprivileged_disabled(void)
                        printf("Unable to retrieve required privileges for bpf() syscall\n");
                        break;
                default:
-                       printf("bpf() syscall restriction has unknown value %d\n", res);
+                       printf("bpf() syscall restriction has unknown value %ld\n", res);
                }
        }
 }
 
 static void probe_jit_enable(void)
 {
-       int res;
+       long res;
 
        /* No support for C-style ouptut */
 
@@ -245,7 +245,7 @@ static void probe_jit_enable(void)
                        printf("Unable to retrieve JIT-compiler status\n");
                        break;
                default:
-                       printf("JIT-compiler status has unknown value %d\n",
+                       printf("JIT-compiler status has unknown value %ld\n",
                               res);
                }
        }
@@ -253,7 +253,7 @@ static void probe_jit_enable(void)
 
 static void probe_jit_harden(void)
 {
-       int res;
+       long res;
 
        /* No support for C-style ouptut */
 
@@ -275,7 +275,7 @@ static void probe_jit_harden(void)
                        printf("Unable to retrieve JIT hardening status\n");
                        break;
                default:
-                       printf("JIT hardening status has unknown value %d\n",
+                       printf("JIT hardening status has unknown value %ld\n",
                               res);
                }
        }
@@ -283,7 +283,7 @@ static void probe_jit_harden(void)
 
 static void probe_jit_kallsyms(void)
 {
-       int res;
+       long res;
 
        /* No support for C-style ouptut */
 
@@ -302,14 +302,14 @@ static void probe_jit_kallsyms(void)
                        printf("Unable to retrieve JIT kallsyms export status\n");
                        break;
                default:
-                       printf("JIT kallsyms exports status has unknown value %d\n", res);
+                       printf("JIT kallsyms exports status has unknown value %ld\n", res);
                }
        }
 }
 
 static void probe_jit_limit(void)
 {
-       int res;
+       long res;
 
        /* No support for C-style ouptut */
 
@@ -322,7 +322,7 @@ static void probe_jit_limit(void)
                        printf("Unable to retrieve global memory limit for JIT compiler for unprivileged users\n");
                        break;
                default:
-                       printf("Global memory limit for JIT compiler for unprivileged users is %d bytes\n", res);
+                       printf("Global memory limit for JIT compiler for unprivileged users is %ld bytes\n", res);
                }
        }
 }
index 9a1d236..6b0e520 100644 (file)
@@ -76,7 +76,7 @@ static int do_pin(int argc, char **argv)
                goto close_obj;
        }
 
-       err = mount_bpffs_for_pin(path);
+       err = mount_bpffs_for_pin(path, false);
        if (err)
                goto close_link;
 
index d98dbc5..2d78607 100644 (file)
@@ -195,6 +195,8 @@ static int show_link_close_json(int fd, struct bpf_link_info *info)
 
                show_link_attach_type_json(info->tracing.attach_type,
                                           json_wtr);
+               jsonw_uint_field(json_wtr, "target_obj_id", info->tracing.target_obj_id);
+               jsonw_uint_field(json_wtr, "target_btf_id", info->tracing.target_btf_id);
                break;
        case BPF_LINK_TYPE_CGROUP:
                jsonw_lluint_field(json_wtr, "cgroup_id",
@@ -212,7 +214,10 @@ static int show_link_close_json(int fd, struct bpf_link_info *info)
        case BPF_LINK_TYPE_NETFILTER:
                netfilter_dump_json(info, json_wtr);
                break;
-
+       case BPF_LINK_TYPE_STRUCT_OPS:
+               jsonw_uint_field(json_wtr, "map_id",
+                                info->struct_ops.map_id);
+               break;
        default:
                break;
        }
@@ -245,7 +250,10 @@ static void show_link_header_plain(struct bpf_link_info *info)
        else
                printf("type %u  ", info->type);
 
-       printf("prog %u  ", info->prog_id);
+       if (info->type == BPF_LINK_TYPE_STRUCT_OPS)
+               printf("map %u  ", info->struct_ops.map_id);
+       else
+               printf("prog %u  ", info->prog_id);
 }
 
 static void show_link_attach_type_plain(__u32 attach_type)
@@ -369,6 +377,10 @@ static int show_link_close_plain(int fd, struct bpf_link_info *info)
                        printf("\n\tprog_type %u  ", prog_info.type);
 
                show_link_attach_type_plain(info->tracing.attach_type);
+               if (info->tracing.target_obj_id || info->tracing.target_btf_id)
+                       printf("\n\ttarget_obj_id %u  target_btf_id %u  ",
+                              info->tracing.target_obj_id,
+                              info->tracing.target_btf_id);
                break;
        case BPF_LINK_TYPE_CGROUP:
                printf("\n\tcgroup_id %zu  ", (size_t)info->cgroup.cgroup_id);
index a49534d..b8bb08d 100644 (file)
@@ -142,7 +142,7 @@ const char *get_fd_type_name(enum bpf_obj_type type);
 char *get_fdinfo(int fd, const char *key);
 int open_obj_pinned(const char *path, bool quiet);
 int open_obj_pinned_any(const char *path, enum bpf_obj_type exp_type);
-int mount_bpffs_for_pin(const char *name);
+int mount_bpffs_for_pin(const char *name, bool is_dir);
 int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(int *, char ***));
 int do_pin_fd(int fd, const char *name);
 
index aaeb893..f98f7bb 100644 (file)
@@ -139,6 +139,9 @@ static void print_entry_json(struct bpf_map_info *info, unsigned char *key,
                print_hex_data_json(key, info->key_size);
                jsonw_name(json_wtr, "value");
                print_hex_data_json(value, info->value_size);
+               if (map_is_map_of_maps(info->type))
+                       jsonw_uint_field(json_wtr, "inner_map_id",
+                                        *(unsigned int *)value);
                if (btf) {
                        struct btf_dumper d = {
                                .btf = btf,
@@ -259,8 +262,13 @@ static void print_entry_plain(struct bpf_map_info *info, unsigned char *key,
                }
 
                if (info->value_size) {
-                       printf("value:%c", break_names ? '\n' : ' ');
-                       fprint_hex(stdout, value, info->value_size, " ");
+                       if (map_is_map_of_maps(info->type)) {
+                               printf("inner_map_id:%c", break_names ? '\n' : ' ');
+                               printf("%u ", *(unsigned int *)value);
+                       } else {
+                               printf("value:%c", break_names ? '\n' : ' ');
+                               fprint_hex(stdout, value, info->value_size, " ");
+                       }
                }
 
                printf("\n");
@@ -1279,6 +1287,11 @@ static int do_create(int argc, char **argv)
                                          "flags"))
                                goto exit;
                } else if (is_prefix(*argv, "dev")) {
+                       p_info("Warning: 'bpftool map create [...] dev <ifname>' syntax is deprecated.\n"
+                              "Going further, please use 'offload_dev <ifname>' to request hardware offload for the map.");
+                       goto offload_dev;
+               } else if (is_prefix(*argv, "offload_dev")) {
+offload_dev:
                        NEXT_ARG();
 
                        if (attr.map_ifindex) {
@@ -1423,7 +1436,7 @@ static int do_help(int argc, char **argv)
                "Usage: %1$s %2$s { show | list }   [MAP]\n"
                "       %1$s %2$s create     FILE type TYPE key KEY_SIZE value VALUE_SIZE \\\n"
                "                                  entries MAX_ENTRIES name NAME [flags FLAGS] \\\n"
-               "                                  [inner_map MAP] [dev NAME]\n"
+               "                                  [inner_map MAP] [offload_dev NAME]\n"
                "       %1$s %2$s dump       MAP\n"
                "       %1$s %2$s update     MAP [key DATA] [value VALUE] [UPDATE_FLAGS]\n"
                "       %1$s %2$s lookup     MAP [key DATA]\n"
index 91b6075..8443a14 100644 (file)
@@ -1517,12 +1517,13 @@ static int load_with_options(int argc, char **argv, bool first_prog_only)
        struct bpf_program *prog = NULL, *pos;
        unsigned int old_map_fds = 0;
        const char *pinmaps = NULL;
+       __u32 xdpmeta_ifindex = 0;
+       __u32 offload_ifindex = 0;
        bool auto_attach = false;
        struct bpf_object *obj;
        struct bpf_map *map;
        const char *pinfile;
        unsigned int i, j;
-       __u32 ifindex = 0;
        const char *file;
        int idx, err;
 
@@ -1614,17 +1615,46 @@ static int load_with_options(int argc, char **argv, bool first_prog_only)
                        map_replace[old_map_fds].fd = fd;
                        old_map_fds++;
                } else if (is_prefix(*argv, "dev")) {
+                       p_info("Warning: 'bpftool prog load [...] dev <ifname>' syntax is deprecated.\n"
+                              "Going further, please use 'offload_dev <ifname>' to offload program to device.\n"
+                              "For applications using XDP hints only, use 'xdpmeta_dev <ifname>'.");
+                       goto offload_dev;
+               } else if (is_prefix(*argv, "offload_dev")) {
+offload_dev:
                        NEXT_ARG();
 
-                       if (ifindex) {
-                               p_err("offload device already specified");
+                       if (offload_ifindex) {
+                               p_err("offload_dev already specified");
+                               goto err_free_reuse_maps;
+                       } else if (xdpmeta_ifindex) {
+                               p_err("xdpmeta_dev and offload_dev are mutually exclusive");
+                               goto err_free_reuse_maps;
+                       }
+                       if (!REQ_ARGS(1))
+                               goto err_free_reuse_maps;
+
+                       offload_ifindex = if_nametoindex(*argv);
+                       if (!offload_ifindex) {
+                               p_err("unrecognized netdevice '%s': %s",
+                                     *argv, strerror(errno));
+                               goto err_free_reuse_maps;
+                       }
+                       NEXT_ARG();
+               } else if (is_prefix(*argv, "xdpmeta_dev")) {
+                       NEXT_ARG();
+
+                       if (xdpmeta_ifindex) {
+                               p_err("xdpmeta_dev already specified");
+                               goto err_free_reuse_maps;
+                       } else if (offload_ifindex) {
+                               p_err("xdpmeta_dev and offload_dev are mutually exclusive");
                                goto err_free_reuse_maps;
                        }
                        if (!REQ_ARGS(1))
                                goto err_free_reuse_maps;
 
-                       ifindex = if_nametoindex(*argv);
-                       if (!ifindex) {
+                       xdpmeta_ifindex = if_nametoindex(*argv);
+                       if (!xdpmeta_ifindex) {
                                p_err("unrecognized netdevice '%s': %s",
                                      *argv, strerror(errno));
                                goto err_free_reuse_maps;
@@ -1671,7 +1701,12 @@ static int load_with_options(int argc, char **argv, bool first_prog_only)
                                goto err_close_obj;
                }
 
-               bpf_program__set_ifindex(pos, ifindex);
+               if (prog_type == BPF_PROG_TYPE_XDP && xdpmeta_ifindex) {
+                       bpf_program__set_flags(pos, BPF_F_XDP_DEV_BOUND_ONLY);
+                       bpf_program__set_ifindex(pos, xdpmeta_ifindex);
+               } else {
+                       bpf_program__set_ifindex(pos, offload_ifindex);
+               }
                if (bpf_program__type(pos) != prog_type)
                        bpf_program__set_type(pos, prog_type);
                bpf_program__set_expected_attach_type(pos, expected_attach_type);
@@ -1709,7 +1744,7 @@ static int load_with_options(int argc, char **argv, bool first_prog_only)
        idx = 0;
        bpf_object__for_each_map(map, obj) {
                if (bpf_map__type(map) != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
-                       bpf_map__set_ifindex(map, ifindex);
+                       bpf_map__set_ifindex(map, offload_ifindex);
 
                if (j < old_map_fds && idx == map_replace[j].idx) {
                        err = bpf_map__reuse_fd(map, map_replace[j++].fd);
@@ -1739,7 +1774,7 @@ static int load_with_options(int argc, char **argv, bool first_prog_only)
                goto err_close_obj;
        }
 
-       err = mount_bpffs_for_pin(pinfile);
+       err = mount_bpffs_for_pin(pinfile, !first_prog_only);
        if (err)
                goto err_close_obj;
 
@@ -2416,7 +2451,7 @@ static int do_help(int argc, char **argv)
                "       %1$s %2$s dump jited  PROG [{ file FILE | [opcodes] [linum] }]\n"
                "       %1$s %2$s pin   PROG FILE\n"
                "       %1$s %2$s { load | loadall } OBJ  PATH \\\n"
-               "                         [type TYPE] [dev NAME] \\\n"
+               "                         [type TYPE] [{ offload_dev | xdpmeta_dev } NAME] \\\n"
                "                         [map { idx IDX | name NAME } MAP]\\\n"
                "                         [pinmaps MAP_DIR]\n"
                "                         [autoattach]\n"
index 57c3da7..3ebc9fe 100644 (file)
@@ -509,7 +509,7 @@ static int do_register(int argc, char **argv)
        if (argc == 1)
                linkdir = GET_ARG();
 
-       if (linkdir && mount_bpffs_for_pin(linkdir)) {
+       if (linkdir && mount_bpffs_for_pin(linkdir, true)) {
                p_err("can't mount bpffs for pinning");
                return -1;
        }
index c994ff5..6961a7b 100644 (file)
@@ -1273,6 +1273,9 @@ enum {
 
 /* Create a map that will be registered/unregesitered by the backed bpf_link */
        BPF_F_LINK              = (1U << 13),
+
+/* Get path from provided FD in BPF_OBJ_PIN/BPF_OBJ_GET commands */
+       BPF_F_PATH_FD           = (1U << 14),
 };
 
 /* Flags for BPF_PROG_QUERY. */
@@ -1421,6 +1424,13 @@ union bpf_attr {
                __aligned_u64   pathname;
                __u32           bpf_fd;
                __u32           file_flags;
+               /* Same as dirfd in openat() syscall; see openat(2)
+                * manpage for details of path FD and pathname semantics;
+                * path_fd should accompanied by BPF_F_PATH_FD flag set in
+                * file_flags field, otherwise it should be set to zero;
+                * if BPF_F_PATH_FD flag is not set, AT_FDCWD is assumed.
+                */
+               __s32           path_fd;
        };
 
        struct { /* anonymous struct used by BPF_PROG_ATTACH/DETACH commands */
index 128ac72..ed86b37 100644 (file)
@@ -572,20 +572,30 @@ int bpf_map_update_batch(int fd, const void *keys, const void *values, __u32 *co
                                    (void *)keys, (void *)values, count, opts);
 }
 
-int bpf_obj_pin(int fd, const char *pathname)
+int bpf_obj_pin_opts(int fd, const char *pathname, const struct bpf_obj_pin_opts *opts)
 {
-       const size_t attr_sz = offsetofend(union bpf_attr, file_flags);
+       const size_t attr_sz = offsetofend(union bpf_attr, path_fd);
        union bpf_attr attr;
        int ret;
 
+       if (!OPTS_VALID(opts, bpf_obj_pin_opts))
+               return libbpf_err(-EINVAL);
+
        memset(&attr, 0, attr_sz);
+       attr.path_fd = OPTS_GET(opts, path_fd, 0);
        attr.pathname = ptr_to_u64((void *)pathname);
+       attr.file_flags = OPTS_GET(opts, file_flags, 0);
        attr.bpf_fd = fd;
 
        ret = sys_bpf(BPF_OBJ_PIN, &attr, attr_sz);
        return libbpf_err_errno(ret);
 }
 
+int bpf_obj_pin(int fd, const char *pathname)
+{
+       return bpf_obj_pin_opts(fd, pathname, NULL);
+}
+
 int bpf_obj_get(const char *pathname)
 {
        return bpf_obj_get_opts(pathname, NULL);
@@ -593,7 +603,7 @@ int bpf_obj_get(const char *pathname)
 
 int bpf_obj_get_opts(const char *pathname, const struct bpf_obj_get_opts *opts)
 {
-       const size_t attr_sz = offsetofend(union bpf_attr, file_flags);
+       const size_t attr_sz = offsetofend(union bpf_attr, path_fd);
        union bpf_attr attr;
        int fd;
 
@@ -601,6 +611,7 @@ int bpf_obj_get_opts(const char *pathname, const struct bpf_obj_get_opts *opts)
                return libbpf_err(-EINVAL);
 
        memset(&attr, 0, attr_sz);
+       attr.path_fd = OPTS_GET(opts, path_fd, 0);
        attr.pathname = ptr_to_u64((void *)pathname);
        attr.file_flags = OPTS_GET(opts, file_flags, 0);
 
index a2c0913..9aa0ee4 100644 (file)
@@ -284,16 +284,30 @@ LIBBPF_API int bpf_map_update_batch(int fd, const void *keys, const void *values
                                    __u32 *count,
                                    const struct bpf_map_batch_opts *opts);
 
-struct bpf_obj_get_opts {
+struct bpf_obj_pin_opts {
        size_t sz; /* size of this struct for forward/backward compatibility */
 
        __u32 file_flags;
+       int path_fd;
 
        size_t :0;
 };
-#define bpf_obj_get_opts__last_field file_flags
+#define bpf_obj_pin_opts__last_field path_fd
 
 LIBBPF_API int bpf_obj_pin(int fd, const char *pathname);
+LIBBPF_API int bpf_obj_pin_opts(int fd, const char *pathname,
+                               const struct bpf_obj_pin_opts *opts);
+
+struct bpf_obj_get_opts {
+       size_t sz; /* size of this struct for forward/backward compatibility */
+
+       __u32 file_flags;
+       int path_fd;
+
+       size_t :0;
+};
+#define bpf_obj_get_opts__last_field path_fd
+
 LIBBPF_API int bpf_obj_get(const char *pathname);
 LIBBPF_API int bpf_obj_get_opts(const char *pathname,
                                const struct bpf_obj_get_opts *opts);
index 929a3ba..bbab9ad 100644 (file)
 /*
  * Helper macros to manipulate data structures
  */
-#ifndef offsetof
-#define offsetof(TYPE, MEMBER) ((unsigned long)&((TYPE *)0)->MEMBER)
-#endif
-#ifndef container_of
+
+/* offsetof() definition that uses __builtin_offset() might not preserve field
+ * offset CO-RE relocation properly, so force-redefine offsetof() using
+ * old-school approach which works with CO-RE correctly
+ */
+#undef offsetof
+#define offsetof(type, member) ((unsigned long)&((type *)0)->member)
+
+/* redefined container_of() to ensure we use the above offsetof() macro */
+#undef container_of
 #define container_of(ptr, type, member)                                \
        ({                                                      \
                void *__mptr = (void *)(ptr);                   \
                ((type *)(__mptr - offsetof(type, member)));    \
        })
-#endif
 
 /*
  * Compiler (optimization) barrier.
index 6fb3d0f..be076a4 100644 (file)
@@ -351,6 +351,7 @@ struct pt_regs___arm64 {
  * https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-cc.adoc#risc-v-calling-conventions
  */
 
+/* riscv provides struct user_regs_struct instead of struct pt_regs to userspace */
 #define __PT_REGS_CAST(x) ((const struct user_regs_struct *)(x))
 #define __PT_PARM1_REG a0
 #define __PT_PARM2_REG a1
@@ -383,7 +384,7 @@ struct pt_regs___arm64 {
  * https://raw.githubusercontent.com/wiki/foss-for-synopsys-dwc-arc-processors/toolchain/files/ARCv2_ABI.pdf
  */
 
-/* arc provides struct user_pt_regs instead of struct pt_regs to userspace */
+/* arc provides struct user_regs_struct instead of struct pt_regs to userspace */
 #define __PT_REGS_CAST(x) ((const struct user_regs_struct *)(x))
 #define __PT_PARM1_REG scratch.r0
 #define __PT_PARM2_REG scratch.r1
index 0a2c079..8484b56 100644 (file)
@@ -1064,7 +1064,7 @@ static struct btf *btf_parse_raw(const char *path, struct btf *base_btf)
        int err = 0;
        long sz;
 
-       f = fopen(path, "rb");
+       f = fopen(path, "rbe");
        if (!f) {
                err = -errno;
                goto err_out;
index 580985e..4d9f30b 100644 (file)
@@ -2250,9 +2250,25 @@ static int btf_dump_type_data_check_overflow(struct btf_dump *d,
                                             const struct btf_type *t,
                                             __u32 id,
                                             const void *data,
-                                            __u8 bits_offset)
+                                            __u8 bits_offset,
+                                            __u8 bit_sz)
 {
-       __s64 size = btf__resolve_size(d->btf, id);
+       __s64 size;
+
+       if (bit_sz) {
+               /* bits_offset is at most 7. bit_sz is at most 128. */
+               __u8 nr_bytes = (bits_offset + bit_sz + 7) / 8;
+
+               /* When bit_sz is non zero, it is called from
+                * btf_dump_struct_data() where it only cares about
+                * negative error value.
+                * Return nr_bytes in success case to make it
+                * consistent as the regular integer case below.
+                */
+               return data + nr_bytes > d->typed_dump->data_end ? -E2BIG : nr_bytes;
+       }
+
+       size = btf__resolve_size(d->btf, id);
 
        if (size < 0 || size >= INT_MAX) {
                pr_warn("unexpected size [%zu] for id [%u]\n",
@@ -2407,7 +2423,7 @@ static int btf_dump_dump_type_data(struct btf_dump *d,
 {
        int size, err = 0;
 
-       size = btf_dump_type_data_check_overflow(d, t, id, data, bits_offset);
+       size = btf_dump_type_data_check_overflow(d, t, id, data, bits_offset, bit_sz);
        if (size < 0)
                return size;
        err = btf_dump_type_data_check_zero(d, t, id, data, bits_offset, bit_sz);
index 83e8e3b..cf3323f 100644 (file)
@@ -703,17 +703,17 @@ static void emit_relo_kfunc_btf(struct bpf_gen *gen, struct ksym_relo_desc *relo
        /* obtain fd in BPF_REG_9 */
        emit(gen, BPF_MOV64_REG(BPF_REG_9, BPF_REG_7));
        emit(gen, BPF_ALU64_IMM(BPF_RSH, BPF_REG_9, 32));
-       /* jump to fd_array store if fd denotes module BTF */
-       emit(gen, BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 0, 2));
-       /* set the default value for off */
-       emit(gen, BPF_ST_MEM(BPF_H, BPF_REG_8, offsetof(struct bpf_insn, off), 0));
-       /* skip BTF fd store for vmlinux BTF */
-       emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, 4));
        /* load fd_array slot pointer */
        emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_0, BPF_PSEUDO_MAP_IDX_VALUE,
                                         0, 0, 0, blob_fd_array_off(gen, btf_fd_idx)));
-       /* store BTF fd in slot */
+       /* store BTF fd in slot, 0 for vmlinux */
        emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_9, 0));
+       /* jump to insn[insn_idx].off store if fd denotes module BTF */
+       emit(gen, BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 0, 2));
+       /* set the default value for off */
+       emit(gen, BPF_ST_MEM(BPF_H, BPF_REG_8, offsetof(struct bpf_insn, off), 0));
+       /* skip BTF fd store for vmlinux BTF */
+       emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, 1));
        /* store index into insn[insn_idx].off */
        emit(gen, BPF_ST_MEM(BPF_H, BPF_REG_8, offsetof(struct bpf_insn, off), btf_fd_idx));
 log:
index a27f6e9..214f828 100644 (file)
@@ -1501,16 +1501,36 @@ static struct bpf_map *bpf_object__add_map(struct bpf_object *obj)
        return map;
 }
 
-static size_t bpf_map_mmap_sz(const struct bpf_map *map)
+static size_t bpf_map_mmap_sz(unsigned int value_sz, unsigned int max_entries)
 {
-       long page_sz = sysconf(_SC_PAGE_SIZE);
+       const long page_sz = sysconf(_SC_PAGE_SIZE);
        size_t map_sz;
 
-       map_sz = (size_t)roundup(map->def.value_size, 8) * map->def.max_entries;
+       map_sz = (size_t)roundup(value_sz, 8) * max_entries;
        map_sz = roundup(map_sz, page_sz);
        return map_sz;
 }
 
+static int bpf_map_mmap_resize(struct bpf_map *map, size_t old_sz, size_t new_sz)
+{
+       void *mmaped;
+
+       if (!map->mmaped)
+               return -EINVAL;
+
+       if (old_sz == new_sz)
+               return 0;
+
+       mmaped = mmap(NULL, new_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+       if (mmaped == MAP_FAILED)
+               return -errno;
+
+       memcpy(mmaped, map->mmaped, min(old_sz, new_sz));
+       munmap(map->mmaped, old_sz);
+       map->mmaped = mmaped;
+       return 0;
+}
+
 static char *internal_map_name(struct bpf_object *obj, const char *real_name)
 {
        char map_name[BPF_OBJ_NAME_LEN], *p;
@@ -1609,6 +1629,7 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
 {
        struct bpf_map_def *def;
        struct bpf_map *map;
+       size_t mmap_sz;
        int err;
 
        map = bpf_object__add_map(obj);
@@ -1643,7 +1664,8 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
        pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n",
                 map->name, map->sec_idx, map->sec_offset, def->map_flags);
 
-       map->mmaped = mmap(NULL, bpf_map_mmap_sz(map), PROT_READ | PROT_WRITE,
+       mmap_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries);
+       map->mmaped = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE,
                           MAP_SHARED | MAP_ANONYMOUS, -1, 0);
        if (map->mmaped == MAP_FAILED) {
                err = -errno;
@@ -4330,7 +4352,7 @@ static int bpf_get_map_info_from_fdinfo(int fd, struct bpf_map_info *info)
        snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd);
        memset(info, 0, sizeof(*info));
 
-       fp = fopen(file, "r");
+       fp = fopen(file, "re");
        if (!fp) {
                err = -errno;
                pr_warn("failed to open %s: %d. No procfs support?\n", file,
@@ -4393,18 +4415,17 @@ int bpf_map__reuse_fd(struct bpf_map *map, int fd)
        if (!new_name)
                return libbpf_err(-errno);
 
-       new_fd = open("/", O_RDONLY | O_CLOEXEC);
+       /*
+        * Like dup(), but make sure new FD is >= 3 and has O_CLOEXEC set.
+        * This is similar to what we do in ensure_good_fd(), but without
+        * closing original FD.
+        */
+       new_fd = fcntl(fd, F_DUPFD_CLOEXEC, 3);
        if (new_fd < 0) {
                err = -errno;
                goto err_free_new_name;
        }
 
-       new_fd = dup3(fd, new_fd, O_CLOEXEC);
-       if (new_fd < 0) {
-               err = -errno;
-               goto err_close_new_fd;
-       }
-
        err = zclose(map->fd);
        if (err) {
                err = -errno;
@@ -7434,7 +7455,7 @@ int libbpf_kallsyms_parse(kallsyms_cb_t cb, void *ctx)
        int ret, err = 0;
        FILE *f;
 
-       f = fopen("/proc/kallsyms", "r");
+       f = fopen("/proc/kallsyms", "re");
        if (!f) {
                err = -errno;
                pr_warn("failed to open /proc/kallsyms: %d\n", err);
@@ -8295,7 +8316,10 @@ static void bpf_map__destroy(struct bpf_map *map)
        map->init_slots_sz = 0;
 
        if (map->mmaped) {
-               munmap(map->mmaped, bpf_map_mmap_sz(map));
+               size_t mmap_sz;
+
+               mmap_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries);
+               munmap(map->mmaped, mmap_sz);
                map->mmaped = NULL;
        }
 
@@ -9413,10 +9437,103 @@ __u32 bpf_map__value_size(const struct bpf_map *map)
        return map->def.value_size;
 }
 
+static int map_btf_datasec_resize(struct bpf_map *map, __u32 size)
+{
+       struct btf *btf;
+       struct btf_type *datasec_type, *var_type;
+       struct btf_var_secinfo *var;
+       const struct btf_type *array_type;
+       const struct btf_array *array;
+       int vlen, element_sz, new_array_id;
+       __u32 nr_elements;
+
+       /* check btf existence */
+       btf = bpf_object__btf(map->obj);
+       if (!btf)
+               return -ENOENT;
+
+       /* verify map is datasec */
+       datasec_type = btf_type_by_id(btf, bpf_map__btf_value_type_id(map));
+       if (!btf_is_datasec(datasec_type)) {
+               pr_warn("map '%s': cannot be resized, map value type is not a datasec\n",
+                       bpf_map__name(map));
+               return -EINVAL;
+       }
+
+       /* verify datasec has at least one var */
+       vlen = btf_vlen(datasec_type);
+       if (vlen == 0) {
+               pr_warn("map '%s': cannot be resized, map value datasec is empty\n",
+                       bpf_map__name(map));
+               return -EINVAL;
+       }
+
+       /* verify last var in the datasec is an array */
+       var = &btf_var_secinfos(datasec_type)[vlen - 1];
+       var_type = btf_type_by_id(btf, var->type);
+       array_type = skip_mods_and_typedefs(btf, var_type->type, NULL);
+       if (!btf_is_array(array_type)) {
+               pr_warn("map '%s': cannot be resized, last var must be an array\n",
+                       bpf_map__name(map));
+               return -EINVAL;
+       }
+
+       /* verify request size aligns with array */
+       array = btf_array(array_type);
+       element_sz = btf__resolve_size(btf, array->type);
+       if (element_sz <= 0 || (size - var->offset) % element_sz != 0) {
+               pr_warn("map '%s': cannot be resized, element size (%d) doesn't align with new total size (%u)\n",
+                       bpf_map__name(map), element_sz, size);
+               return -EINVAL;
+       }
+
+       /* create a new array based on the existing array, but with new length */
+       nr_elements = (size - var->offset) / element_sz;
+       new_array_id = btf__add_array(btf, array->index_type, array->type, nr_elements);
+       if (new_array_id < 0)
+               return new_array_id;
+
+       /* adding a new btf type invalidates existing pointers to btf objects,
+        * so refresh pointers before proceeding
+        */
+       datasec_type = btf_type_by_id(btf, map->btf_value_type_id);
+       var = &btf_var_secinfos(datasec_type)[vlen - 1];
+       var_type = btf_type_by_id(btf, var->type);
+
+       /* finally update btf info */
+       datasec_type->size = size;
+       var->size = size - var->offset;
+       var_type->type = new_array_id;
+
+       return 0;
+}
+
 int bpf_map__set_value_size(struct bpf_map *map, __u32 size)
 {
        if (map->fd >= 0)
                return libbpf_err(-EBUSY);
+
+       if (map->mmaped) {
+               int err;
+               size_t mmap_old_sz, mmap_new_sz;
+
+               mmap_old_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries);
+               mmap_new_sz = bpf_map_mmap_sz(size, map->def.max_entries);
+               err = bpf_map_mmap_resize(map, mmap_old_sz, mmap_new_sz);
+               if (err) {
+                       pr_warn("map '%s': failed to resize memory-mapped region: %d\n",
+                               bpf_map__name(map), err);
+                       return err;
+               }
+               err = map_btf_datasec_resize(map, size);
+               if (err && err != -ENOENT) {
+                       pr_warn("map '%s': failed to adjust resized BTF, clearing BTF key/value info: %d\n",
+                               bpf_map__name(map), err);
+                       map->btf_value_type_id = 0;
+                       map->btf_key_type_id = 0;
+               }
+       }
+
        map->def.value_size = size;
        return 0;
 }
@@ -9442,7 +9559,7 @@ int bpf_map__set_initial_value(struct bpf_map *map,
        return 0;
 }
 
-const void *bpf_map__initial_value(struct bpf_map *map, size_t *psize)
+void *bpf_map__initial_value(struct bpf_map *map, size_t *psize)
 {
        if (!map->mmaped)
                return NULL;
@@ -9958,7 +10075,7 @@ static int parse_uint_from_file(const char *file, const char *fmt)
        int err, ret;
        FILE *f;
 
-       f = fopen(file, "r");
+       f = fopen(file, "re");
        if (!f) {
                err = -errno;
                pr_debug("failed to open '%s': %s\n", file,
@@ -12694,7 +12811,7 @@ int bpf_object__load_skeleton(struct bpf_object_skeleton *s)
 
        for (i = 0; i < s->map_cnt; i++) {
                struct bpf_map *map = *s->maps[i].map;
-               size_t mmap_sz = bpf_map_mmap_sz(map);
+               size_t mmap_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries);
                int prot, map_fd = bpf_map__fd(map);
                void **mmaped = s->maps[i].mmaped;
 
@@ -12721,8 +12838,7 @@ int bpf_object__load_skeleton(struct bpf_object_skeleton *s)
                 * as per normal clean up procedure, so we don't need to worry
                 * about it from skeleton's clean up perspective.
                 */
-               *mmaped = mmap(map->mmaped, mmap_sz, prot,
-                               MAP_SHARED | MAP_FIXED, map_fd, 0);
+               *mmaped = mmap(map->mmaped, mmap_sz, prot, MAP_SHARED | MAP_FIXED, map_fd, 0);
                if (*mmaped == MAP_FAILED) {
                        err = -errno;
                        *mmaped = NULL;
index 0b73623..754da73 100644 (file)
@@ -869,8 +869,22 @@ LIBBPF_API int bpf_map__set_numa_node(struct bpf_map *map, __u32 numa_node);
 /* get/set map key size */
 LIBBPF_API __u32 bpf_map__key_size(const struct bpf_map *map);
 LIBBPF_API int bpf_map__set_key_size(struct bpf_map *map, __u32 size);
-/* get/set map value size */
+/* get map value size */
 LIBBPF_API __u32 bpf_map__value_size(const struct bpf_map *map);
+/**
+ * @brief **bpf_map__set_value_size()** sets map value size.
+ * @param map the BPF map instance
+ * @return 0, on success; negative error, otherwise
+ *
+ * There is a special case for maps with associated memory-mapped regions, like
+ * the global data section maps (bss, data, rodata). When this function is used
+ * on such a map, the mapped region is resized. Afterward, an attempt is made to
+ * adjust the corresponding BTF info. This attempt is best-effort and can only
+ * succeed if the last variable of the data section map is an array. The array
+ * BTF type is replaced by a new BTF array type with a different length.
+ * Any previously existing pointers returned from bpf_map__initial_value() or
+ * corresponding data section skeleton pointer must be reinitialized.
+ */
 LIBBPF_API int bpf_map__set_value_size(struct bpf_map *map, __u32 size);
 /* get map key/value BTF type IDs */
 LIBBPF_API __u32 bpf_map__btf_key_type_id(const struct bpf_map *map);
@@ -884,7 +898,7 @@ LIBBPF_API int bpf_map__set_map_extra(struct bpf_map *map, __u64 map_extra);
 
 LIBBPF_API int bpf_map__set_initial_value(struct bpf_map *map,
                                          const void *data, size_t size);
-LIBBPF_API const void *bpf_map__initial_value(struct bpf_map *map, size_t *psize);
+LIBBPF_API void *bpf_map__initial_value(struct bpf_map *map, size_t *psize);
 
 /**
  * @brief **bpf_map__is_internal()** tells the caller whether or not the
index a5aa3a3..7521a2f 100644 (file)
@@ -391,3 +391,8 @@ LIBBPF_1.2.0 {
                bpf_map_get_info_by_fd;
                bpf_prog_get_info_by_fd;
 } LIBBPF_1.1.0;
+
+LIBBPF_1.3.0 {
+       global:
+               bpf_obj_pin_opts;
+} LIBBPF_1.2.0;
index b7d4431..9c4db90 100644 (file)
@@ -38,7 +38,7 @@ static __u32 get_ubuntu_kernel_version(void)
        if (faccessat(AT_FDCWD, ubuntu_kver_file, R_OK, AT_EACCESS) != 0)
                return 0;
 
-       f = fopen(ubuntu_kver_file, "r");
+       f = fopen(ubuntu_kver_file, "re");
        if (!f)
                return 0;
 
index 1fd2eea..290411d 100644 (file)
@@ -4,6 +4,6 @@
 #define __LIBBPF_VERSION_H
 
 #define LIBBPF_MAJOR_VERSION 1
-#define LIBBPF_MINOR_VERSION 2
+#define LIBBPF_MINOR_VERSION 3
 
 #endif /* __LIBBPF_VERSION_H */
index 086eef3..f1a1415 100644 (file)
@@ -466,7 +466,7 @@ static int parse_vma_segs(int pid, const char *lib_path, struct elf_seg **segs,
 
 proceed:
        sprintf(line, "/proc/%d/maps", pid);
-       f = fopen(line, "r");
+       f = fopen(line, "re");
        if (!f) {
                err = -errno;
                pr_warn("usdt: failed to open '%s' to get base addr of '%s': %d\n",
@@ -954,8 +954,7 @@ struct bpf_link *usdt_manager_attach_usdt(struct usdt_manager *man, const struct
        spec_map_fd = bpf_map__fd(man->specs_map);
        ip_map_fd = bpf_map__fd(man->ip_to_spec_id_map);
 
-       /* TODO: perform path resolution similar to uprobe's */
-       fd = open(path, O_RDONLY);
+       fd = open(path, O_RDONLY | O_CLOEXEC);
        if (fd < 0) {
                err = -errno;
                pr_warn("usdt: failed to open ELF binary '%s': %d\n", path, err);
diff --git a/tools/net/ynl/Makefile b/tools/net/ynl/Makefile
new file mode 100644 (file)
index 0000000..d664b36
--- /dev/null
@@ -0,0 +1,19 @@
+# SPDX-License-Identifier: GPL-2.0
+
+SUBDIRS = lib generated samples
+
+all: $(SUBDIRS)
+
+$(SUBDIRS):
+       @if [ -f "$@/Makefile" ] ; then \
+               $(MAKE) -C $@ ; \
+       fi
+
+clean hardclean:
+       @for dir in $(SUBDIRS) ; do \
+               if [ -f "$$dir/Makefile" ] ; then \
+                       $(MAKE) -C $$dir $@; \
+               fi \
+       done
+
+.PHONY: clean all $(SUBDIRS)
diff --git a/tools/net/ynl/generated/Makefile b/tools/net/ynl/generated/Makefile
new file mode 100644 (file)
index 0000000..9167231
--- /dev/null
@@ -0,0 +1,45 @@
+# SPDX-License-Identifier: GPL-2.0
+
+CC=gcc
+CFLAGS=-std=gnu11 -O2 -W -Wall -Wextra -Wno-unused-parameter -Wshadow \
+       -I../lib/
+ifeq ("$(DEBUG)","1")
+  CFLAGS += -g -fsanitize=address -fsanitize=leak -static-libasan
+endif
+
+TOOL:=../ynl-gen-c.py
+
+GENS:=handshake fou netdev
+SRCS=$(patsubst %,%-user.c,${GENS})
+HDRS=$(patsubst %,%-user.h,${GENS})
+OBJS=$(patsubst %,%-user.o,${GENS})
+
+all: protos.a $(HDRS) $(SRCS) $(KHDRS) $(KSRCS) $(UAPI) regen
+
+protos.a: $(OBJS)
+       @echo -e "\tAR $@"
+       @ar rcs $@ $(OBJS)
+
+%-user.h: ../../../../Documentation/netlink/specs/%.yaml $(TOOL)
+       @echo -e "\tGEN $@"
+       @$(TOOL) --mode user --header --spec $< > $@
+
+%-user.c: ../../../../Documentation/netlink/specs/%.yaml $(TOOL)
+       @echo -e "\tGEN $@"
+       @$(TOOL) --mode user --source --spec $< > $@
+
+%-user.o: %-user.c %-user.h
+       @echo -e "\tCC $@"
+       @$(COMPILE.c) -c -o $@ $<
+
+clean:
+       rm -f *.o
+
+hardclean: clean
+       rm -f *.c *.h *.a
+
+regen:
+       @../ynl-regen.sh
+
+.PHONY: all clean hardclean regen
+.DEFAULT_GOAL: all
diff --git a/tools/net/ynl/generated/fou-user.c b/tools/net/ynl/generated/fou-user.c
new file mode 100644 (file)
index 0000000..c99b5d4
--- /dev/null
@@ -0,0 +1,340 @@
+// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
+/* Do not edit directly, auto-generated from: */
+/*     Documentation/netlink/specs/fou.yaml */
+/* YNL-GEN user source */
+
+#include <stdlib.h>
+#include "fou-user.h"
+#include "ynl.h"
+#include <linux/fou.h>
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <libmnl/libmnl.h>
+#include <linux/genetlink.h>
+
+/* Enums */
+static const char * const fou_op_strmap[] = {
+       [FOU_CMD_UNSPEC] = "unspec",
+       [FOU_CMD_ADD] = "add",
+       [FOU_CMD_DEL] = "del",
+       [FOU_CMD_GET] = "get",
+};
+
+const char *fou_op_str(int op)
+{
+       if (op < 0 || op >= (int)MNL_ARRAY_SIZE(fou_op_strmap))
+               return NULL;
+       return fou_op_strmap[op];
+}
+
+static const char * const fou_encap_type_strmap[] = {
+       [0] = "unspec",
+       [1] = "direct",
+       [2] = "gue",
+};
+
+const char *fou_encap_type_str(int value)
+{
+       if (value < 0 || value >= (int)MNL_ARRAY_SIZE(fou_encap_type_strmap))
+               return NULL;
+       return fou_encap_type_strmap[value];
+}
+
+/* Policies */
+extern struct ynl_policy_nest fou_nest;
+
+struct ynl_policy_attr fou_policy[FOU_ATTR_MAX + 1] = {
+       [FOU_ATTR_UNSPEC] = { .name = "unspec", .type = YNL_PT_REJECT, },
+       [FOU_ATTR_PORT] = { .name = "port", .type = YNL_PT_U16, },
+       [FOU_ATTR_AF] = { .name = "af", .type = YNL_PT_U8, },
+       [FOU_ATTR_IPPROTO] = { .name = "ipproto", .type = YNL_PT_U8, },
+       [FOU_ATTR_TYPE] = { .name = "type", .type = YNL_PT_U8, },
+       [FOU_ATTR_REMCSUM_NOPARTIAL] = { .name = "remcsum_nopartial", .type = YNL_PT_FLAG, },
+       [FOU_ATTR_LOCAL_V4] = { .name = "local_v4", .type = YNL_PT_U32, },
+       [FOU_ATTR_LOCAL_V6] = { .name = "local_v6", .type = YNL_PT_BINARY,},
+       [FOU_ATTR_PEER_V4] = { .name = "peer_v4", .type = YNL_PT_U32, },
+       [FOU_ATTR_PEER_V6] = { .name = "peer_v6", .type = YNL_PT_BINARY,},
+       [FOU_ATTR_PEER_PORT] = { .name = "peer_port", .type = YNL_PT_U16, },
+       [FOU_ATTR_IFINDEX] = { .name = "ifindex", .type = YNL_PT_U32, },
+};
+
+struct ynl_policy_nest fou_nest = {
+       .max_attr = FOU_ATTR_MAX,
+       .table = fou_policy,
+};
+
+/* Common nested types */
+/* ============== FOU_CMD_ADD ============== */
+/* FOU_CMD_ADD - do */
+void fou_add_req_free(struct fou_add_req *req)
+{
+       free(req->local_v6);
+       free(req->peer_v6);
+       free(req);
+}
+
+int fou_add(struct ynl_sock *ys, struct fou_add_req *req)
+{
+       struct nlmsghdr *nlh;
+       int err;
+
+       nlh = ynl_gemsg_start_req(ys, ys->family_id, FOU_CMD_ADD, 1);
+       ys->req_policy = &fou_nest;
+
+       if (req->_present.port)
+               mnl_attr_put_u16(nlh, FOU_ATTR_PORT, req->port);
+       if (req->_present.ipproto)
+               mnl_attr_put_u8(nlh, FOU_ATTR_IPPROTO, req->ipproto);
+       if (req->_present.type)
+               mnl_attr_put_u8(nlh, FOU_ATTR_TYPE, req->type);
+       if (req->_present.remcsum_nopartial)
+               mnl_attr_put(nlh, FOU_ATTR_REMCSUM_NOPARTIAL, 0, NULL);
+       if (req->_present.local_v4)
+               mnl_attr_put_u32(nlh, FOU_ATTR_LOCAL_V4, req->local_v4);
+       if (req->_present.peer_v4)
+               mnl_attr_put_u32(nlh, FOU_ATTR_PEER_V4, req->peer_v4);
+       if (req->_present.local_v6_len)
+               mnl_attr_put(nlh, FOU_ATTR_LOCAL_V6, req->_present.local_v6_len, req->local_v6);
+       if (req->_present.peer_v6_len)
+               mnl_attr_put(nlh, FOU_ATTR_PEER_V6, req->_present.peer_v6_len, req->peer_v6);
+       if (req->_present.peer_port)
+               mnl_attr_put_u16(nlh, FOU_ATTR_PEER_PORT, req->peer_port);
+       if (req->_present.ifindex)
+               mnl_attr_put_u32(nlh, FOU_ATTR_IFINDEX, req->ifindex);
+
+       err = ynl_exec(ys, nlh, NULL);
+       if (err < 0)
+               return -1;
+
+       return 0;
+}
+
+/* ============== FOU_CMD_DEL ============== */
+/* FOU_CMD_DEL - do */
+void fou_del_req_free(struct fou_del_req *req)
+{
+       free(req->local_v6);
+       free(req->peer_v6);
+       free(req);
+}
+
+int fou_del(struct ynl_sock *ys, struct fou_del_req *req)
+{
+       struct nlmsghdr *nlh;
+       int err;
+
+       nlh = ynl_gemsg_start_req(ys, ys->family_id, FOU_CMD_DEL, 1);
+       ys->req_policy = &fou_nest;
+
+       if (req->_present.af)
+               mnl_attr_put_u8(nlh, FOU_ATTR_AF, req->af);
+       if (req->_present.ifindex)
+               mnl_attr_put_u32(nlh, FOU_ATTR_IFINDEX, req->ifindex);
+       if (req->_present.port)
+               mnl_attr_put_u16(nlh, FOU_ATTR_PORT, req->port);
+       if (req->_present.peer_port)
+               mnl_attr_put_u16(nlh, FOU_ATTR_PEER_PORT, req->peer_port);
+       if (req->_present.local_v4)
+               mnl_attr_put_u32(nlh, FOU_ATTR_LOCAL_V4, req->local_v4);
+       if (req->_present.peer_v4)
+               mnl_attr_put_u32(nlh, FOU_ATTR_PEER_V4, req->peer_v4);
+       if (req->_present.local_v6_len)
+               mnl_attr_put(nlh, FOU_ATTR_LOCAL_V6, req->_present.local_v6_len, req->local_v6);
+       if (req->_present.peer_v6_len)
+               mnl_attr_put(nlh, FOU_ATTR_PEER_V6, req->_present.peer_v6_len, req->peer_v6);
+
+       err = ynl_exec(ys, nlh, NULL);
+       if (err < 0)
+               return -1;
+
+       return 0;
+}
+
+/* ============== FOU_CMD_GET ============== */
+/* FOU_CMD_GET - do */
+void fou_get_req_free(struct fou_get_req *req)
+{
+       free(req->local_v6);
+       free(req->peer_v6);
+       free(req);
+}
+
+void fou_get_rsp_free(struct fou_get_rsp *rsp)
+{
+       free(rsp->local_v6);
+       free(rsp->peer_v6);
+       free(rsp);
+}
+
+int fou_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
+{
+       struct ynl_parse_arg *yarg = data;
+       const struct nlattr *attr;
+       struct fou_get_rsp *dst;
+
+       dst = yarg->data;
+
+       mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
+               if (mnl_attr_get_type(attr) == FOU_ATTR_PORT) {
+                       if (ynl_attr_validate(yarg, attr))
+                               return MNL_CB_ERROR;
+                       dst->_present.port = 1;
+                       dst->port = mnl_attr_get_u16(attr);
+               }
+               else if (mnl_attr_get_type(attr) == FOU_ATTR_IPPROTO) {
+                       if (ynl_attr_validate(yarg, attr))
+                               return MNL_CB_ERROR;
+                       dst->_present.ipproto = 1;
+                       dst->ipproto = mnl_attr_get_u8(attr);
+               }
+               else if (mnl_attr_get_type(attr) == FOU_ATTR_TYPE) {
+                       if (ynl_attr_validate(yarg, attr))
+                               return MNL_CB_ERROR;
+                       dst->_present.type = 1;
+                       dst->type = mnl_attr_get_u8(attr);
+               }
+               else if (mnl_attr_get_type(attr) == FOU_ATTR_REMCSUM_NOPARTIAL) {
+                       if (ynl_attr_validate(yarg, attr))
+                               return MNL_CB_ERROR;
+                       dst->_present.remcsum_nopartial = 1;
+               }
+               else if (mnl_attr_get_type(attr) == FOU_ATTR_LOCAL_V4) {
+                       if (ynl_attr_validate(yarg, attr))
+                               return MNL_CB_ERROR;
+                       dst->_present.local_v4 = 1;
+                       dst->local_v4 = mnl_attr_get_u32(attr);
+               }
+               else if (mnl_attr_get_type(attr) == FOU_ATTR_PEER_V4) {
+                       if (ynl_attr_validate(yarg, attr))
+                               return MNL_CB_ERROR;
+                       dst->_present.peer_v4 = 1;
+                       dst->peer_v4 = mnl_attr_get_u32(attr);
+               }
+               else if (mnl_attr_get_type(attr) == FOU_ATTR_LOCAL_V6) {
+                       unsigned int len;
+
+                       if (ynl_attr_validate(yarg, attr))
+                               return MNL_CB_ERROR;
+
+                       len = mnl_attr_get_payload_len(attr);
+                       dst->_present.local_v6_len = len;
+                       dst->local_v6 = malloc(len);
+                       memcpy(dst->local_v6, mnl_attr_get_payload(attr), len);
+               }
+               else if (mnl_attr_get_type(attr) == FOU_ATTR_PEER_V6) {
+                       unsigned int len;
+
+                       if (ynl_attr_validate(yarg, attr))
+                               return MNL_CB_ERROR;
+
+                       len = mnl_attr_get_payload_len(attr);
+                       dst->_present.peer_v6_len = len;
+                       dst->peer_v6 = malloc(len);
+                       memcpy(dst->peer_v6, mnl_attr_get_payload(attr), len);
+               }
+               else if (mnl_attr_get_type(attr) == FOU_ATTR_PEER_PORT) {
+                       if (ynl_attr_validate(yarg, attr))
+                               return MNL_CB_ERROR;
+                       dst->_present.peer_port = 1;
+                       dst->peer_port = mnl_attr_get_u16(attr);
+               }
+               else if (mnl_attr_get_type(attr) == FOU_ATTR_IFINDEX) {
+                       if (ynl_attr_validate(yarg, attr))
+                               return MNL_CB_ERROR;
+                       dst->_present.ifindex = 1;
+                       dst->ifindex = mnl_attr_get_u32(attr);
+               }
+       }
+
+       return MNL_CB_OK;
+}
+
+struct fou_get_rsp *fou_get(struct ynl_sock *ys, struct fou_get_req *req)
+{
+       struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
+       struct fou_get_rsp *rsp;
+       struct nlmsghdr *nlh;
+       int err;
+
+       nlh = ynl_gemsg_start_req(ys, ys->family_id, FOU_CMD_GET, 1);
+       ys->req_policy = &fou_nest;
+       yrs.yarg.rsp_policy = &fou_nest;
+
+       if (req->_present.af)
+               mnl_attr_put_u8(nlh, FOU_ATTR_AF, req->af);
+       if (req->_present.ifindex)
+               mnl_attr_put_u32(nlh, FOU_ATTR_IFINDEX, req->ifindex);
+       if (req->_present.port)
+               mnl_attr_put_u16(nlh, FOU_ATTR_PORT, req->port);
+       if (req->_present.peer_port)
+               mnl_attr_put_u16(nlh, FOU_ATTR_PEER_PORT, req->peer_port);
+       if (req->_present.local_v4)
+               mnl_attr_put_u32(nlh, FOU_ATTR_LOCAL_V4, req->local_v4);
+       if (req->_present.peer_v4)
+               mnl_attr_put_u32(nlh, FOU_ATTR_PEER_V4, req->peer_v4);
+       if (req->_present.local_v6_len)
+               mnl_attr_put(nlh, FOU_ATTR_LOCAL_V6, req->_present.local_v6_len, req->local_v6);
+       if (req->_present.peer_v6_len)
+               mnl_attr_put(nlh, FOU_ATTR_PEER_V6, req->_present.peer_v6_len, req->peer_v6);
+
+       rsp = calloc(1, sizeof(*rsp));
+       yrs.yarg.data = rsp;
+       yrs.cb = fou_get_rsp_parse;
+       yrs.rsp_cmd = FOU_CMD_GET;
+
+       err = ynl_exec(ys, nlh, &yrs);
+       if (err < 0)
+               goto err_free;
+
+       return rsp;
+
+err_free:
+       fou_get_rsp_free(rsp);
+       return NULL;
+}
+
+/* FOU_CMD_GET - dump */
+void fou_get_list_free(struct fou_get_list *rsp)
+{
+       struct fou_get_list *next = rsp;
+
+       while ((void *)next != YNL_LIST_END) {
+               rsp = next;
+               next = rsp->next;
+
+               free(rsp->obj.local_v6);
+               free(rsp->obj.peer_v6);
+               free(rsp);
+       }
+}
+
+struct fou_get_list *fou_get_dump(struct ynl_sock *ys)
+{
+       struct ynl_dump_state yds = {};
+       struct nlmsghdr *nlh;
+       int err;
+
+       yds.ys = ys;
+       yds.alloc_sz = sizeof(struct fou_get_list);
+       yds.cb = fou_get_rsp_parse;
+       yds.rsp_cmd = FOU_CMD_GET;
+       yds.rsp_policy = &fou_nest;
+
+       nlh = ynl_gemsg_start_dump(ys, ys->family_id, FOU_CMD_GET, 1);
+
+       err = ynl_exec_dump(ys, nlh, &yds);
+       if (err < 0)
+               goto free_list;
+
+       return yds.first;
+
+free_list:
+       fou_get_list_free(yds.first);
+       return NULL;
+}
+
+const struct ynl_family ynl_fou_family =  {
+       .name           = "fou",
+};
diff --git a/tools/net/ynl/generated/fou-user.h b/tools/net/ynl/generated/fou-user.h
new file mode 100644 (file)
index 0000000..d8ab505
--- /dev/null
@@ -0,0 +1,337 @@
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
+/* Do not edit directly, auto-generated from: */
+/*     Documentation/netlink/specs/fou.yaml */
+/* YNL-GEN user header */
+
+#ifndef _LINUX_FOU_GEN_H
+#define _LINUX_FOU_GEN_H
+
+#include <stdlib.h>
+#include <string.h>
+#include <linux/types.h>
+#include <linux/fou.h>
+
+struct ynl_sock;
+
+extern const struct ynl_family ynl_fou_family;
+
+/* Enums */
+const char *fou_op_str(int op);
+const char *fou_encap_type_str(int value);
+
+/* Common nested types */
+/* ============== FOU_CMD_ADD ============== */
+/* FOU_CMD_ADD - do */
+struct fou_add_req {
+       struct {
+               __u32 port:1;
+               __u32 ipproto:1;
+               __u32 type:1;
+               __u32 remcsum_nopartial:1;
+               __u32 local_v4:1;
+               __u32 peer_v4:1;
+               __u32 local_v6_len;
+               __u32 peer_v6_len;
+               __u32 peer_port:1;
+               __u32 ifindex:1;
+       } _present;
+
+       __u16 port /* big-endian */;
+       __u8 ipproto;
+       __u8 type;
+       __u32 local_v4;
+       __u32 peer_v4;
+       void *local_v6;
+       void *peer_v6;
+       __u16 peer_port /* big-endian */;
+       __s32 ifindex;
+};
+
+static inline struct fou_add_req *fou_add_req_alloc(void)
+{
+       return calloc(1, sizeof(struct fou_add_req));
+}
+void fou_add_req_free(struct fou_add_req *req);
+
+static inline void
+fou_add_req_set_port(struct fou_add_req *req, __u16 port /* big-endian */)
+{
+       req->_present.port = 1;
+       req->port = port;
+}
+static inline void
+fou_add_req_set_ipproto(struct fou_add_req *req, __u8 ipproto)
+{
+       req->_present.ipproto = 1;
+       req->ipproto = ipproto;
+}
+static inline void fou_add_req_set_type(struct fou_add_req *req, __u8 type)
+{
+       req->_present.type = 1;
+       req->type = type;
+}
+static inline void fou_add_req_set_remcsum_nopartial(struct fou_add_req *req)
+{
+       req->_present.remcsum_nopartial = 1;
+}
+static inline void
+fou_add_req_set_local_v4(struct fou_add_req *req, __u32 local_v4)
+{
+       req->_present.local_v4 = 1;
+       req->local_v4 = local_v4;
+}
+static inline void
+fou_add_req_set_peer_v4(struct fou_add_req *req, __u32 peer_v4)
+{
+       req->_present.peer_v4 = 1;
+       req->peer_v4 = peer_v4;
+}
+static inline void
+fou_add_req_set_local_v6(struct fou_add_req *req, const void *local_v6,
+                        size_t len)
+{
+       free(req->local_v6);
+       req->local_v6 = malloc(req->_present.local_v6_len);
+       memcpy(req->local_v6, local_v6, req->_present.local_v6_len);
+}
+static inline void
+fou_add_req_set_peer_v6(struct fou_add_req *req, const void *peer_v6,
+                       size_t len)
+{
+       free(req->peer_v6);
+       req->peer_v6 = malloc(req->_present.peer_v6_len);
+       memcpy(req->peer_v6, peer_v6, req->_present.peer_v6_len);
+}
+static inline void
+fou_add_req_set_peer_port(struct fou_add_req *req,
+                         __u16 peer_port /* big-endian */)
+{
+       req->_present.peer_port = 1;
+       req->peer_port = peer_port;
+}
+static inline void
+fou_add_req_set_ifindex(struct fou_add_req *req, __s32 ifindex)
+{
+       req->_present.ifindex = 1;
+       req->ifindex = ifindex;
+}
+
+/*
+ * Add port.
+ */
+int fou_add(struct ynl_sock *ys, struct fou_add_req *req);
+
+/* ============== FOU_CMD_DEL ============== */
+/* FOU_CMD_DEL - do */
+struct fou_del_req {
+       struct {
+               __u32 af:1;
+               __u32 ifindex:1;
+               __u32 port:1;
+               __u32 peer_port:1;
+               __u32 local_v4:1;
+               __u32 peer_v4:1;
+               __u32 local_v6_len;
+               __u32 peer_v6_len;
+       } _present;
+
+       __u8 af;
+       __s32 ifindex;
+       __u16 port /* big-endian */;
+       __u16 peer_port /* big-endian */;
+       __u32 local_v4;
+       __u32 peer_v4;
+       void *local_v6;
+       void *peer_v6;
+};
+
+static inline struct fou_del_req *fou_del_req_alloc(void)
+{
+       return calloc(1, sizeof(struct fou_del_req));
+}
+void fou_del_req_free(struct fou_del_req *req);
+
+static inline void fou_del_req_set_af(struct fou_del_req *req, __u8 af)
+{
+       req->_present.af = 1;
+       req->af = af;
+}
+static inline void
+fou_del_req_set_ifindex(struct fou_del_req *req, __s32 ifindex)
+{
+       req->_present.ifindex = 1;
+       req->ifindex = ifindex;
+}
+static inline void
+fou_del_req_set_port(struct fou_del_req *req, __u16 port /* big-endian */)
+{
+       req->_present.port = 1;
+       req->port = port;
+}
+static inline void
+fou_del_req_set_peer_port(struct fou_del_req *req,
+                         __u16 peer_port /* big-endian */)
+{
+       req->_present.peer_port = 1;
+       req->peer_port = peer_port;
+}
+static inline void
+fou_del_req_set_local_v4(struct fou_del_req *req, __u32 local_v4)
+{
+       req->_present.local_v4 = 1;
+       req->local_v4 = local_v4;
+}
+static inline void
+fou_del_req_set_peer_v4(struct fou_del_req *req, __u32 peer_v4)
+{
+       req->_present.peer_v4 = 1;
+       req->peer_v4 = peer_v4;
+}
+static inline void
+fou_del_req_set_local_v6(struct fou_del_req *req, const void *local_v6,
+                        size_t len)
+{
+       free(req->local_v6);
+       req->local_v6 = malloc(req->_present.local_v6_len);
+       memcpy(req->local_v6, local_v6, req->_present.local_v6_len);
+}
+static inline void
+fou_del_req_set_peer_v6(struct fou_del_req *req, const void *peer_v6,
+                       size_t len)
+{
+       free(req->peer_v6);
+       req->peer_v6 = malloc(req->_present.peer_v6_len);
+       memcpy(req->peer_v6, peer_v6, req->_present.peer_v6_len);
+}
+
+/*
+ * Delete port.
+ */
+int fou_del(struct ynl_sock *ys, struct fou_del_req *req);
+
+/* ============== FOU_CMD_GET ============== */
+/* FOU_CMD_GET - do */
+struct fou_get_req {
+       struct {
+               __u32 af:1;
+               __u32 ifindex:1;
+               __u32 port:1;
+               __u32 peer_port:1;
+               __u32 local_v4:1;
+               __u32 peer_v4:1;
+               __u32 local_v6_len;
+               __u32 peer_v6_len;
+       } _present;
+
+       __u8 af;
+       __s32 ifindex;
+       __u16 port /* big-endian */;
+       __u16 peer_port /* big-endian */;
+       __u32 local_v4;
+       __u32 peer_v4;
+       void *local_v6;
+       void *peer_v6;
+};
+
+static inline struct fou_get_req *fou_get_req_alloc(void)
+{
+       return calloc(1, sizeof(struct fou_get_req));
+}
+void fou_get_req_free(struct fou_get_req *req);
+
+static inline void fou_get_req_set_af(struct fou_get_req *req, __u8 af)
+{
+       req->_present.af = 1;
+       req->af = af;
+}
+static inline void
+fou_get_req_set_ifindex(struct fou_get_req *req, __s32 ifindex)
+{
+       req->_present.ifindex = 1;
+       req->ifindex = ifindex;
+}
+static inline void
+fou_get_req_set_port(struct fou_get_req *req, __u16 port /* big-endian */)
+{
+       req->_present.port = 1;
+       req->port = port;
+}
+static inline void
+fou_get_req_set_peer_port(struct fou_get_req *req,
+                         __u16 peer_port /* big-endian */)
+{
+       req->_present.peer_port = 1;
+       req->peer_port = peer_port;
+}
+static inline void
+fou_get_req_set_local_v4(struct fou_get_req *req, __u32 local_v4)
+{
+       req->_present.local_v4 = 1;
+       req->local_v4 = local_v4;
+}
+static inline void
+fou_get_req_set_peer_v4(struct fou_get_req *req, __u32 peer_v4)
+{
+       req->_present.peer_v4 = 1;
+       req->peer_v4 = peer_v4;
+}
+static inline void
+fou_get_req_set_local_v6(struct fou_get_req *req, const void *local_v6,
+                        size_t len)
+{
+       free(req->local_v6);
+       req->local_v6 = malloc(req->_present.local_v6_len);
+       memcpy(req->local_v6, local_v6, req->_present.local_v6_len);
+}
+static inline void
+fou_get_req_set_peer_v6(struct fou_get_req *req, const void *peer_v6,
+                       size_t len)
+{
+       free(req->peer_v6);
+       req->peer_v6 = malloc(req->_present.peer_v6_len);
+       memcpy(req->peer_v6, peer_v6, req->_present.peer_v6_len);
+}
+
+struct fou_get_rsp {
+       struct {
+               __u32 port:1;
+               __u32 ipproto:1;
+               __u32 type:1;
+               __u32 remcsum_nopartial:1;
+               __u32 local_v4:1;
+               __u32 peer_v4:1;
+               __u32 local_v6_len;
+               __u32 peer_v6_len;
+               __u32 peer_port:1;
+               __u32 ifindex:1;
+       } _present;
+
+       __u16 port /* big-endian */;
+       __u8 ipproto;
+       __u8 type;
+       __u32 local_v4;
+       __u32 peer_v4;
+       void *local_v6;
+       void *peer_v6;
+       __u16 peer_port /* big-endian */;
+       __s32 ifindex;
+};
+
+void fou_get_rsp_free(struct fou_get_rsp *rsp);
+
+/*
+ * Get tunnel info.
+ */
+struct fou_get_rsp *fou_get(struct ynl_sock *ys, struct fou_get_req *req);
+
+/* FOU_CMD_GET - dump */
+struct fou_get_list {
+       struct fou_get_list *next;
+       struct fou_get_rsp obj __attribute__ ((aligned (8)));
+};
+
+void fou_get_list_free(struct fou_get_list *rsp);
+
+struct fou_get_list *fou_get_dump(struct ynl_sock *ys);
+
+#endif /* _LINUX_FOU_GEN_H */
diff --git a/tools/net/ynl/generated/handshake-user.c b/tools/net/ynl/generated/handshake-user.c
new file mode 100644 (file)
index 0000000..fe99c4e
--- /dev/null
@@ -0,0 +1,385 @@
+// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
+/* Do not edit directly, auto-generated from: */
+/*     Documentation/netlink/specs/handshake.yaml */
+/* YNL-GEN user source */
+
+#include <stdlib.h>
+#include "handshake-user.h"
+#include "ynl.h"
+#include <linux/handshake.h>
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <libmnl/libmnl.h>
+#include <linux/genetlink.h>
+
+/* Enums */
+static const char * const handshake_op_strmap[] = {
+       [HANDSHAKE_CMD_READY] = "ready",
+       [HANDSHAKE_CMD_ACCEPT] = "accept",
+       [HANDSHAKE_CMD_DONE] = "done",
+};
+
+const char *handshake_op_str(int op)
+{
+       if (op < 0 || op >= (int)MNL_ARRAY_SIZE(handshake_op_strmap))
+               return NULL;
+       return handshake_op_strmap[op];
+}
+
+static const char * const handshake_handler_class_strmap[] = {
+       [0] = "none",
+       [1] = "tlshd",
+       [2] = "max",
+};
+
+const char *handshake_handler_class_str(enum handshake_handler_class value)
+{
+       if (value < 0 || value >= (int)MNL_ARRAY_SIZE(handshake_handler_class_strmap))
+               return NULL;
+       return handshake_handler_class_strmap[value];
+}
+
+static const char * const handshake_msg_type_strmap[] = {
+       [0] = "unspec",
+       [1] = "clienthello",
+       [2] = "serverhello",
+};
+
+const char *handshake_msg_type_str(enum handshake_msg_type value)
+{
+       if (value < 0 || value >= (int)MNL_ARRAY_SIZE(handshake_msg_type_strmap))
+               return NULL;
+       return handshake_msg_type_strmap[value];
+}
+
+static const char * const handshake_auth_strmap[] = {
+       [0] = "unspec",
+       [1] = "unauth",
+       [2] = "psk",
+       [3] = "x509",
+};
+
+const char *handshake_auth_str(enum handshake_auth value)
+{
+       if (value < 0 || value >= (int)MNL_ARRAY_SIZE(handshake_auth_strmap))
+               return NULL;
+       return handshake_auth_strmap[value];
+}
+
+/* Policies */
+extern struct ynl_policy_nest handshake_x509_nest;
+extern struct ynl_policy_nest handshake_accept_nest;
+extern struct ynl_policy_nest handshake_done_nest;
+
+struct ynl_policy_attr handshake_x509_policy[HANDSHAKE_A_X509_MAX + 1] = {
+       [HANDSHAKE_A_X509_CERT] = { .name = "cert", .type = YNL_PT_U32, },
+       [HANDSHAKE_A_X509_PRIVKEY] = { .name = "privkey", .type = YNL_PT_U32, },
+};
+
+struct ynl_policy_nest handshake_x509_nest = {
+       .max_attr = HANDSHAKE_A_X509_MAX,
+       .table = handshake_x509_policy,
+};
+
+struct ynl_policy_attr handshake_accept_policy[HANDSHAKE_A_ACCEPT_MAX + 1] = {
+       [HANDSHAKE_A_ACCEPT_SOCKFD] = { .name = "sockfd", .type = YNL_PT_U32, },
+       [HANDSHAKE_A_ACCEPT_HANDLER_CLASS] = { .name = "handler-class", .type = YNL_PT_U32, },
+       [HANDSHAKE_A_ACCEPT_MESSAGE_TYPE] = { .name = "message-type", .type = YNL_PT_U32, },
+       [HANDSHAKE_A_ACCEPT_TIMEOUT] = { .name = "timeout", .type = YNL_PT_U32, },
+       [HANDSHAKE_A_ACCEPT_AUTH_MODE] = { .name = "auth-mode", .type = YNL_PT_U32, },
+       [HANDSHAKE_A_ACCEPT_PEER_IDENTITY] = { .name = "peer-identity", .type = YNL_PT_U32, },
+       [HANDSHAKE_A_ACCEPT_CERTIFICATE] = { .name = "certificate", .type = YNL_PT_NEST, .nest = &handshake_x509_nest, },
+       [HANDSHAKE_A_ACCEPT_PEERNAME] = { .name = "peername", .type = YNL_PT_NUL_STR, },
+};
+
+struct ynl_policy_nest handshake_accept_nest = {
+       .max_attr = HANDSHAKE_A_ACCEPT_MAX,
+       .table = handshake_accept_policy,
+};
+
+struct ynl_policy_attr handshake_done_policy[HANDSHAKE_A_DONE_MAX + 1] = {
+       [HANDSHAKE_A_DONE_STATUS] = { .name = "status", .type = YNL_PT_U32, },
+       [HANDSHAKE_A_DONE_SOCKFD] = { .name = "sockfd", .type = YNL_PT_U32, },
+       [HANDSHAKE_A_DONE_REMOTE_AUTH] = { .name = "remote-auth", .type = YNL_PT_U32, },
+};
+
+struct ynl_policy_nest handshake_done_nest = {
+       .max_attr = HANDSHAKE_A_DONE_MAX,
+       .table = handshake_done_policy,
+};
+
+/* Common nested types */
+void handshake_x509_free(struct handshake_x509 *obj)
+{
+}
+
+int handshake_x509_parse(struct ynl_parse_arg *yarg,
+                        const struct nlattr *nested)
+{
+       struct handshake_x509 *dst = yarg->data;
+       const struct nlattr *attr;
+
+       mnl_attr_for_each_nested(attr, nested) {
+               if (mnl_attr_get_type(attr) == HANDSHAKE_A_X509_CERT) {
+                       if (ynl_attr_validate(yarg, attr))
+                               return MNL_CB_ERROR;
+                       dst->_present.cert = 1;
+                       dst->cert = mnl_attr_get_u32(attr);
+               }
+               else if (mnl_attr_get_type(attr) == HANDSHAKE_A_X509_PRIVKEY) {
+                       if (ynl_attr_validate(yarg, attr))
+                               return MNL_CB_ERROR;
+                       dst->_present.privkey = 1;
+                       dst->privkey = mnl_attr_get_u32(attr);
+               }
+       }
+
+       return 0;
+}
+
+/* ============== HANDSHAKE_CMD_ACCEPT ============== */
+/* HANDSHAKE_CMD_ACCEPT - do */
+void handshake_accept_req_free(struct handshake_accept_req *req)
+{
+       free(req);
+}
+
+void handshake_accept_rsp_free(struct handshake_accept_rsp *rsp)
+{
+       unsigned int i;
+
+       free(rsp->peer_identity);
+       for (i = 0; i < rsp->n_certificate; i++)
+               handshake_x509_free(&rsp->certificate[i]);
+       free(rsp->certificate);
+       free(rsp->peername);
+       free(rsp);
+}
+
+int handshake_accept_rsp_parse(const struct nlmsghdr *nlh, void *data)
+{
+       struct ynl_parse_arg *yarg = data;
+       struct handshake_accept_rsp *dst;
+       unsigned int n_peer_identity = 0;
+       unsigned int n_certificate = 0;
+       const struct nlattr *attr;
+       struct ynl_parse_arg parg;
+       int i;
+
+       dst = yarg->data;
+       parg.ys = yarg->ys;
+
+       if (dst->certificate)
+               return ynl_error_parse(yarg, "attribute already present (accept.certificate)");
+       if (dst->peer_identity)
+               return ynl_error_parse(yarg, "attribute already present (accept.peer-identity)");
+
+       mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
+               if (mnl_attr_get_type(attr) == HANDSHAKE_A_ACCEPT_SOCKFD) {
+                       if (ynl_attr_validate(yarg, attr))
+                               return MNL_CB_ERROR;
+                       dst->_present.sockfd = 1;
+                       dst->sockfd = mnl_attr_get_u32(attr);
+               }
+               else if (mnl_attr_get_type(attr) == HANDSHAKE_A_ACCEPT_MESSAGE_TYPE) {
+                       if (ynl_attr_validate(yarg, attr))
+                               return MNL_CB_ERROR;
+                       dst->_present.message_type = 1;
+                       dst->message_type = mnl_attr_get_u32(attr);
+               }
+               else if (mnl_attr_get_type(attr) == HANDSHAKE_A_ACCEPT_TIMEOUT) {
+                       if (ynl_attr_validate(yarg, attr))
+                               return MNL_CB_ERROR;
+                       dst->_present.timeout = 1;
+                       dst->timeout = mnl_attr_get_u32(attr);
+               }
+               else if (mnl_attr_get_type(attr) == HANDSHAKE_A_ACCEPT_AUTH_MODE) {
+                       if (ynl_attr_validate(yarg, attr))
+                               return MNL_CB_ERROR;
+                       dst->_present.auth_mode = 1;
+                       dst->auth_mode = mnl_attr_get_u32(attr);
+               }
+               else if (mnl_attr_get_type(attr) == HANDSHAKE_A_ACCEPT_PEER_IDENTITY) {
+                       n_peer_identity++;
+               }
+               else if (mnl_attr_get_type(attr) == HANDSHAKE_A_ACCEPT_CERTIFICATE) {
+                       n_certificate++;
+               }
+               else if (mnl_attr_get_type(attr) == HANDSHAKE_A_ACCEPT_PEERNAME) {
+                       unsigned int len;
+
+                       if (ynl_attr_validate(yarg, attr))
+                               return MNL_CB_ERROR;
+
+                       len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
+                       dst->_present.peername_len = len;
+                       dst->peername = malloc(len + 1);
+                       memcpy(dst->peername, mnl_attr_get_str(attr), len);
+                       dst->peername[len] = 0;
+               }
+       }
+
+       if (n_certificate) {
+               dst->certificate = calloc(n_certificate, sizeof(*dst->certificate));
+               dst->n_certificate = n_certificate;
+               i = 0;
+               parg.rsp_policy = &handshake_x509_nest;
+               mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
+                       if (mnl_attr_get_type(attr) == HANDSHAKE_A_ACCEPT_CERTIFICATE) {
+                               parg.data = &dst->certificate[i];
+                               if (handshake_x509_parse(&parg, attr))
+                                       return MNL_CB_ERROR;
+                               i++;
+                       }
+               }
+       }
+       if (n_peer_identity) {
+               dst->peer_identity = calloc(n_peer_identity, sizeof(*dst->peer_identity));
+               dst->n_peer_identity = n_peer_identity;
+               i = 0;
+               mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
+                       if (mnl_attr_get_type(attr) == HANDSHAKE_A_ACCEPT_PEER_IDENTITY) {
+                               dst->peer_identity[i] = mnl_attr_get_u32(attr);
+                               i++;
+                       }
+               }
+       }
+
+       return MNL_CB_OK;
+}
+
+struct handshake_accept_rsp *
+handshake_accept(struct ynl_sock *ys, struct handshake_accept_req *req)
+{
+       struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
+       struct handshake_accept_rsp *rsp;
+       struct nlmsghdr *nlh;
+       int err;
+
+       nlh = ynl_gemsg_start_req(ys, ys->family_id, HANDSHAKE_CMD_ACCEPT, 1);
+       ys->req_policy = &handshake_accept_nest;
+       yrs.yarg.rsp_policy = &handshake_accept_nest;
+
+       if (req->_present.handler_class)
+               mnl_attr_put_u32(nlh, HANDSHAKE_A_ACCEPT_HANDLER_CLASS, req->handler_class);
+
+       rsp = calloc(1, sizeof(*rsp));
+       yrs.yarg.data = rsp;
+       yrs.cb = handshake_accept_rsp_parse;
+       yrs.rsp_cmd = HANDSHAKE_CMD_ACCEPT;
+
+       err = ynl_exec(ys, nlh, &yrs);
+       if (err < 0)
+               goto err_free;
+
+       return rsp;
+
+err_free:
+       handshake_accept_rsp_free(rsp);
+       return NULL;
+}
+
+/* HANDSHAKE_CMD_ACCEPT - notify */
+void handshake_accept_ntf_free(struct handshake_accept_ntf *rsp)
+{
+       unsigned int i;
+
+       free(rsp->obj.peer_identity);
+       for (i = 0; i < rsp->obj.n_certificate; i++)
+               handshake_x509_free(&rsp->obj.certificate[i]);
+       free(rsp->obj.certificate);
+       free(rsp->obj.peername);
+       free(rsp);
+}
+
+/* ============== HANDSHAKE_CMD_DONE ============== */
+/* HANDSHAKE_CMD_DONE - do */
+void handshake_done_req_free(struct handshake_done_req *req)
+{
+       free(req->remote_auth);
+       free(req);
+}
+
+int handshake_done(struct ynl_sock *ys, struct handshake_done_req *req)
+{
+       struct nlmsghdr *nlh;
+       int err;
+
+       nlh = ynl_gemsg_start_req(ys, ys->family_id, HANDSHAKE_CMD_DONE, 1);
+       ys->req_policy = &handshake_done_nest;
+
+       if (req->_present.status)
+               mnl_attr_put_u32(nlh, HANDSHAKE_A_DONE_STATUS, req->status);
+       if (req->_present.sockfd)
+               mnl_attr_put_u32(nlh, HANDSHAKE_A_DONE_SOCKFD, req->sockfd);
+       for (unsigned int i = 0; i < req->n_remote_auth; i++)
+               mnl_attr_put_u32(nlh, HANDSHAKE_A_DONE_REMOTE_AUTH, req->remote_auth[i]);
+
+       err = ynl_exec(ys, nlh, NULL);
+       if (err < 0)
+               return -1;
+
+       return 0;
+}
+
+/* --------------- Common notification parsing --------------- */
+struct ynl_ntf_base_type *handshake_ntf_parse(struct ynl_sock *ys)
+{
+       struct ynl_parse_arg yarg = { .ys = ys, };
+       struct ynl_ntf_base_type *rsp;
+       struct genlmsghdr *genlh;
+       struct nlmsghdr *nlh;
+       mnl_cb_t parse;
+       int len, err;
+
+       len = mnl_socket_recvfrom(ys->sock, ys->rx_buf, MNL_SOCKET_BUFFER_SIZE);
+       if (len < (ssize_t)(sizeof(*nlh) + sizeof(*genlh)))
+               return NULL;
+
+       nlh = (struct nlmsghdr *)ys->rx_buf;
+       genlh = mnl_nlmsg_get_payload(nlh);
+
+       switch (genlh->cmd) {
+       case HANDSHAKE_CMD_READY:
+               rsp = calloc(1, sizeof(struct handshake_accept_ntf));
+               parse = handshake_accept_rsp_parse;
+               yarg.rsp_policy = &handshake_accept_nest;
+               rsp->free = (void *)handshake_accept_ntf_free;
+               break;
+       default:
+               ynl_error_unknown_notification(ys, genlh->cmd);
+               return NULL;
+       }
+
+       yarg.data = rsp->data;
+
+       err = mnl_cb_run2(ys->rx_buf, len, 0, 0, parse, &yarg,
+                        ynl_cb_array, NLMSG_MIN_TYPE);
+       if (err < 0)
+               goto err_free;
+
+       rsp->family = nlh->nlmsg_type;
+       rsp->cmd = genlh->cmd;
+       return rsp;
+
+err_free:
+       free(rsp);
+       return NULL;
+}
+
+static const struct ynl_ntf_info handshake_ntf_info[] =  {
+       [HANDSHAKE_CMD_READY] =  {
+               .alloc_sz       = sizeof(struct handshake_accept_ntf),
+               .cb             = handshake_accept_rsp_parse,
+               .policy         = &handshake_accept_nest,
+               .free           = (void *)handshake_accept_ntf_free,
+       },
+};
+
+const struct ynl_family ynl_handshake_family =  {
+       .name           = "handshake",
+       .ntf_info       = handshake_ntf_info,
+       .ntf_info_size  = MNL_ARRAY_SIZE(handshake_ntf_info),
+};
diff --git a/tools/net/ynl/generated/handshake-user.h b/tools/net/ynl/generated/handshake-user.h
new file mode 100644 (file)
index 0000000..38e0844
--- /dev/null
@@ -0,0 +1,148 @@
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
+/* Do not edit directly, auto-generated from: */
+/*     Documentation/netlink/specs/handshake.yaml */
+/* YNL-GEN user header */
+
+#ifndef _LINUX_HANDSHAKE_GEN_H
+#define _LINUX_HANDSHAKE_GEN_H
+
+#include <stdlib.h>
+#include <string.h>
+#include <linux/types.h>
+#include <linux/handshake.h>
+
+struct ynl_sock;
+
+extern const struct ynl_family ynl_handshake_family;
+
+/* Enums */
+const char *handshake_op_str(int op);
+const char *handshake_handler_class_str(enum handshake_handler_class value);
+const char *handshake_msg_type_str(enum handshake_msg_type value);
+const char *handshake_auth_str(enum handshake_auth value);
+
+/* Common nested types */
+struct handshake_x509 {
+       struct {
+               __u32 cert:1;
+               __u32 privkey:1;
+       } _present;
+
+       __u32 cert;
+       __u32 privkey;
+};
+
+/* ============== HANDSHAKE_CMD_ACCEPT ============== */
+/* HANDSHAKE_CMD_ACCEPT - do */
+struct handshake_accept_req {
+       struct {
+               __u32 handler_class:1;
+       } _present;
+
+       enum handshake_handler_class handler_class;
+};
+
+static inline struct handshake_accept_req *handshake_accept_req_alloc(void)
+{
+       return calloc(1, sizeof(struct handshake_accept_req));
+}
+void handshake_accept_req_free(struct handshake_accept_req *req);
+
+static inline void
+handshake_accept_req_set_handler_class(struct handshake_accept_req *req,
+                                      enum handshake_handler_class handler_class)
+{
+       req->_present.handler_class = 1;
+       req->handler_class = handler_class;
+}
+
+struct handshake_accept_rsp {
+       struct {
+               __u32 sockfd:1;
+               __u32 message_type:1;
+               __u32 timeout:1;
+               __u32 auth_mode:1;
+               __u32 peername_len;
+       } _present;
+
+       __u32 sockfd;
+       enum handshake_msg_type message_type;
+       __u32 timeout;
+       enum handshake_auth auth_mode;
+       unsigned int n_peer_identity;
+       __u32 *peer_identity;
+       unsigned int n_certificate;
+       struct handshake_x509 *certificate;
+       char *peername;
+};
+
+void handshake_accept_rsp_free(struct handshake_accept_rsp *rsp);
+
+/*
+ * Handler retrieves next queued handshake request
+ */
+struct handshake_accept_rsp *
+handshake_accept(struct ynl_sock *ys, struct handshake_accept_req *req);
+
+/* HANDSHAKE_CMD_ACCEPT - notify */
+struct handshake_accept_ntf {
+       __u16 family;
+       __u8 cmd;
+       struct ynl_ntf_base_type *next;
+       void (*free)(struct handshake_accept_ntf *ntf);
+       struct handshake_accept_rsp obj __attribute__ ((aligned (8)));
+};
+
+void handshake_accept_ntf_free(struct handshake_accept_ntf *rsp);
+
+/* ============== HANDSHAKE_CMD_DONE ============== */
+/* HANDSHAKE_CMD_DONE - do */
+struct handshake_done_req {
+       struct {
+               __u32 status:1;
+               __u32 sockfd:1;
+       } _present;
+
+       __u32 status;
+       __u32 sockfd;
+       unsigned int n_remote_auth;
+       __u32 *remote_auth;
+};
+
+static inline struct handshake_done_req *handshake_done_req_alloc(void)
+{
+       return calloc(1, sizeof(struct handshake_done_req));
+}
+void handshake_done_req_free(struct handshake_done_req *req);
+
+static inline void
+handshake_done_req_set_status(struct handshake_done_req *req, __u32 status)
+{
+       req->_present.status = 1;
+       req->status = status;
+}
+static inline void
+handshake_done_req_set_sockfd(struct handshake_done_req *req, __u32 sockfd)
+{
+       req->_present.sockfd = 1;
+       req->sockfd = sockfd;
+}
+static inline void
+__handshake_done_req_set_remote_auth(struct handshake_done_req *req,
+                                    __u32 *remote_auth,
+                                    unsigned int n_remote_auth)
+{
+       free(req->remote_auth);
+       req->remote_auth = remote_auth;
+       req->n_remote_auth = n_remote_auth;
+}
+
+/*
+ * Handler reports handshake completion
+ */
+int handshake_done(struct ynl_sock *ys, struct handshake_done_req *req);
+
+/* --------------- Common notification parsing --------------- */
+struct ynl_ntf_base_type *handshake_ntf_parse(struct ynl_sock *ys);
+
+#endif /* _LINUX_HANDSHAKE_GEN_H */
diff --git a/tools/net/ynl/generated/netdev-user.c b/tools/net/ynl/generated/netdev-user.c
new file mode 100644 (file)
index 0000000..aea5c7c
--- /dev/null
@@ -0,0 +1,250 @@
+// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
+/* Do not edit directly, auto-generated from: */
+/*     Documentation/netlink/specs/netdev.yaml */
+/* YNL-GEN user source */
+
+#include <stdlib.h>
+#include "netdev-user.h"
+#include "ynl.h"
+#include <linux/netdev.h>
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <libmnl/libmnl.h>
+#include <linux/genetlink.h>
+
+/* Enums */
+static const char * const netdev_op_strmap[] = {
+       [NETDEV_CMD_DEV_GET] = "dev-get",
+       [NETDEV_CMD_DEV_ADD_NTF] = "dev-add-ntf",
+       [NETDEV_CMD_DEV_DEL_NTF] = "dev-del-ntf",
+       [NETDEV_CMD_DEV_CHANGE_NTF] = "dev-change-ntf",
+};
+
+const char *netdev_op_str(int op)
+{
+       if (op < 0 || op >= (int)MNL_ARRAY_SIZE(netdev_op_strmap))
+               return NULL;
+       return netdev_op_strmap[op];
+}
+
+static const char * const netdev_xdp_act_strmap[] = {
+       [0] = "basic",
+       [1] = "redirect",
+       [2] = "ndo-xmit",
+       [3] = "xsk-zerocopy",
+       [4] = "hw-offload",
+       [5] = "rx-sg",
+       [6] = "ndo-xmit-sg",
+};
+
+const char *netdev_xdp_act_str(enum netdev_xdp_act value)
+{
+       value = ffs(value) - 1;
+       if (value < 0 || value >= (int)MNL_ARRAY_SIZE(netdev_xdp_act_strmap))
+               return NULL;
+       return netdev_xdp_act_strmap[value];
+}
+
+/* Policies */
+extern struct ynl_policy_nest netdev_dev_nest;
+
+struct ynl_policy_attr netdev_dev_policy[NETDEV_A_DEV_MAX + 1] = {
+       [NETDEV_A_DEV_IFINDEX] = { .name = "ifindex", .type = YNL_PT_U32, },
+       [NETDEV_A_DEV_PAD] = { .name = "pad", .type = YNL_PT_IGNORE, },
+       [NETDEV_A_DEV_XDP_FEATURES] = { .name = "xdp-features", .type = YNL_PT_U64, },
+};
+
+struct ynl_policy_nest netdev_dev_nest = {
+       .max_attr = NETDEV_A_DEV_MAX,
+       .table = netdev_dev_policy,
+};
+
+/* Common nested types */
+/* ============== NETDEV_CMD_DEV_GET ============== */
+/* NETDEV_CMD_DEV_GET - do */
+void netdev_dev_get_req_free(struct netdev_dev_get_req *req)
+{
+       free(req);
+}
+
+void netdev_dev_get_rsp_free(struct netdev_dev_get_rsp *rsp)
+{
+       free(rsp);
+}
+
+int netdev_dev_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
+{
+       struct ynl_parse_arg *yarg = data;
+       struct netdev_dev_get_rsp *dst;
+       const struct nlattr *attr;
+
+       dst = yarg->data;
+
+       mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
+               if (mnl_attr_get_type(attr) == NETDEV_A_DEV_IFINDEX) {
+                       if (ynl_attr_validate(yarg, attr))
+                               return MNL_CB_ERROR;
+                       dst->_present.ifindex = 1;
+                       dst->ifindex = mnl_attr_get_u32(attr);
+               }
+               else if (mnl_attr_get_type(attr) == NETDEV_A_DEV_XDP_FEATURES) {
+                       if (ynl_attr_validate(yarg, attr))
+                               return MNL_CB_ERROR;
+                       dst->_present.xdp_features = 1;
+                       dst->xdp_features = mnl_attr_get_u64(attr);
+               }
+       }
+
+       return MNL_CB_OK;
+}
+
+struct netdev_dev_get_rsp *
+netdev_dev_get(struct ynl_sock *ys, struct netdev_dev_get_req *req)
+{
+       struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
+       struct netdev_dev_get_rsp *rsp;
+       struct nlmsghdr *nlh;
+       int err;
+
+       nlh = ynl_gemsg_start_req(ys, ys->family_id, NETDEV_CMD_DEV_GET, 1);
+       ys->req_policy = &netdev_dev_nest;
+       yrs.yarg.rsp_policy = &netdev_dev_nest;
+
+       if (req->_present.ifindex)
+               mnl_attr_put_u32(nlh, NETDEV_A_DEV_IFINDEX, req->ifindex);
+
+       rsp = calloc(1, sizeof(*rsp));
+       yrs.yarg.data = rsp;
+       yrs.cb = netdev_dev_get_rsp_parse;
+       yrs.rsp_cmd = NETDEV_CMD_DEV_GET;
+
+       err = ynl_exec(ys, nlh, &yrs);
+       if (err < 0)
+               goto err_free;
+
+       return rsp;
+
+err_free:
+       netdev_dev_get_rsp_free(rsp);
+       return NULL;
+}
+
+/* NETDEV_CMD_DEV_GET - dump */
+void netdev_dev_get_list_free(struct netdev_dev_get_list *rsp)
+{
+       struct netdev_dev_get_list *next = rsp;
+
+       while ((void *)next != YNL_LIST_END) {
+               rsp = next;
+               next = rsp->next;
+
+               free(rsp);
+       }
+}
+
+struct netdev_dev_get_list *netdev_dev_get_dump(struct ynl_sock *ys)
+{
+       struct ynl_dump_state yds = {};
+       struct nlmsghdr *nlh;
+       int err;
+
+       yds.ys = ys;
+       yds.alloc_sz = sizeof(struct netdev_dev_get_list);
+       yds.cb = netdev_dev_get_rsp_parse;
+       yds.rsp_cmd = NETDEV_CMD_DEV_GET;
+       yds.rsp_policy = &netdev_dev_nest;
+
+       nlh = ynl_gemsg_start_dump(ys, ys->family_id, NETDEV_CMD_DEV_GET, 1);
+
+       err = ynl_exec_dump(ys, nlh, &yds);
+       if (err < 0)
+               goto free_list;
+
+       return yds.first;
+
+free_list:
+       netdev_dev_get_list_free(yds.first);
+       return NULL;
+}
+
+/* NETDEV_CMD_DEV_GET - notify */
+void netdev_dev_get_ntf_free(struct netdev_dev_get_ntf *rsp)
+{
+       free(rsp);
+}
+
+/* --------------- Common notification parsing --------------- */
+struct ynl_ntf_base_type *netdev_ntf_parse(struct ynl_sock *ys)
+{
+       struct ynl_parse_arg yarg = { .ys = ys, };
+       struct ynl_ntf_base_type *rsp;
+       struct genlmsghdr *genlh;
+       struct nlmsghdr *nlh;
+       mnl_cb_t parse;
+       int len, err;
+
+       len = mnl_socket_recvfrom(ys->sock, ys->rx_buf, MNL_SOCKET_BUFFER_SIZE);
+       if (len < (ssize_t)(sizeof(*nlh) + sizeof(*genlh)))
+               return NULL;
+
+       nlh = (struct nlmsghdr *)ys->rx_buf;
+       genlh = mnl_nlmsg_get_payload(nlh);
+
+       switch (genlh->cmd) {
+       case NETDEV_CMD_DEV_ADD_NTF:
+       case NETDEV_CMD_DEV_DEL_NTF:
+       case NETDEV_CMD_DEV_CHANGE_NTF:
+               rsp = calloc(1, sizeof(struct netdev_dev_get_ntf));
+               parse = netdev_dev_get_rsp_parse;
+               yarg.rsp_policy = &netdev_dev_nest;
+               rsp->free = (void *)netdev_dev_get_ntf_free;
+               break;
+       default:
+               ynl_error_unknown_notification(ys, genlh->cmd);
+               return NULL;
+       }
+
+       yarg.data = rsp->data;
+
+       err = mnl_cb_run2(ys->rx_buf, len, 0, 0, parse, &yarg,
+                        ynl_cb_array, NLMSG_MIN_TYPE);
+       if (err < 0)
+               goto err_free;
+
+       rsp->family = nlh->nlmsg_type;
+       rsp->cmd = genlh->cmd;
+       return rsp;
+
+err_free:
+       free(rsp);
+       return NULL;
+}
+
+static const struct ynl_ntf_info netdev_ntf_info[] =  {
+       [NETDEV_CMD_DEV_ADD_NTF] =  {
+               .alloc_sz       = sizeof(struct netdev_dev_get_ntf),
+               .cb             = netdev_dev_get_rsp_parse,
+               .policy         = &netdev_dev_nest,
+               .free           = (void *)netdev_dev_get_ntf_free,
+       },
+       [NETDEV_CMD_DEV_DEL_NTF] =  {
+               .alloc_sz       = sizeof(struct netdev_dev_get_ntf),
+               .cb             = netdev_dev_get_rsp_parse,
+               .policy         = &netdev_dev_nest,
+               .free           = (void *)netdev_dev_get_ntf_free,
+       },
+       [NETDEV_CMD_DEV_CHANGE_NTF] =  {
+               .alloc_sz       = sizeof(struct netdev_dev_get_ntf),
+               .cb             = netdev_dev_get_rsp_parse,
+               .policy         = &netdev_dev_nest,
+               .free           = (void *)netdev_dev_get_ntf_free,
+       },
+};
+
+const struct ynl_family ynl_netdev_family =  {
+       .name           = "netdev",
+       .ntf_info       = netdev_ntf_info,
+       .ntf_info_size  = MNL_ARRAY_SIZE(netdev_ntf_info),
+};
diff --git a/tools/net/ynl/generated/netdev-user.h b/tools/net/ynl/generated/netdev-user.h
new file mode 100644 (file)
index 0000000..d146bc4
--- /dev/null
@@ -0,0 +1,88 @@
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
+/* Do not edit directly, auto-generated from: */
+/*     Documentation/netlink/specs/netdev.yaml */
+/* YNL-GEN user header */
+
+#ifndef _LINUX_NETDEV_GEN_H
+#define _LINUX_NETDEV_GEN_H
+
+#include <stdlib.h>
+#include <string.h>
+#include <linux/types.h>
+#include <linux/netdev.h>
+
+struct ynl_sock;
+
+extern const struct ynl_family ynl_netdev_family;
+
+/* Enums */
+const char *netdev_op_str(int op);
+const char *netdev_xdp_act_str(enum netdev_xdp_act value);
+
+/* Common nested types */
+/* ============== NETDEV_CMD_DEV_GET ============== */
+/* NETDEV_CMD_DEV_GET - do */
+struct netdev_dev_get_req {
+       struct {
+               __u32 ifindex:1;
+       } _present;
+
+       __u32 ifindex;
+};
+
+static inline struct netdev_dev_get_req *netdev_dev_get_req_alloc(void)
+{
+       return calloc(1, sizeof(struct netdev_dev_get_req));
+}
+void netdev_dev_get_req_free(struct netdev_dev_get_req *req);
+
+static inline void
+netdev_dev_get_req_set_ifindex(struct netdev_dev_get_req *req, __u32 ifindex)
+{
+       req->_present.ifindex = 1;
+       req->ifindex = ifindex;
+}
+
+struct netdev_dev_get_rsp {
+       struct {
+               __u32 ifindex:1;
+               __u32 xdp_features:1;
+       } _present;
+
+       __u32 ifindex;
+       __u64 xdp_features;
+};
+
+void netdev_dev_get_rsp_free(struct netdev_dev_get_rsp *rsp);
+
+/*
+ * Get / dump information about a netdev.
+ */
+struct netdev_dev_get_rsp *
+netdev_dev_get(struct ynl_sock *ys, struct netdev_dev_get_req *req);
+
+/* NETDEV_CMD_DEV_GET - dump */
+struct netdev_dev_get_list {
+       struct netdev_dev_get_list *next;
+       struct netdev_dev_get_rsp obj __attribute__ ((aligned (8)));
+};
+
+void netdev_dev_get_list_free(struct netdev_dev_get_list *rsp);
+
+struct netdev_dev_get_list *netdev_dev_get_dump(struct ynl_sock *ys);
+
+/* NETDEV_CMD_DEV_GET - notify */
+struct netdev_dev_get_ntf {
+       __u16 family;
+       __u8 cmd;
+       struct ynl_ntf_base_type *next;
+       void (*free)(struct netdev_dev_get_ntf *ntf);
+       struct netdev_dev_get_rsp obj __attribute__ ((aligned (8)));
+};
+
+void netdev_dev_get_ntf_free(struct netdev_dev_get_ntf *rsp);
+
+/* --------------- Common notification parsing --------------- */
+struct ynl_ntf_base_type *netdev_ntf_parse(struct ynl_sock *ys);
+
+#endif /* _LINUX_NETDEV_GEN_H */
diff --git a/tools/net/ynl/lib/Makefile b/tools/net/ynl/lib/Makefile
new file mode 100644 (file)
index 0000000..d2e50fd
--- /dev/null
@@ -0,0 +1,28 @@
+# SPDX-License-Identifier: GPL-2.0
+
+CC=gcc
+CFLAGS=-std=gnu11 -O2 -W -Wall -Wextra -Wno-unused-parameter -Wshadow
+ifeq ("$(DEBUG)","1")
+  CFLAGS += -g -fsanitize=address -fsanitize=leak -static-libasan
+endif
+
+SRCS=$(wildcard *.c)
+OBJS=$(patsubst %.c,%.o,${SRCS})
+
+include $(wildcard *.d)
+
+all: ynl.a
+
+ynl.a: $(OBJS)
+       ar rcs $@ $(OBJS)
+clean:
+       rm -f *.o *.d *~
+
+hardclean: clean
+       rm -f *.a
+
+%.o: %.c
+       $(COMPILE.c) -MMD -c -o $@ $<
+
+.PHONY: all clean
+.DEFAULT_GOAL=all
index a0241ad..ada22b0 100644 (file)
@@ -226,11 +226,15 @@ class SpecStructMember(SpecElement):
     Represents a single struct member attribute.
 
     Attributes:
-        type    string, type of the member attribute
+        type        string, type of the member attribute
+        byte_order  string or None for native byte order
+        enum        string, name of the enum definition
     """
     def __init__(self, family, yaml):
         super().__init__(family, yaml)
         self.type = yaml['type']
+        self.byte_order = yaml.get('byte-order')
+        self.enum = yaml.get('enum')
 
 
 class SpecStruct(SpecElement):
diff --git a/tools/net/ynl/lib/ynl.c b/tools/net/ynl/lib/ynl.c
new file mode 100644 (file)
index 0000000..514e0d6
--- /dev/null
@@ -0,0 +1,901 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+#include <errno.h>
+#include <poll.h>
+#include <string.h>
+#include <stdlib.h>
+#include <linux/types.h>
+
+#include <libmnl/libmnl.h>
+#include <linux/genetlink.h>
+
+#include "ynl.h"
+
+#define ARRAY_SIZE(arr)                (sizeof(arr) / sizeof(*arr))
+
+#define __yerr_msg(yse, _msg...)                                       \
+       ({                                                              \
+               struct ynl_error *_yse = (yse);                         \
+                                                                       \
+               if (_yse) {                                             \
+                       snprintf(_yse->msg, sizeof(_yse->msg) - 1,  _msg); \
+                       _yse->msg[sizeof(_yse->msg) - 1] = 0;           \
+               }                                                       \
+       })
+
+#define __yerr_code(yse, _code...)             \
+       ({                                      \
+               struct ynl_error *_yse = (yse); \
+                                               \
+               if (_yse) {                     \
+                       _yse->code = _code;     \
+               }                               \
+       })
+
+#define __yerr(yse, _code, _msg...)            \
+       ({                                      \
+               __yerr_msg(yse, _msg);          \
+               __yerr_code(yse, _code);        \
+       })
+
+#define __perr(yse, _msg)              __yerr(yse, errno, _msg)
+
+#define yerr_msg(_ys, _msg...)         __yerr_msg(&(_ys)->err, _msg)
+#define yerr(_ys, _code, _msg...)      __yerr(&(_ys)->err, _code, _msg)
+#define perr(_ys, _msg)                        __yerr(&(_ys)->err, errno, _msg)
+
+/* -- Netlink boiler plate */
+static int
+ynl_err_walk_report_one(struct ynl_policy_nest *policy, unsigned int type,
+                       char *str, int str_sz, int *n)
+{
+       if (!policy) {
+               if (*n < str_sz)
+                       *n += snprintf(str, str_sz, "!policy");
+               return 1;
+       }
+
+       if (type > policy->max_attr) {
+               if (*n < str_sz)
+                       *n += snprintf(str, str_sz, "!oob");
+               return 1;
+       }
+
+       if (!policy->table[type].name) {
+               if (*n < str_sz)
+                       *n += snprintf(str, str_sz, "!name");
+               return 1;
+       }
+
+       if (*n < str_sz)
+               *n += snprintf(str, str_sz - *n,
+                              ".%s", policy->table[type].name);
+       return 0;
+}
+
+static int
+ynl_err_walk(struct ynl_sock *ys, void *start, void *end, unsigned int off,
+            struct ynl_policy_nest *policy, char *str, int str_sz,
+            struct ynl_policy_nest **nest_pol)
+{
+       unsigned int astart_off, aend_off;
+       const struct nlattr *attr;
+       unsigned int data_len;
+       unsigned int type;
+       bool found = false;
+       int n = 0;
+
+       if (!policy) {
+               if (n < str_sz)
+                       n += snprintf(str, str_sz, "!policy");
+               return n;
+       }
+
+       data_len = end - start;
+
+       mnl_attr_for_each_payload(start, data_len) {
+               astart_off = (char *)attr - (char *)start;
+               aend_off = astart_off + mnl_attr_get_payload_len(attr);
+               if (aend_off <= off)
+                       continue;
+
+               found = true;
+               break;
+       }
+       if (!found)
+               return 0;
+
+       off -= astart_off;
+
+       type = mnl_attr_get_type(attr);
+
+       if (ynl_err_walk_report_one(policy, type, str, str_sz, &n))
+               return n;
+
+       if (!off) {
+               if (nest_pol)
+                       *nest_pol = policy->table[type].nest;
+               return n;
+       }
+
+       if (!policy->table[type].nest) {
+               if (n < str_sz)
+                       n += snprintf(str, str_sz, "!nest");
+               return n;
+       }
+
+       off -= sizeof(struct nlattr);
+       start =  mnl_attr_get_payload(attr);
+       end = start + mnl_attr_get_payload_len(attr);
+
+       return n + ynl_err_walk(ys, start, end, off, policy->table[type].nest,
+                               &str[n], str_sz - n, nest_pol);
+}
+
+#define NLMSGERR_ATTR_MISS_TYPE (NLMSGERR_ATTR_POLICY + 1)
+#define NLMSGERR_ATTR_MISS_NEST (NLMSGERR_ATTR_POLICY + 2)
+#define NLMSGERR_ATTR_MAX (NLMSGERR_ATTR_MAX + 2)
+
+static int
+ynl_ext_ack_check(struct ynl_sock *ys, const struct nlmsghdr *nlh,
+                 unsigned int hlen)
+{
+       const struct nlattr *tb[NLMSGERR_ATTR_MAX + 1] = {};
+       char miss_attr[sizeof(ys->err.msg)];
+       char bad_attr[sizeof(ys->err.msg)];
+       const struct nlattr *attr;
+       const char *str = NULL;
+
+       if (!(nlh->nlmsg_flags & NLM_F_ACK_TLVS))
+               return MNL_CB_OK;
+
+       mnl_attr_for_each(attr, nlh, hlen) {
+               unsigned int len, type;
+
+               len = mnl_attr_get_payload_len(attr);
+               type = mnl_attr_get_type(attr);
+
+               if (type > NLMSGERR_ATTR_MAX)
+                       continue;
+
+               tb[type] = attr;
+
+               switch (type) {
+               case NLMSGERR_ATTR_OFFS:
+               case NLMSGERR_ATTR_MISS_TYPE:
+               case NLMSGERR_ATTR_MISS_NEST:
+                       if (len != sizeof(__u32))
+                               return MNL_CB_ERROR;
+                       break;
+               case NLMSGERR_ATTR_MSG:
+                       str = mnl_attr_get_payload(attr);
+                       if (str[len - 1])
+                               return MNL_CB_ERROR;
+                       break;
+               default:
+                       break;
+               }
+       }
+
+       bad_attr[0] = '\0';
+       miss_attr[0] = '\0';
+
+       if (tb[NLMSGERR_ATTR_OFFS]) {
+               unsigned int n, off;
+               void *start, *end;
+
+               ys->err.attr_offs = mnl_attr_get_u32(tb[NLMSGERR_ATTR_OFFS]);
+
+               n = snprintf(bad_attr, sizeof(bad_attr), "%sbad attribute: ",
+                            str ? " (" : "");
+
+               start = mnl_nlmsg_get_payload_offset(ys->nlh,
+                                                    sizeof(struct genlmsghdr));
+               end = mnl_nlmsg_get_payload_tail(ys->nlh);
+
+               off = ys->err.attr_offs;
+               off -= sizeof(struct nlmsghdr);
+               off -= sizeof(struct genlmsghdr);
+
+               n += ynl_err_walk(ys, start, end, off, ys->req_policy,
+                                 &bad_attr[n], sizeof(bad_attr) - n, NULL);
+
+               if (n >= sizeof(bad_attr))
+                       n = sizeof(bad_attr) - 1;
+               bad_attr[n] = '\0';
+       }
+       if (tb[NLMSGERR_ATTR_MISS_TYPE]) {
+               struct ynl_policy_nest *nest_pol = NULL;
+               unsigned int n, off, type;
+               void *start, *end;
+               int n2;
+
+               type = mnl_attr_get_u32(tb[NLMSGERR_ATTR_MISS_TYPE]);
+
+               n = snprintf(miss_attr, sizeof(miss_attr), "%smissing attribute: ",
+                            bad_attr[0] ? ", " : (str ? " (" : ""));
+
+               start = mnl_nlmsg_get_payload_offset(ys->nlh,
+                                                    sizeof(struct genlmsghdr));
+               end = mnl_nlmsg_get_payload_tail(ys->nlh);
+
+               nest_pol = ys->req_policy;
+               if (tb[NLMSGERR_ATTR_MISS_NEST]) {
+                       off = mnl_attr_get_u32(tb[NLMSGERR_ATTR_MISS_NEST]);
+                       off -= sizeof(struct nlmsghdr);
+                       off -= sizeof(struct genlmsghdr);
+
+                       n += ynl_err_walk(ys, start, end, off, ys->req_policy,
+                                         &miss_attr[n], sizeof(miss_attr) - n,
+                                         &nest_pol);
+               }
+
+               n2 = 0;
+               ynl_err_walk_report_one(nest_pol, type, &miss_attr[n],
+                                       sizeof(miss_attr) - n, &n2);
+               n += n2;
+
+               if (n >= sizeof(miss_attr))
+                       n = sizeof(miss_attr) - 1;
+               miss_attr[n] = '\0';
+       }
+
+       /* Implicitly depend on ys->err.code already set */
+       if (str)
+               yerr_msg(ys, "Kernel %s: '%s'%s%s%s",
+                        ys->err.code ? "error" : "warning",
+                        str, bad_attr, miss_attr,
+                        bad_attr[0] || miss_attr[0] ? ")" : "");
+       else if (bad_attr[0] || miss_attr[0])
+               yerr_msg(ys, "Kernel %s: %s%s",
+                        ys->err.code ? "error" : "warning",
+                        bad_attr, miss_attr);
+
+       return MNL_CB_OK;
+}
+
+static int ynl_cb_error(const struct nlmsghdr *nlh, void *data)
+{
+       const struct nlmsgerr *err = mnl_nlmsg_get_payload(nlh);
+       struct ynl_parse_arg *yarg = data;
+       unsigned int hlen;
+       int code;
+
+       code = err->error >= 0 ? err->error : -err->error;
+       yarg->ys->err.code = code;
+       errno = code;
+
+       hlen = sizeof(*err);
+       if (!(nlh->nlmsg_flags & NLM_F_CAPPED))
+               hlen += mnl_nlmsg_get_payload_len(&err->msg);
+
+       ynl_ext_ack_check(yarg->ys, nlh, hlen);
+
+       return code ? MNL_CB_ERROR : MNL_CB_STOP;
+}
+
+static int ynl_cb_done(const struct nlmsghdr *nlh, void *data)
+{
+       struct ynl_parse_arg *yarg = data;
+       int err;
+
+       err = *(int *)NLMSG_DATA(nlh);
+       if (err < 0) {
+               yarg->ys->err.code = -err;
+               errno = -err;
+
+               ynl_ext_ack_check(yarg->ys, nlh, sizeof(int));
+
+               return MNL_CB_ERROR;
+       }
+       return MNL_CB_STOP;
+}
+
+static int ynl_cb_noop(const struct nlmsghdr *nlh, void *data)
+{
+       return MNL_CB_OK;
+}
+
+mnl_cb_t ynl_cb_array[NLMSG_MIN_TYPE] = {
+       [NLMSG_NOOP]    = ynl_cb_noop,
+       [NLMSG_ERROR]   = ynl_cb_error,
+       [NLMSG_DONE]    = ynl_cb_done,
+       [NLMSG_OVERRUN] = ynl_cb_noop,
+};
+
+/* Attribute validation */
+
+int ynl_attr_validate(struct ynl_parse_arg *yarg, const struct nlattr *attr)
+{
+       struct ynl_policy_attr *policy;
+       unsigned int type, len;
+       unsigned char *data;
+
+       data = mnl_attr_get_payload(attr);
+       len = mnl_attr_get_payload_len(attr);
+       type = mnl_attr_get_type(attr);
+       if (type > yarg->rsp_policy->max_attr) {
+               yerr(yarg->ys, YNL_ERROR_INTERNAL,
+                    "Internal error, validating unknown attribute");
+               return -1;
+       }
+
+       policy = &yarg->rsp_policy->table[type];
+
+       switch (policy->type) {
+       case YNL_PT_REJECT:
+               yerr(yarg->ys, YNL_ERROR_ATTR_INVALID,
+                    "Rejected attribute (%s)", policy->name);
+               return -1;
+       case YNL_PT_IGNORE:
+               break;
+       case YNL_PT_U8:
+               if (len == sizeof(__u8))
+                       break;
+               yerr(yarg->ys, YNL_ERROR_ATTR_INVALID,
+                    "Invalid attribute (u8 %s)", policy->name);
+               return -1;
+       case YNL_PT_U16:
+               if (len == sizeof(__u16))
+                       break;
+               yerr(yarg->ys, YNL_ERROR_ATTR_INVALID,
+                    "Invalid attribute (u16 %s)", policy->name);
+               return -1;
+       case YNL_PT_U32:
+               if (len == sizeof(__u32))
+                       break;
+               yerr(yarg->ys, YNL_ERROR_ATTR_INVALID,
+                    "Invalid attribute (u32 %s)", policy->name);
+               return -1;
+       case YNL_PT_U64:
+               if (len == sizeof(__u64))
+                       break;
+               yerr(yarg->ys, YNL_ERROR_ATTR_INVALID,
+                    "Invalid attribute (u64 %s)", policy->name);
+               return -1;
+       case YNL_PT_FLAG:
+               /* Let flags grow into real attrs, why not.. */
+               break;
+       case YNL_PT_NEST:
+               if (!len || len >= sizeof(*attr))
+                       break;
+               yerr(yarg->ys, YNL_ERROR_ATTR_INVALID,
+                    "Invalid attribute (nest %s)", policy->name);
+               return -1;
+       case YNL_PT_BINARY:
+               if (!policy->len || len == policy->len)
+                       break;
+               yerr(yarg->ys, YNL_ERROR_ATTR_INVALID,
+                    "Invalid attribute (binary %s)", policy->name);
+               return -1;
+       case YNL_PT_NUL_STR:
+               if ((!policy->len || len <= policy->len) && !data[len - 1])
+                       break;
+               yerr(yarg->ys, YNL_ERROR_ATTR_INVALID,
+                    "Invalid attribute (string %s)", policy->name);
+               return -1;
+       default:
+               yerr(yarg->ys, YNL_ERROR_ATTR_INVALID,
+                    "Invalid attribute (unknown %s)", policy->name);
+               return -1;
+       }
+
+       return 0;
+}
+
+/* Generic code */
+
+static void ynl_err_reset(struct ynl_sock *ys)
+{
+       ys->err.code = 0;
+       ys->err.attr_offs = 0;
+       ys->err.msg[0] = 0;
+}
+
+struct nlmsghdr *ynl_msg_start(struct ynl_sock *ys, __u32 id, __u16 flags)
+{
+       struct nlmsghdr *nlh;
+
+       ynl_err_reset(ys);
+
+       nlh = ys->nlh = mnl_nlmsg_put_header(ys->tx_buf);
+       nlh->nlmsg_type = id;
+       nlh->nlmsg_flags = flags;
+       nlh->nlmsg_seq = ++ys->seq;
+
+       return nlh;
+}
+
+struct nlmsghdr *
+ynl_gemsg_start(struct ynl_sock *ys, __u32 id, __u16 flags,
+               __u8 cmd, __u8 version)
+{
+       struct genlmsghdr gehdr;
+       struct nlmsghdr *nlh;
+       void *data;
+
+       nlh = ynl_msg_start(ys, id, flags);
+
+       memset(&gehdr, 0, sizeof(gehdr));
+       gehdr.cmd = cmd;
+       gehdr.version = version;
+
+       data = mnl_nlmsg_put_extra_header(nlh, sizeof(gehdr));
+       memcpy(data, &gehdr, sizeof(gehdr));
+
+       return nlh;
+}
+
+void ynl_msg_start_req(struct ynl_sock *ys, __u32 id)
+{
+       ynl_msg_start(ys, id, NLM_F_REQUEST | NLM_F_ACK);
+}
+
+void ynl_msg_start_dump(struct ynl_sock *ys, __u32 id)
+{
+       ynl_msg_start(ys, id, NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP);
+}
+
+struct nlmsghdr *
+ynl_gemsg_start_req(struct ynl_sock *ys, __u32 id, __u8 cmd, __u8 version)
+{
+       return ynl_gemsg_start(ys, id, NLM_F_REQUEST | NLM_F_ACK, cmd, version);
+}
+
+struct nlmsghdr *
+ynl_gemsg_start_dump(struct ynl_sock *ys, __u32 id, __u8 cmd, __u8 version)
+{
+       return ynl_gemsg_start(ys, id, NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP,
+                              cmd, version);
+}
+
+int ynl_recv_ack(struct ynl_sock *ys, int ret)
+{
+       if (!ret) {
+               yerr(ys, YNL_ERROR_EXPECT_ACK,
+                    "Expecting an ACK but nothing received");
+               return -1;
+       }
+
+       ret = mnl_socket_recvfrom(ys->sock, ys->rx_buf, MNL_SOCKET_BUFFER_SIZE);
+       if (ret < 0) {
+               perr(ys, "Socket receive failed");
+               return ret;
+       }
+       return mnl_cb_run(ys->rx_buf, ret, ys->seq, ys->portid,
+                         ynl_cb_null, ys);
+}
+
+int ynl_cb_null(const struct nlmsghdr *nlh, void *data)
+{
+       struct ynl_parse_arg *yarg = data;
+
+       yerr(yarg->ys, YNL_ERROR_UNEXPECT_MSG,
+            "Received a message when none were expected");
+
+       return MNL_CB_ERROR;
+}
+
+/* Init/fini and genetlink boiler plate */
+static int
+ynl_get_family_info_mcast(struct ynl_sock *ys, const struct nlattr *mcasts)
+{
+       const struct nlattr *entry, *attr;
+       unsigned int i;
+
+       mnl_attr_for_each_nested(attr, mcasts)
+               ys->n_mcast_groups++;
+
+       if (!ys->n_mcast_groups)
+               return 0;
+
+       ys->mcast_groups = calloc(ys->n_mcast_groups,
+                                 sizeof(*ys->mcast_groups));
+       if (!ys->mcast_groups)
+               return MNL_CB_ERROR;
+
+       i = 0;
+       mnl_attr_for_each_nested(entry, mcasts) {
+               mnl_attr_for_each_nested(attr, entry) {
+                       if (mnl_attr_get_type(attr) == CTRL_ATTR_MCAST_GRP_ID)
+                               ys->mcast_groups[i].id = mnl_attr_get_u32(attr);
+                       if (mnl_attr_get_type(attr) == CTRL_ATTR_MCAST_GRP_NAME) {
+                               strncpy(ys->mcast_groups[i].name,
+                                       mnl_attr_get_str(attr),
+                                       GENL_NAMSIZ - 1);
+                               ys->mcast_groups[i].name[GENL_NAMSIZ - 1] = 0;
+                       }
+               }
+       }
+
+       return 0;
+}
+
+static int ynl_get_family_info_cb(const struct nlmsghdr *nlh, void *data)
+{
+       struct ynl_parse_arg *yarg = data;
+       struct ynl_sock *ys = yarg->ys;
+       const struct nlattr *attr;
+       bool found_id = true;
+
+       mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
+               if (mnl_attr_get_type(attr) == CTRL_ATTR_MCAST_GROUPS)
+                       if (ynl_get_family_info_mcast(ys, attr))
+                               return MNL_CB_ERROR;
+
+               if (mnl_attr_get_type(attr) != CTRL_ATTR_FAMILY_ID)
+                       continue;
+
+               if (mnl_attr_get_payload_len(attr) != sizeof(__u16)) {
+                       yerr(ys, YNL_ERROR_ATTR_INVALID, "Invalid family ID");
+                       return MNL_CB_ERROR;
+               }
+
+               ys->family_id = mnl_attr_get_u16(attr);
+               found_id = true;
+       }
+
+       if (!found_id) {
+               yerr(ys, YNL_ERROR_ATTR_MISSING, "Family ID missing");
+               return MNL_CB_ERROR;
+       }
+       return MNL_CB_OK;
+}
+
+static int ynl_sock_read_family(struct ynl_sock *ys, const char *family_name)
+{
+       struct ynl_parse_arg yarg = { .ys = ys, };
+       struct nlmsghdr *nlh;
+       int err;
+
+       nlh = ynl_gemsg_start_req(ys, GENL_ID_CTRL, CTRL_CMD_GETFAMILY, 1);
+       mnl_attr_put_strz(nlh, CTRL_ATTR_FAMILY_NAME, family_name);
+
+       err = mnl_socket_sendto(ys->sock, nlh, nlh->nlmsg_len);
+       if (err < 0) {
+               perr(ys, "failed to request socket family info");
+               return err;
+       }
+
+       err = mnl_socket_recvfrom(ys->sock, ys->rx_buf, MNL_SOCKET_BUFFER_SIZE);
+       if (err <= 0) {
+               perr(ys, "failed to receive the socket family info");
+               return err;
+       }
+       err = mnl_cb_run2(ys->rx_buf, err, ys->seq, ys->portid,
+                         ynl_get_family_info_cb, &yarg,
+                         ynl_cb_array, ARRAY_SIZE(ynl_cb_array));
+       if (err < 0) {
+               free(ys->mcast_groups);
+               perr(ys, "failed to receive the socket family info - no such family?");
+               return err;
+       }
+
+       return ynl_recv_ack(ys, err);
+}
+
+struct ynl_sock *
+ynl_sock_create(const struct ynl_family *yf, struct ynl_error *yse)
+{
+       struct ynl_sock *ys;
+       int one = 1;
+
+       ys = malloc(sizeof(*ys) + 2 * MNL_SOCKET_BUFFER_SIZE);
+       if (!ys)
+               return NULL;
+       memset(ys, 0, sizeof(*ys));
+
+       ys->family = yf;
+       ys->tx_buf = &ys->raw_buf[0];
+       ys->rx_buf = &ys->raw_buf[MNL_SOCKET_BUFFER_SIZE];
+       ys->ntf_last_next = &ys->ntf_first;
+
+       ys->sock = mnl_socket_open(NETLINK_GENERIC);
+       if (!ys->sock) {
+               __perr(yse, "failed to create a netlink socket");
+               goto err_free_sock;
+       }
+
+       if (mnl_socket_setsockopt(ys->sock, NETLINK_CAP_ACK,
+                                 &one, sizeof(one))) {
+               __perr(yse, "failed to enable netlink ACK");
+               goto err_close_sock;
+       }
+       if (mnl_socket_setsockopt(ys->sock, NETLINK_EXT_ACK,
+                                 &one, sizeof(one))) {
+               __perr(yse, "failed to enable netlink ext ACK");
+               goto err_close_sock;
+       }
+
+       ys->seq = random();
+       ys->portid = mnl_socket_get_portid(ys->sock);
+
+       if (ynl_sock_read_family(ys, yf->name)) {
+               if (yse)
+                       memcpy(yse, &ys->err, sizeof(*yse));
+               goto err_close_sock;
+       }
+
+       return ys;
+
+err_close_sock:
+       mnl_socket_close(ys->sock);
+err_free_sock:
+       free(ys);
+       return NULL;
+}
+
+void ynl_sock_destroy(struct ynl_sock *ys)
+{
+       struct ynl_ntf_base_type *ntf;
+
+       mnl_socket_close(ys->sock);
+       while ((ntf = ynl_ntf_dequeue(ys)))
+               ynl_ntf_free(ntf);
+       free(ys->mcast_groups);
+       free(ys);
+}
+
+/* YNL multicast handling */
+
+void ynl_ntf_free(struct ynl_ntf_base_type *ntf)
+{
+       ntf->free(ntf);
+}
+
+int ynl_subscribe(struct ynl_sock *ys, const char *grp_name)
+{
+       unsigned int i;
+       int err;
+
+       for (i = 0; i < ys->n_mcast_groups; i++)
+               if (!strcmp(ys->mcast_groups[i].name, grp_name))
+                       break;
+       if (i == ys->n_mcast_groups) {
+               yerr(ys, ENOENT, "Multicast group '%s' not found", grp_name);
+               return -1;
+       }
+
+       err = mnl_socket_setsockopt(ys->sock, NETLINK_ADD_MEMBERSHIP,
+                                   &ys->mcast_groups[i].id,
+                                   sizeof(ys->mcast_groups[i].id));
+       if (err < 0) {
+               perr(ys, "Subscribing to multicast group failed");
+               return -1;
+       }
+
+       return 0;
+}
+
+int ynl_socket_get_fd(struct ynl_sock *ys)
+{
+       return mnl_socket_get_fd(ys->sock);
+}
+
+struct ynl_ntf_base_type *ynl_ntf_dequeue(struct ynl_sock *ys)
+{
+       struct ynl_ntf_base_type *ntf;
+
+       if (!ynl_has_ntf(ys))
+               return NULL;
+
+       ntf = ys->ntf_first;
+       ys->ntf_first = ntf->next;
+       if (ys->ntf_last_next == &ntf->next)
+               ys->ntf_last_next = &ys->ntf_first;
+
+       return ntf;
+}
+
+static int ynl_ntf_parse(struct ynl_sock *ys, const struct nlmsghdr *nlh)
+{
+       struct ynl_parse_arg yarg = { .ys = ys, };
+       const struct ynl_ntf_info *info;
+       struct ynl_ntf_base_type *rsp;
+       struct genlmsghdr *gehdr;
+       int ret;
+
+       gehdr = mnl_nlmsg_get_payload(nlh);
+       if (gehdr->cmd >= ys->family->ntf_info_size)
+               return MNL_CB_ERROR;
+       info = &ys->family->ntf_info[gehdr->cmd];
+       if (!info->cb)
+               return MNL_CB_ERROR;
+
+       rsp = calloc(1, info->alloc_sz);
+       rsp->free = info->free;
+       yarg.data = rsp->data;
+       yarg.rsp_policy = info->policy;
+
+       ret = info->cb(nlh, &yarg);
+       if (ret <= MNL_CB_STOP)
+               goto err_free;
+
+       rsp->family = nlh->nlmsg_type;
+       rsp->cmd = gehdr->cmd;
+
+       *ys->ntf_last_next = rsp;
+       ys->ntf_last_next = &rsp->next;
+
+       return MNL_CB_OK;
+
+err_free:
+       info->free(rsp);
+       return MNL_CB_ERROR;
+}
+
+static int ynl_ntf_trampoline(const struct nlmsghdr *nlh, void *data)
+{
+       return ynl_ntf_parse((struct ynl_sock *)data, nlh);
+}
+
+int ynl_ntf_check(struct ynl_sock *ys)
+{
+       ssize_t len;
+       int err;
+
+       do {
+               /* libmnl doesn't let us pass flags to the recv to make
+                * it non-blocking so we need to poll() or peek() :|
+                */
+               struct pollfd pfd = { };
+
+               pfd.fd = mnl_socket_get_fd(ys->sock);
+               pfd.events = POLLIN;
+               err = poll(&pfd, 1, 1);
+               if (err < 1)
+                       return err;
+
+               len = mnl_socket_recvfrom(ys->sock, ys->rx_buf,
+                                         MNL_SOCKET_BUFFER_SIZE);
+               if (len < 0)
+                       return len;
+
+               err = mnl_cb_run2(ys->rx_buf, len, ys->seq, ys->portid,
+                                 ynl_ntf_trampoline, ys,
+                                 ynl_cb_array, NLMSG_MIN_TYPE);
+               if (err < 0)
+                       return err;
+       } while (err > 0);
+
+       return 0;
+}
+
+/* YNL specific helpers used by the auto-generated code */
+
+struct ynl_dump_list_type *YNL_LIST_END = (void *)(0xb4d123);
+
+void ynl_error_unknown_notification(struct ynl_sock *ys, __u8 cmd)
+{
+       yerr(ys, YNL_ERROR_UNKNOWN_NTF,
+            "Unknown notification message type '%d'", cmd);
+}
+
+int ynl_error_parse(struct ynl_parse_arg *yarg, const char *msg)
+{
+       yerr(yarg->ys, YNL_ERROR_INV_RESP, "Error parsing response: %s", msg);
+       return MNL_CB_ERROR;
+}
+
+static int
+ynl_check_alien(struct ynl_sock *ys, const struct nlmsghdr *nlh, __u32 rsp_cmd)
+{
+       struct genlmsghdr *gehdr;
+
+       if (mnl_nlmsg_get_payload_len(nlh) < sizeof(*gehdr)) {
+               yerr(ys, YNL_ERROR_INV_RESP,
+                    "Kernel responded with truncated message");
+               return -1;
+       }
+
+       gehdr = mnl_nlmsg_get_payload(nlh);
+       if (gehdr->cmd != rsp_cmd)
+               return ynl_ntf_parse(ys, nlh);
+
+       return 0;
+}
+
+static int ynl_req_trampoline(const struct nlmsghdr *nlh, void *data)
+{
+       struct ynl_req_state *yrs = data;
+       int ret;
+
+       ret = ynl_check_alien(yrs->yarg.ys, nlh, yrs->rsp_cmd);
+       if (ret)
+               return ret < 0 ? MNL_CB_ERROR : MNL_CB_OK;
+
+       return yrs->cb(nlh, &yrs->yarg);
+}
+
+int ynl_exec(struct ynl_sock *ys, struct nlmsghdr *req_nlh,
+            struct ynl_req_state *yrs)
+{
+       ssize_t len;
+       int err;
+
+       err = mnl_socket_sendto(ys->sock, req_nlh, req_nlh->nlmsg_len);
+       if (err < 0)
+               return err;
+
+       do {
+               len = mnl_socket_recvfrom(ys->sock, ys->rx_buf,
+                                         MNL_SOCKET_BUFFER_SIZE);
+               if (len < 0)
+                       return len;
+
+               err = mnl_cb_run2(ys->rx_buf, len, ys->seq, ys->portid,
+                                 ynl_req_trampoline, yrs,
+                                 ynl_cb_array, NLMSG_MIN_TYPE);
+               if (err < 0)
+                       return err;
+       } while (err > 0);
+
+       return 0;
+}
+
+static int ynl_dump_trampoline(const struct nlmsghdr *nlh, void *data)
+{
+       struct ynl_dump_state *ds = data;
+       struct ynl_dump_list_type *obj;
+       struct ynl_parse_arg yarg = {};
+       int ret;
+
+       ret = ynl_check_alien(ds->ys, nlh, ds->rsp_cmd);
+       if (ret)
+               return ret < 0 ? MNL_CB_ERROR : MNL_CB_OK;
+
+       obj = calloc(1, ds->alloc_sz);
+       if (!obj)
+               return MNL_CB_ERROR;
+
+       if (!ds->first)
+               ds->first = obj;
+       if (ds->last)
+               ds->last->next = obj;
+       ds->last = obj;
+
+       yarg.ys = ds->ys;
+       yarg.rsp_policy = ds->rsp_policy;
+       yarg.data = &obj->data;
+
+       return ds->cb(nlh, &yarg);
+}
+
+static void *ynl_dump_end(struct ynl_dump_state *ds)
+{
+       if (!ds->first)
+               return YNL_LIST_END;
+
+       ds->last->next = YNL_LIST_END;
+       return ds->first;
+}
+
+int ynl_exec_dump(struct ynl_sock *ys, struct nlmsghdr *req_nlh,
+                 struct ynl_dump_state *yds)
+{
+       ssize_t len;
+       int err;
+
+       err = mnl_socket_sendto(ys->sock, req_nlh, req_nlh->nlmsg_len);
+       if (err < 0)
+               return err;
+
+       do {
+               len = mnl_socket_recvfrom(ys->sock, ys->rx_buf,
+                                         MNL_SOCKET_BUFFER_SIZE);
+               if (len < 0)
+                       goto err_close_list;
+
+               err = mnl_cb_run2(ys->rx_buf, len, ys->seq, ys->portid,
+                                 ynl_dump_trampoline, yds,
+                                 ynl_cb_array, NLMSG_MIN_TYPE);
+               if (err < 0)
+                       goto err_close_list;
+       } while (err > 0);
+
+       yds->first = ynl_dump_end(yds);
+       return 0;
+
+err_close_list:
+       yds->first = ynl_dump_end(yds);
+       return -1;
+}
diff --git a/tools/net/ynl/lib/ynl.h b/tools/net/ynl/lib/ynl.h
new file mode 100644 (file)
index 0000000..9eafa35
--- /dev/null
@@ -0,0 +1,237 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+#ifndef __YNL_C_H
+#define __YNL_C_H 1
+
+#include <stddef.h>
+#include <libmnl/libmnl.h>
+#include <linux/genetlink.h>
+#include <linux/types.h>
+
+struct mnl_socket;
+struct nlmsghdr;
+
+/*
+ * User facing code
+ */
+
+struct ynl_ntf_base_type;
+struct ynl_ntf_info;
+struct ynl_sock;
+
+enum ynl_error_code {
+       YNL_ERROR_NONE = 0,
+       __YNL_ERRNO_END = 4096,
+       YNL_ERROR_INTERNAL,
+       YNL_ERROR_EXPECT_ACK,
+       YNL_ERROR_EXPECT_MSG,
+       YNL_ERROR_UNEXPECT_MSG,
+       YNL_ERROR_ATTR_MISSING,
+       YNL_ERROR_ATTR_INVALID,
+       YNL_ERROR_UNKNOWN_NTF,
+       YNL_ERROR_INV_RESP,
+};
+
+/**
+ * struct ynl_error - error encountered by YNL
+ * @code:      errno (low values) or YNL error code (enum ynl_error_code)
+ * @attr_offs: offset of bad attribute (for very advanced users)
+ * @msg:       error message
+ *
+ * Error information for when YNL operations fail.
+ * Users should interact with the err member of struct ynl_sock directly.
+ * The main exception to that rule is ynl_sock_create().
+ */
+struct ynl_error {
+       enum ynl_error_code code;
+       unsigned int attr_offs;
+       char msg[512];
+};
+
+/**
+ * struct ynl_family - YNL family info
+ * Family description generated by codegen. Pass to ynl_sock_create().
+ */
+struct ynl_family {
+/* private: */
+       const char *name;
+       const struct ynl_ntf_info *ntf_info;
+       unsigned int ntf_info_size;
+};
+
+/**
+ * struct ynl_sock - YNL wrapped netlink socket
+ * @err: YNL error descriptor, cleared on every request.
+ */
+struct ynl_sock {
+       struct ynl_error err;
+
+/* private: */
+       const struct ynl_family *family;
+       struct mnl_socket *sock;
+       __u32 seq;
+       __u32 portid;
+       __u16 family_id;
+
+       unsigned int n_mcast_groups;
+       struct {
+               unsigned int id;
+               char name[GENL_NAMSIZ];
+       } *mcast_groups;
+
+       struct ynl_ntf_base_type *ntf_first;
+       struct ynl_ntf_base_type **ntf_last_next;
+
+       struct nlmsghdr *nlh;
+       struct ynl_policy_nest *req_policy;
+       unsigned char *tx_buf;
+       unsigned char *rx_buf;
+       unsigned char raw_buf[];
+};
+
+struct ynl_sock *
+ynl_sock_create(const struct ynl_family *yf, struct ynl_error *e);
+void ynl_sock_destroy(struct ynl_sock *ys);
+
+#define ynl_dump_foreach(dump, iter)                                   \
+       for (typeof(dump->obj) *iter = &dump->obj;                      \
+            !ynl_dump_obj_is_last(iter);                               \
+            iter = ynl_dump_obj_next(iter))
+
+int ynl_subscribe(struct ynl_sock *ys, const char *grp_name);
+int ynl_socket_get_fd(struct ynl_sock *ys);
+int ynl_ntf_check(struct ynl_sock *ys);
+
+/**
+ * ynl_has_ntf() - check if socket has *parsed* notifications
+ * @ys: active YNL socket
+ *
+ * Note that this does not take into account notifications sitting
+ * in netlink socket, just the notifications which have already been
+ * read and parsed (e.g. during a ynl_ntf_check() call).
+ */
+static inline bool ynl_has_ntf(struct ynl_sock *ys)
+{
+       return ys->ntf_last_next != &ys->ntf_first;
+}
+struct ynl_ntf_base_type *ynl_ntf_dequeue(struct ynl_sock *ys);
+
+void ynl_ntf_free(struct ynl_ntf_base_type *ntf);
+
+/*
+ * YNL internals / low level stuff
+ */
+
+/* Generic mnl helper code */
+
+enum ynl_policy_type {
+       YNL_PT_REJECT = 1,
+       YNL_PT_IGNORE,
+       YNL_PT_NEST,
+       YNL_PT_FLAG,
+       YNL_PT_BINARY,
+       YNL_PT_U8,
+       YNL_PT_U16,
+       YNL_PT_U32,
+       YNL_PT_U64,
+       YNL_PT_NUL_STR,
+};
+
+struct ynl_policy_attr {
+       enum ynl_policy_type type;
+       unsigned int len;
+       const char *name;
+       struct ynl_policy_nest *nest;
+};
+
+struct ynl_policy_nest {
+       unsigned int max_attr;
+       struct ynl_policy_attr *table;
+};
+
+struct ynl_parse_arg {
+       struct ynl_sock *ys;
+       struct ynl_policy_nest *rsp_policy;
+       void *data;
+};
+
+struct ynl_dump_list_type {
+       struct ynl_dump_list_type *next;
+       unsigned char data[] __attribute__ ((aligned (8)));
+};
+extern struct ynl_dump_list_type *YNL_LIST_END;
+
+static inline bool ynl_dump_obj_is_last(void *obj)
+{
+       unsigned long uptr = (unsigned long)obj;
+
+       uptr -= offsetof(struct ynl_dump_list_type, data);
+       return uptr == (unsigned long)YNL_LIST_END;
+}
+
+static inline void *ynl_dump_obj_next(void *obj)
+{
+       unsigned long uptr = (unsigned long)obj;
+       struct ynl_dump_list_type *list;
+
+       uptr -= offsetof(struct ynl_dump_list_type, data);
+       list = (void *)uptr;
+       uptr = (unsigned long)list->next;
+       uptr += offsetof(struct ynl_dump_list_type, data);
+
+       return (void *)uptr;
+}
+
+struct ynl_ntf_base_type {
+       __u16 family;
+       __u8 cmd;
+       struct ynl_ntf_base_type *next;
+       void (*free)(struct ynl_ntf_base_type *ntf);
+       unsigned char data[] __attribute__ ((aligned (8)));
+};
+
+extern mnl_cb_t ynl_cb_array[NLMSG_MIN_TYPE];
+
+struct nlmsghdr *
+ynl_gemsg_start_req(struct ynl_sock *ys, __u32 id, __u8 cmd, __u8 version);
+struct nlmsghdr *
+ynl_gemsg_start_dump(struct ynl_sock *ys, __u32 id, __u8 cmd, __u8 version);
+
+int ynl_attr_validate(struct ynl_parse_arg *yarg, const struct nlattr *attr);
+
+int ynl_recv_ack(struct ynl_sock *ys, int ret);
+int ynl_cb_null(const struct nlmsghdr *nlh, void *data);
+
+/* YNL specific helpers used by the auto-generated code */
+
+struct ynl_req_state {
+       struct ynl_parse_arg yarg;
+       mnl_cb_t cb;
+       __u32 rsp_cmd;
+};
+
+struct ynl_dump_state {
+       struct ynl_sock *ys;
+       struct ynl_policy_nest *rsp_policy;
+       void *first;
+       struct ynl_dump_list_type *last;
+       size_t alloc_sz;
+       mnl_cb_t cb;
+       __u32 rsp_cmd;
+};
+
+struct ynl_ntf_info {
+       struct ynl_policy_nest *policy;
+       mnl_cb_t cb;
+       size_t alloc_sz;
+       void (*free)(struct ynl_ntf_base_type *ntf);
+};
+
+int ynl_exec(struct ynl_sock *ys, struct nlmsghdr *req_nlh,
+            struct ynl_req_state *yrs);
+int ynl_exec_dump(struct ynl_sock *ys, struct nlmsghdr *req_nlh,
+                 struct ynl_dump_state *yds);
+
+void ynl_error_unknown_notification(struct ynl_sock *ys, __u8 cmd);
+int ynl_error_parse(struct ynl_parse_arg *yarg, const char *msg);
+
+#endif
index 3144f33..3b343d6 100644 (file)
@@ -1,10 +1,12 @@
 # SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
 
+from collections import namedtuple
 import functools
 import os
 import random
 import socket
 import struct
+from struct import Struct
 import yaml
 
 from .nlspec import SpecFamily
@@ -76,10 +78,17 @@ class NlError(Exception):
 
 
 class NlAttr:
-    type_formats = { 'u8' : ('B', 1), 's8' : ('b', 1),
-                     'u16': ('H', 2), 's16': ('h', 2),
-                     'u32': ('I', 4), 's32': ('i', 4),
-                     'u64': ('Q', 8), 's64': ('q', 8) }
+    ScalarFormat = namedtuple('ScalarFormat', ['native', 'big', 'little'])
+    type_formats = {
+        'u8' : ScalarFormat(Struct('B'), Struct("B"),  Struct("B")),
+        's8' : ScalarFormat(Struct('b'), Struct("b"),  Struct("b")),
+        'u16': ScalarFormat(Struct('H'), Struct(">H"), Struct("<H")),
+        's16': ScalarFormat(Struct('h'), Struct(">h"), Struct("<h")),
+        'u32': ScalarFormat(Struct('I'), Struct(">I"), Struct("<I")),
+        's32': ScalarFormat(Struct('i'), Struct(">i"), Struct("<i")),
+        'u64': ScalarFormat(Struct('Q'), Struct(">Q"), Struct("<Q")),
+        's64': ScalarFormat(Struct('q'), Struct(">q"), Struct("<q"))
+    }
 
     def __init__(self, raw, offset):
         self._len, self._type = struct.unpack("HH", raw[offset:offset + 4])
@@ -88,25 +97,17 @@ class NlAttr:
         self.full_len = (self.payload_len + 3) & ~3
         self.raw = raw[offset + 4:offset + self.payload_len]
 
-    def format_byte_order(byte_order):
+    @classmethod
+    def get_format(cls, attr_type, byte_order=None):
+        format = cls.type_formats[attr_type]
         if byte_order:
-            return ">" if byte_order == "big-endian" else "<"
-        return ""
+            return format.big if byte_order == "big-endian" \
+                else format.little
+        return format.native
 
-    def as_u8(self):
-        return struct.unpack("B", self.raw)[0]
-
-    def as_u16(self, byte_order=None):
-        endian = NlAttr.format_byte_order(byte_order)
-        return struct.unpack(f"{endian}H", self.raw)[0]
-
-    def as_u32(self, byte_order=None):
-        endian = NlAttr.format_byte_order(byte_order)
-        return struct.unpack(f"{endian}I", self.raw)[0]
-
-    def as_u64(self, byte_order=None):
-        endian = NlAttr.format_byte_order(byte_order)
-        return struct.unpack(f"{endian}Q", self.raw)[0]
+    def as_scalar(self, attr_type, byte_order=None):
+        format = self.get_format(attr_type, byte_order)
+        return format.unpack(self.raw)[0]
 
     def as_strz(self):
         return self.raw.decode('ascii')[:-1]
@@ -115,17 +116,17 @@ class NlAttr:
         return self.raw
 
     def as_c_array(self, type):
-        format, _ = self.type_formats[type]
-        return list({ x[0] for x in struct.iter_unpack(format, self.raw) })
+        format = self.get_format(type)
+        return [ x[0] for x in format.iter_unpack(self.raw) ]
 
     def as_struct(self, members):
         value = dict()
         offset = 0
         for m in members:
             # TODO: handle non-scalar members
-            format, size = self.type_formats[m.type]
-            decoded = struct.unpack_from(format, self.raw, offset)
-            offset += size
+            format = self.get_format(m.type, m.byte_order)
+            decoded = format.unpack_from(self.raw, offset)
+            offset += format.size
             value[m.name] = decoded[0]
         return value
 
@@ -184,11 +185,11 @@ class NlMsg:
                 if extack.type == Netlink.NLMSGERR_ATTR_MSG:
                     self.extack['msg'] = extack.as_strz()
                 elif extack.type == Netlink.NLMSGERR_ATTR_MISS_TYPE:
-                    self.extack['miss-type'] = extack.as_u32()
+                    self.extack['miss-type'] = extack.as_scalar('u32')
                 elif extack.type == Netlink.NLMSGERR_ATTR_MISS_NEST:
-                    self.extack['miss-nest'] = extack.as_u32()
+                    self.extack['miss-nest'] = extack.as_scalar('u32')
                 elif extack.type == Netlink.NLMSGERR_ATTR_OFFS:
-                    self.extack['bad-attr-offs'] = extack.as_u32()
+                    self.extack['bad-attr-offs'] = extack.as_scalar('u32')
                 else:
                     if 'unknown' not in self.extack:
                         self.extack['unknown'] = []
@@ -272,11 +273,11 @@ def _genl_load_families():
                 fam = dict()
                 for attr in gm.raw_attrs:
                     if attr.type == Netlink.CTRL_ATTR_FAMILY_ID:
-                        fam['id'] = attr.as_u16()
+                        fam['id'] = attr.as_scalar('u16')
                     elif attr.type == Netlink.CTRL_ATTR_FAMILY_NAME:
                         fam['name'] = attr.as_strz()
                     elif attr.type == Netlink.CTRL_ATTR_MAXATTR:
-                        fam['maxattr'] = attr.as_u32()
+                        fam['maxattr'] = attr.as_scalar('u32')
                     elif attr.type == Netlink.CTRL_ATTR_MCAST_GROUPS:
                         fam['mcast'] = dict()
                         for entry in NlAttrs(attr.raw):
@@ -286,7 +287,7 @@ def _genl_load_families():
                                 if entry_attr.type == Netlink.CTRL_ATTR_MCAST_GRP_NAME:
                                     mcast_name = entry_attr.as_strz()
                                 elif entry_attr.type == Netlink.CTRL_ATTR_MCAST_GRP_ID:
-                                    mcast_id = entry_attr.as_u32()
+                                    mcast_id = entry_attr.as_scalar('u32')
                             if mcast_name and mcast_id is not None:
                                 fam['mcast'][mcast_name] = mcast_id
                 if 'name' in fam and 'id' in fam:
@@ -304,9 +305,9 @@ class GenlMsg:
 
         self.fixed_header_attrs = dict()
         for m in fixed_header_members:
-            format, size = NlAttr.type_formats[m.type]
-            decoded = struct.unpack_from(format, nl_msg.raw, offset)
-            offset += size
+            format = NlAttr.get_format(m.type, m.byte_order)
+            decoded = format.unpack_from(nl_msg.raw, offset)
+            offset += format.size
             self.fixed_header_attrs[m.name] = decoded[0]
 
         self.raw = nl_msg.raw[offset:]
@@ -381,21 +382,13 @@ class YnlFamily(SpecFamily):
                 attr_payload += self._add_attr(attr['nested-attributes'], subname, subvalue)
         elif attr["type"] == 'flag':
             attr_payload = b''
-        elif attr["type"] == 'u8':
-            attr_payload = struct.pack("B", int(value))
-        elif attr["type"] == 'u16':
-            endian = NlAttr.format_byte_order(attr.byte_order)
-            attr_payload = struct.pack(f"{endian}H", int(value))
-        elif attr["type"] == 'u32':
-            endian = NlAttr.format_byte_order(attr.byte_order)
-            attr_payload = struct.pack(f"{endian}I", int(value))
-        elif attr["type"] == 'u64':
-            endian = NlAttr.format_byte_order(attr.byte_order)
-            attr_payload = struct.pack(f"{endian}Q", int(value))
         elif attr["type"] == 'string':
             attr_payload = str(value).encode('ascii') + b'\x00'
         elif attr["type"] == 'binary':
             attr_payload = value
+        elif attr['type'] in NlAttr.type_formats:
+            format = NlAttr.get_format(attr['type'], attr.byte_order)
+            attr_payload = format.pack(int(value))
         else:
             raise Exception(f'Unknown type at {space} {name} {value} {attr["type"]}')
 
@@ -419,7 +412,11 @@ class YnlFamily(SpecFamily):
 
     def _decode_binary(self, attr, attr_spec):
         if attr_spec.struct_name:
-            decoded = attr.as_struct(self.consts[attr_spec.struct_name])
+            members = self.consts[attr_spec.struct_name]
+            decoded = attr.as_struct(members)
+            for m in members:
+                if m.enum:
+                    self._decode_enum(decoded, m)
         elif attr_spec.sub_type:
             decoded = attr.as_c_array(attr_spec.sub_type)
         else:
@@ -434,22 +431,16 @@ class YnlFamily(SpecFamily):
             if attr_spec["type"] == 'nest':
                 subdict = self._decode(NlAttrs(attr.raw), attr_spec['nested-attributes'])
                 decoded = subdict
-            elif attr_spec['type'] == 'u8':
-                decoded = attr.as_u8()
-            elif attr_spec['type'] == 'u16':
-                decoded = attr.as_u16(attr_spec.byte_order)
-            elif attr_spec['type'] == 'u32':
-                decoded = attr.as_u32(attr_spec.byte_order)
-            elif attr_spec['type'] == 'u64':
-                decoded = attr.as_u64(attr_spec.byte_order)
             elif attr_spec["type"] == 'string':
                 decoded = attr.as_strz()
             elif attr_spec["type"] == 'binary':
                 decoded = self._decode_binary(attr, attr_spec)
             elif attr_spec["type"] == 'flag':
                 decoded = True
+            elif attr_spec["type"] in NlAttr.type_formats:
+                decoded = attr.as_scalar(attr_spec['type'], attr_spec.byte_order)
             else:
-                raise Exception(f'Unknown {attr.type} {attr_spec["name"]} {attr_spec["type"]}')
+                raise Exception(f'Unknown {attr_spec["type"]} with name {attr_spec["name"]}')
 
             if not attr_spec.is_multi:
                 rsp[attr_spec['name']] = decoded
@@ -554,9 +545,9 @@ class YnlFamily(SpecFamily):
         if op.fixed_header:
             fixed_header_members = self.consts[op.fixed_header].members
             for m in fixed_header_members:
-                value = vals.pop(m.name)
-                format, _ = NlAttr.type_formats[m.type]
-                msg += struct.pack(format, value)
+                value = vals.pop(m.name) if m.name in vals else 0
+                format = NlAttr.get_format(m.type, m.byte_order)
+                msg += format.pack(value)
         for name, value in vals.items():
             msg += self._add_attr(op.attr_set.name, name, value)
         msg = _genl_msg_finalize(msg)
diff --git a/tools/net/ynl/samples/.gitignore b/tools/net/ynl/samples/.gitignore
new file mode 100644 (file)
index 0000000..7b1f517
--- /dev/null
@@ -0,0 +1 @@
+netdev
diff --git a/tools/net/ynl/samples/Makefile b/tools/net/ynl/samples/Makefile
new file mode 100644 (file)
index 0000000..714316c
--- /dev/null
@@ -0,0 +1,28 @@
+# SPDX-License-Identifier: GPL-2.0
+
+CC=gcc
+CFLAGS=-std=gnu11 -O2 -W -Wall -Wextra -Wno-unused-parameter -Wshadow \
+       -I../lib/ -I../generated/
+ifeq ("$(DEBUG)","1")
+  CFLAGS += -g -fsanitize=address -fsanitize=leak -static-libasan
+endif
+
+LDLIBS=-lmnl ../lib/ynl.a ../generated/protos.a
+
+SRCS=$(wildcard *.c)
+BINS=$(patsubst %.c,%,${SRCS})
+
+include $(wildcard *.d)
+
+all: $(BINS)
+
+$(BINS): ../lib/ynl.a ../generated/protos.a
+
+clean:
+       rm -f *.o *.d *~
+
+hardclean: clean
+       rm -f $(BINS)
+
+.PHONY: all clean
+.DEFAULT_GOAL=all
diff --git a/tools/net/ynl/samples/netdev.c b/tools/net/ynl/samples/netdev.c
new file mode 100644 (file)
index 0000000..d31268a
--- /dev/null
@@ -0,0 +1,108 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <string.h>
+
+#include <ynl.h>
+
+#include <net/if.h>
+
+#include "netdev-user.h"
+
+/* netdev genetlink family code sample
+ * This sample shows off basics of the netdev family but also notification
+ * handling, hence the somewhat odd UI. We subscribe to notifications first
+ * then wait for ifc selection, so the socket may already accumulate
+ * notifications as we wait. This allows us to test that YNL can handle
+ * requests and notifications getting interleaved.
+ */
+
+static void netdev_print_device(struct netdev_dev_get_rsp *d, unsigned int op)
+{
+       char ifname[IF_NAMESIZE];
+       const char *name;
+
+       if (!d->_present.ifindex)
+               return;
+
+       name = if_indextoname(d->ifindex, ifname);
+       if (name)
+               printf("%8s", name);
+       printf("[%d]\t", d->ifindex);
+
+       if (!d->_present.xdp_features)
+               return;
+
+       printf("%llx:", d->xdp_features);
+       for (int i = 0; d->xdp_features > 1U << i; i++) {
+               if (d->xdp_features & (1U << i))
+                       printf(" %s", netdev_xdp_act_str(1 << i));
+       }
+
+       name = netdev_op_str(op);
+       if (name)
+               printf(" (ntf: %s)", name);
+       printf("\n");
+}
+
+int main(int argc, char **argv)
+{
+       struct netdev_dev_get_list *devs;
+       struct ynl_ntf_base_type *ntf;
+       struct ynl_error yerr;
+       struct ynl_sock *ys;
+       int ifindex = 0;
+
+       if (argc > 1)
+               ifindex = strtol(argv[1], NULL, 0);
+
+       ys = ynl_sock_create(&ynl_netdev_family, &yerr);
+       if (!ys) {
+               fprintf(stderr, "YNL: %s\n", yerr.msg);
+               return 1;
+       }
+
+       if (ynl_subscribe(ys, "mgmt"))
+               goto err_close;
+
+       printf("Select ifc ($ifindex; or 0 = dump; or -2 ntf check): ");
+       scanf("%d", &ifindex);
+
+       if (ifindex > 0) {
+               struct netdev_dev_get_req *req;
+               struct netdev_dev_get_rsp *d;
+
+               req = netdev_dev_get_req_alloc();
+               netdev_dev_get_req_set_ifindex(req, ifindex);
+
+               d = netdev_dev_get(ys, req);
+               netdev_dev_get_req_free(req);
+               if (!d)
+                       goto err_close;
+
+               netdev_print_device(d, 0);
+               netdev_dev_get_rsp_free(d);
+       } else if (!ifindex) {
+               devs = netdev_dev_get_dump(ys);
+               if (!devs)
+                       goto err_close;
+
+               ynl_dump_foreach(devs, d)
+                       netdev_print_device(d, 0);
+               netdev_dev_get_list_free(devs);
+       } else if (ifindex == -2) {
+               ynl_ntf_check(ys);
+       }
+       while ((ntf = ynl_ntf_dequeue(ys))) {
+               netdev_print_device((struct netdev_dev_get_rsp *)&ntf->data,
+                                   ntf->cmd);
+               ynl_ntf_free(ntf);
+       }
+
+       ynl_sock_destroy(ys);
+       return 0;
+
+err_close:
+       fprintf(stderr, "YNL: %s\n", ys->err.msg);
+       ynl_sock_destroy(ys);
+       return 2;
+}
index cc2f8c9..c073407 100755 (executable)
@@ -94,7 +94,10 @@ class Type(SpecAttr):
     def arg_member(self, ri):
         member = self._complex_member_type(ri)
         if member:
-            return [member + ' *' + self.c_name]
+            arg = [member + ' *' + self.c_name]
+            if self.presence_type() == 'count':
+                arg += ['unsigned int n_' + self.c_name]
+            return arg
         raise Exception(f"Struct member not implemented for class type {self.type}")
 
     def struct_member(self, ri):
@@ -170,6 +173,7 @@ class Type(SpecAttr):
         for line in lines:
             ri.cw.p(line)
         ri.cw.block_end()
+        return True
 
     def _setter_lines(self, ri, member, presence):
         raise Exception(f"Setter not implemented for class type {self.type}")
@@ -187,9 +191,12 @@ class Type(SpecAttr):
                 code.append(presence + ' = 1;')
         code += self._setter_lines(ri, member, presence)
 
-        ri.cw.write_func('static inline void',
-                         f"{op_prefix(ri, direction, deref=deref)}_set_{'_'.join(ref)}",
-                         body=code,
+        func_name = f"{op_prefix(ri, direction, deref=deref)}_set_{'_'.join(ref)}"
+        free = bool([x for x in code if 'free(' in x])
+        alloc = bool([x for x in code if 'alloc(' in x])
+        if free and not alloc:
+            func_name = '__' + func_name
+        ri.cw.write_func('static inline void', func_name, body=code,
                          args=[f'{type_name(ri, direction, deref=deref)} *{var}'] + self.arg_member(ri))
 
 
@@ -197,6 +204,12 @@ class TypeUnused(Type):
     def presence_type(self):
         return ''
 
+    def arg_member(self, ri):
+        return []
+
+    def _attr_get(self, ri, var):
+        return ['return MNL_CB_ERROR;'], None, None
+
     def _attr_typol(self):
         return '.type = YNL_PT_REJECT, '
 
@@ -208,8 +221,14 @@ class TypePad(Type):
     def presence_type(self):
         return ''
 
+    def arg_member(self, ri):
+        return []
+
     def _attr_typol(self):
-        return '.type = YNL_PT_REJECT, '
+        return '.type = YNL_PT_IGNORE, '
+
+    def attr_get(self, ri, var, first):
+        pass
 
     def attr_policy(self, cw):
         pass
@@ -411,7 +430,8 @@ class TypeNest(Type):
                             f"{self.enum_name}, &{var}->{self.c_name})")
 
     def _attr_get(self, ri, var):
-        get_lines = [f"{self.nested_render_name}_parse(&parg, attr);"]
+        get_lines = [f"if ({self.nested_render_name}_parse(&parg, attr))",
+                     "return MNL_CB_ERROR;"]
         init_lines = [f"parg.rsp_policy = &{self.nested_render_name}_nest;",
                       f"parg.data = &{var}->{self.c_name};"]
         return get_lines, init_lines, None
@@ -430,6 +450,13 @@ class TypeMultiAttr(Type):
     def presence_type(self):
         return 'count'
 
+    def _mnl_type(self):
+        t = self.type
+        # mnl does not have a helper for signed types
+        if t[0] == 's':
+            t = 'u' + t[1:]
+        return t
+
     def _complex_member_type(self, ri):
         if 'type' not in self.attr or self.attr['type'] == 'nest':
             return f"struct {self.nested_render_name}"
@@ -443,9 +470,14 @@ class TypeMultiAttr(Type):
         return 'type' not in self.attr or self.attr['type'] == 'nest'
 
     def free(self, ri, var, ref):
-        if 'type' not in self.attr or self.attr['type'] == 'nest':
+        if self.attr['type'] in scalars:
+            ri.cw.p(f"free({var}->{ref}{self.c_name});")
+        elif 'type' not in self.attr or self.attr['type'] == 'nest':
             ri.cw.p(f"for (i = 0; i < {var}->{ref}n_{self.c_name}; i++)")
             ri.cw.p(f'{self.nested_render_name}_free(&{var}->{ref}{self.c_name}[i]);')
+            ri.cw.p(f"free({var}->{ref}{self.c_name});")
+        else:
+            raise Exception(f"Free of MultiAttr sub-type {self.attr['type']} not supported yet")
 
     def _attr_typol(self):
         if 'type' not in self.attr or self.attr['type'] == 'nest':
@@ -456,7 +488,26 @@ class TypeMultiAttr(Type):
             raise Exception(f"Sub-type {self.attr['type']} not supported yet")
 
     def _attr_get(self, ri, var):
-        return f'{var}->n_{self.c_name}++;', None, None
+        return f'n_{self.c_name}++;', None, None
+
+    def attr_put(self, ri, var):
+        if self.attr['type'] in scalars:
+            put_type = self._mnl_type()
+            ri.cw.p(f"for (unsigned int i = 0; i < {var}->n_{self.c_name}; i++)")
+            ri.cw.p(f"mnl_attr_put_{put_type}(nlh, {self.enum_name}, {var}->{self.c_name}[i]);")
+        elif 'type' not in self.attr or self.attr['type'] == 'nest':
+            ri.cw.p(f"for (unsigned int i = 0; i < {var}->n_{self.c_name}; i++)")
+            self._attr_put_line(ri, var, f"{self.nested_render_name}_put(nlh, " +
+                                f"{self.enum_name}, &{var}->{self.c_name}[i])")
+        else:
+            raise Exception(f"Put of MultiAttr sub-type {self.attr['type']} not supported yet")
+
+    def _setter_lines(self, ri, member, presence):
+        # For multi-attr we have a count, not presence, hack up the presence
+        presence = presence[:-(len('_present.') + len(self.c_name))] + "n_" + self.c_name
+        return [f"free({member});",
+                f"{member} = {self.c_name};",
+                f"{presence} = n_{self.c_name};"]
 
 
 class TypeArrayNest(Type):
@@ -812,7 +863,8 @@ class Family(SpecFamily):
                     inherit = set()
                     nested = spec['nested-attributes']
                     if nested not in self.root_sets:
-                        self.pure_nested_structs[nested] = Struct(self, nested, inherited=inherit)
+                        if nested not in self.pure_nested_structs:
+                            self.pure_nested_structs[nested] = Struct(self, nested, inherited=inherit)
                     if attr in rs_members['request']:
                         self.pure_nested_structs[nested].request = True
                     if attr in rs_members['reply']:
@@ -872,6 +924,12 @@ class Family(SpecFamily):
                     self.hooks[when][op_mode]['set'].add(name)
                     self.hooks[when][op_mode]['list'].append(name)
 
+    def has_notifications(self):
+        for op in self.ops.values():
+            if 'notify' in op or 'event' in op:
+                return True
+        return False
+
 
 class RenderInfo:
     def __init__(self, cw, family, ku_space, op, op_name, op_mode, attr_set=None):
@@ -883,11 +941,12 @@ class RenderInfo:
         self.op_mode = op_mode
 
         # 'do' and 'dump' response parsing is identical
-        if op_mode != 'do' and 'dump' in op and 'do' in op and 'reply' in op['do'] and \
-           op["do"]["reply"] == op["dump"]["reply"]:
-            self.type_consistent = True
-        else:
-            self.type_consistent = op_mode == 'event'
+        self.type_consistent = True
+        if op_mode != 'do' and 'dump' in op and 'do' in op:
+            if ('reply' in op['do']) != ('reply' in op["dump"]):
+                self.type_consistent = False
+            elif 'reply' in op['do'] and op["do"]["reply"] != op["dump"]["reply"]:
+                self.type_consistent = False
 
         self.attr_set = attr_set
         if not self.attr_set:
@@ -922,9 +981,10 @@ class CodeWriter:
     def _is_cond(cls, line):
         return line.startswith('if') or line.startswith('while') or line.startswith('for')
 
-    def p(self, line, add_ind=0):
+    def p(self, line, add_ind=0, eat_nl=False):
         if self._nl:
-            self._out.write('\n')
+            if not eat_nl:
+                self._out.write('\n')
             self._nl = False
         ind = self._ind
         if line[-1] == ':':
@@ -949,7 +1009,7 @@ class CodeWriter:
         if line and line[0] not in {';', ','}:
             line = ' ' + line
         self._ind -= 1
-        self.p('}' + line)
+        self.p('}' + line, eat_nl=True)
 
     def write_doc_line(self, doc, indent=True):
         words = doc.split()
@@ -1152,6 +1212,56 @@ def put_typol(cw, struct):
     cw.nl()
 
 
+def put_op_name_fwd(family, cw):
+    cw.write_func_prot('const char *', f'{family.name}_op_str', ['int op'], suffix=';')
+
+
+def put_op_name(family, cw):
+    map_name = f'{family.name}_op_strmap'
+    cw.block_start(line=f"static const char * const {map_name}[] =")
+    for op_name, op in family.msgs.items():
+        cw.p(f'[{op.enum_name}] = "{op_name}",')
+    cw.block_end(line=';')
+    cw.nl()
+
+    cw.write_func_prot('const char *', f'{family.name}_op_str', ['int op'])
+    cw.block_start()
+    cw.p(f'if (op < 0 || op >= (int)MNL_ARRAY_SIZE({map_name}))')
+    cw.p('return NULL;')
+    cw.p(f'return {map_name}[op];')
+    cw.block_end()
+    cw.nl()
+
+
+def put_enum_to_str_fwd(family, cw, enum):
+    args = [f'enum {enum.render_name} value']
+    if 'enum-name' in enum and not enum['enum-name']:
+        args = ['int value']
+    cw.write_func_prot('const char *', f'{enum.render_name}_str', args, suffix=';')
+
+
+def put_enum_to_str(family, cw, enum):
+    map_name = f'{enum.render_name}_strmap'
+    cw.block_start(line=f"static const char * const {map_name}[] =")
+    for entry in enum.entries.values():
+        cw.p(f'[{entry.value}] = "{entry.name}",')
+    cw.block_end(line=';')
+    cw.nl()
+
+    args = [f'enum {enum.render_name} value']
+    if 'enum-name' in enum and not enum['enum-name']:
+        args = ['int value']
+    cw.write_func_prot('const char *', f'{enum.render_name}_str', args)
+    cw.block_start()
+    if enum.type == 'flags':
+        cw.p('value = ffs(value) - 1;')
+    cw.p(f'if (value < 0 || value >= (int)MNL_ARRAY_SIZE({map_name}))')
+    cw.p('return NULL;')
+    cw.p(f'return {map_name}[value];')
+    cw.block_end()
+    cw.nl()
+
+
 def put_req_nested(ri, struct):
     func_args = ['struct nlmsghdr *nlh',
                  'unsigned int attr_type',
@@ -1196,6 +1306,11 @@ def _multi_parse(ri, struct, init_lines, local_vars):
         local_vars.append('struct ynl_parse_arg parg;')
         init_lines.append('parg.ys = yarg->ys;')
 
+    all_multi = array_nests | multi_attrs
+
+    for anest in sorted(all_multi):
+        local_vars.append(f"unsigned int n_{struct[anest].c_name} = 0;")
+
     ri.cw.block_start()
     ri.cw.write_func_lvar(local_vars)
 
@@ -1206,13 +1321,19 @@ def _multi_parse(ri, struct, init_lines, local_vars):
     for arg in struct.inherited:
         ri.cw.p(f'dst->{arg} = {arg};')
 
+    for anest in sorted(all_multi):
+        aspec = struct[anest]
+        ri.cw.p(f"if (dst->{aspec.c_name})")
+        ri.cw.p(f'return ynl_error_parse(yarg, "attribute already present ({struct.attr_set.name}.{aspec.name})");')
+
     ri.cw.nl()
     ri.cw.block_start(line=iter_line)
 
     first = True
     for _, arg in struct.member_list():
-        arg.attr_get(ri, 'dst', first=first)
-        first = False
+        good = arg.attr_get(ri, 'dst', first=first)
+        # First may be 'unused' or 'pad', ignore those
+        first &= not good
 
     ri.cw.block_end()
     ri.cw.nl()
@@ -1220,8 +1341,9 @@ def _multi_parse(ri, struct, init_lines, local_vars):
     for anest in sorted(array_nests):
         aspec = struct[anest]
 
-        ri.cw.block_start(line=f"if (dst->n_{aspec.c_name})")
-        ri.cw.p(f"dst->{aspec.c_name} = calloc(dst->n_{aspec.c_name}, sizeof(*dst->{aspec.c_name}));")
+        ri.cw.block_start(line=f"if (n_{aspec.c_name})")
+        ri.cw.p(f"dst->{aspec.c_name} = calloc({aspec.c_name}, sizeof(*dst->{aspec.c_name}));")
+        ri.cw.p(f"dst->n_{aspec.c_name} = n_{aspec.c_name};")
         ri.cw.p('i = 0;')
         ri.cw.p(f"parg.rsp_policy = &{aspec.nested_render_name}_nest;")
         ri.cw.block_start(line=f"mnl_attr_for_each_nested(attr, attr_{aspec.c_name})")
@@ -1235,8 +1357,9 @@ def _multi_parse(ri, struct, init_lines, local_vars):
 
     for anest in sorted(multi_attrs):
         aspec = struct[anest]
-        ri.cw.block_start(line=f"if (dst->n_{aspec.c_name})")
-        ri.cw.p(f"dst->{aspec.c_name} = calloc(dst->n_{aspec.c_name}, sizeof(*dst->{aspec.c_name}));")
+        ri.cw.block_start(line=f"if (n_{aspec.c_name})")
+        ri.cw.p(f"dst->{aspec.c_name} = calloc(n_{aspec.c_name}, sizeof(*dst->{aspec.c_name}));")
+        ri.cw.p(f"dst->n_{aspec.c_name} = n_{aspec.c_name};")
         ri.cw.p('i = 0;')
         if 'nested-attributes' in aspec:
             ri.cw.p(f"parg.rsp_policy = &{aspec.nested_render_name}_nest;")
@@ -1304,13 +1427,13 @@ def print_req(ri):
     ret_err = '-1'
     direction = "request"
     local_vars = ['struct nlmsghdr *nlh;',
-                  'int len, err;']
+                  'int err;']
 
     if 'reply' in ri.op[ri.op_mode]:
         ret_ok = 'rsp'
         ret_err = 'NULL'
         local_vars += [f'{type_name(ri, rdir(direction))} *rsp;',
-                       'struct ynl_parse_arg yarg = { .ys = ys, };']
+                       'struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };']
 
     print_prototype(ri, direction, terminate=False)
     ri.cw.block_start()
@@ -1320,41 +1443,39 @@ def print_req(ri):
 
     ri.cw.p(f"ys->req_policy = &{ri.struct['request'].render_name}_nest;")
     if 'reply' in ri.op[ri.op_mode]:
-        ri.cw.p(f"yarg.rsp_policy = &{ri.struct['reply'].render_name}_nest;")
+        ri.cw.p(f"yrs.yarg.rsp_policy = &{ri.struct['reply'].render_name}_nest;")
     ri.cw.nl()
     for _, attr in ri.struct["request"].member_list():
         attr.attr_put(ri, "req")
     ri.cw.nl()
 
-    ri.cw.p('err = mnl_socket_sendto(ys->sock, nlh, nlh->nlmsg_len);')
-    ri.cw.p('if (err < 0)')
-    ri.cw.p(f"return {ret_err};")
-    ri.cw.nl()
-    ri.cw.p('len = mnl_socket_recvfrom(ys->sock, ys->rx_buf, MNL_SOCKET_BUFFER_SIZE);')
-    ri.cw.p('if (len < 0)')
-    ri.cw.p(f"return {ret_err};")
-    ri.cw.nl()
-
+    parse_arg = "NULL"
     if 'reply' in ri.op[ri.op_mode]:
         ri.cw.p('rsp = calloc(1, sizeof(*rsp));')
-        ri.cw.p('yarg.data = rsp;')
+        ri.cw.p('yrs.yarg.data = rsp;')
+        ri.cw.p(f"yrs.cb = {op_prefix(ri, 'reply')}_parse;")
+        if ri.op.value is not None:
+            ri.cw.p(f'yrs.rsp_cmd = {ri.op.enum_name};')
+        else:
+            ri.cw.p(f'yrs.rsp_cmd = {ri.op.rsp_value};')
         ri.cw.nl()
-        ri.cw.p(f"err = {ri.nl.parse_cb_run(op_prefix(ri, 'reply') + '_parse', '&yarg', False)};")
-        ri.cw.p('if (err < 0)')
+        parse_arg = '&yrs'
+    ri.cw.p(f"err = ynl_exec(ys, nlh, {parse_arg});")
+    ri.cw.p('if (err < 0)')
+    if 'reply' in ri.op[ri.op_mode]:
         ri.cw.p('goto err_free;')
-        ri.cw.nl()
-
-    ri.cw.p('err = ynl_recv_ack(ys, err);')
-    ri.cw.p('if (err)')
-    ri.cw.p('goto err_free;')
+    else:
+        ri.cw.p('return -1;')
     ri.cw.nl()
+
     ri.cw.p(f"return {ret_ok};")
     ri.cw.nl()
-    ri.cw.p('err_free:')
 
     if 'reply' in ri.op[ri.op_mode]:
+        ri.cw.p('err_free:')
         ri.cw.p(f"{call_free(ri, rdir(direction), 'rsp')}")
-    ri.cw.p(f"return {ret_err};")
+        ri.cw.p(f"return {ret_err};")
+
     ri.cw.block_end()
 
 
@@ -1364,7 +1485,7 @@ def print_dump(ri):
     ri.cw.block_start()
     local_vars = ['struct ynl_dump_state yds = {};',
                   'struct nlmsghdr *nlh;',
-                  'int len, err;']
+                  'int err;']
 
     for var in local_vars:
         ri.cw.p(f'{var}')
@@ -1373,6 +1494,10 @@ def print_dump(ri):
     ri.cw.p('yds.ys = ys;')
     ri.cw.p(f"yds.alloc_sz = sizeof({type_name(ri, rdir(direction))});")
     ri.cw.p(f"yds.cb = {op_prefix(ri, 'reply', deref=True)}_parse;")
+    if ri.op.value is not None:
+        ri.cw.p(f'yds.rsp_cmd = {ri.op.enum_name};')
+    else:
+        ri.cw.p(f'yds.rsp_cmd = {ri.op.rsp_value};')
     ri.cw.p(f"yds.rsp_policy = &{ri.struct['reply'].render_name}_nest;")
     ri.cw.nl()
     ri.cw.p(f"nlh = ynl_gemsg_start_dump(ys, {ri.nl.get_family_id()}, {ri.op.enum_name}, 1);")
@@ -1384,21 +1509,10 @@ def print_dump(ri):
             attr.attr_put(ri, "req")
     ri.cw.nl()
 
-    ri.cw.p('err = mnl_socket_sendto(ys->sock, nlh, nlh->nlmsg_len);')
+    ri.cw.p('err = ynl_exec_dump(ys, nlh, &yds);')
     ri.cw.p('if (err < 0)')
-    ri.cw.p('return NULL;')
-    ri.cw.nl()
-
-    ri.cw.block_start(line='do')
-    ri.cw.p('len = mnl_socket_recvfrom(ys->sock, ys->rx_buf, MNL_SOCKET_BUFFER_SIZE);')
-    ri.cw.p('if (len < 0)')
     ri.cw.p('goto free_list;')
     ri.cw.nl()
-    ri.cw.p(f"err = {ri.nl.parse_cb_run('ynl_dump_trampoline', '&yds', False, indent=2)};")
-    ri.cw.p('if (err < 0)')
-    ri.cw.p('goto free_list;')
-    ri.cw.block_end(line='while (err > 0);')
-    ri.cw.nl()
 
     ri.cw.p('return yds.first;')
     ri.cw.nl()
@@ -1418,6 +1532,14 @@ def free_arg_name(direction):
     return 'obj'
 
 
+def print_alloc_wrapper(ri, direction):
+    name = op_prefix(ri, direction)
+    ri.cw.write_func_prot(f'static inline struct {name} *', f"{name}_alloc", [f"void"])
+    ri.cw.block_start()
+    ri.cw.p(f'return calloc(1, sizeof(struct {name}));')
+    ri.cw.block_end()
+
+
 def print_free_prototype(ri, direction, suffix=';'):
     name = op_prefix(ri, direction)
     arg = free_arg_name(direction)
@@ -1465,6 +1587,7 @@ def print_type_full(ri, struct):
 
 def print_type_helpers(ri, direction, deref=False):
     print_free_prototype(ri, direction)
+    ri.cw.nl()
 
     if ri.ku_space == 'user' and direction == 'request':
         for _, attr in ri.struct[direction].member_list():
@@ -1473,6 +1596,7 @@ def print_type_helpers(ri, direction, deref=False):
 
 
 def print_req_type_helpers(ri):
+    print_alloc_wrapper(ri, "request")
     print_type_helpers(ri, "request")
 
 
@@ -1496,6 +1620,12 @@ def print_req_type(ri):
     print_type(ri, "request")
 
 
+def print_req_free(ri):
+    if 'request' not in ri.op[ri.op_mode]:
+        return
+    _free_type(ri, 'request', ri.struct['request'])
+
+
 def print_rsp_type(ri):
     if (ri.op_mode == 'do' or ri.op_mode == 'dump') and 'reply' in ri.op[ri.op_mode]:
         direction = 'reply'
@@ -1513,6 +1643,7 @@ def print_wrapped_type(ri):
     elif ri.op_mode == 'notify' or ri.op_mode == 'event':
         ri.cw.p('__u16 family;')
         ri.cw.p('__u8 cmd;')
+        ri.cw.p('struct ynl_ntf_base_type *next;')
         ri.cw.p(f"void (*free)({type_name(ri, 'reply')} *ntf);")
     ri.cw.p(f"{type_name(ri, 'reply', deref=True)} obj __attribute__ ((aligned (8)));")
     ri.cw.block_end(line=';')
@@ -1564,7 +1695,7 @@ def print_dump_type_free(ri):
     ri.cw.block_start()
     ri.cw.p(f"{sub_type} *next = rsp;")
     ri.cw.nl()
-    ri.cw.block_start(line='while (next)')
+    ri.cw.block_start(line='while ((void *)next != YNL_LIST_END)')
     _free_type_members_iter(ri, ri.struct['reply'])
     ri.cw.p('rsp = next;')
     ri.cw.p('next = rsp->next;')
@@ -2035,6 +2166,45 @@ def render_uapi(family, cw):
     cw.p(f'#endif /* {hdr_prot} */')
 
 
+def _render_user_ntf_entry(ri, op):
+    ri.cw.block_start(line=f"[{op.enum_name}] = ")
+    ri.cw.p(f".alloc_sz\t= sizeof({type_name(ri, 'event')}),")
+    ri.cw.p(f".cb\t\t= {op_prefix(ri, 'reply', deref=True)}_parse,")
+    ri.cw.p(f".policy\t\t= &{ri.struct['reply'].render_name}_nest,")
+    ri.cw.p(f".free\t\t= (void *){op_prefix(ri, 'notify')}_free,")
+    ri.cw.block_end(line=',')
+
+
+def render_user_family(family, cw, prototype):
+    symbol = f'const struct ynl_family ynl_{family.c_name}_family'
+    if prototype:
+        cw.p(f'extern {symbol};')
+        return
+
+    ntf = family.has_notifications()
+    if ntf:
+        cw.block_start(line=f"static const struct ynl_ntf_info {family['name']}_ntf_info[] = ")
+        for ntf_op in sorted(family.all_notify.keys()):
+            op = family.ops[ntf_op]
+            ri = RenderInfo(cw, family, "user", op, ntf_op, "notify")
+            for ntf in op['notify']['cmds']:
+                _render_user_ntf_entry(ri, ntf)
+        for op_name, op in family.ops.items():
+            if 'event' not in op:
+                continue
+            ri = RenderInfo(cw, family, "user", op, op_name, "event")
+            _render_user_ntf_entry(ri, op)
+        cw.block_end(line=";")
+        cw.nl()
+
+    cw.block_start(f'{symbol} = ')
+    cw.p(f'.name\t\t= "{family.name}",')
+    if ntf:
+        cw.p(f".ntf_info\t= {family['name']}_ntf_info,")
+        cw.p(f".ntf_info_size\t= MNL_ARRAY_SIZE({family['name']}_ntf_info),")
+    cw.block_end(line=';')
+
+
 def find_kernel_root(full_path):
     sub_path = ''
     while True:
@@ -2101,7 +2271,16 @@ def main():
             if args.out_file:
                 cw.p(f'#include "{os.path.basename(args.out_file[:-2])}.h"')
             cw.nl()
-    headers = [parsed.uapi_header]
+        headers = ['uapi/' + parsed.uapi_header]
+    else:
+        cw.p('#include <stdlib.h>')
+        if args.header:
+            cw.p('#include <string.h>')
+            cw.p('#include <linux/types.h>')
+        else:
+            cw.p(f'#include "{parsed.name}-user.h"')
+            cw.p('#include "ynl.h"')
+        headers = [parsed.uapi_header]
     for definition in parsed['definitions']:
         if 'header' in definition:
             headers.append(definition['header'])
@@ -2121,6 +2300,8 @@ def main():
                 cw.p(f'#include "{one}"')
         else:
             cw.p('struct ynl_sock;')
+            cw.nl()
+            render_user_family(parsed, cw, True)
         cw.nl()
 
     if args.mode == "kernel":
@@ -2182,8 +2363,15 @@ def main():
             print_kernel_family_struct_src(parsed, cw)
 
     if args.mode == "user":
-        has_ntf = False
         if args.header:
+            cw.p('/* Enums */')
+            put_op_name_fwd(parsed, cw)
+
+            for name, const in parsed.consts.items():
+                if isinstance(const, EnumSet):
+                    put_enum_to_str_fwd(parsed, cw, const)
+            cw.nl()
+
             cw.p('/* Common nested types */')
             for attr_set, struct in sorted(parsed.pure_nested_structs.items()):
                 ri = RenderInfo(cw, parsed, args.mode, "", "", "", attr_set)
@@ -2219,9 +2407,8 @@ def main():
                 if 'notify' in op:
                     cw.p(f"/* {op.enum_name} - notify */")
                     ri = RenderInfo(cw, parsed, args.mode, op, op_name, 'notify')
-                    has_ntf = True
                     if not ri.type_consistent:
-                        raise Exception('Only notifications with consistent types supported')
+                        raise Exception(f'Only notifications with consistent types supported ({op.name})')
                     print_wrapped_type(ri)
 
                 if 'event' in op:
@@ -2231,11 +2418,19 @@ def main():
                     cw.nl()
                     print_wrapped_type(ri)
 
-            if has_ntf:
+            if parsed.has_notifications():
                 cw.p('/* --------------- Common notification parsing --------------- */')
                 print_ntf_parse_prototype(parsed, cw)
             cw.nl()
         else:
+            cw.p('/* Enums */')
+            put_op_name(parsed, cw)
+
+            for name, const in parsed.consts.items():
+                if isinstance(const, EnumSet):
+                    put_enum_to_str(parsed, cw, const)
+            cw.nl()
+
             cw.p('/* Policies */')
             for name, _ in parsed.attr_sets.items():
                 struct = Struct(parsed, name)
@@ -2261,6 +2456,7 @@ def main():
                 if 'do' in op and 'event' not in op:
                     cw.p(f"/* {op.enum_name} - do */")
                     ri = RenderInfo(cw, parsed, args.mode, op, op_name, "do")
+                    print_req_free(ri)
                     print_rsp_free(ri)
                     parse_rsp_msg(ri)
                     print_req(ri)
@@ -2278,14 +2474,12 @@ def main():
                 if 'notify' in op:
                     cw.p(f"/* {op.enum_name} - notify */")
                     ri = RenderInfo(cw, parsed, args.mode, op, op_name, 'notify')
-                    has_ntf = True
                     if not ri.type_consistent:
-                        raise Exception('Only notifications with consistent types supported')
+                        raise Exception(f'Only notifications with consistent types supported ({op.name})')
                     print_ntf_type_free(ri)
 
                 if 'event' in op:
                     cw.p(f"/* {op.enum_name} - event */")
-                    has_ntf = True
 
                     ri = RenderInfo(cw, parsed, args.mode, op, op_name, "do")
                     parse_rsp_msg(ri)
@@ -2293,10 +2487,13 @@ def main():
                     ri = RenderInfo(cw, parsed, args.mode, op, op_name, "event")
                     print_ntf_type_free(ri)
 
-            if has_ntf:
+            if parsed.has_notifications():
                 cw.p('/* --------------- Common notification parsing --------------- */')
                 print_ntf_type_parse(parsed, cw, args.mode)
 
+            cw.nl()
+            render_user_family(parsed, cw, False)
+
     if args.header:
         cw.p(f'#endif /* {hdr_prot} */')
 
index 74f5de1..2a4525e 100755 (executable)
@@ -14,7 +14,7 @@ done
 
 KDIR=$(dirname $(dirname $(dirname $(dirname $(realpath $0)))))
 
-files=$(git grep --files-with-matches '^/\* YNL-GEN \(kernel\|uapi\)')
+files=$(git grep --files-with-matches '^/\* YNL-GEN \(kernel\|uapi\|user\)')
 for f in $files; do
     # params:     0       1      2     3
     #         $YAML YNL-GEN kernel $mode
index 0a6837f..08adc80 100644 (file)
@@ -1,33 +1,6 @@
-bloom_filter_map                                 # libbpf: prog 'check_bloom': failed to attach: ERROR: strerror_r(-524)=22
-bpf_cookie/lsm
-bpf_cookie/multi_kprobe_attach_api
-bpf_cookie/multi_kprobe_link_api
-bpf_cookie/trampoline
-bpf_loop/check_callback_fn_stop                  # link unexpected error: -524
-bpf_loop/check_invalid_flags
-bpf_loop/check_nested_calls
-bpf_loop/check_non_constant_callback
-bpf_loop/check_nr_loops
-bpf_loop/check_null_callback_ctx
-bpf_loop/check_stack
-bpf_mod_race                                     # bpf_mod_kfunc_race__attach unexpected error: -524 (errno 524)
-bpf_tcp_ca/dctcp_fallback
-btf_dump/btf_dump: var_data                      # find type id unexpected find type id: actual -2 < expected 0
-cgroup_hierarchical_stats                        # attach unexpected error: -524 (errno 524)
-d_path/basic                                     # setup attach failed: -524
-deny_namespace                                   # attach unexpected error: -524 (errno 524)
-fentry_fexit                                     # fentry_attach unexpected error: -1 (errno 524)
-fentry_test                                      # fentry_attach unexpected error: -1 (errno 524)
-fexit_sleep                                      # fexit_attach fexit attach failed: -1
-fexit_stress                                     # fexit attach unexpected fexit attach: actual -524 < expected 0
-fexit_test                                       # fexit_attach unexpected error: -1 (errno 524)
-get_func_args_test                               # get_func_args_test__attach unexpected error: -524 (errno 524) (trampoline)
-get_func_ip_test                                 # get_func_ip_test__attach unexpected error: -524 (errno 524) (trampoline)
-htab_update/reenter_update
-kfree_skb                                        # attach fentry unexpected error: -524 (trampoline)
-kfunc_call/subprog                               # extern (var ksym) 'bpf_prog_active': not found in kernel BTF
-kfunc_call/subprog_lskel                         # skel unexpected error: -2
-kfunc_dynptr_param/dynptr_data_null              # libbpf: prog 'dynptr_data_null': failed to attach: ERROR: strerror_r(-524)=22
+bpf_cookie/multi_kprobe_attach_api               # kprobe_multi_link_api_subtest:FAIL:fentry_raw_skel_load unexpected error: -3
+bpf_cookie/multi_kprobe_link_api                 # kprobe_multi_link_api_subtest:FAIL:fentry_raw_skel_load unexpected error: -3
+fexit_sleep                                      # The test never returns. The remaining tests cannot start.
 kprobe_multi_bench_attach                        # bpf_program__attach_kprobe_multi_opts unexpected error: -95
 kprobe_multi_test/attach_api_addrs               # bpf_program__attach_kprobe_multi_opts unexpected error: -95
 kprobe_multi_test/attach_api_pattern             # bpf_program__attach_kprobe_multi_opts unexpected error: -95
@@ -35,51 +8,5 @@ kprobe_multi_test/attach_api_syms                # bpf_program__attach_kprobe_mu
 kprobe_multi_test/bench_attach                   # bpf_program__attach_kprobe_multi_opts unexpected error: -95
 kprobe_multi_test/link_api_addrs                 # link_fd unexpected link_fd: actual -95 < expected 0
 kprobe_multi_test/link_api_syms                  # link_fd unexpected link_fd: actual -95 < expected 0
-kprobe_multi_test/skel_api                       # kprobe_multi__attach unexpected error: -524 (errno 524)
-ksyms_module/libbpf                              # 'bpf_testmod_ksym_percpu': not found in kernel BTF
-ksyms_module/lskel                               # test_ksyms_module_lskel__open_and_load unexpected error: -2
-libbpf_get_fd_by_id_opts                         # test_libbpf_get_fd_by_id_opts__attach unexpected error: -524 (errno 524)
-linked_list
-lookup_key                                       # test_lookup_key__attach unexpected error: -524 (errno 524)
-lru_bug                                          # lru_bug__attach unexpected error: -524 (errno 524)
-modify_return                                    # modify_return__attach failed unexpected error: -524 (errno 524)
-module_attach                                    # skel_attach skeleton attach failed: -524
-module_fentry_shadow                             # bpf_link_create unexpected bpf_link_create: actual -524 < expected 0
-mptcp/base                                       # run_test mptcp unexpected error: -524 (errno 524)
-netcnt                                           # packets unexpected packets: actual 10001 != expected 10000
-rcu_read_lock                                    # failed to attach: ERROR: strerror_r(-524)=22
-recursion                                        # skel_attach unexpected error: -524 (errno 524)
-ringbuf                                          # skel_attach skeleton attachment failed: -1
-setget_sockopt                                   # attach_cgroup unexpected error: -524
-sk_storage_tracing                               # test_sk_storage_tracing__attach unexpected error: -524 (errno 524)
-skc_to_unix_sock                                 # could not attach BPF object unexpected error: -524 (errno 524)
-socket_cookie                                    # prog_attach unexpected error: -524
-stacktrace_build_id                              # compare_stack_ips stackmap vs. stack_amap err -1 errno 2
-task_local_storage/exit_creds                    # skel_attach unexpected error: -524 (errno 524)
-task_local_storage/recursion                     # skel_attach unexpected error: -524 (errno 524)
-test_bprm_opts                                   # attach attach failed: -524
-test_ima                                         # attach attach failed: -524
-test_local_storage                               # attach lsm attach failed: -524
-test_lsm                                         # test_lsm_first_attach unexpected error: -524 (errno 524)
-test_overhead                                    # attach_fentry unexpected error: -524
-timer                                            # timer unexpected error: -524 (errno 524)
-timer_crash                                      # timer_crash__attach unexpected error: -524 (errno 524)
-timer_mim                                        # timer_mim unexpected error: -524 (errno 524)
-trace_printk                                     # trace_printk__attach unexpected error: -1 (errno 524)
-trace_vprintk                                    # trace_vprintk__attach unexpected error: -1 (errno 524)
-tracing_struct                                   # tracing_struct__attach unexpected error: -524 (errno 524)
-trampoline_count                                 # attach_prog unexpected error: -524
-unpriv_bpf_disabled                              # skel_attach unexpected error: -524 (errno 524)
-user_ringbuf/test_user_ringbuf_post_misaligned   # misaligned_skel unexpected error: -524 (errno 524)
-user_ringbuf/test_user_ringbuf_post_producer_wrong_offset
-user_ringbuf/test_user_ringbuf_post_larger_than_ringbuf_sz
-user_ringbuf/test_user_ringbuf_basic             # ringbuf_basic_skel unexpected error: -524 (errno 524)
-user_ringbuf/test_user_ringbuf_sample_full_ring_buffer
-user_ringbuf/test_user_ringbuf_post_alignment_autoadjust
-user_ringbuf/test_user_ringbuf_overfill
-user_ringbuf/test_user_ringbuf_discards_properly_ignored
-user_ringbuf/test_user_ringbuf_loop
-user_ringbuf/test_user_ringbuf_msg_protocol
-user_ringbuf/test_user_ringbuf_blocking_reserve
-verify_pkcs7_sig                                 # test_verify_pkcs7_sig__attach unexpected error: -524 (errno 524)
-vmlinux                                          # skel_attach skeleton attach failed: -524
+kprobe_multi_test/skel_api                       # libbpf: failed to load BPF skeleton 'kprobe_multi': -3
+module_attach                                    # prog 'kprobe_multi': failed to auto-attach: -95
index c7463f3..5061d9e 100644 (file)
@@ -26,3 +26,4 @@ user_ringbuf                             # failed to find kernel BTF type ID of
 verif_stats                              # trace_vprintk__open_and_load unexpected error: -9                           (?)
 xdp_bonding                              # failed to auto-attach program 'trace_on_entry': -524                        (trampoline)
 xdp_metadata                             # JIT does not support calling kernel function                                (kfunc)
+test_task_under_cgroup                   # JIT does not support calling kernel function                                (kfunc)
index 28d2c77..538df8f 100644 (file)
@@ -88,8 +88,7 @@ TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \
        xskxceiver xdp_redirect_multi xdp_synproxy veristat xdp_hw_metadata \
        xdp_features
 
-TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read $(OUTPUT)/sign-file
-TEST_GEN_FILES += liburandom_read.so
+TEST_GEN_FILES += liburandom_read.so urandom_read sign-file
 
 # Emit succinct information message describing current building step
 # $1 - generic step name (e.g., CC, LINK, etc);
index 8c993ec..642dda0 100644 (file)
@@ -35,4 +35,10 @@ extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, __u32 offset,
 extern void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *ptr, __u32 offset,
                              void *buffer, __u32 buffer__szk) __ksym;
 
+extern int bpf_dynptr_adjust(const struct bpf_dynptr *ptr, __u32 start, __u32 end) __ksym;
+extern bool bpf_dynptr_is_null(const struct bpf_dynptr *ptr) __ksym;
+extern bool bpf_dynptr_is_rdonly(const struct bpf_dynptr *ptr) __ksym;
+extern __u32 bpf_dynptr_size(const struct bpf_dynptr *ptr) __ksym;
+extern int bpf_dynptr_clone(const struct bpf_dynptr *ptr, struct bpf_dynptr *clone__init) __ksym;
+
 #endif
index 52785ba..cf21604 100644 (file)
@@ -9,6 +9,7 @@
 #include <linux/sysfs.h>
 #include <linux/tracepoint.h>
 #include "bpf_testmod.h"
+#include "bpf_testmod_kfunc.h"
 
 #define CREATE_TRACE_POINTS
 #include "bpf_testmod-events.h"
@@ -289,8 +290,171 @@ static const struct btf_kfunc_id_set bpf_testmod_common_kfunc_set = {
        .set   = &bpf_testmod_common_kfunc_ids,
 };
 
+__bpf_kfunc u64 bpf_kfunc_call_test1(struct sock *sk, u32 a, u64 b, u32 c, u64 d)
+{
+       return a + b + c + d;
+}
+
+__bpf_kfunc int bpf_kfunc_call_test2(struct sock *sk, u32 a, u32 b)
+{
+       return a + b;
+}
+
+__bpf_kfunc struct sock *bpf_kfunc_call_test3(struct sock *sk)
+{
+       return sk;
+}
+
+__bpf_kfunc long noinline bpf_kfunc_call_test4(signed char a, short b, int c, long d)
+{
+       /* Provoke the compiler to assume that the caller has sign-extended a,
+        * b and c on platforms where this is required (e.g. s390x).
+        */
+       return (long)a + (long)b + (long)c + d;
+}
+
+static struct prog_test_ref_kfunc prog_test_struct = {
+       .a = 42,
+       .b = 108,
+       .next = &prog_test_struct,
+       .cnt = REFCOUNT_INIT(1),
+};
+
+__bpf_kfunc struct prog_test_ref_kfunc *
+bpf_kfunc_call_test_acquire(unsigned long *scalar_ptr)
+{
+       refcount_inc(&prog_test_struct.cnt);
+       return &prog_test_struct;
+}
+
+__bpf_kfunc void bpf_kfunc_call_test_offset(struct prog_test_ref_kfunc *p)
+{
+       WARN_ON_ONCE(1);
+}
+
+__bpf_kfunc struct prog_test_member *
+bpf_kfunc_call_memb_acquire(void)
+{
+       WARN_ON_ONCE(1);
+       return NULL;
+}
+
+__bpf_kfunc void bpf_kfunc_call_memb1_release(struct prog_test_member1 *p)
+{
+       WARN_ON_ONCE(1);
+}
+
+static int *__bpf_kfunc_call_test_get_mem(struct prog_test_ref_kfunc *p, const int size)
+{
+       if (size > 2 * sizeof(int))
+               return NULL;
+
+       return (int *)p;
+}
+
+__bpf_kfunc int *bpf_kfunc_call_test_get_rdwr_mem(struct prog_test_ref_kfunc *p,
+                                                 const int rdwr_buf_size)
+{
+       return __bpf_kfunc_call_test_get_mem(p, rdwr_buf_size);
+}
+
+__bpf_kfunc int *bpf_kfunc_call_test_get_rdonly_mem(struct prog_test_ref_kfunc *p,
+                                                   const int rdonly_buf_size)
+{
+       return __bpf_kfunc_call_test_get_mem(p, rdonly_buf_size);
+}
+
+/* the next 2 ones can't be really used for testing expect to ensure
+ * that the verifier rejects the call.
+ * Acquire functions must return struct pointers, so these ones are
+ * failing.
+ */
+__bpf_kfunc int *bpf_kfunc_call_test_acq_rdonly_mem(struct prog_test_ref_kfunc *p,
+                                                   const int rdonly_buf_size)
+{
+       return __bpf_kfunc_call_test_get_mem(p, rdonly_buf_size);
+}
+
+__bpf_kfunc void bpf_kfunc_call_int_mem_release(int *p)
+{
+}
+
+__bpf_kfunc void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb)
+{
+}
+
+__bpf_kfunc void bpf_kfunc_call_test_pass1(struct prog_test_pass1 *p)
+{
+}
+
+__bpf_kfunc void bpf_kfunc_call_test_pass2(struct prog_test_pass2 *p)
+{
+}
+
+__bpf_kfunc void bpf_kfunc_call_test_fail1(struct prog_test_fail1 *p)
+{
+}
+
+__bpf_kfunc void bpf_kfunc_call_test_fail2(struct prog_test_fail2 *p)
+{
+}
+
+__bpf_kfunc void bpf_kfunc_call_test_fail3(struct prog_test_fail3 *p)
+{
+}
+
+__bpf_kfunc void bpf_kfunc_call_test_mem_len_pass1(void *mem, int mem__sz)
+{
+}
+
+__bpf_kfunc void bpf_kfunc_call_test_mem_len_fail1(void *mem, int len)
+{
+}
+
+__bpf_kfunc void bpf_kfunc_call_test_mem_len_fail2(u64 *mem, int len)
+{
+}
+
+__bpf_kfunc void bpf_kfunc_call_test_ref(struct prog_test_ref_kfunc *p)
+{
+       /* p != NULL, but p->cnt could be 0 */
+}
+
+__bpf_kfunc void bpf_kfunc_call_test_destructive(void)
+{
+}
+
+__bpf_kfunc static u32 bpf_kfunc_call_test_static_unused_arg(u32 arg, u32 unused)
+{
+       return arg;
+}
+
 BTF_SET8_START(bpf_testmod_check_kfunc_ids)
 BTF_ID_FLAGS(func, bpf_testmod_test_mod_kfunc)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test1)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test2)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test3)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test4)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_pass1)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail1)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail2)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_acquire, KF_ACQUIRE | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_kfunc_call_memb_acquire, KF_ACQUIRE | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_kfunc_call_memb1_release, KF_RELEASE)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_get_rdwr_mem, KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_get_rdonly_mem, KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_acq_rdonly_mem, KF_ACQUIRE | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_kfunc_call_int_mem_release, KF_RELEASE)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_pass_ctx)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_pass1)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_pass2)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail1)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail2)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail3)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_ref, KF_TRUSTED_ARGS | KF_RCU)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_destructive, KF_DESTRUCTIVE)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_static_unused_arg)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_offset)
 BTF_SET8_END(bpf_testmod_check_kfunc_ids)
 
 static const struct btf_kfunc_id_set bpf_testmod_kfunc_set = {
@@ -312,6 +476,8 @@ static int bpf_testmod_init(void)
 
        ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_UNSPEC, &bpf_testmod_common_kfunc_set);
        ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_testmod_kfunc_set);
+       ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &bpf_testmod_kfunc_set);
+       ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SYSCALL, &bpf_testmod_kfunc_set);
        if (ret < 0)
                return ret;
        if (bpf_fentry_test1(0) < 0)
diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h
new file mode 100644 (file)
index 0000000..9693c62
--- /dev/null
@@ -0,0 +1,100 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _BPF_TESTMOD_KFUNC_H
+#define _BPF_TESTMOD_KFUNC_H
+
+#ifndef __KERNEL__
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#else
+#define __ksym
+struct prog_test_member1 {
+       int a;
+};
+
+struct prog_test_member {
+       struct prog_test_member1 m;
+       int c;
+};
+
+struct prog_test_ref_kfunc {
+       int a;
+       int b;
+       struct prog_test_member memb;
+       struct prog_test_ref_kfunc *next;
+       refcount_t cnt;
+};
+#endif
+
+struct prog_test_pass1 {
+       int x0;
+       struct {
+               int x1;
+               struct {
+                       int x2;
+                       struct {
+                               int x3;
+                       };
+               };
+       };
+};
+
+struct prog_test_pass2 {
+       int len;
+       short arr1[4];
+       struct {
+               char arr2[4];
+               unsigned long arr3[8];
+       } x;
+};
+
+struct prog_test_fail1 {
+       void *p;
+       int x;
+};
+
+struct prog_test_fail2 {
+       int x8;
+       struct prog_test_pass1 x;
+};
+
+struct prog_test_fail3 {
+       int len;
+       char arr1[2];
+       char arr2[];
+};
+
+struct prog_test_ref_kfunc *
+bpf_kfunc_call_test_acquire(unsigned long *scalar_ptr) __ksym;
+void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym;
+void bpf_kfunc_call_test_ref(struct prog_test_ref_kfunc *p) __ksym;
+
+void bpf_kfunc_call_test_mem_len_pass1(void *mem, int len) __ksym;
+int *bpf_kfunc_call_test_get_rdwr_mem(struct prog_test_ref_kfunc *p, const int rdwr_buf_size) __ksym;
+int *bpf_kfunc_call_test_get_rdonly_mem(struct prog_test_ref_kfunc *p, const int rdonly_buf_size) __ksym;
+int *bpf_kfunc_call_test_acq_rdonly_mem(struct prog_test_ref_kfunc *p, const int rdonly_buf_size) __ksym;
+void bpf_kfunc_call_int_mem_release(int *p) __ksym;
+
+/* The bpf_kfunc_call_test_static_unused_arg is defined as static,
+ * but bpf program compilation needs to see it as global symbol.
+ */
+#ifndef __KERNEL__
+u32 bpf_kfunc_call_test_static_unused_arg(u32 arg, u32 unused) __ksym;
+#endif
+
+void bpf_testmod_test_mod_kfunc(int i) __ksym;
+
+__u64 bpf_kfunc_call_test1(struct sock *sk, __u32 a, __u64 b,
+                               __u32 c, __u64 d) __ksym;
+int bpf_kfunc_call_test2(struct sock *sk, __u32 a, __u32 b) __ksym;
+struct sock *bpf_kfunc_call_test3(struct sock *sk) __ksym;
+long bpf_kfunc_call_test4(signed char a, short b, int c, long d) __ksym;
+
+void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb) __ksym;
+void bpf_kfunc_call_test_pass1(struct prog_test_pass1 *p) __ksym;
+void bpf_kfunc_call_test_pass2(struct prog_test_pass2 *p) __ksym;
+void bpf_kfunc_call_test_mem_len_fail2(__u64 *mem, int len) __ksym;
+
+void bpf_kfunc_call_test_destructive(void) __ksym;
+
+#endif /* _BPF_TESTMOD_KFUNC_H */
index 596caa1..a105c0c 100644 (file)
@@ -427,3 +427,26 @@ void close_netns(struct nstoken *token)
        close(token->orig_netns_fd);
        free(token);
 }
+
+int get_socket_local_port(int sock_fd)
+{
+       struct sockaddr_storage addr;
+       socklen_t addrlen = sizeof(addr);
+       int err;
+
+       err = getsockname(sock_fd, (struct sockaddr *)&addr, &addrlen);
+       if (err < 0)
+               return err;
+
+       if (addr.ss_family == AF_INET) {
+               struct sockaddr_in *sin = (struct sockaddr_in *)&addr;
+
+               return sin->sin_port;
+       } else if (addr.ss_family == AF_INET6) {
+               struct sockaddr_in6 *sin = (struct sockaddr_in6 *)&addr;
+
+               return sin->sin6_port;
+       }
+
+       return -1;
+}
index f882c69..6941856 100644 (file)
@@ -56,6 +56,7 @@ int fastopen_connect(int server_fd, const char *data, unsigned int data_len,
 int make_sockaddr(int family, const char *addr_str, __u16 port,
                  struct sockaddr_storage *addr, socklen_t *len);
 char *ping_command(int family);
+int get_socket_local_port(int sock_fd);
 
 struct nstoken;
 /**
index b17bfa0..bb143de 100644 (file)
@@ -96,12 +96,80 @@ static void test_parse_test_list(void)
                goto error;
        ASSERT_OK(strcmp("*bpf_cookie*", set.tests[0].name), "test name");
        ASSERT_OK(strcmp("*trace*", set.tests[0].subtests[0]), "subtest name");
+       free_test_filter_set(&set);
+
+       ASSERT_OK(parse_test_list("t/subtest1,t/subtest2", &set, true),
+                 "parsing");
+       if (!ASSERT_EQ(set.cnt, 1, "count of test filters"))
+               goto error;
+       if (!ASSERT_OK_PTR(set.tests, "test filters initialized"))
+               goto error;
+       if (!ASSERT_EQ(set.tests[0].subtest_cnt, 2, "subtest filters count"))
+               goto error;
+       ASSERT_OK(strcmp("t", set.tests[0].name), "test name");
+       ASSERT_OK(strcmp("subtest1", set.tests[0].subtests[0]), "subtest name");
+       ASSERT_OK(strcmp("subtest2", set.tests[0].subtests[1]), "subtest name");
 error:
        free_test_filter_set(&set);
 }
 
+static void test_parse_test_list_file(void)
+{
+       struct test_filter_set set;
+       char tmpfile[80];
+       FILE *fp;
+       int fd;
+
+       snprintf(tmpfile, sizeof(tmpfile), "/tmp/bpf_arg_parsing_test.XXXXXX");
+       fd = mkstemp(tmpfile);
+       if (!ASSERT_GE(fd, 0, "create tmp"))
+               return;
+
+       fp = fdopen(fd, "w");
+       if (!ASSERT_NEQ(fp, NULL, "fdopen tmp")) {
+               close(fd);
+               goto out_remove;
+       }
+
+       fprintf(fp, "# comment\n");
+       fprintf(fp, "  test_with_spaces    \n");
+       fprintf(fp, "testA/subtest    # comment\n");
+       fprintf(fp, "testB#comment with no space\n");
+       fprintf(fp, "testB # duplicate\n");
+       fprintf(fp, "testA/subtest # subtest duplicate\n");
+       fprintf(fp, "testA/subtest2\n");
+       fprintf(fp, "testC_no_eof_newline");
+       fflush(fp);
+
+       if (!ASSERT_OK(ferror(fp), "prepare tmp"))
+               goto out_fclose;
+
+       init_test_filter_set(&set);
+
+       ASSERT_OK(parse_test_list_file(tmpfile, &set, true), "parse file");
+
+       ASSERT_EQ(set.cnt, 4, "test  count");
+       ASSERT_OK(strcmp("test_with_spaces", set.tests[0].name), "test 0 name");
+       ASSERT_EQ(set.tests[0].subtest_cnt, 0, "test 0 subtest count");
+       ASSERT_OK(strcmp("testA", set.tests[1].name), "test 1 name");
+       ASSERT_EQ(set.tests[1].subtest_cnt, 2, "test 1 subtest count");
+       ASSERT_OK(strcmp("subtest", set.tests[1].subtests[0]), "test 1 subtest 0");
+       ASSERT_OK(strcmp("subtest2", set.tests[1].subtests[1]), "test 1 subtest 1");
+       ASSERT_OK(strcmp("testB", set.tests[2].name), "test 2 name");
+       ASSERT_OK(strcmp("testC_no_eof_newline", set.tests[3].name), "test 3 name");
+
+       free_test_filter_set(&set);
+
+out_fclose:
+       fclose(fp);
+out_remove:
+       remove(tmpfile);
+}
+
 void test_arg_parsing(void)
 {
        if (test__start_subtest("test_parse_test_list"))
                test_parse_test_list();
+       if (test__start_subtest("test_parse_test_list_file"))
+               test_parse_test_list_file();
 }
index a4d0cc9..fe2c502 100644 (file)
@@ -11,6 +11,7 @@
 #include "ksym_race.skel.h"
 #include "bpf_mod_race.skel.h"
 #include "kfunc_call_race.skel.h"
+#include "testing_helpers.h"
 
 /* This test crafts a race between btf_try_get_module and do_init_module, and
  * checks whether btf_try_get_module handles the invocation for a well-formed
@@ -44,35 +45,10 @@ enum bpf_test_state {
 
 static _Atomic enum bpf_test_state state = _TS_INVALID;
 
-static int sys_finit_module(int fd, const char *param_values, int flags)
-{
-       return syscall(__NR_finit_module, fd, param_values, flags);
-}
-
-static int sys_delete_module(const char *name, unsigned int flags)
-{
-       return syscall(__NR_delete_module, name, flags);
-}
-
-static int load_module(const char *mod)
-{
-       int ret, fd;
-
-       fd = open("bpf_testmod.ko", O_RDONLY);
-       if (fd < 0)
-               return fd;
-
-       ret = sys_finit_module(fd, "", 0);
-       close(fd);
-       if (ret < 0)
-               return ret;
-       return 0;
-}
-
 static void *load_module_thread(void *p)
 {
 
-       if (!ASSERT_NEQ(load_module("bpf_testmod.ko"), 0, "load_module_thread must fail"))
+       if (!ASSERT_NEQ(load_bpf_testmod(false), 0, "load_module_thread must fail"))
                atomic_store(&state, TS_MODULE_LOAD);
        else
                atomic_store(&state, TS_MODULE_LOAD_FAIL);
@@ -124,7 +100,7 @@ static void test_bpf_mod_race_config(const struct test_config *config)
        if (!ASSERT_NEQ(fault_addr, MAP_FAILED, "mmap for uffd registration"))
                return;
 
-       if (!ASSERT_OK(sys_delete_module("bpf_testmod", 0), "unload bpf_testmod"))
+       if (!ASSERT_OK(unload_bpf_testmod(false), "unload bpf_testmod"))
                goto end_mmap;
 
        skel = bpf_mod_race__open();
@@ -202,8 +178,8 @@ end_destroy:
        bpf_mod_race__destroy(skel);
        ASSERT_OK(kern_sync_rcu(), "kern_sync_rcu");
 end_module:
-       sys_delete_module("bpf_testmod", 0);
-       ASSERT_OK(load_module("bpf_testmod.ko"), "restore bpf_testmod");
+       unload_bpf_testmod(false);
+       ASSERT_OK(load_bpf_testmod(false), "restore bpf_testmod");
 end_mmap:
        munmap(fault_addr, 4096);
        atomic_store(&state, _TS_INVALID);
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_obj_pinning.c b/tools/testing/selftests/bpf/prog_tests/bpf_obj_pinning.c
new file mode 100644 (file)
index 0000000..31f1e81
--- /dev/null
@@ -0,0 +1,268 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+#define _GNU_SOURCE
+#include <test_progs.h>
+#include <bpf/btf.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <linux/unistd.h>
+#include <linux/mount.h>
+#include <sys/syscall.h>
+
+static inline int sys_fsopen(const char *fsname, unsigned flags)
+{
+       return syscall(__NR_fsopen, fsname, flags);
+}
+
+static inline int sys_fsconfig(int fs_fd, unsigned cmd, const char *key, const void *val, int aux)
+{
+       return syscall(__NR_fsconfig, fs_fd, cmd, key, val, aux);
+}
+
+static inline int sys_fsmount(int fs_fd, unsigned flags, unsigned ms_flags)
+{
+       return syscall(__NR_fsmount, fs_fd, flags, ms_flags);
+}
+
+__attribute__((unused))
+static inline int sys_move_mount(int from_dfd, const char *from_path,
+                                int to_dfd, const char *to_path,
+                                unsigned int ms_flags)
+{
+       return syscall(__NR_move_mount, from_dfd, from_path, to_dfd, to_path, ms_flags);
+}
+
+static void bpf_obj_pinning_detached(void)
+{
+       LIBBPF_OPTS(bpf_obj_pin_opts, pin_opts);
+       LIBBPF_OPTS(bpf_obj_get_opts, get_opts);
+       int fs_fd = -1, mnt_fd = -1;
+       int map_fd = -1, map_fd2 = -1;
+       int zero = 0, src_value, dst_value, err;
+       const char *map_name = "fsmount_map";
+
+       /* A bunch of below UAPI calls are constructed based on reading:
+        * https://brauner.io/2023/02/28/mounting-into-mount-namespaces.html
+        */
+
+       /* create VFS context */
+       fs_fd = sys_fsopen("bpf", 0);
+       if (!ASSERT_GE(fs_fd, 0, "fs_fd"))
+               goto cleanup;
+
+       /* instantiate FS object */
+       err = sys_fsconfig(fs_fd, FSCONFIG_CMD_CREATE, NULL, NULL, 0);
+       if (!ASSERT_OK(err, "fs_create"))
+               goto cleanup;
+
+       /* create O_PATH fd for detached mount */
+       mnt_fd = sys_fsmount(fs_fd, 0, 0);
+       if (!ASSERT_GE(mnt_fd, 0, "mnt_fd"))
+               goto cleanup;
+
+       /* If we wanted to expose detached mount in the file system, we'd do
+        * something like below. But the whole point is that we actually don't
+        * even have to expose BPF FS in the file system to be able to work
+        * (pin/get objects) with it.
+        *
+        * err = sys_move_mount(mnt_fd, "", -EBADF, mnt_path, MOVE_MOUNT_F_EMPTY_PATH);
+        * if (!ASSERT_OK(err, "move_mount"))
+        *      goto cleanup;
+        */
+
+       /* create BPF map to pin */
+       map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, map_name, 4, 4, 1, NULL);
+       if (!ASSERT_GE(map_fd, 0, "map_fd"))
+               goto cleanup;
+
+       /* pin BPF map into detached BPF FS through mnt_fd */
+       pin_opts.file_flags = BPF_F_PATH_FD;
+       pin_opts.path_fd = mnt_fd;
+       err = bpf_obj_pin_opts(map_fd, map_name, &pin_opts);
+       if (!ASSERT_OK(err, "map_pin"))
+               goto cleanup;
+
+       /* get BPF map from detached BPF FS through mnt_fd */
+       get_opts.file_flags = BPF_F_PATH_FD;
+       get_opts.path_fd = mnt_fd;
+       map_fd2 = bpf_obj_get_opts(map_name, &get_opts);
+       if (!ASSERT_GE(map_fd2, 0, "map_get"))
+               goto cleanup;
+
+       /* update map through one FD */
+       src_value = 0xcafebeef;
+       err = bpf_map_update_elem(map_fd, &zero, &src_value, 0);
+       ASSERT_OK(err, "map_update");
+
+       /* check values written/read through different FDs do match */
+       dst_value = 0;
+       err = bpf_map_lookup_elem(map_fd2, &zero, &dst_value);
+       ASSERT_OK(err, "map_lookup");
+       ASSERT_EQ(dst_value, src_value, "map_value_eq1");
+       ASSERT_EQ(dst_value, 0xcafebeef, "map_value_eq2");
+
+cleanup:
+       if (map_fd >= 0)
+               ASSERT_OK(close(map_fd), "close_map_fd");
+       if (map_fd2 >= 0)
+               ASSERT_OK(close(map_fd2), "close_map_fd2");
+       if (fs_fd >= 0)
+               ASSERT_OK(close(fs_fd), "close_fs_fd");
+       if (mnt_fd >= 0)
+               ASSERT_OK(close(mnt_fd), "close_mnt_fd");
+}
+
+enum path_kind
+{
+       PATH_STR_ABS,
+       PATH_STR_REL,
+       PATH_FD_REL,
+};
+
+static void validate_pin(int map_fd, const char *map_name, int src_value,
+                        enum path_kind path_kind)
+{
+       LIBBPF_OPTS(bpf_obj_pin_opts, pin_opts);
+       char abs_path[PATH_MAX], old_cwd[PATH_MAX];
+       const char *pin_path = NULL;
+       int zero = 0, dst_value, map_fd2, err;
+
+       snprintf(abs_path, sizeof(abs_path), "/sys/fs/bpf/%s", map_name);
+       old_cwd[0] = '\0';
+
+       switch (path_kind) {
+       case PATH_STR_ABS:
+               /* absolute path */
+               pin_path = abs_path;
+               break;
+       case PATH_STR_REL:
+               /* cwd + relative path */
+               ASSERT_OK_PTR(getcwd(old_cwd, sizeof(old_cwd)), "getcwd");
+               ASSERT_OK(chdir("/sys/fs/bpf"), "chdir");
+               pin_path = map_name;
+               break;
+       case PATH_FD_REL:
+               /* dir fd + relative path */
+               pin_opts.file_flags = BPF_F_PATH_FD;
+               pin_opts.path_fd = open("/sys/fs/bpf", O_PATH);
+               ASSERT_GE(pin_opts.path_fd, 0, "path_fd");
+               pin_path = map_name;
+               break;
+       }
+
+       /* pin BPF map using specified path definition */
+       err = bpf_obj_pin_opts(map_fd, pin_path, &pin_opts);
+       ASSERT_OK(err, "obj_pin");
+
+       /* cleanup */
+       if (pin_opts.path_fd >= 0)
+               close(pin_opts.path_fd);
+       if (old_cwd[0])
+               ASSERT_OK(chdir(old_cwd), "restore_cwd");
+
+       map_fd2 = bpf_obj_get(abs_path);
+       if (!ASSERT_GE(map_fd2, 0, "map_get"))
+               goto cleanup;
+
+       /* update map through one FD */
+       err = bpf_map_update_elem(map_fd, &zero, &src_value, 0);
+       ASSERT_OK(err, "map_update");
+
+       /* check values written/read through different FDs do match */
+       dst_value = 0;
+       err = bpf_map_lookup_elem(map_fd2, &zero, &dst_value);
+       ASSERT_OK(err, "map_lookup");
+       ASSERT_EQ(dst_value, src_value, "map_value_eq");
+cleanup:
+       if (map_fd2 >= 0)
+               ASSERT_OK(close(map_fd2), "close_map_fd2");
+       unlink(abs_path);
+}
+
+static void validate_get(int map_fd, const char *map_name, int src_value,
+                        enum path_kind path_kind)
+{
+       LIBBPF_OPTS(bpf_obj_get_opts, get_opts);
+       char abs_path[PATH_MAX], old_cwd[PATH_MAX];
+       const char *pin_path = NULL;
+       int zero = 0, dst_value, map_fd2, err;
+
+       snprintf(abs_path, sizeof(abs_path), "/sys/fs/bpf/%s", map_name);
+       /* pin BPF map using specified path definition */
+       err = bpf_obj_pin(map_fd, abs_path);
+       if (!ASSERT_OK(err, "pin_map"))
+               return;
+
+       old_cwd[0] = '\0';
+
+       switch (path_kind) {
+       case PATH_STR_ABS:
+               /* absolute path */
+               pin_path = abs_path;
+               break;
+       case PATH_STR_REL:
+               /* cwd + relative path */
+               ASSERT_OK_PTR(getcwd(old_cwd, sizeof(old_cwd)), "getcwd");
+               ASSERT_OK(chdir("/sys/fs/bpf"), "chdir");
+               pin_path = map_name;
+               break;
+       case PATH_FD_REL:
+               /* dir fd + relative path */
+               get_opts.file_flags = BPF_F_PATH_FD;
+               get_opts.path_fd = open("/sys/fs/bpf", O_PATH);
+               ASSERT_GE(get_opts.path_fd, 0, "path_fd");
+               pin_path = map_name;
+               break;
+       }
+
+       map_fd2 = bpf_obj_get_opts(pin_path, &get_opts);
+       if (!ASSERT_GE(map_fd2, 0, "map_get"))
+               goto cleanup;
+
+       /* cleanup */
+       if (get_opts.path_fd >= 0)
+               close(get_opts.path_fd);
+       if (old_cwd[0])
+               ASSERT_OK(chdir(old_cwd), "restore_cwd");
+
+       /* update map through one FD */
+       err = bpf_map_update_elem(map_fd, &zero, &src_value, 0);
+       ASSERT_OK(err, "map_update");
+
+       /* check values written/read through different FDs do match */
+       dst_value = 0;
+       err = bpf_map_lookup_elem(map_fd2, &zero, &dst_value);
+       ASSERT_OK(err, "map_lookup");
+       ASSERT_EQ(dst_value, src_value, "map_value_eq");
+cleanup:
+       if (map_fd2 >= 0)
+               ASSERT_OK(close(map_fd2), "close_map_fd2");
+       unlink(abs_path);
+}
+
+static void bpf_obj_pinning_mounted(enum path_kind path_kind)
+{
+       const char *map_name = "mounted_map";
+       int map_fd;
+
+       /* create BPF map to pin */
+       map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, map_name, 4, 4, 1, NULL);
+       if (!ASSERT_GE(map_fd, 0, "map_fd"))
+               return;
+
+       validate_pin(map_fd, map_name, 100 + (int)path_kind, path_kind);
+       validate_get(map_fd, map_name, 200 + (int)path_kind, path_kind);
+       ASSERT_OK(close(map_fd), "close_map_fd");
+}
+
+void test_bpf_obj_pinning()
+{
+       if (test__start_subtest("detached"))
+               bpf_obj_pinning_detached();
+       if (test__start_subtest("mounted-str-abs"))
+               bpf_obj_pinning_mounted(PATH_STR_ABS);
+       if (test__start_subtest("mounted-str-rel"))
+               bpf_obj_pinning_mounted(PATH_STR_REL);
+       if (test__start_subtest("mounted-fd-rel"))
+               bpf_obj_pinning_mounted(PATH_FD_REL);
+}
index 4d2fa99..2bb5773 100644 (file)
@@ -25,6 +25,8 @@ static void test_setsockopt_set(int cgroup_fd, int sock_fd)
        if (!ASSERT_OK_PTR(obj, "skel-load"))
                return;
 
+       obj->bss->page_size = sysconf(_SC_PAGESIZE);
+
        /* Attach setsockopt that sets EUNATCH, assert that
         * we actually get that error when we run setsockopt()
         */
@@ -59,6 +61,8 @@ static void test_setsockopt_set_and_get(int cgroup_fd, int sock_fd)
        if (!ASSERT_OK_PTR(obj, "skel-load"))
                return;
 
+       obj->bss->page_size = sysconf(_SC_PAGESIZE);
+
        /* Attach setsockopt that sets EUNATCH, and one that gets the
         * previously set errno. Assert that we get the same errno back.
         */
@@ -100,6 +104,8 @@ static void test_setsockopt_default_zero(int cgroup_fd, int sock_fd)
        if (!ASSERT_OK_PTR(obj, "skel-load"))
                return;
 
+       obj->bss->page_size = sysconf(_SC_PAGESIZE);
+
        /* Attach setsockopt that gets the previously set errno.
         * Assert that, without anything setting one, we get 0.
         */
@@ -134,6 +140,8 @@ static void test_setsockopt_default_zero_and_set(int cgroup_fd, int sock_fd)
        if (!ASSERT_OK_PTR(obj, "skel-load"))
                return;
 
+       obj->bss->page_size = sysconf(_SC_PAGESIZE);
+
        /* Attach setsockopt that gets the previously set errno, and then
         * one that sets the errno to EUNATCH. Assert that the get does not
         * see EUNATCH set later, and does not prevent EUNATCH from being set.
@@ -177,6 +185,8 @@ static void test_setsockopt_override(int cgroup_fd, int sock_fd)
        if (!ASSERT_OK_PTR(obj, "skel-load"))
                return;
 
+       obj->bss->page_size = sysconf(_SC_PAGESIZE);
+
        /* Attach setsockopt that sets EUNATCH, then one that sets EISCONN,
         * and then one that gets the exported errno. Assert both the syscall
         * and the helper sees the last set errno.
@@ -224,6 +234,8 @@ static void test_setsockopt_legacy_eperm(int cgroup_fd, int sock_fd)
        if (!ASSERT_OK_PTR(obj, "skel-load"))
                return;
 
+       obj->bss->page_size = sysconf(_SC_PAGESIZE);
+
        /* Attach setsockopt that return a reject without setting errno
         * (legacy reject), and one that gets the errno. Assert that for
         * backward compatibility the syscall result in EPERM, and this
@@ -268,6 +280,8 @@ static void test_setsockopt_legacy_no_override(int cgroup_fd, int sock_fd)
        if (!ASSERT_OK_PTR(obj, "skel-load"))
                return;
 
+       obj->bss->page_size = sysconf(_SC_PAGESIZE);
+
        /* Attach setsockopt that sets EUNATCH, then one that return a reject
         * without setting errno, and then one that gets the exported errno.
         * Assert both the syscall and the helper's errno are unaffected by
@@ -319,6 +333,8 @@ static void test_getsockopt_get(int cgroup_fd, int sock_fd)
        if (!ASSERT_OK_PTR(obj, "skel-load"))
                return;
 
+       obj->bss->page_size = sysconf(_SC_PAGESIZE);
+
        /* Attach getsockopt that gets previously set errno. Assert that the
         * error from kernel is in both ctx_retval_value and retval_value.
         */
@@ -359,6 +375,8 @@ static void test_getsockopt_override(int cgroup_fd, int sock_fd)
        if (!ASSERT_OK_PTR(obj, "skel-load"))
                return;
 
+       obj->bss->page_size = sysconf(_SC_PAGESIZE);
+
        /* Attach getsockopt that sets retval to -EISCONN. Assert that this
         * overrides the value from kernel.
         */
@@ -396,6 +414,8 @@ static void test_getsockopt_retval_sync(int cgroup_fd, int sock_fd)
        if (!ASSERT_OK_PTR(obj, "skel-load"))
                return;
 
+       obj->bss->page_size = sysconf(_SC_PAGESIZE);
+
        /* Attach getsockopt that sets retval to -EISCONN, and one that clears
         * ctx retval. Assert that the clearing ctx retval is synced to helper
         * and clears any errors both from kernel and BPF..
index d176c34..7cfac53 100644 (file)
@@ -20,6 +20,14 @@ static struct {
        {"test_ringbuf", SETUP_SYSCALL_SLEEP},
        {"test_skb_readonly", SETUP_SKB_PROG},
        {"test_dynptr_skb_data", SETUP_SKB_PROG},
+       {"test_adjust", SETUP_SYSCALL_SLEEP},
+       {"test_adjust_err", SETUP_SYSCALL_SLEEP},
+       {"test_zero_size_dynptr", SETUP_SYSCALL_SLEEP},
+       {"test_dynptr_is_null", SETUP_SYSCALL_SLEEP},
+       {"test_dynptr_is_rdonly", SETUP_SKB_PROG},
+       {"test_dynptr_clone", SETUP_SKB_PROG},
+       {"test_dynptr_skb_no_buff", SETUP_SKB_PROG},
+       {"test_dynptr_skb_strcmp", SETUP_SKB_PROG},
 };
 
 static void verify_success(const char *prog_name, enum test_setup_type setup_type)
diff --git a/tools/testing/selftests/bpf/prog_tests/global_map_resize.c b/tools/testing/selftests/bpf/prog_tests/global_map_resize.c
new file mode 100644 (file)
index 0000000..fd41425
--- /dev/null
@@ -0,0 +1,227 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+#include <errno.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include "test_global_map_resize.skel.h"
+#include "test_progs.h"
+
+static void run_prog_bss_array_sum(void)
+{
+       (void)syscall(__NR_getpid);
+}
+
+static void run_prog_data_array_sum(void)
+{
+       (void)syscall(__NR_getuid);
+}
+
+static void global_map_resize_bss_subtest(void)
+{
+       int err;
+       struct test_global_map_resize *skel;
+       struct bpf_map *map;
+       const __u32 desired_sz = sizeof(skel->bss->sum) + sysconf(_SC_PAGE_SIZE) * 2;
+       size_t array_len, actual_sz;
+
+       skel = test_global_map_resize__open();
+       if (!ASSERT_OK_PTR(skel, "test_global_map_resize__open"))
+               goto teardown;
+
+       /* set some initial value before resizing.
+        * it is expected this non-zero value will be preserved
+        * while resizing.
+        */
+       skel->bss->array[0] = 1;
+
+       /* resize map value and verify the new size */
+       map = skel->maps.bss;
+       err = bpf_map__set_value_size(map, desired_sz);
+       if (!ASSERT_OK(err, "bpf_map__set_value_size"))
+               goto teardown;
+       if (!ASSERT_EQ(bpf_map__value_size(map), desired_sz, "resize"))
+               goto teardown;
+
+       /* set the expected number of elements based on the resized array */
+       array_len = (desired_sz - sizeof(skel->bss->sum)) / sizeof(skel->bss->array[0]);
+       if (!ASSERT_GT(array_len, 1, "array_len"))
+               goto teardown;
+
+       skel->bss = bpf_map__initial_value(skel->maps.bss, &actual_sz);
+       if (!ASSERT_OK_PTR(skel->bss, "bpf_map__initial_value (ptr)"))
+               goto teardown;
+       if (!ASSERT_EQ(actual_sz, desired_sz, "bpf_map__initial_value (size)"))
+               goto teardown;
+
+       /* fill the newly resized array with ones,
+        * skipping the first element which was previously set
+        */
+       for (int i = 1; i < array_len; i++)
+               skel->bss->array[i] = 1;
+
+       /* set global const values before loading */
+       skel->rodata->pid = getpid();
+       skel->rodata->bss_array_len = array_len;
+       skel->rodata->data_array_len = 1;
+
+       err = test_global_map_resize__load(skel);
+       if (!ASSERT_OK(err, "test_global_map_resize__load"))
+               goto teardown;
+       err = test_global_map_resize__attach(skel);
+       if (!ASSERT_OK(err, "test_global_map_resize__attach"))
+               goto teardown;
+
+       /* run the bpf program which will sum the contents of the array.
+        * since the array was filled with ones,verify the sum equals array_len
+        */
+       run_prog_bss_array_sum();
+       if (!ASSERT_EQ(skel->bss->sum, array_len, "sum"))
+               goto teardown;
+
+teardown:
+       test_global_map_resize__destroy(skel);
+}
+
+static void global_map_resize_data_subtest(void)
+{
+       int err;
+       struct test_global_map_resize *skel;
+       struct bpf_map *map;
+       const __u32 desired_sz = sysconf(_SC_PAGE_SIZE) * 2;
+       size_t array_len, actual_sz;
+
+       skel = test_global_map_resize__open();
+       if (!ASSERT_OK_PTR(skel, "test_global_map_resize__open"))
+               goto teardown;
+
+       /* set some initial value before resizing.
+        * it is expected this non-zero value will be preserved
+        * while resizing.
+        */
+       skel->data_custom->my_array[0] = 1;
+
+       /* resize map value and verify the new size */
+       map = skel->maps.data_custom;
+       err = bpf_map__set_value_size(map, desired_sz);
+       if (!ASSERT_OK(err, "bpf_map__set_value_size"))
+               goto teardown;
+       if (!ASSERT_EQ(bpf_map__value_size(map), desired_sz, "resize"))
+               goto teardown;
+
+       /* set the expected number of elements based on the resized array */
+       array_len = (desired_sz - sizeof(skel->bss->sum)) / sizeof(skel->data_custom->my_array[0]);
+       if (!ASSERT_GT(array_len, 1, "array_len"))
+               goto teardown;
+
+       skel->data_custom = bpf_map__initial_value(skel->maps.data_custom, &actual_sz);
+       if (!ASSERT_OK_PTR(skel->data_custom, "bpf_map__initial_value (ptr)"))
+               goto teardown;
+       if (!ASSERT_EQ(actual_sz, desired_sz, "bpf_map__initial_value (size)"))
+               goto teardown;
+
+       /* fill the newly resized array with ones,
+        * skipping the first element which was previously set
+        */
+       for (int i = 1; i < array_len; i++)
+               skel->data_custom->my_array[i] = 1;
+
+       /* set global const values before loading */
+       skel->rodata->pid = getpid();
+       skel->rodata->bss_array_len = 1;
+       skel->rodata->data_array_len = array_len;
+
+       err = test_global_map_resize__load(skel);
+       if (!ASSERT_OK(err, "test_global_map_resize__load"))
+               goto teardown;
+       err = test_global_map_resize__attach(skel);
+       if (!ASSERT_OK(err, "test_global_map_resize__attach"))
+               goto teardown;
+
+       /* run the bpf program which will sum the contents of the array.
+        * since the array was filled with ones,verify the sum equals array_len
+        */
+       run_prog_data_array_sum();
+       if (!ASSERT_EQ(skel->bss->sum, array_len, "sum"))
+               goto teardown;
+
+teardown:
+       test_global_map_resize__destroy(skel);
+}
+
+static void global_map_resize_invalid_subtest(void)
+{
+       int err;
+       struct test_global_map_resize *skel;
+       struct bpf_map *map;
+       __u32 element_sz, desired_sz;
+
+       skel = test_global_map_resize__open();
+       if (!ASSERT_OK_PTR(skel, "test_global_map_resize__open"))
+               return;
+
+        /* attempt to resize a global datasec map to size
+         * which does NOT align with array
+         */
+       map = skel->maps.data_custom;
+       if (!ASSERT_NEQ(bpf_map__btf_value_type_id(map), 0, ".data.custom initial btf"))
+               goto teardown;
+       /* set desired size a fraction of element size beyond an aligned size */
+       element_sz = sizeof(skel->data_custom->my_array[0]);
+       desired_sz = element_sz + element_sz / 2;
+       /* confirm desired size does NOT align with array */
+       if (!ASSERT_NEQ(desired_sz % element_sz, 0, "my_array alignment"))
+               goto teardown;
+       err = bpf_map__set_value_size(map, desired_sz);
+       /* confirm resize is OK but BTF info is cleared */
+       if (!ASSERT_OK(err, ".data.custom bpf_map__set_value_size") ||
+           !ASSERT_EQ(bpf_map__btf_key_type_id(map), 0, ".data.custom clear btf key") ||
+           !ASSERT_EQ(bpf_map__btf_value_type_id(map), 0, ".data.custom clear btf val"))
+               goto teardown;
+
+       /* attempt to resize a global datasec map whose only var is NOT an array */
+       map = skel->maps.data_non_array;
+       if (!ASSERT_NEQ(bpf_map__btf_value_type_id(map), 0, ".data.non_array initial btf"))
+               goto teardown;
+       /* set desired size to arbitrary value */
+       desired_sz = 1024;
+       err = bpf_map__set_value_size(map, desired_sz);
+       /* confirm resize is OK but BTF info is cleared */
+       if (!ASSERT_OK(err, ".data.non_array bpf_map__set_value_size") ||
+           !ASSERT_EQ(bpf_map__btf_key_type_id(map), 0, ".data.non_array clear btf key") ||
+           !ASSERT_EQ(bpf_map__btf_value_type_id(map), 0, ".data.non_array clear btf val"))
+               goto teardown;
+
+       /* attempt to resize a global datasec map
+        * whose last var is NOT an array
+        */
+       map = skel->maps.data_array_not_last;
+       if (!ASSERT_NEQ(bpf_map__btf_value_type_id(map), 0, ".data.array_not_last initial btf"))
+               goto teardown;
+       /* set desired size to a multiple of element size */
+       element_sz = sizeof(skel->data_array_not_last->my_array_first[0]);
+       desired_sz = element_sz * 8;
+       /* confirm desired size aligns with array */
+       if (!ASSERT_EQ(desired_sz % element_sz, 0, "my_array_first alignment"))
+               goto teardown;
+       err = bpf_map__set_value_size(map, desired_sz);
+       /* confirm resize is OK but BTF info is cleared */
+       if (!ASSERT_OK(err, ".data.array_not_last bpf_map__set_value_size") ||
+           !ASSERT_EQ(bpf_map__btf_key_type_id(map), 0, ".data.array_not_last clear btf key") ||
+           !ASSERT_EQ(bpf_map__btf_value_type_id(map), 0, ".data.array_not_last clear btf val"))
+               goto teardown;
+
+teardown:
+       test_global_map_resize__destroy(skel);
+}
+
+void test_global_map_resize(void)
+{
+       if (test__start_subtest("global_map_resize_bss"))
+               global_map_resize_bss_subtest();
+
+       if (test__start_subtest("global_map_resize_data"))
+               global_map_resize_data_subtest();
+
+       if (test__start_subtest("global_map_resize_invalid"))
+               global_map_resize_invalid_subtest();
+}
index 7fc01ff..f53d658 100644 (file)
@@ -4,6 +4,7 @@
 #include <test_progs.h>
 #include <stdbool.h>
 #include "test_module_attach.skel.h"
+#include "testing_helpers.h"
 
 static int duration;
 
@@ -32,11 +33,6 @@ static int trigger_module_test_writable(int *val)
        return 0;
 }
 
-static int delete_module(const char *name, int flags)
-{
-       return syscall(__NR_delete_module, name, flags);
-}
-
 void test_module_attach(void)
 {
        const int READ_SZ = 456;
@@ -93,21 +89,21 @@ void test_module_attach(void)
        if (!ASSERT_OK_PTR(link, "attach_fentry"))
                goto cleanup;
 
-       ASSERT_ERR(delete_module("bpf_testmod", 0), "delete_module");
+       ASSERT_ERR(unload_bpf_testmod(false), "unload_bpf_testmod");
        bpf_link__destroy(link);
 
        link = bpf_program__attach(skel->progs.handle_fexit);
        if (!ASSERT_OK_PTR(link, "attach_fexit"))
                goto cleanup;
 
-       ASSERT_ERR(delete_module("bpf_testmod", 0), "delete_module");
+       ASSERT_ERR(unload_bpf_testmod(false), "unload_bpf_testmod");
        bpf_link__destroy(link);
 
        link = bpf_program__attach(skel->progs.kprobe_multi);
        if (!ASSERT_OK_PTR(link, "attach_kprobe_multi"))
                goto cleanup;
 
-       ASSERT_ERR(delete_module("bpf_testmod", 0), "delete_module");
+       ASSERT_ERR(unload_bpf_testmod(false), "unload_bpf_testmod");
        bpf_link__destroy(link);
 
 cleanup:
index d3915c5..c3333ed 100644 (file)
@@ -67,12 +67,12 @@ void serial_test_netcnt(void)
        }
 
        /* No packets should be lost */
-       ASSERT_EQ(packets, 10000, "packets");
+       ASSERT_GE(packets, 10000, "packets");
 
        /* Let's check that bytes counter matches the number of packets
         * multiplied by the size of ipv6 ICMP packet.
         */
-       ASSERT_EQ(bytes, packets * 104, "bytes");
+       ASSERT_GE(bytes, packets * 104, "bytes");
 
 err:
        if (cg_fd != -1)
diff --git a/tools/testing/selftests/bpf/prog_tests/sock_destroy.c b/tools/testing/selftests/bpf/prog_tests/sock_destroy.c
new file mode 100644 (file)
index 0000000..b058330
--- /dev/null
@@ -0,0 +1,221 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <bpf/bpf_endian.h>
+
+#include "sock_destroy_prog.skel.h"
+#include "sock_destroy_prog_fail.skel.h"
+#include "network_helpers.h"
+
+#define TEST_NS "sock_destroy_netns"
+
+static void start_iter_sockets(struct bpf_program *prog)
+{
+       struct bpf_link *link;
+       char buf[50] = {};
+       int iter_fd, len;
+
+       link = bpf_program__attach_iter(prog, NULL);
+       if (!ASSERT_OK_PTR(link, "attach_iter"))
+               return;
+
+       iter_fd = bpf_iter_create(bpf_link__fd(link));
+       if (!ASSERT_GE(iter_fd, 0, "create_iter"))
+               goto free_link;
+
+       while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
+               ;
+       ASSERT_GE(len, 0, "read");
+
+       close(iter_fd);
+
+free_link:
+       bpf_link__destroy(link);
+}
+
+static void test_tcp_client(struct sock_destroy_prog *skel)
+{
+       int serv = -1, clien = -1, accept_serv = -1, n;
+
+       serv = start_server(AF_INET6, SOCK_STREAM, NULL, 0, 0);
+       if (!ASSERT_GE(serv, 0, "start_server"))
+               goto cleanup;
+
+       clien = connect_to_fd(serv, 0);
+       if (!ASSERT_GE(clien, 0, "connect_to_fd"))
+               goto cleanup;
+
+       accept_serv = accept(serv, NULL, NULL);
+       if (!ASSERT_GE(accept_serv, 0, "serv accept"))
+               goto cleanup;
+
+       n = send(clien, "t", 1, 0);
+       if (!ASSERT_EQ(n, 1, "client send"))
+               goto cleanup;
+
+       /* Run iterator program that destroys connected client sockets. */
+       start_iter_sockets(skel->progs.iter_tcp6_client);
+
+       n = send(clien, "t", 1, 0);
+       if (!ASSERT_LT(n, 0, "client_send on destroyed socket"))
+               goto cleanup;
+       ASSERT_EQ(errno, ECONNABORTED, "error code on destroyed socket");
+
+cleanup:
+       if (clien != -1)
+               close(clien);
+       if (accept_serv != -1)
+               close(accept_serv);
+       if (serv != -1)
+               close(serv);
+}
+
+static void test_tcp_server(struct sock_destroy_prog *skel)
+{
+       int serv = -1, clien = -1, accept_serv = -1, n, serv_port;
+
+       serv = start_server(AF_INET6, SOCK_STREAM, NULL, 0, 0);
+       if (!ASSERT_GE(serv, 0, "start_server"))
+               goto cleanup;
+       serv_port = get_socket_local_port(serv);
+       if (!ASSERT_GE(serv_port, 0, "get_sock_local_port"))
+               goto cleanup;
+       skel->bss->serv_port = (__be16) serv_port;
+
+       clien = connect_to_fd(serv, 0);
+       if (!ASSERT_GE(clien, 0, "connect_to_fd"))
+               goto cleanup;
+
+       accept_serv = accept(serv, NULL, NULL);
+       if (!ASSERT_GE(accept_serv, 0, "serv accept"))
+               goto cleanup;
+
+       n = send(clien, "t", 1, 0);
+       if (!ASSERT_EQ(n, 1, "client send"))
+               goto cleanup;
+
+       /* Run iterator program that destroys server sockets. */
+       start_iter_sockets(skel->progs.iter_tcp6_server);
+
+       n = send(clien, "t", 1, 0);
+       if (!ASSERT_LT(n, 0, "client_send on destroyed socket"))
+               goto cleanup;
+       ASSERT_EQ(errno, ECONNRESET, "error code on destroyed socket");
+
+cleanup:
+       if (clien != -1)
+               close(clien);
+       if (accept_serv != -1)
+               close(accept_serv);
+       if (serv != -1)
+               close(serv);
+}
+
+static void test_udp_client(struct sock_destroy_prog *skel)
+{
+       int serv = -1, clien = -1, n = 0;
+
+       serv = start_server(AF_INET6, SOCK_DGRAM, NULL, 0, 0);
+       if (!ASSERT_GE(serv, 0, "start_server"))
+               goto cleanup;
+
+       clien = connect_to_fd(serv, 0);
+       if (!ASSERT_GE(clien, 0, "connect_to_fd"))
+               goto cleanup;
+
+       n = send(clien, "t", 1, 0);
+       if (!ASSERT_EQ(n, 1, "client send"))
+               goto cleanup;
+
+       /* Run iterator program that destroys sockets. */
+       start_iter_sockets(skel->progs.iter_udp6_client);
+
+       n = send(clien, "t", 1, 0);
+       if (!ASSERT_LT(n, 0, "client_send on destroyed socket"))
+               goto cleanup;
+       /* UDP sockets have an overriding error code after they are disconnected,
+        * so we don't check for ECONNABORTED error code.
+        */
+
+cleanup:
+       if (clien != -1)
+               close(clien);
+       if (serv != -1)
+               close(serv);
+}
+
+static void test_udp_server(struct sock_destroy_prog *skel)
+{
+       int *listen_fds = NULL, n, i, serv_port;
+       unsigned int num_listens = 5;
+       char buf[1];
+
+       /* Start reuseport servers. */
+       listen_fds = start_reuseport_server(AF_INET6, SOCK_DGRAM,
+                                           "::1", 0, 0, num_listens);
+       if (!ASSERT_OK_PTR(listen_fds, "start_reuseport_server"))
+               goto cleanup;
+       serv_port = get_socket_local_port(listen_fds[0]);
+       if (!ASSERT_GE(serv_port, 0, "get_sock_local_port"))
+               goto cleanup;
+       skel->bss->serv_port = (__be16) serv_port;
+
+       /* Run iterator program that destroys server sockets. */
+       start_iter_sockets(skel->progs.iter_udp6_server);
+
+       for (i = 0; i < num_listens; ++i) {
+               n = read(listen_fds[i], buf, sizeof(buf));
+               if (!ASSERT_EQ(n, -1, "read") ||
+                   !ASSERT_EQ(errno, ECONNABORTED, "error code on destroyed socket"))
+                       break;
+       }
+       ASSERT_EQ(i, num_listens, "server socket");
+
+cleanup:
+       free_fds(listen_fds, num_listens);
+}
+
+void test_sock_destroy(void)
+{
+       struct sock_destroy_prog *skel;
+       struct nstoken *nstoken = NULL;
+       int cgroup_fd;
+
+       skel = sock_destroy_prog__open_and_load();
+       if (!ASSERT_OK_PTR(skel, "skel_open"))
+               return;
+
+       cgroup_fd = test__join_cgroup("/sock_destroy");
+       if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup"))
+               goto cleanup;
+
+       skel->links.sock_connect = bpf_program__attach_cgroup(
+               skel->progs.sock_connect, cgroup_fd);
+       if (!ASSERT_OK_PTR(skel->links.sock_connect, "prog_attach"))
+               goto cleanup;
+
+       SYS(cleanup, "ip netns add %s", TEST_NS);
+       SYS(cleanup, "ip -net %s link set dev lo up", TEST_NS);
+
+       nstoken = open_netns(TEST_NS);
+       if (!ASSERT_OK_PTR(nstoken, "open_netns"))
+               goto cleanup;
+
+       if (test__start_subtest("tcp_client"))
+               test_tcp_client(skel);
+       if (test__start_subtest("tcp_server"))
+               test_tcp_server(skel);
+       if (test__start_subtest("udp_client"))
+               test_udp_client(skel);
+       if (test__start_subtest("udp_server"))
+               test_udp_server(skel);
+
+       RUN_TESTS(sock_destroy_prog_fail);
+
+cleanup:
+       if (nstoken)
+               close_netns(nstoken);
+       SYS_NOFAIL("ip netns del " TEST_NS " &> /dev/null");
+       if (cgroup_fd >= 0)
+               close(cgroup_fd);
+       sock_destroy_prog__destroy(skel);
+}
index aa4debf..9e6a5e3 100644 (file)
@@ -5,10 +5,15 @@
 static char bpf_log_buf[4096];
 static bool verbose;
 
+#ifndef PAGE_SIZE
+#define PAGE_SIZE 4096
+#endif
+
 enum sockopt_test_error {
        OK = 0,
        DENY_LOAD,
        DENY_ATTACH,
+       EOPNOTSUPP_GETSOCKOPT,
        EPERM_GETSOCKOPT,
        EFAULT_GETSOCKOPT,
        EPERM_SETSOCKOPT,
@@ -273,10 +278,31 @@ static struct sockopt_test {
                .error = EFAULT_GETSOCKOPT,
        },
        {
-               .descr = "getsockopt: deny arbitrary ctx->retval",
+               .descr = "getsockopt: ignore >PAGE_SIZE optlen",
                .insns = {
-                       /* ctx->retval = 123 */
-                       BPF_MOV64_IMM(BPF_REG_0, 123),
+                       /* write 0xFF to the first optval byte */
+
+                       /* r6 = ctx->optval */
+                       BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1,
+                                   offsetof(struct bpf_sockopt, optval)),
+                       /* r2 = ctx->optval */
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_6),
+                       /* r6 = ctx->optval + 1 */
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
+
+                       /* r7 = ctx->optval_end */
+                       BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_1,
+                                   offsetof(struct bpf_sockopt, optval_end)),
+
+                       /* if (ctx->optval + 1 <= ctx->optval_end) { */
+                       BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_7, 1),
+                       /* ctx->optval[0] = 0xF0 */
+                       BPF_ST_MEM(BPF_B, BPF_REG_2, 0, 0xFF),
+                       /* } */
+
+                       /* retval changes are ignored */
+                       /* ctx->retval = 5 */
+                       BPF_MOV64_IMM(BPF_REG_0, 5),
                        BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
                                    offsetof(struct bpf_sockopt, retval)),
 
@@ -287,9 +313,11 @@ static struct sockopt_test {
                .attach_type = BPF_CGROUP_GETSOCKOPT,
                .expected_attach_type = BPF_CGROUP_GETSOCKOPT,
 
-               .get_optlen = 64,
-
-               .error = EFAULT_GETSOCKOPT,
+               .get_level = 1234,
+               .get_optname = 5678,
+               .get_optval = {}, /* the changes are ignored */
+               .get_optlen = PAGE_SIZE + 1,
+               .error = EOPNOTSUPP_GETSOCKOPT,
        },
        {
                .descr = "getsockopt: support smaller ctx->optlen",
@@ -649,6 +677,45 @@ static struct sockopt_test {
                .error = EFAULT_SETSOCKOPT,
        },
        {
+               .descr = "setsockopt: ignore >PAGE_SIZE optlen",
+               .insns = {
+                       /* write 0xFF to the first optval byte */
+
+                       /* r6 = ctx->optval */
+                       BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1,
+                                   offsetof(struct bpf_sockopt, optval)),
+                       /* r2 = ctx->optval */
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_6),
+                       /* r6 = ctx->optval + 1 */
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
+
+                       /* r7 = ctx->optval_end */
+                       BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_1,
+                                   offsetof(struct bpf_sockopt, optval_end)),
+
+                       /* if (ctx->optval + 1 <= ctx->optval_end) { */
+                       BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_7, 1),
+                       /* ctx->optval[0] = 0xF0 */
+                       BPF_ST_MEM(BPF_B, BPF_REG_2, 0, 0xF0),
+                       /* } */
+
+                       BPF_MOV64_IMM(BPF_REG_0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               .attach_type = BPF_CGROUP_SETSOCKOPT,
+               .expected_attach_type = BPF_CGROUP_SETSOCKOPT,
+
+               .set_level = SOL_IP,
+               .set_optname = IP_TOS,
+               .set_optval = {},
+               .set_optlen = PAGE_SIZE + 1,
+
+               .get_level = SOL_IP,
+               .get_optname = IP_TOS,
+               .get_optval = {}, /* the changes are ignored */
+               .get_optlen = 4,
+       },
+       {
                .descr = "setsockopt: allow changing ctx->optlen within bounds",
                .insns = {
                        /* r6 = ctx->optval */
@@ -906,6 +973,13 @@ static int run_test(int cgroup_fd, struct sockopt_test *test)
        }
 
        if (test->set_optlen) {
+               if (test->set_optlen >= PAGE_SIZE) {
+                       int num_pages = test->set_optlen / PAGE_SIZE;
+                       int remainder = test->set_optlen % PAGE_SIZE;
+
+                       test->set_optlen = num_pages * sysconf(_SC_PAGESIZE) + remainder;
+               }
+
                err = setsockopt(sock_fd, test->set_level, test->set_optname,
                                 test->set_optval, test->set_optlen);
                if (err) {
@@ -921,7 +995,15 @@ static int run_test(int cgroup_fd, struct sockopt_test *test)
        }
 
        if (test->get_optlen) {
+               if (test->get_optlen >= PAGE_SIZE) {
+                       int num_pages = test->get_optlen / PAGE_SIZE;
+                       int remainder = test->get_optlen % PAGE_SIZE;
+
+                       test->get_optlen = num_pages * sysconf(_SC_PAGESIZE) + remainder;
+               }
+
                optval = malloc(test->get_optlen);
+               memset(optval, 0, test->get_optlen);
                socklen_t optlen = test->get_optlen;
                socklen_t expected_get_optlen = test->get_optlen_ret ?:
                        test->get_optlen;
@@ -929,6 +1011,8 @@ static int run_test(int cgroup_fd, struct sockopt_test *test)
                err = getsockopt(sock_fd, test->get_level, test->get_optname,
                                 optval, &optlen);
                if (err) {
+                       if (errno == EOPNOTSUPP && test->error == EOPNOTSUPP_GETSOCKOPT)
+                               goto free_optval;
                        if (errno == EPERM && test->error == EPERM_GETSOCKOPT)
                                goto free_optval;
                        if (errno == EFAULT && test->error == EFAULT_GETSOCKOPT)
@@ -976,7 +1060,9 @@ void test_sockopt(void)
                return;
 
        for (i = 0; i < ARRAY_SIZE(tests); i++) {
-               test__start_subtest(tests[i].descr);
+               if (!test__start_subtest(tests[i].descr))
+                       continue;
+
                ASSERT_OK(run_test(cgroup_fd, &tests[i]), tests[i].descr);
        }
 
index 60c17a8..917f486 100644 (file)
@@ -2,6 +2,8 @@
 #include <test_progs.h>
 #include "cgroup_helpers.h"
 
+#include "sockopt_inherit.skel.h"
+
 #define SOL_CUSTOM                     0xdeadbeef
 #define CUSTOM_INHERIT1                        0
 #define CUSTOM_INHERIT2                        1
@@ -132,58 +134,30 @@ static int start_server(void)
        return fd;
 }
 
-static int prog_attach(struct bpf_object *obj, int cgroup_fd, const char *title,
-                      const char *prog_name)
-{
-       enum bpf_attach_type attach_type;
-       enum bpf_prog_type prog_type;
-       struct bpf_program *prog;
-       int err;
-
-       err = libbpf_prog_type_by_name(title, &prog_type, &attach_type);
-       if (err) {
-               log_err("Failed to deduct types for %s BPF program", prog_name);
-               return -1;
-       }
-
-       prog = bpf_object__find_program_by_name(obj, prog_name);
-       if (!prog) {
-               log_err("Failed to find %s BPF program", prog_name);
-               return -1;
-       }
-
-       err = bpf_prog_attach(bpf_program__fd(prog), cgroup_fd,
-                             attach_type, 0);
-       if (err) {
-               log_err("Failed to attach %s BPF program", prog_name);
-               return -1;
-       }
-
-       return 0;
-}
-
 static void run_test(int cgroup_fd)
 {
+       struct bpf_link *link_getsockopt = NULL;
+       struct bpf_link *link_setsockopt = NULL;
        int server_fd = -1, client_fd;
-       struct bpf_object *obj;
+       struct sockopt_inherit *obj;
        void *server_err;
        pthread_t tid;
        int err;
 
-       obj = bpf_object__open_file("sockopt_inherit.bpf.o", NULL);
-       if (!ASSERT_OK_PTR(obj, "obj_open"))
+       obj = sockopt_inherit__open_and_load();
+       if (!ASSERT_OK_PTR(obj, "skel-load"))
                return;
 
-       err = bpf_object__load(obj);
-       if (!ASSERT_OK(err, "obj_load"))
-               goto close_bpf_object;
+       obj->bss->page_size = sysconf(_SC_PAGESIZE);
 
-       err = prog_attach(obj, cgroup_fd, "cgroup/getsockopt", "_getsockopt");
-       if (!ASSERT_OK(err, "prog_attach _getsockopt"))
+       link_getsockopt = bpf_program__attach_cgroup(obj->progs._getsockopt,
+                                                    cgroup_fd);
+       if (!ASSERT_OK_PTR(link_getsockopt, "cg-attach-getsockopt"))
                goto close_bpf_object;
 
-       err = prog_attach(obj, cgroup_fd, "cgroup/setsockopt", "_setsockopt");
-       if (!ASSERT_OK(err, "prog_attach _setsockopt"))
+       link_setsockopt = bpf_program__attach_cgroup(obj->progs._setsockopt,
+                                                    cgroup_fd);
+       if (!ASSERT_OK_PTR(link_setsockopt, "cg-attach-setsockopt"))
                goto close_bpf_object;
 
        server_fd = start_server();
@@ -217,7 +191,10 @@ static void run_test(int cgroup_fd)
 close_server_fd:
        close(server_fd);
 close_bpf_object:
-       bpf_object__close(obj);
+       bpf_link__destroy(link_getsockopt);
+       bpf_link__destroy(link_setsockopt);
+
+       sockopt_inherit__destroy(obj);
 }
 
 void test_sockopt_inherit(void)
index 7f56593..759bbb6 100644 (file)
@@ -2,61 +2,13 @@
 #include <test_progs.h>
 #include "cgroup_helpers.h"
 
-static int prog_attach(struct bpf_object *obj, int cgroup_fd, const char *title, const char *name)
-{
-       enum bpf_attach_type attach_type;
-       enum bpf_prog_type prog_type;
-       struct bpf_program *prog;
-       int err;
-
-       err = libbpf_prog_type_by_name(title, &prog_type, &attach_type);
-       if (err) {
-               log_err("Failed to deduct types for %s BPF program", title);
-               return -1;
-       }
-
-       prog = bpf_object__find_program_by_name(obj, name);
-       if (!prog) {
-               log_err("Failed to find %s BPF program", name);
-               return -1;
-       }
-
-       err = bpf_prog_attach(bpf_program__fd(prog), cgroup_fd,
-                             attach_type, BPF_F_ALLOW_MULTI);
-       if (err) {
-               log_err("Failed to attach %s BPF program", name);
-               return -1;
-       }
-
-       return 0;
-}
+#include "sockopt_multi.skel.h"
 
-static int prog_detach(struct bpf_object *obj, int cgroup_fd, const char *title, const char *name)
-{
-       enum bpf_attach_type attach_type;
-       enum bpf_prog_type prog_type;
-       struct bpf_program *prog;
-       int err;
-
-       err = libbpf_prog_type_by_name(title, &prog_type, &attach_type);
-       if (err)
-               return -1;
-
-       prog = bpf_object__find_program_by_name(obj, name);
-       if (!prog)
-               return -1;
-
-       err = bpf_prog_detach2(bpf_program__fd(prog), cgroup_fd,
-                              attach_type);
-       if (err)
-               return -1;
-
-       return 0;
-}
-
-static int run_getsockopt_test(struct bpf_object *obj, int cg_parent,
+static int run_getsockopt_test(struct sockopt_multi *obj, int cg_parent,
                               int cg_child, int sock_fd)
 {
+       struct bpf_link *link_parent = NULL;
+       struct bpf_link *link_child = NULL;
        socklen_t optlen;
        __u8 buf;
        int err;
@@ -89,8 +41,9 @@ static int run_getsockopt_test(struct bpf_object *obj, int cg_parent,
         * - child:  0x80 -> 0x90
         */
 
-       err = prog_attach(obj, cg_child, "cgroup/getsockopt", "_getsockopt_child");
-       if (err)
+       link_child = bpf_program__attach_cgroup(obj->progs._getsockopt_child,
+                                               cg_child);
+       if (!ASSERT_OK_PTR(link_child, "cg-attach-getsockopt_child"))
                goto detach;
 
        buf = 0x00;
@@ -113,8 +66,9 @@ static int run_getsockopt_test(struct bpf_object *obj, int cg_parent,
         * - parent: 0x90 -> 0xA0
         */
 
-       err = prog_attach(obj, cg_parent, "cgroup/getsockopt", "_getsockopt_parent");
-       if (err)
+       link_parent = bpf_program__attach_cgroup(obj->progs._getsockopt_parent,
+                                                cg_parent);
+       if (!ASSERT_OK_PTR(link_parent, "cg-attach-getsockopt_parent"))
                goto detach;
 
        buf = 0x00;
@@ -157,11 +111,8 @@ static int run_getsockopt_test(struct bpf_object *obj, int cg_parent,
         * - parent: unexpected 0x40, EPERM
         */
 
-       err = prog_detach(obj, cg_child, "cgroup/getsockopt", "_getsockopt_child");
-       if (err) {
-               log_err("Failed to detach child program");
-               goto detach;
-       }
+       bpf_link__destroy(link_child);
+       link_child = NULL;
 
        buf = 0x00;
        optlen = 1;
@@ -198,15 +149,17 @@ static int run_getsockopt_test(struct bpf_object *obj, int cg_parent,
        }
 
 detach:
-       prog_detach(obj, cg_child, "cgroup/getsockopt", "_getsockopt_child");
-       prog_detach(obj, cg_parent, "cgroup/getsockopt", "_getsockopt_parent");
+       bpf_link__destroy(link_child);
+       bpf_link__destroy(link_parent);
 
        return err;
 }
 
-static int run_setsockopt_test(struct bpf_object *obj, int cg_parent,
+static int run_setsockopt_test(struct sockopt_multi *obj, int cg_parent,
                               int cg_child, int sock_fd)
 {
+       struct bpf_link *link_parent = NULL;
+       struct bpf_link *link_child = NULL;
        socklen_t optlen;
        __u8 buf;
        int err;
@@ -236,8 +189,9 @@ static int run_setsockopt_test(struct bpf_object *obj, int cg_parent,
 
        /* Attach child program and make sure it adds 0x10. */
 
-       err = prog_attach(obj, cg_child, "cgroup/setsockopt", "_setsockopt");
-       if (err)
+       link_child = bpf_program__attach_cgroup(obj->progs._setsockopt,
+                                               cg_child);
+       if (!ASSERT_OK_PTR(link_child, "cg-attach-setsockopt_child"))
                goto detach;
 
        buf = 0x80;
@@ -263,8 +217,9 @@ static int run_setsockopt_test(struct bpf_object *obj, int cg_parent,
 
        /* Attach parent program and make sure it adds another 0x10. */
 
-       err = prog_attach(obj, cg_parent, "cgroup/setsockopt", "_setsockopt");
-       if (err)
+       link_parent = bpf_program__attach_cgroup(obj->progs._setsockopt,
+                                                cg_parent);
+       if (!ASSERT_OK_PTR(link_parent, "cg-attach-setsockopt_parent"))
                goto detach;
 
        buf = 0x80;
@@ -289,8 +244,8 @@ static int run_setsockopt_test(struct bpf_object *obj, int cg_parent,
        }
 
 detach:
-       prog_detach(obj, cg_child, "cgroup/setsockopt", "_setsockopt");
-       prog_detach(obj, cg_parent, "cgroup/setsockopt", "_setsockopt");
+       bpf_link__destroy(link_child);
+       bpf_link__destroy(link_parent);
 
        return err;
 }
@@ -298,9 +253,8 @@ detach:
 void test_sockopt_multi(void)
 {
        int cg_parent = -1, cg_child = -1;
-       struct bpf_object *obj = NULL;
+       struct sockopt_multi *obj = NULL;
        int sock_fd = -1;
-       int err = -1;
 
        cg_parent = test__join_cgroup("/parent");
        if (!ASSERT_GE(cg_parent, 0, "join_cgroup /parent"))
@@ -310,13 +264,11 @@ void test_sockopt_multi(void)
        if (!ASSERT_GE(cg_child, 0, "join_cgroup /parent/child"))
                goto out;
 
-       obj = bpf_object__open_file("sockopt_multi.bpf.o", NULL);
-       if (!ASSERT_OK_PTR(obj, "obj_load"))
+       obj = sockopt_multi__open_and_load();
+       if (!ASSERT_OK_PTR(obj, "skel-load"))
                goto out;
 
-       err = bpf_object__load(obj);
-       if (!ASSERT_OK(err, "obj_load"))
-               goto out;
+       obj->bss->page_size = sysconf(_SC_PAGESIZE);
 
        sock_fd = socket(AF_INET, SOCK_STREAM, 0);
        if (!ASSERT_GE(sock_fd, 0, "socket"))
@@ -327,7 +279,7 @@ void test_sockopt_multi(void)
 
 out:
        close(sock_fd);
-       bpf_object__close(obj);
+       sockopt_multi__destroy(obj);
        close(cg_child);
        close(cg_parent);
 }
index 6b53b3c..6b2d300 100644 (file)
@@ -42,6 +42,8 @@ void test_sockopt_qos_to_cc(void)
        if (!ASSERT_OK_PTR(skel, "skel"))
                goto done;
 
+       skel->bss->page_size = sysconf(_SC_PAGESIZE);
+
        sock_fd = socket(AF_INET6, SOCK_STREAM, 0);
        if (!ASSERT_GE(sock_fd, 0, "v6 socket open"))
                goto done;
diff --git a/tools/testing/selftests/bpf/prog_tests/task_under_cgroup.c b/tools/testing/selftests/bpf/prog_tests/task_under_cgroup.c
new file mode 100644 (file)
index 0000000..4224727
--- /dev/null
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Bytedance */
+
+#include <sys/syscall.h>
+#include <test_progs.h>
+#include <cgroup_helpers.h>
+#include "test_task_under_cgroup.skel.h"
+
+#define FOO    "/foo"
+
+void test_task_under_cgroup(void)
+{
+       struct test_task_under_cgroup *skel;
+       int ret, foo;
+       pid_t pid;
+
+       foo = test__join_cgroup(FOO);
+       if (!ASSERT_OK(foo < 0, "cgroup_join_foo"))
+               return;
+
+       skel = test_task_under_cgroup__open();
+       if (!ASSERT_OK_PTR(skel, "test_task_under_cgroup__open"))
+               goto cleanup;
+
+       skel->rodata->local_pid = getpid();
+       skel->bss->remote_pid = getpid();
+       skel->rodata->cgid = get_cgroup_id(FOO);
+
+       ret = test_task_under_cgroup__load(skel);
+       if (!ASSERT_OK(ret, "test_task_under_cgroup__load"))
+               goto cleanup;
+
+       ret = test_task_under_cgroup__attach(skel);
+       if (!ASSERT_OK(ret, "test_task_under_cgroup__attach"))
+               goto cleanup;
+
+       pid = fork();
+       if (pid == 0)
+               exit(0);
+
+       ret = (pid == -1);
+       if (ASSERT_OK(ret, "fork process"))
+               wait(NULL);
+
+       test_task_under_cgroup__detach(skel);
+
+       ASSERT_NEQ(skel->bss->remote_pid, skel->rodata->local_pid,
+                  "test task_under_cgroup");
+
+cleanup:
+       test_task_under_cgroup__destroy(skel);
+       close(foo);
+}
index 2497716..531621a 100644 (file)
@@ -55,6 +55,7 @@
 #include "verifier_spill_fill.skel.h"
 #include "verifier_spin_lock.skel.h"
 #include "verifier_stack_ptr.skel.h"
+#include "verifier_subprog_precision.skel.h"
 #include "verifier_subreg.skel.h"
 #include "verifier_uninit.skel.h"
 #include "verifier_unpriv.skel.h"
@@ -154,6 +155,7 @@ void test_verifier_sock(void)                 { RUN(verifier_sock); }
 void test_verifier_spill_fill(void)           { RUN(verifier_spill_fill); }
 void test_verifier_spin_lock(void)            { RUN(verifier_spin_lock); }
 void test_verifier_stack_ptr(void)            { RUN(verifier_stack_ptr); }
+void test_verifier_subprog_precision(void)    { RUN(verifier_subprog_precision); }
 void test_verifier_subreg(void)               { RUN(verifier_subreg); }
 void test_verifier_uninit(void)               { RUN(verifier_uninit); }
 void test_verifier_unpriv(void)               { RUN(verifier_unpriv); }
index d19f790..c3b4574 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/if_bonding.h>
 #include <linux/limits.h>
 #include <linux/udp.h>
+#include <uapi/linux/netdev.h>
 
 #include "xdp_dummy.skel.h"
 #include "xdp_redirect_multi_kern.skel.h"
@@ -492,6 +493,123 @@ out:
        system("ip link del bond_nest2");
 }
 
+static void test_xdp_bonding_features(struct skeletons *skeletons)
+{
+       LIBBPF_OPTS(bpf_xdp_query_opts, query_opts);
+       int bond_idx, veth1_idx, err;
+       struct bpf_link *link = NULL;
+
+       if (!ASSERT_OK(system("ip link add bond type bond"), "add bond"))
+               goto out;
+
+       bond_idx = if_nametoindex("bond");
+       if (!ASSERT_GE(bond_idx, 0, "if_nametoindex bond"))
+               goto out;
+
+       /* query default xdp-feature for bond device */
+       err = bpf_xdp_query(bond_idx, XDP_FLAGS_DRV_MODE, &query_opts);
+       if (!ASSERT_OK(err, "bond bpf_xdp_query"))
+               goto out;
+
+       if (!ASSERT_EQ(query_opts.feature_flags, NETDEV_XDP_ACT_MASK,
+                      "bond query_opts.feature_flags"))
+               goto out;
+
+       if (!ASSERT_OK(system("ip link add veth0 type veth peer name veth1"),
+                      "add veth{0,1} pair"))
+               goto out;
+
+       if (!ASSERT_OK(system("ip link add veth2 type veth peer name veth3"),
+                      "add veth{2,3} pair"))
+               goto out;
+
+       if (!ASSERT_OK(system("ip link set veth0 master bond"),
+                      "add veth0 to master bond"))
+               goto out;
+
+       /* xdp-feature for bond device should be obtained from the single slave
+        * device (veth0)
+        */
+       err = bpf_xdp_query(bond_idx, XDP_FLAGS_DRV_MODE, &query_opts);
+       if (!ASSERT_OK(err, "bond bpf_xdp_query"))
+               goto out;
+
+       if (!ASSERT_EQ(query_opts.feature_flags,
+                      NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
+                      NETDEV_XDP_ACT_RX_SG,
+                      "bond query_opts.feature_flags"))
+               goto out;
+
+       veth1_idx = if_nametoindex("veth1");
+       if (!ASSERT_GE(veth1_idx, 0, "if_nametoindex veth1"))
+               goto out;
+
+       link = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog,
+                                      veth1_idx);
+       if (!ASSERT_OK_PTR(link, "attach program to veth1"))
+               goto out;
+
+       /* xdp-feature for veth0 are changed */
+       err = bpf_xdp_query(bond_idx, XDP_FLAGS_DRV_MODE, &query_opts);
+       if (!ASSERT_OK(err, "bond bpf_xdp_query"))
+               goto out;
+
+       if (!ASSERT_EQ(query_opts.feature_flags,
+                      NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
+                      NETDEV_XDP_ACT_RX_SG | NETDEV_XDP_ACT_NDO_XMIT |
+                      NETDEV_XDP_ACT_NDO_XMIT_SG,
+                      "bond query_opts.feature_flags"))
+               goto out;
+
+       if (!ASSERT_OK(system("ip link set veth2 master bond"),
+                      "add veth2 to master bond"))
+               goto out;
+
+       err = bpf_xdp_query(bond_idx, XDP_FLAGS_DRV_MODE, &query_opts);
+       if (!ASSERT_OK(err, "bond bpf_xdp_query"))
+               goto out;
+
+       /* xdp-feature for bond device should be set to the most restrict
+        * value obtained from attached slave devices (veth0 and veth2)
+        */
+       if (!ASSERT_EQ(query_opts.feature_flags,
+                      NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
+                      NETDEV_XDP_ACT_RX_SG,
+                      "bond query_opts.feature_flags"))
+               goto out;
+
+       if (!ASSERT_OK(system("ip link set veth2 nomaster"),
+                      "del veth2 to master bond"))
+               goto out;
+
+       err = bpf_xdp_query(bond_idx, XDP_FLAGS_DRV_MODE, &query_opts);
+       if (!ASSERT_OK(err, "bond bpf_xdp_query"))
+               goto out;
+
+       if (!ASSERT_EQ(query_opts.feature_flags,
+                      NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
+                      NETDEV_XDP_ACT_RX_SG | NETDEV_XDP_ACT_NDO_XMIT |
+                      NETDEV_XDP_ACT_NDO_XMIT_SG,
+                      "bond query_opts.feature_flags"))
+               goto out;
+
+       if (!ASSERT_OK(system("ip link set veth0 nomaster"),
+                      "del veth0 to master bond"))
+               goto out;
+
+       err = bpf_xdp_query(bond_idx, XDP_FLAGS_DRV_MODE, &query_opts);
+       if (!ASSERT_OK(err, "bond bpf_xdp_query"))
+               goto out;
+
+       ASSERT_EQ(query_opts.feature_flags, NETDEV_XDP_ACT_MASK,
+                 "bond query_opts.feature_flags");
+out:
+       bpf_link__destroy(link);
+       system("ip link del veth0");
+       system("ip link del veth2");
+       system("ip link del bond");
+}
+
 static int libbpf_debug_print(enum libbpf_print_level level,
                              const char *format, va_list args)
 {
@@ -546,6 +664,9 @@ void serial_test_xdp_bonding(void)
        if (test__start_subtest("xdp_bonding_nested"))
                test_xdp_bonding_nested(&skeletons);
 
+       if (test__start_subtest("xdp_bonding_features"))
+               test_xdp_bonding_features(&skeletons);
+
        for (i = 0; i < ARRAY_SIZE(bond_test_cases); i++) {
                struct bond_test_case *test_case = &bond_test_cases[i];
 
index d3c1217..38a57a2 100644 (file)
 #define POINTER_VALUE  0xcafe4all
 #define TEST_DATA_LEN  64
 
+#ifndef __used
+#define __used __attribute__((used))
+#endif
+
 #if defined(__TARGET_ARCH_x86)
 #define SYSCALL_WRAPPER 1
 #define SYS_PREFIX "__x64_"
index 50f95ec..76d661b 100644 (file)
@@ -2,6 +2,7 @@
 #include <vmlinux.h>
 #include <bpf/bpf_tracing.h>
 #include <bpf/bpf_helpers.h>
+#include "../bpf_testmod/bpf_testmod_kfunc.h"
 
 struct map_value {
        struct prog_test_ref_kfunc __kptr *ptr;
@@ -14,9 +15,6 @@ struct {
        __uint(max_entries, 16);
 } array_map SEC(".maps");
 
-extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp) __ksym;
-extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym;
-
 static __noinline int cb1(void *map, void *key, void *value, void *ctx)
 {
        void *p = *(void **)ctx;
index b2a409e..932b8ec 100644 (file)
@@ -12,6 +12,7 @@ __u32 invocations = 0;
 __u32 assertion_error = 0;
 __u32 retval_value = 0;
 __u32 ctx_retval_value = 0;
+__u32 page_size = 0;
 
 SEC("cgroup/getsockopt")
 int get_retval(struct bpf_sockopt *ctx)
@@ -20,6 +21,10 @@ int get_retval(struct bpf_sockopt *ctx)
        ctx_retval_value = ctx->retval;
        __sync_fetch_and_add(&invocations, 1);
 
+       /* optval larger than PAGE_SIZE use kernel's buffer. */
+       if (ctx->optlen > page_size)
+               ctx->optlen = 0;
+
        return 1;
 }
 
@@ -31,6 +36,10 @@ int set_eisconn(struct bpf_sockopt *ctx)
        if (bpf_set_retval(-EISCONN))
                assertion_error = 1;
 
+       /* optval larger than PAGE_SIZE use kernel's buffer. */
+       if (ctx->optlen > page_size)
+               ctx->optlen = 0;
+
        return 1;
 }
 
@@ -41,5 +50,9 @@ int clear_retval(struct bpf_sockopt *ctx)
 
        ctx->retval = 0;
 
+       /* optval larger than PAGE_SIZE use kernel's buffer. */
+       if (ctx->optlen > page_size)
+               ctx->optlen = 0;
+
        return 1;
 }
index d6e5903..b7fa880 100644 (file)
@@ -11,6 +11,7 @@
 __u32 invocations = 0;
 __u32 assertion_error = 0;
 __u32 retval_value = 0;
+__u32 page_size = 0;
 
 SEC("cgroup/setsockopt")
 int get_retval(struct bpf_sockopt *ctx)
@@ -18,6 +19,10 @@ int get_retval(struct bpf_sockopt *ctx)
        retval_value = bpf_get_retval();
        __sync_fetch_and_add(&invocations, 1);
 
+       /* optval larger than PAGE_SIZE use kernel's buffer. */
+       if (ctx->optlen > page_size)
+               ctx->optlen = 0;
+
        return 1;
 }
 
@@ -29,6 +34,10 @@ int set_eunatch(struct bpf_sockopt *ctx)
        if (bpf_set_retval(-EUNATCH))
                assertion_error = 1;
 
+       /* optval larger than PAGE_SIZE use kernel's buffer. */
+       if (ctx->optlen > page_size)
+               ctx->optlen = 0;
+
        return 0;
 }
 
@@ -40,6 +49,10 @@ int set_eisconn(struct bpf_sockopt *ctx)
        if (bpf_set_retval(-EISCONN))
                assertion_error = 1;
 
+       /* optval larger than PAGE_SIZE use kernel's buffer. */
+       if (ctx->optlen > page_size)
+               ctx->optlen = 0;
+
        return 0;
 }
 
@@ -48,5 +61,9 @@ int legacy_eperm(struct bpf_sockopt *ctx)
 {
        __sync_fetch_and_add(&invocations, 1);
 
+       /* optval larger than PAGE_SIZE use kernel's buffer. */
+       if (ctx->optlen > page_size)
+               ctx->optlen = 0;
+
        return 0;
 }
index 759eb5c..7ce7e82 100644 (file)
@@ -3,6 +3,7 @@
 
 #include <errno.h>
 #include <string.h>
+#include <stdbool.h>
 #include <linux/bpf.h>
 #include <bpf/bpf_helpers.h>
 #include <linux/if_ether.h>
@@ -1378,3 +1379,310 @@ int invalid_slice_rdwr_rdonly(struct __sk_buff *skb)
 
        return 0;
 }
+
+/* bpf_dynptr_adjust can only be called on initialized dynptrs */
+SEC("?raw_tp")
+__failure __msg("Expected an initialized dynptr as arg #1")
+int dynptr_adjust_invalid(void *ctx)
+{
+       struct bpf_dynptr ptr;
+
+       /* this should fail */
+       bpf_dynptr_adjust(&ptr, 1, 2);
+
+       return 0;
+}
+
+/* bpf_dynptr_is_null can only be called on initialized dynptrs */
+SEC("?raw_tp")
+__failure __msg("Expected an initialized dynptr as arg #1")
+int dynptr_is_null_invalid(void *ctx)
+{
+       struct bpf_dynptr ptr;
+
+       /* this should fail */
+       bpf_dynptr_is_null(&ptr);
+
+       return 0;
+}
+
+/* bpf_dynptr_is_rdonly can only be called on initialized dynptrs */
+SEC("?raw_tp")
+__failure __msg("Expected an initialized dynptr as arg #1")
+int dynptr_is_rdonly_invalid(void *ctx)
+{
+       struct bpf_dynptr ptr;
+
+       /* this should fail */
+       bpf_dynptr_is_rdonly(&ptr);
+
+       return 0;
+}
+
+/* bpf_dynptr_size can only be called on initialized dynptrs */
+SEC("?raw_tp")
+__failure __msg("Expected an initialized dynptr as arg #1")
+int dynptr_size_invalid(void *ctx)
+{
+       struct bpf_dynptr ptr;
+
+       /* this should fail */
+       bpf_dynptr_size(&ptr);
+
+       return 0;
+}
+
+/* Only initialized dynptrs can be cloned */
+SEC("?raw_tp")
+__failure __msg("Expected an initialized dynptr as arg #1")
+int clone_invalid1(void *ctx)
+{
+       struct bpf_dynptr ptr1;
+       struct bpf_dynptr ptr2;
+
+       /* this should fail */
+       bpf_dynptr_clone(&ptr1, &ptr2);
+
+       return 0;
+}
+
+/* Can't overwrite an existing dynptr when cloning */
+SEC("?xdp")
+__failure __msg("cannot overwrite referenced dynptr")
+int clone_invalid2(struct xdp_md *xdp)
+{
+       struct bpf_dynptr ptr1;
+       struct bpf_dynptr clone;
+
+       bpf_dynptr_from_xdp(xdp, 0, &ptr1);
+
+       bpf_ringbuf_reserve_dynptr(&ringbuf, 64, 0, &clone);
+
+       /* this should fail */
+       bpf_dynptr_clone(&ptr1, &clone);
+
+       bpf_ringbuf_submit_dynptr(&clone, 0);
+
+       return 0;
+}
+
+/* Invalidating a dynptr should invalidate its clones */
+SEC("?raw_tp")
+__failure __msg("Expected an initialized dynptr as arg #3")
+int clone_invalidate1(void *ctx)
+{
+       struct bpf_dynptr clone;
+       struct bpf_dynptr ptr;
+       char read_data[64];
+
+       bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &ptr);
+
+       bpf_dynptr_clone(&ptr, &clone);
+
+       bpf_ringbuf_submit_dynptr(&ptr, 0);
+
+       /* this should fail */
+       bpf_dynptr_read(read_data, sizeof(read_data), &clone, 0, 0);
+
+       return 0;
+}
+
+/* Invalidating a dynptr should invalidate its parent */
+SEC("?raw_tp")
+__failure __msg("Expected an initialized dynptr as arg #3")
+int clone_invalidate2(void *ctx)
+{
+       struct bpf_dynptr ptr;
+       struct bpf_dynptr clone;
+       char read_data[64];
+
+       bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &ptr);
+
+       bpf_dynptr_clone(&ptr, &clone);
+
+       bpf_ringbuf_submit_dynptr(&clone, 0);
+
+       /* this should fail */
+       bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0, 0);
+
+       return 0;
+}
+
+/* Invalidating a dynptr should invalidate its siblings */
+SEC("?raw_tp")
+__failure __msg("Expected an initialized dynptr as arg #3")
+int clone_invalidate3(void *ctx)
+{
+       struct bpf_dynptr ptr;
+       struct bpf_dynptr clone1;
+       struct bpf_dynptr clone2;
+       char read_data[64];
+
+       bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &ptr);
+
+       bpf_dynptr_clone(&ptr, &clone1);
+
+       bpf_dynptr_clone(&ptr, &clone2);
+
+       bpf_ringbuf_submit_dynptr(&clone2, 0);
+
+       /* this should fail */
+       bpf_dynptr_read(read_data, sizeof(read_data), &clone1, 0, 0);
+
+       return 0;
+}
+
+/* Invalidating a dynptr should invalidate any data slices
+ * of its clones
+ */
+SEC("?raw_tp")
+__failure __msg("invalid mem access 'scalar'")
+int clone_invalidate4(void *ctx)
+{
+       struct bpf_dynptr ptr;
+       struct bpf_dynptr clone;
+       int *data;
+
+       bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &ptr);
+
+       bpf_dynptr_clone(&ptr, &clone);
+       data = bpf_dynptr_data(&clone, 0, sizeof(val));
+       if (!data)
+               return 0;
+
+       bpf_ringbuf_submit_dynptr(&ptr, 0);
+
+       /* this should fail */
+       *data = 123;
+
+       return 0;
+}
+
+/* Invalidating a dynptr should invalidate any data slices
+ * of its parent
+ */
+SEC("?raw_tp")
+__failure __msg("invalid mem access 'scalar'")
+int clone_invalidate5(void *ctx)
+{
+       struct bpf_dynptr ptr;
+       struct bpf_dynptr clone;
+       int *data;
+
+       bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &ptr);
+       data = bpf_dynptr_data(&ptr, 0, sizeof(val));
+       if (!data)
+               return 0;
+
+       bpf_dynptr_clone(&ptr, &clone);
+
+       bpf_ringbuf_submit_dynptr(&clone, 0);
+
+       /* this should fail */
+       *data = 123;
+
+       return 0;
+}
+
+/* Invalidating a dynptr should invalidate any data slices
+ * of its sibling
+ */
+SEC("?raw_tp")
+__failure __msg("invalid mem access 'scalar'")
+int clone_invalidate6(void *ctx)
+{
+       struct bpf_dynptr ptr;
+       struct bpf_dynptr clone1;
+       struct bpf_dynptr clone2;
+       int *data;
+
+       bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &ptr);
+
+       bpf_dynptr_clone(&ptr, &clone1);
+
+       bpf_dynptr_clone(&ptr, &clone2);
+
+       data = bpf_dynptr_data(&clone1, 0, sizeof(val));
+       if (!data)
+               return 0;
+
+       bpf_ringbuf_submit_dynptr(&clone2, 0);
+
+       /* this should fail */
+       *data = 123;
+
+       return 0;
+}
+
+/* A skb clone's data slices should be invalid anytime packet data changes */
+SEC("?tc")
+__failure __msg("invalid mem access 'scalar'")
+int clone_skb_packet_data(struct __sk_buff *skb)
+{
+       char buffer[sizeof(__u32)] = {};
+       struct bpf_dynptr clone;
+       struct bpf_dynptr ptr;
+       __u32 *data;
+
+       bpf_dynptr_from_skb(skb, 0, &ptr);
+
+       bpf_dynptr_clone(&ptr, &clone);
+       data = bpf_dynptr_slice_rdwr(&clone, 0, buffer, sizeof(buffer));
+       if (!data)
+               return XDP_DROP;
+
+       if (bpf_skb_pull_data(skb, skb->len))
+               return SK_DROP;
+
+       /* this should fail */
+       *data = 123;
+
+       return 0;
+}
+
+/* A xdp clone's data slices should be invalid anytime packet data changes */
+SEC("?xdp")
+__failure __msg("invalid mem access 'scalar'")
+int clone_xdp_packet_data(struct xdp_md *xdp)
+{
+       char buffer[sizeof(__u32)] = {};
+       struct bpf_dynptr clone;
+       struct bpf_dynptr ptr;
+       struct ethhdr *hdr;
+       __u32 *data;
+
+       bpf_dynptr_from_xdp(xdp, 0, &ptr);
+
+       bpf_dynptr_clone(&ptr, &clone);
+       data = bpf_dynptr_slice_rdwr(&clone, 0, buffer, sizeof(buffer));
+       if (!data)
+               return XDP_DROP;
+
+       if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(*hdr)))
+               return XDP_DROP;
+
+       /* this should fail */
+       *data = 123;
+
+       return 0;
+}
+
+/* Buffers that are provided must be sufficiently long */
+SEC("?cgroup_skb/egress")
+__failure __msg("memory, len pair leads to invalid memory access")
+int test_dynptr_skb_small_buff(struct __sk_buff *skb)
+{
+       struct bpf_dynptr ptr;
+       char buffer[8] = {};
+       __u64 *data;
+
+       if (bpf_dynptr_from_skb(skb, 0, &ptr)) {
+               err = 1;
+               return 1;
+       }
+
+       /* This may return NULL. SKB may require a buffer */
+       data = bpf_dynptr_slice(&ptr, 0, buffer, 9);
+
+       return !!data;
+}
index b2fa6c4..5985920 100644 (file)
@@ -2,6 +2,7 @@
 /* Copyright (c) 2022 Facebook */
 
 #include <string.h>
+#include <stdbool.h>
 #include <linux/bpf.h>
 #include <bpf/bpf_helpers.h>
 #include "bpf_misc.h"
@@ -207,3 +208,339 @@ int test_dynptr_skb_data(struct __sk_buff *skb)
 
        return 1;
 }
+
+SEC("tp/syscalls/sys_enter_nanosleep")
+int test_adjust(void *ctx)
+{
+       struct bpf_dynptr ptr;
+       __u32 bytes = 64;
+       __u32 off = 10;
+       __u32 trim = 15;
+
+       if (bpf_get_current_pid_tgid() >> 32 != pid)
+               return 0;
+
+       err = bpf_ringbuf_reserve_dynptr(&ringbuf, bytes, 0, &ptr);
+       if (err) {
+               err = 1;
+               goto done;
+       }
+
+       if (bpf_dynptr_size(&ptr) != bytes) {
+               err = 2;
+               goto done;
+       }
+
+       /* Advance the dynptr by off */
+       err = bpf_dynptr_adjust(&ptr, off, bpf_dynptr_size(&ptr));
+       if (err) {
+               err = 3;
+               goto done;
+       }
+
+       if (bpf_dynptr_size(&ptr) != bytes - off) {
+               err = 4;
+               goto done;
+       }
+
+       /* Trim the dynptr */
+       err = bpf_dynptr_adjust(&ptr, off, 15);
+       if (err) {
+               err = 5;
+               goto done;
+       }
+
+       /* Check that the size was adjusted correctly */
+       if (bpf_dynptr_size(&ptr) != trim - off) {
+               err = 6;
+               goto done;
+       }
+
+done:
+       bpf_ringbuf_discard_dynptr(&ptr, 0);
+       return 0;
+}
+
+SEC("tp/syscalls/sys_enter_nanosleep")
+int test_adjust_err(void *ctx)
+{
+       char write_data[45] = "hello there, world!!";
+       struct bpf_dynptr ptr;
+       __u32 size = 64;
+       __u32 off = 20;
+
+       if (bpf_get_current_pid_tgid() >> 32 != pid)
+               return 0;
+
+       if (bpf_ringbuf_reserve_dynptr(&ringbuf, size, 0, &ptr)) {
+               err = 1;
+               goto done;
+       }
+
+       /* Check that start can't be greater than end */
+       if (bpf_dynptr_adjust(&ptr, 5, 1) != -EINVAL) {
+               err = 2;
+               goto done;
+       }
+
+       /* Check that start can't be greater than size */
+       if (bpf_dynptr_adjust(&ptr, size + 1, size + 1) != -ERANGE) {
+               err = 3;
+               goto done;
+       }
+
+       /* Check that end can't be greater than size */
+       if (bpf_dynptr_adjust(&ptr, 0, size + 1) != -ERANGE) {
+               err = 4;
+               goto done;
+       }
+
+       if (bpf_dynptr_adjust(&ptr, off, size)) {
+               err = 5;
+               goto done;
+       }
+
+       /* Check that you can't write more bytes than available into the dynptr
+        * after you've adjusted it
+        */
+       if (bpf_dynptr_write(&ptr, 0, &write_data, sizeof(write_data), 0) != -E2BIG) {
+               err = 6;
+               goto done;
+       }
+
+       /* Check that even after adjusting, submitting/discarding
+        * a ringbuf dynptr works
+        */
+       bpf_ringbuf_submit_dynptr(&ptr, 0);
+       return 0;
+
+done:
+       bpf_ringbuf_discard_dynptr(&ptr, 0);
+       return 0;
+}
+
+SEC("tp/syscalls/sys_enter_nanosleep")
+int test_zero_size_dynptr(void *ctx)
+{
+       char write_data = 'x', read_data;
+       struct bpf_dynptr ptr;
+       __u32 size = 64;
+
+       if (bpf_get_current_pid_tgid() >> 32 != pid)
+               return 0;
+
+       if (bpf_ringbuf_reserve_dynptr(&ringbuf, size, 0, &ptr)) {
+               err = 1;
+               goto done;
+       }
+
+       /* After this, the dynptr has a size of 0 */
+       if (bpf_dynptr_adjust(&ptr, size, size)) {
+               err = 2;
+               goto done;
+       }
+
+       /* Test that reading + writing non-zero bytes is not ok */
+       if (bpf_dynptr_read(&read_data, sizeof(read_data), &ptr, 0, 0) != -E2BIG) {
+               err = 3;
+               goto done;
+       }
+
+       if (bpf_dynptr_write(&ptr, 0, &write_data, sizeof(write_data), 0) != -E2BIG) {
+               err = 4;
+               goto done;
+       }
+
+       /* Test that reading + writing 0 bytes from a 0-size dynptr is ok */
+       if (bpf_dynptr_read(&read_data, 0, &ptr, 0, 0)) {
+               err = 5;
+               goto done;
+       }
+
+       if (bpf_dynptr_write(&ptr, 0, &write_data, 0, 0)) {
+               err = 6;
+               goto done;
+       }
+
+       err = 0;
+
+done:
+       bpf_ringbuf_discard_dynptr(&ptr, 0);
+       return 0;
+}
+
+SEC("tp/syscalls/sys_enter_nanosleep")
+int test_dynptr_is_null(void *ctx)
+{
+       struct bpf_dynptr ptr1;
+       struct bpf_dynptr ptr2;
+       __u64 size = 4;
+
+       if (bpf_get_current_pid_tgid() >> 32 != pid)
+               return 0;
+
+       /* Pass in invalid flags, get back an invalid dynptr */
+       if (bpf_ringbuf_reserve_dynptr(&ringbuf, size, 123, &ptr1) != -EINVAL) {
+               err = 1;
+               goto exit_early;
+       }
+
+       /* Test that the invalid dynptr is null */
+       if (!bpf_dynptr_is_null(&ptr1)) {
+               err = 2;
+               goto exit_early;
+       }
+
+       /* Get a valid dynptr */
+       if (bpf_ringbuf_reserve_dynptr(&ringbuf, size, 0, &ptr2)) {
+               err = 3;
+               goto exit;
+       }
+
+       /* Test that the valid dynptr is not null */
+       if (bpf_dynptr_is_null(&ptr2)) {
+               err = 4;
+               goto exit;
+       }
+
+exit:
+       bpf_ringbuf_discard_dynptr(&ptr2, 0);
+exit_early:
+       bpf_ringbuf_discard_dynptr(&ptr1, 0);
+       return 0;
+}
+
+SEC("cgroup_skb/egress")
+int test_dynptr_is_rdonly(struct __sk_buff *skb)
+{
+       struct bpf_dynptr ptr1;
+       struct bpf_dynptr ptr2;
+       struct bpf_dynptr ptr3;
+
+       /* Pass in invalid flags, get back an invalid dynptr */
+       if (bpf_dynptr_from_skb(skb, 123, &ptr1) != -EINVAL) {
+               err = 1;
+               return 0;
+       }
+
+       /* Test that an invalid dynptr is_rdonly returns false */
+       if (bpf_dynptr_is_rdonly(&ptr1)) {
+               err = 2;
+               return 0;
+       }
+
+       /* Get a read-only dynptr */
+       if (bpf_dynptr_from_skb(skb, 0, &ptr2)) {
+               err = 3;
+               return 0;
+       }
+
+       /* Test that the dynptr is read-only */
+       if (!bpf_dynptr_is_rdonly(&ptr2)) {
+               err = 4;
+               return 0;
+       }
+
+       /* Get a read-writeable dynptr */
+       if (bpf_ringbuf_reserve_dynptr(&ringbuf, 64, 0, &ptr3)) {
+               err = 5;
+               goto done;
+       }
+
+       /* Test that the dynptr is read-only */
+       if (bpf_dynptr_is_rdonly(&ptr3)) {
+               err = 6;
+               goto done;
+       }
+
+done:
+       bpf_ringbuf_discard_dynptr(&ptr3, 0);
+       return 0;
+}
+
+SEC("cgroup_skb/egress")
+int test_dynptr_clone(struct __sk_buff *skb)
+{
+       struct bpf_dynptr ptr1;
+       struct bpf_dynptr ptr2;
+       __u32 off = 2, size;
+
+       /* Get a dynptr */
+       if (bpf_dynptr_from_skb(skb, 0, &ptr1)) {
+               err = 1;
+               return 0;
+       }
+
+       if (bpf_dynptr_adjust(&ptr1, off, bpf_dynptr_size(&ptr1))) {
+               err = 2;
+               return 0;
+       }
+
+       /* Clone the dynptr */
+       if (bpf_dynptr_clone(&ptr1, &ptr2)) {
+               err = 3;
+               return 0;
+       }
+
+       size = bpf_dynptr_size(&ptr1);
+
+       /* Check that the clone has the same size and rd-only */
+       if (bpf_dynptr_size(&ptr2) != size) {
+               err = 4;
+               return 0;
+       }
+
+       if (bpf_dynptr_is_rdonly(&ptr2) != bpf_dynptr_is_rdonly(&ptr1)) {
+               err = 5;
+               return 0;
+       }
+
+       /* Advance and trim the original dynptr */
+       bpf_dynptr_adjust(&ptr1, 5, 5);
+
+       /* Check that only original dynptr was affected, and the clone wasn't */
+       if (bpf_dynptr_size(&ptr2) != size) {
+               err = 6;
+               return 0;
+       }
+
+       return 0;
+}
+
+SEC("?cgroup_skb/egress")
+int test_dynptr_skb_no_buff(struct __sk_buff *skb)
+{
+       struct bpf_dynptr ptr;
+       __u64 *data;
+
+       if (bpf_dynptr_from_skb(skb, 0, &ptr)) {
+               err = 1;
+               return 1;
+       }
+
+       /* This may return NULL. SKB may require a buffer */
+       data = bpf_dynptr_slice(&ptr, 0, NULL, 1);
+
+       return !!data;
+}
+
+SEC("?cgroup_skb/egress")
+int test_dynptr_skb_strcmp(struct __sk_buff *skb)
+{
+       struct bpf_dynptr ptr;
+       char *data;
+
+       if (bpf_dynptr_from_skb(skb, 0, &ptr)) {
+               err = 1;
+               return 1;
+       }
+
+       /* This may return NULL. SKB may require a buffer */
+       data = bpf_dynptr_slice(&ptr, 0, NULL, 10);
+       if (data) {
+               bpf_strncmp(data, 10, "foo");
+               return 1;
+       }
+
+       return 1;
+}
index be16143..6b9b3c5 100644 (file)
@@ -651,29 +651,25 @@ int iter_stack_array_loop(const void *ctx)
        return sum;
 }
 
-#define ARR_SZ 16
-
-static __noinline void fill(struct bpf_iter_num *it, int *arr, int mul)
+static __noinline void fill(struct bpf_iter_num *it, int *arr, __u32 n, int mul)
 {
-       int *t;
-       __u64 i;
+       int *t, i;
 
        while ((t = bpf_iter_num_next(it))) {
                i = *t;
-               if (i >= ARR_SZ)
+               if (i >= n)
                        break;
                arr[i] =  i * mul;
        }
 }
 
-static __noinline int sum(struct bpf_iter_num *it, int *arr)
+static __noinline int sum(struct bpf_iter_num *it, int *arr, __u32 n)
 {
-       int *t, sum = 0;;
-       __u64 i;
+       int *t, i, sum = 0;;
 
        while ((t = bpf_iter_num_next(it))) {
                i = *t;
-               if (i >= ARR_SZ)
+               if (i >= n)
                        break;
                sum += arr[i];
        }
@@ -685,7 +681,7 @@ SEC("raw_tp")
 __success
 int iter_pass_iter_ptr_to_subprog(const void *ctx)
 {
-       int arr1[ARR_SZ], arr2[ARR_SZ];
+       int arr1[16], arr2[32];
        struct bpf_iter_num it;
        int n, sum1, sum2;
 
@@ -694,25 +690,25 @@ int iter_pass_iter_ptr_to_subprog(const void *ctx)
        /* fill arr1 */
        n = ARRAY_SIZE(arr1);
        bpf_iter_num_new(&it, 0, n);
-       fill(&it, arr1, 2);
+       fill(&it, arr1, n, 2);
        bpf_iter_num_destroy(&it);
 
        /* fill arr2 */
        n = ARRAY_SIZE(arr2);
        bpf_iter_num_new(&it, 0, n);
-       fill(&it, arr2, 10);
+       fill(&it, arr2, n, 10);
        bpf_iter_num_destroy(&it);
 
        /* sum arr1 */
        n = ARRAY_SIZE(arr1);
        bpf_iter_num_new(&it, 0, n);
-       sum1 = sum(&it, arr1);
+       sum1 = sum(&it, arr1, n);
        bpf_iter_num_destroy(&it);
 
        /* sum arr2 */
        n = ARRAY_SIZE(arr2);
        bpf_iter_num_new(&it, 0, n);
-       sum2 = sum(&it, arr2);
+       sum2 = sum(&it, arr2, n);
        bpf_iter_num_destroy(&it);
 
        bpf_printk("sum1=%d, sum2=%d", sum1, sum2);
index 13f00ca..f9789e6 100644 (file)
@@ -3,13 +3,11 @@
 #include <vmlinux.h>
 #include <bpf/bpf_tracing.h>
 #include <bpf/bpf_helpers.h>
+#include "../bpf_testmod/bpf_testmod_kfunc.h"
 
 static struct prog_test_ref_kfunc __kptr *v;
 long total_sum = -1;
 
-extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp) __ksym;
-extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym;
-
 SEC("tc")
 int test_jit_probe_mem(struct __sk_buff *ctx)
 {
index 767472b..7632d9e 100644 (file)
@@ -1,8 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <vmlinux.h>
 #include <bpf/bpf_helpers.h>
-
-extern void bpf_kfunc_call_test_destructive(void) __ksym;
+#include "../bpf_testmod/bpf_testmod_kfunc.h"
 
 SEC("tc")
 int kfunc_destructive_test(void)
index b98313d..4b0b7b7 100644 (file)
@@ -2,14 +2,7 @@
 /* Copyright (c) 2021 Facebook */
 #include <vmlinux.h>
 #include <bpf/bpf_helpers.h>
-
-extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp) __ksym;
-extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym;
-extern void bpf_kfunc_call_test_mem_len_pass1(void *mem, int len) __ksym;
-extern int *bpf_kfunc_call_test_get_rdwr_mem(struct prog_test_ref_kfunc *p, const int rdwr_buf_size) __ksym;
-extern int *bpf_kfunc_call_test_get_rdonly_mem(struct prog_test_ref_kfunc *p, const int rdonly_buf_size) __ksym;
-extern int *bpf_kfunc_call_test_acq_rdonly_mem(struct prog_test_ref_kfunc *p, const int rdonly_buf_size) __ksym;
-extern void bpf_kfunc_call_int_mem_release(int *p) __ksym;
+#include "../bpf_testmod/bpf_testmod_kfunc.h"
 
 struct syscall_test_args {
        __u8 data[16];
index 4e8fed7..d532af0 100644 (file)
@@ -1,8 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <vmlinux.h>
 #include <bpf/bpf_helpers.h>
-
-extern void bpf_testmod_test_mod_kfunc(int i) __ksym;
+#include "../bpf_testmod/bpf_testmod_kfunc.h"
 
 SEC("tc")
 int kfunc_call_fail(struct __sk_buff *ctx)
index 7daa8f5..cf68d1e 100644 (file)
@@ -2,22 +2,7 @@
 /* Copyright (c) 2021 Facebook */
 #include <vmlinux.h>
 #include <bpf/bpf_helpers.h>
-
-extern long bpf_kfunc_call_test4(signed char a, short b, int c, long d) __ksym;
-extern int bpf_kfunc_call_test2(struct sock *sk, __u32 a, __u32 b) __ksym;
-extern __u64 bpf_kfunc_call_test1(struct sock *sk, __u32 a, __u64 b,
-                                 __u32 c, __u64 d) __ksym;
-
-extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp) __ksym;
-extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym;
-extern void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb) __ksym;
-extern void bpf_kfunc_call_test_pass1(struct prog_test_pass1 *p) __ksym;
-extern void bpf_kfunc_call_test_pass2(struct prog_test_pass2 *p) __ksym;
-extern void bpf_kfunc_call_test_mem_len_pass1(void *mem, int len) __ksym;
-extern void bpf_kfunc_call_test_mem_len_fail2(__u64 *mem, int len) __ksym;
-extern int *bpf_kfunc_call_test_get_rdwr_mem(struct prog_test_ref_kfunc *p, const int rdwr_buf_size) __ksym;
-extern int *bpf_kfunc_call_test_get_rdonly_mem(struct prog_test_ref_kfunc *p, const int rdonly_buf_size) __ksym;
-extern u32 bpf_kfunc_call_test_static_unused_arg(u32 arg, u32 unused) __ksym;
+#include "../bpf_testmod/bpf_testmod_kfunc.h"
 
 SEC("tc")
 int kfunc_call_test4(struct __sk_buff *skb)
index c1fdeca..2380c75 100644 (file)
@@ -1,13 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2021 Facebook */
-#include <linux/bpf.h>
-#include <bpf/bpf_helpers.h>
-#include "bpf_tcp_helpers.h"
+#include "../bpf_testmod/bpf_testmod_kfunc.h"
 
 extern const int bpf_prog_active __ksym;
-extern __u64 bpf_kfunc_call_test1(struct sock *sk, __u32 a, __u64 b,
-                                 __u32 c, __u64 d) __ksym;
-extern struct sock *bpf_kfunc_call_test3(struct sock *sk) __ksym;
 int active_res = -1;
 int sk_state_res = -1;
 
@@ -28,7 +23,7 @@ int __noinline f1(struct __sk_buff *skb)
        if (active)
                active_res = *active;
 
-       sk_state_res = bpf_kfunc_call_test3((struct sock *)sk)->sk_state;
+       sk_state_res = bpf_kfunc_call_test3((struct sock *)sk)->__sk_common.skc_state;
 
        return (__u32)bpf_kfunc_call_test1((struct sock *)sk, 1, 2, 3, 4);
 }
index 0ef286d..0683808 100644 (file)
@@ -5,7 +5,8 @@
 #include <bpf/bpf_tracing.h>
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_core_read.h>
-#include "bpf_experimental.h"
+#include "../bpf_experimental.h"
+#include "../bpf_testmod/bpf_testmod_kfunc.h"
 
 struct node_data {
        long key;
@@ -32,8 +33,6 @@ struct map_value {
  */
 struct node_data *just_here_because_btf_bug;
 
-extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym;
-
 struct {
        __uint(type, BPF_MAP_TYPE_ARRAY);
        __type(key, int);
index d715004..da30f0d 100644 (file)
@@ -2,6 +2,7 @@
 #include <vmlinux.h>
 #include <bpf/bpf_tracing.h>
 #include <bpf/bpf_helpers.h>
+#include "../bpf_testmod/bpf_testmod_kfunc.h"
 
 struct map_value {
        struct prog_test_ref_kfunc __kptr_untrusted *unref_ptr;
@@ -114,10 +115,6 @@ DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_HASH_OF_MAPS, hash_map, hash_of_hash_maps);
 DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_HASH_OF_MAPS, hash_malloc_map, hash_of_hash_malloc_maps);
 DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_HASH_OF_MAPS, lru_hash_map, hash_of_lru_hash_maps);
 
-extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp) __ksym;
-extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym;
-void bpf_kfunc_call_test_ref(struct prog_test_ref_kfunc *p) __ksym;
-
 #define WRITE_ONCE(x, val) ((*(volatile typeof(x) *) &(x)) = (val))
 
 static void test_kptr_unref(struct map_value *v)
index da8c724..450bb37 100644 (file)
@@ -4,6 +4,7 @@
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_core_read.h>
 #include "bpf_misc.h"
+#include "../bpf_testmod/bpf_testmod_kfunc.h"
 
 struct map_value {
        char buf[8];
@@ -19,9 +20,6 @@ struct array_map {
        __uint(max_entries, 1);
 } array_map SEC(".maps");
 
-extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp) __ksym;
-extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym;
-
 SEC("?tc")
 __failure __msg("kptr access size must be BPF_DW")
 int size_not_bpf_dw(struct __sk_buff *ctx)
diff --git a/tools/testing/selftests/bpf/progs/sock_destroy_prog.c b/tools/testing/selftests/bpf/progs/sock_destroy_prog.c
new file mode 100644 (file)
index 0000000..9e0bf7a
--- /dev/null
@@ -0,0 +1,145 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#include "bpf_tracing_net.h"
+
+__be16 serv_port = 0;
+
+int bpf_sock_destroy(struct sock_common *sk) __ksym;
+
+struct {
+       __uint(type, BPF_MAP_TYPE_ARRAY);
+       __uint(max_entries, 1);
+       __type(key, __u32);
+       __type(value, __u64);
+} tcp_conn_sockets SEC(".maps");
+
+struct {
+       __uint(type, BPF_MAP_TYPE_ARRAY);
+       __uint(max_entries, 1);
+       __type(key, __u32);
+       __type(value, __u64);
+} udp_conn_sockets SEC(".maps");
+
+SEC("cgroup/connect6")
+int sock_connect(struct bpf_sock_addr *ctx)
+{
+       __u64 sock_cookie = 0;
+       int key = 0;
+       __u32 keyc = 0;
+
+       if (ctx->family != AF_INET6 || ctx->user_family != AF_INET6)
+               return 1;
+
+       sock_cookie = bpf_get_socket_cookie(ctx);
+       if (ctx->protocol == IPPROTO_TCP)
+               bpf_map_update_elem(&tcp_conn_sockets, &key, &sock_cookie, 0);
+       else if (ctx->protocol == IPPROTO_UDP)
+               bpf_map_update_elem(&udp_conn_sockets, &keyc, &sock_cookie, 0);
+       else
+               return 1;
+
+       return 1;
+}
+
+SEC("iter/tcp")
+int iter_tcp6_client(struct bpf_iter__tcp *ctx)
+{
+       struct sock_common *sk_common = ctx->sk_common;
+       __u64 sock_cookie = 0;
+       __u64 *val;
+       int key = 0;
+
+       if (!sk_common)
+               return 0;
+
+       if (sk_common->skc_family != AF_INET6)
+               return 0;
+
+       sock_cookie  = bpf_get_socket_cookie(sk_common);
+       val = bpf_map_lookup_elem(&tcp_conn_sockets, &key);
+       if (!val)
+               return 0;
+       /* Destroy connected client sockets. */
+       if (sock_cookie == *val)
+               bpf_sock_destroy(sk_common);
+
+       return 0;
+}
+
+SEC("iter/tcp")
+int iter_tcp6_server(struct bpf_iter__tcp *ctx)
+{
+       struct sock_common *sk_common = ctx->sk_common;
+       const struct inet_connection_sock *icsk;
+       const struct inet_sock *inet;
+       struct tcp6_sock *tcp_sk;
+       __be16 srcp;
+
+       if (!sk_common)
+               return 0;
+
+       if (sk_common->skc_family != AF_INET6)
+               return 0;
+
+       tcp_sk = bpf_skc_to_tcp6_sock(sk_common);
+       if (!tcp_sk)
+               return 0;
+
+       icsk = &tcp_sk->tcp.inet_conn;
+       inet = &icsk->icsk_inet;
+       srcp = inet->inet_sport;
+
+       /* Destroy server sockets. */
+       if (srcp == serv_port)
+               bpf_sock_destroy(sk_common);
+
+       return 0;
+}
+
+
+SEC("iter/udp")
+int iter_udp6_client(struct bpf_iter__udp *ctx)
+{
+       struct udp_sock *udp_sk = ctx->udp_sk;
+       struct sock *sk = (struct sock *) udp_sk;
+       __u64 sock_cookie = 0, *val;
+       int key = 0;
+
+       if (!sk)
+               return 0;
+
+       sock_cookie  = bpf_get_socket_cookie(sk);
+       val = bpf_map_lookup_elem(&udp_conn_sockets, &key);
+       if (!val)
+               return 0;
+       /* Destroy connected client sockets. */
+       if (sock_cookie == *val)
+               bpf_sock_destroy((struct sock_common *)sk);
+
+       return 0;
+}
+
+SEC("iter/udp")
+int iter_udp6_server(struct bpf_iter__udp *ctx)
+{
+       struct udp_sock *udp_sk = ctx->udp_sk;
+       struct sock *sk = (struct sock *) udp_sk;
+       struct inet_sock *inet;
+       __be16 srcp;
+
+       if (!sk)
+               return 0;
+
+       inet = &udp_sk->inet;
+       srcp = inet->inet_sport;
+       if (srcp == serv_port)
+               bpf_sock_destroy((struct sock_common *)sk);
+
+       return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/sock_destroy_prog_fail.c b/tools/testing/selftests/bpf/progs/sock_destroy_prog_fail.c
new file mode 100644 (file)
index 0000000..dd6850b
--- /dev/null
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+int bpf_sock_destroy(struct sock_common *sk) __ksym;
+
+SEC("tp_btf/tcp_destroy_sock")
+__failure __msg("calling kernel function bpf_sock_destroy is not allowed")
+int BPF_PROG(trace_tcp_destroy_sock, struct sock *sk)
+{
+       /* should not load */
+       bpf_sock_destroy((struct sock_common *)sk);
+
+       return 0;
+}
+
index 9fb241b..c8f59ca 100644 (file)
@@ -9,6 +9,8 @@ char _license[] SEC("license") = "GPL";
 #define CUSTOM_INHERIT2                        1
 #define CUSTOM_LISTENER                        2
 
+__u32 page_size = 0;
+
 struct sockopt_inherit {
        __u8 val;
 };
@@ -55,7 +57,7 @@ int _getsockopt(struct bpf_sockopt *ctx)
        __u8 *optval = ctx->optval;
 
        if (ctx->level != SOL_CUSTOM)
-               return 1; /* only interested in SOL_CUSTOM */
+               goto out; /* only interested in SOL_CUSTOM */
 
        if (optval + 1 > optval_end)
                return 0; /* EPERM, bounds check */
@@ -70,6 +72,12 @@ int _getsockopt(struct bpf_sockopt *ctx)
        ctx->optlen = 1;
 
        return 1;
+
+out:
+       /* optval larger than PAGE_SIZE use kernel's buffer. */
+       if (ctx->optlen > page_size)
+               ctx->optlen = 0;
+       return 1;
 }
 
 SEC("cgroup/setsockopt")
@@ -80,7 +88,7 @@ int _setsockopt(struct bpf_sockopt *ctx)
        __u8 *optval = ctx->optval;
 
        if (ctx->level != SOL_CUSTOM)
-               return 1; /* only interested in SOL_CUSTOM */
+               goto out; /* only interested in SOL_CUSTOM */
 
        if (optval + 1 > optval_end)
                return 0; /* EPERM, bounds check */
@@ -93,4 +101,10 @@ int _setsockopt(struct bpf_sockopt *ctx)
        ctx->optlen = -1;
 
        return 1;
+
+out:
+       /* optval larger than PAGE_SIZE use kernel's buffer. */
+       if (ctx->optlen > page_size)
+               ctx->optlen = 0;
+       return 1;
 }
index 177a590..96f29fc 100644 (file)
@@ -5,6 +5,8 @@
 
 char _license[] SEC("license") = "GPL";
 
+__u32 page_size = 0;
+
 SEC("cgroup/getsockopt")
 int _getsockopt_child(struct bpf_sockopt *ctx)
 {
@@ -12,7 +14,7 @@ int _getsockopt_child(struct bpf_sockopt *ctx)
        __u8 *optval = ctx->optval;
 
        if (ctx->level != SOL_IP || ctx->optname != IP_TOS)
-               return 1;
+               goto out;
 
        if (optval + 1 > optval_end)
                return 0; /* EPERM, bounds check */
@@ -26,6 +28,12 @@ int _getsockopt_child(struct bpf_sockopt *ctx)
        ctx->optlen = 1;
 
        return 1;
+
+out:
+       /* optval larger than PAGE_SIZE use kernel's buffer. */
+       if (ctx->optlen > page_size)
+               ctx->optlen = 0;
+       return 1;
 }
 
 SEC("cgroup/getsockopt")
@@ -35,7 +43,7 @@ int _getsockopt_parent(struct bpf_sockopt *ctx)
        __u8 *optval = ctx->optval;
 
        if (ctx->level != SOL_IP || ctx->optname != IP_TOS)
-               return 1;
+               goto out;
 
        if (optval + 1 > optval_end)
                return 0; /* EPERM, bounds check */
@@ -49,6 +57,12 @@ int _getsockopt_parent(struct bpf_sockopt *ctx)
        ctx->optlen = 1;
 
        return 1;
+
+out:
+       /* optval larger than PAGE_SIZE use kernel's buffer. */
+       if (ctx->optlen > page_size)
+               ctx->optlen = 0;
+       return 1;
 }
 
 SEC("cgroup/setsockopt")
@@ -58,7 +72,7 @@ int _setsockopt(struct bpf_sockopt *ctx)
        __u8 *optval = ctx->optval;
 
        if (ctx->level != SOL_IP || ctx->optname != IP_TOS)
-               return 1;
+               goto out;
 
        if (optval + 1 > optval_end)
                return 0; /* EPERM, bounds check */
@@ -67,4 +81,10 @@ int _setsockopt(struct bpf_sockopt *ctx)
        ctx->optlen = 1;
 
        return 1;
+
+out:
+       /* optval larger than PAGE_SIZE use kernel's buffer. */
+       if (ctx->optlen > page_size)
+               ctx->optlen = 0;
+       return 1;
 }
index 1bce83b..dbe235e 100644 (file)
@@ -9,6 +9,8 @@
 
 char _license[] SEC("license") = "GPL";
 
+__u32 page_size = 0;
+
 SEC("cgroup/setsockopt")
 int sockopt_qos_to_cc(struct bpf_sockopt *ctx)
 {
@@ -19,7 +21,7 @@ int sockopt_qos_to_cc(struct bpf_sockopt *ctx)
        char cc_cubic[TCP_CA_NAME_MAX] = "cubic";
 
        if (ctx->level != SOL_IPV6 || ctx->optname != IPV6_TCLASS)
-               return 1;
+               goto out;
 
        if (optval + 1 > optval_end)
                return 0; /* EPERM, bounds check */
@@ -36,4 +38,10 @@ int sockopt_qos_to_cc(struct bpf_sockopt *ctx)
                        return 0;
        }
        return 1;
+
+out:
+       /* optval larger than PAGE_SIZE use kernel's buffer. */
+       if (ctx->optlen > page_size)
+               ctx->optlen = 0;
+       return 1;
 }
index fe1df4c..cb990a7 100644 (file)
@@ -37,7 +37,7 @@ int _getsockopt(struct bpf_sockopt *ctx)
        /* Bypass AF_NETLINK. */
        sk = ctx->sk;
        if (sk && sk->family == AF_NETLINK)
-               return 1;
+               goto out;
 
        /* Make sure bpf_get_netns_cookie is callable.
         */
@@ -52,8 +52,7 @@ int _getsockopt(struct bpf_sockopt *ctx)
                 * let next BPF program in the cgroup chain or kernel
                 * handle it.
                 */
-               ctx->optlen = 0; /* bypass optval>PAGE_SIZE */
-               return 1;
+               goto out;
        }
 
        if (ctx->level == SOL_SOCKET && ctx->optname == SO_SNDBUF) {
@@ -61,7 +60,7 @@ int _getsockopt(struct bpf_sockopt *ctx)
                 * let next BPF program in the cgroup chain or kernel
                 * handle it.
                 */
-               return 1;
+               goto out;
        }
 
        if (ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION) {
@@ -69,7 +68,7 @@ int _getsockopt(struct bpf_sockopt *ctx)
                 * let next BPF program in the cgroup chain or kernel
                 * handle it.
                 */
-               return 1;
+               goto out;
        }
 
        if (ctx->level == SOL_TCP && ctx->optname == TCP_ZEROCOPY_RECEIVE) {
@@ -85,7 +84,7 @@ int _getsockopt(struct bpf_sockopt *ctx)
                if (((struct tcp_zerocopy_receive *)optval)->address != 0)
                        return 0; /* unexpected data */
 
-               return 1;
+               goto out;
        }
 
        if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) {
@@ -129,6 +128,12 @@ int _getsockopt(struct bpf_sockopt *ctx)
        ctx->optlen = 1;
 
        return 1;
+
+out:
+       /* optval larger than PAGE_SIZE use kernel's buffer. */
+       if (ctx->optlen > page_size)
+               ctx->optlen = 0;
+       return 1;
 }
 
 SEC("cgroup/setsockopt")
@@ -142,7 +147,7 @@ int _setsockopt(struct bpf_sockopt *ctx)
        /* Bypass AF_NETLINK. */
        sk = ctx->sk;
        if (sk && sk->family == AF_NETLINK)
-               return 1;
+               goto out;
 
        /* Make sure bpf_get_netns_cookie is callable.
         */
@@ -224,4 +229,10 @@ int _setsockopt(struct bpf_sockopt *ctx)
                           */
 
        return 1;
+
+out:
+       /* optval larger than PAGE_SIZE use kernel's buffer. */
+       if (ctx->optlen > page_size)
+               ctx->optlen = 0;
+       return 1;
 }
index b85fc8c..17a9f59 100644 (file)
@@ -10,6 +10,8 @@
 static __attribute__ ((noinline))
 int f0(int var, struct __sk_buff *skb)
 {
+       asm volatile ("");
+
        return skb->len;
 }
 
diff --git a/tools/testing/selftests/bpf/progs/test_global_map_resize.c b/tools/testing/selftests/bpf/progs/test_global_map_resize.c
new file mode 100644 (file)
index 0000000..2588f23
--- /dev/null
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+/* rodata section */
+const volatile pid_t pid;
+const volatile size_t bss_array_len;
+const volatile size_t data_array_len;
+
+/* bss section */
+int sum = 0;
+int array[1];
+
+/* custom data secton */
+int my_array[1] SEC(".data.custom");
+
+/* custom data section which should NOT be resizable,
+ * since it contains a single var which is not an array
+ */
+int my_int SEC(".data.non_array");
+
+/* custom data section which should NOT be resizable,
+ * since its last var is not an array
+ */
+int my_array_first[1] SEC(".data.array_not_last");
+int my_int_last SEC(".data.array_not_last");
+
+SEC("tp/syscalls/sys_enter_getpid")
+int bss_array_sum(void *ctx)
+{
+       if (pid != (bpf_get_current_pid_tgid() >> 32))
+               return 0;
+
+       sum = 0;
+
+       for (size_t i = 0; i < bss_array_len; ++i)
+               sum += array[i];
+
+       return 0;
+}
+
+SEC("tp/syscalls/sys_enter_getuid")
+int data_array_sum(void *ctx)
+{
+       if (pid != (bpf_get_current_pid_tgid() >> 32))
+               return 0;
+
+       sum = 0;
+
+       for (size_t i = 0; i < data_array_len; ++i)
+               sum += my_array[i];
+
+       return 0;
+}
index bbad3c2..f75e531 100644 (file)
@@ -265,7 +265,10 @@ static __noinline bool sk_dst_port__load_word(struct bpf_sock *sk)
 
 static __noinline bool sk_dst_port__load_half(struct bpf_sock *sk)
 {
-       __u16 *half = (__u16 *)&sk->dst_port;
+       __u16 *half;
+
+       asm volatile ("");
+       half = (__u16 *)&sk->dst_port;
        return half[0] == bpf_htons(0xcafe);
 }
 
diff --git a/tools/testing/selftests/bpf/progs/test_task_under_cgroup.c b/tools/testing/selftests/bpf/progs/test_task_under_cgroup.c
new file mode 100644 (file)
index 0000000..56cdc0a
--- /dev/null
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Bytedance */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+#include "bpf_misc.h"
+
+struct cgroup *bpf_cgroup_from_id(u64 cgid) __ksym;
+long bpf_task_under_cgroup(struct task_struct *task, struct cgroup *ancestor) __ksym;
+void bpf_cgroup_release(struct cgroup *p) __ksym;
+struct task_struct *bpf_task_acquire(struct task_struct *p) __ksym;
+void bpf_task_release(struct task_struct *p) __ksym;
+
+const volatile int local_pid;
+const volatile __u64 cgid;
+int remote_pid;
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(handle__task_newtask, struct task_struct *task, u64 clone_flags)
+{
+       struct cgroup *cgrp = NULL;
+       struct task_struct *acquired;
+
+       if (local_pid != (bpf_get_current_pid_tgid() >> 32))
+               return 0;
+
+       acquired = bpf_task_acquire(task);
+       if (!acquired)
+               return 0;
+
+       if (local_pid == acquired->tgid)
+               goto out;
+
+       cgrp = bpf_cgroup_from_id(cgid);
+       if (!cgrp)
+               goto out;
+
+       if (bpf_task_under_cgroup(acquired, cgrp))
+               remote_pid = acquired->tgid;
+
+out:
+       if (cgrp)
+               bpf_cgroup_release(cgrp);
+       bpf_task_release(acquired);
+
+       return 0;
+}
+
+char _license[] SEC("license") = "GPL";
index 25ee4a2..78c368e 100644 (file)
@@ -2,6 +2,7 @@
 /* Copyright (c) 2022 Meta */
 #include <stddef.h>
 #include <string.h>
+#include <stdbool.h>
 #include <linux/bpf.h>
 #include <linux/if_ether.h>
 #include <linux/if_packet.h>
diff --git a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c
new file mode 100644 (file)
index 0000000..db6b314
--- /dev/null
@@ -0,0 +1,536 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include <errno.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
+
+int vals[] SEC(".data.vals") = {1, 2, 3, 4};
+
+__naked __noinline __used
+static unsigned long identity_subprog()
+{
+       /* the simplest *static* 64-bit identity function */
+       asm volatile (
+               "r0 = r1;"
+               "exit;"
+       );
+}
+
+__noinline __used
+unsigned long global_identity_subprog(__u64 x)
+{
+       /* the simplest *global* 64-bit identity function */
+       return x;
+}
+
+__naked __noinline __used
+static unsigned long callback_subprog()
+{
+       /* the simplest callback function */
+       asm volatile (
+               "r0 = 0;"
+               "exit;"
+       );
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("7: (0f) r1 += r0")
+__msg("mark_precise: frame0: regs=r0 stack= before 6: (bf) r1 = r7")
+__msg("mark_precise: frame0: regs=r0 stack= before 5: (27) r0 *= 4")
+__msg("mark_precise: frame0: regs=r0 stack= before 11: (95) exit")
+__msg("mark_precise: frame1: regs=r0 stack= before 10: (bf) r0 = r1")
+__msg("mark_precise: frame1: regs=r1 stack= before 4: (85) call pc+5")
+__msg("mark_precise: frame0: regs=r1 stack= before 3: (bf) r1 = r6")
+__msg("mark_precise: frame0: regs=r6 stack= before 2: (b7) r6 = 3")
+__naked int subprog_result_precise(void)
+{
+       asm volatile (
+               "r6 = 3;"
+               /* pass r6 through r1 into subprog to get it back as r0;
+                * this whole chain will have to be marked as precise later
+                */
+               "r1 = r6;"
+               "call identity_subprog;"
+               /* now use subprog's returned value (which is a
+                * r6 -> r1 -> r0 chain), as index into vals array, forcing
+                * all of that to be known precisely
+                */
+               "r0 *= 4;"
+               "r1 = %[vals];"
+               /* here r0->r1->r6 chain is forced to be precise and has to be
+                * propagated back to the beginning, including through the
+                * subprog call
+                */
+               "r1 += r0;"
+               "r0 = *(u32 *)(r1 + 0);"
+               "exit;"
+               :
+               : __imm_ptr(vals)
+               : __clobber_common, "r6"
+       );
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("9: (0f) r1 += r0")
+__msg("mark_precise: frame0: last_idx 9 first_idx 0")
+__msg("mark_precise: frame0: regs=r0 stack= before 8: (bf) r1 = r7")
+__msg("mark_precise: frame0: regs=r0 stack= before 7: (27) r0 *= 4")
+__msg("mark_precise: frame0: regs=r0 stack= before 5: (a5) if r0 < 0x4 goto pc+1")
+__msg("mark_precise: frame0: regs=r0 stack= before 4: (85) call pc+7")
+__naked int global_subprog_result_precise(void)
+{
+       asm volatile (
+               "r6 = 3;"
+               /* pass r6 through r1 into subprog to get it back as r0;
+                * given global_identity_subprog is global, precision won't
+                * propagate all the way back to r6
+                */
+               "r1 = r6;"
+               "call global_identity_subprog;"
+               /* now use subprog's returned value (which is unknown now, so
+                * we need to clamp it), as index into vals array, forcing r0
+                * to be marked precise (with no effect on r6, though)
+                */
+               "if r0 < %[vals_arr_sz] goto 1f;"
+               "r0 = %[vals_arr_sz] - 1;"
+       "1:"
+               "r0 *= 4;"
+               "r1 = %[vals];"
+               /* here r0 is forced to be precise and has to be
+                * propagated back to the global subprog call, but it
+                * shouldn't go all the way to mark r6 as precise
+                */
+               "r1 += r0;"
+               "r0 = *(u32 *)(r1 + 0);"
+               "exit;"
+               :
+               : __imm_ptr(vals),
+                 __imm_const(vals_arr_sz, ARRAY_SIZE(vals))
+               : __clobber_common, "r6"
+       );
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("14: (0f) r1 += r6")
+__msg("mark_precise: frame0: last_idx 14 first_idx 10")
+__msg("mark_precise: frame0: regs=r6 stack= before 13: (bf) r1 = r7")
+__msg("mark_precise: frame0: regs=r6 stack= before 12: (27) r6 *= 4")
+__msg("mark_precise: frame0: regs=r6 stack= before 11: (25) if r6 > 0x3 goto pc+4")
+__msg("mark_precise: frame0: regs=r6 stack= before 10: (bf) r6 = r0")
+__msg("mark_precise: frame0: parent state regs=r0 stack=:")
+__msg("mark_precise: frame0: last_idx 18 first_idx 0")
+__msg("mark_precise: frame0: regs=r0 stack= before 18: (95) exit")
+__naked int callback_result_precise(void)
+{
+       asm volatile (
+               "r6 = 3;"
+
+               /* call subprog and use result; r0 shouldn't propagate back to
+                * callback_subprog
+                */
+               "r1 = r6;"                      /* nr_loops */
+               "r2 = %[callback_subprog];"     /* callback_fn */
+               "r3 = 0;"                       /* callback_ctx */
+               "r4 = 0;"                       /* flags */
+               "call %[bpf_loop];"
+
+               "r6 = r0;"
+               "if r6 > 3 goto 1f;"
+               "r6 *= 4;"
+               "r1 = %[vals];"
+               /* here r6 is forced to be precise and has to be propagated
+                * back to the bpf_loop() call, but not beyond
+                */
+               "r1 += r6;"
+               "r0 = *(u32 *)(r1 + 0);"
+       "1:"
+               "exit;"
+               :
+               : __imm_ptr(vals),
+                 __imm_ptr(callback_subprog),
+                 __imm(bpf_loop)
+               : __clobber_common, "r6"
+       );
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("7: (0f) r1 += r6")
+__msg("mark_precise: frame0: last_idx 7 first_idx 0")
+__msg("mark_precise: frame0: regs=r6 stack= before 6: (bf) r1 = r7")
+__msg("mark_precise: frame0: regs=r6 stack= before 5: (27) r6 *= 4")
+__msg("mark_precise: frame0: regs=r6 stack= before 11: (95) exit")
+__msg("mark_precise: frame1: regs= stack= before 10: (bf) r0 = r1")
+__msg("mark_precise: frame1: regs= stack= before 4: (85) call pc+5")
+__msg("mark_precise: frame0: regs=r6 stack= before 3: (b7) r1 = 0")
+__msg("mark_precise: frame0: regs=r6 stack= before 2: (b7) r6 = 3")
+__naked int parent_callee_saved_reg_precise(void)
+{
+       asm volatile (
+               "r6 = 3;"
+
+               /* call subprog and ignore result; we need this call only to
+                * complicate jump history
+                */
+               "r1 = 0;"
+               "call identity_subprog;"
+
+               "r6 *= 4;"
+               "r1 = %[vals];"
+               /* here r6 is forced to be precise and has to be propagated
+                * back to the beginning, handling (and ignoring) subprog call
+                */
+               "r1 += r6;"
+               "r0 = *(u32 *)(r1 + 0);"
+               "exit;"
+               :
+               : __imm_ptr(vals)
+               : __clobber_common, "r6"
+       );
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("7: (0f) r1 += r6")
+__msg("mark_precise: frame0: last_idx 7 first_idx 0")
+__msg("mark_precise: frame0: regs=r6 stack= before 6: (bf) r1 = r7")
+__msg("mark_precise: frame0: regs=r6 stack= before 5: (27) r6 *= 4")
+__msg("mark_precise: frame0: regs=r6 stack= before 4: (85) call pc+5")
+__msg("mark_precise: frame0: regs=r6 stack= before 3: (b7) r1 = 0")
+__msg("mark_precise: frame0: regs=r6 stack= before 2: (b7) r6 = 3")
+__naked int parent_callee_saved_reg_precise_global(void)
+{
+       asm volatile (
+               "r6 = 3;"
+
+               /* call subprog and ignore result; we need this call only to
+                * complicate jump history
+                */
+               "r1 = 0;"
+               "call global_identity_subprog;"
+
+               "r6 *= 4;"
+               "r1 = %[vals];"
+               /* here r6 is forced to be precise and has to be propagated
+                * back to the beginning, handling (and ignoring) subprog call
+                */
+               "r1 += r6;"
+               "r0 = *(u32 *)(r1 + 0);"
+               "exit;"
+               :
+               : __imm_ptr(vals)
+               : __clobber_common, "r6"
+       );
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("12: (0f) r1 += r6")
+__msg("mark_precise: frame0: last_idx 12 first_idx 10")
+__msg("mark_precise: frame0: regs=r6 stack= before 11: (bf) r1 = r7")
+__msg("mark_precise: frame0: regs=r6 stack= before 10: (27) r6 *= 4")
+__msg("mark_precise: frame0: parent state regs=r6 stack=:")
+__msg("mark_precise: frame0: last_idx 16 first_idx 0")
+__msg("mark_precise: frame0: regs=r6 stack= before 16: (95) exit")
+__msg("mark_precise: frame1: regs= stack= before 15: (b7) r0 = 0")
+__msg("mark_precise: frame1: regs= stack= before 9: (85) call bpf_loop#181")
+__msg("mark_precise: frame0: regs=r6 stack= before 8: (b7) r4 = 0")
+__msg("mark_precise: frame0: regs=r6 stack= before 7: (b7) r3 = 0")
+__msg("mark_precise: frame0: regs=r6 stack= before 6: (bf) r2 = r8")
+__msg("mark_precise: frame0: regs=r6 stack= before 5: (b7) r1 = 1")
+__msg("mark_precise: frame0: regs=r6 stack= before 4: (b7) r6 = 3")
+__naked int parent_callee_saved_reg_precise_with_callback(void)
+{
+       asm volatile (
+               "r6 = 3;"
+
+               /* call subprog and ignore result; we need this call only to
+                * complicate jump history
+                */
+               "r1 = 1;"                       /* nr_loops */
+               "r2 = %[callback_subprog];"     /* callback_fn */
+               "r3 = 0;"                       /* callback_ctx */
+               "r4 = 0;"                       /* flags */
+               "call %[bpf_loop];"
+
+               "r6 *= 4;"
+               "r1 = %[vals];"
+               /* here r6 is forced to be precise and has to be propagated
+                * back to the beginning, handling (and ignoring) callback call
+                */
+               "r1 += r6;"
+               "r0 = *(u32 *)(r1 + 0);"
+               "exit;"
+               :
+               : __imm_ptr(vals),
+                 __imm_ptr(callback_subprog),
+                 __imm(bpf_loop)
+               : __clobber_common, "r6"
+       );
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("9: (0f) r1 += r6")
+__msg("mark_precise: frame0: last_idx 9 first_idx 6")
+__msg("mark_precise: frame0: regs=r6 stack= before 8: (bf) r1 = r7")
+__msg("mark_precise: frame0: regs=r6 stack= before 7: (27) r6 *= 4")
+__msg("mark_precise: frame0: regs=r6 stack= before 6: (79) r6 = *(u64 *)(r10 -8)")
+__msg("mark_precise: frame0: parent state regs= stack=-8:")
+__msg("mark_precise: frame0: last_idx 13 first_idx 0")
+__msg("mark_precise: frame0: regs= stack=-8 before 13: (95) exit")
+__msg("mark_precise: frame1: regs= stack= before 12: (bf) r0 = r1")
+__msg("mark_precise: frame1: regs= stack= before 5: (85) call pc+6")
+__msg("mark_precise: frame0: regs= stack=-8 before 4: (b7) r1 = 0")
+__msg("mark_precise: frame0: regs= stack=-8 before 3: (7b) *(u64 *)(r10 -8) = r6")
+__msg("mark_precise: frame0: regs=r6 stack= before 2: (b7) r6 = 3")
+__naked int parent_stack_slot_precise(void)
+{
+       asm volatile (
+               /* spill reg */
+               "r6 = 3;"
+               "*(u64 *)(r10 - 8) = r6;"
+
+               /* call subprog and ignore result; we need this call only to
+                * complicate jump history
+                */
+               "r1 = 0;"
+               "call identity_subprog;"
+
+               /* restore reg from stack; in this case we'll be carrying
+                * stack mask when going back into subprog through jump
+                * history
+                */
+               "r6 = *(u64 *)(r10 - 8);"
+
+               "r6 *= 4;"
+               "r1 = %[vals];"
+               /* here r6 is forced to be precise and has to be propagated
+                * back to the beginning, handling (and ignoring) subprog call
+                */
+               "r1 += r6;"
+               "r0 = *(u32 *)(r1 + 0);"
+               "exit;"
+               :
+               : __imm_ptr(vals)
+               : __clobber_common, "r6"
+       );
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("9: (0f) r1 += r6")
+__msg("mark_precise: frame0: last_idx 9 first_idx 6")
+__msg("mark_precise: frame0: regs=r6 stack= before 8: (bf) r1 = r7")
+__msg("mark_precise: frame0: regs=r6 stack= before 7: (27) r6 *= 4")
+__msg("mark_precise: frame0: regs=r6 stack= before 6: (79) r6 = *(u64 *)(r10 -8)")
+__msg("mark_precise: frame0: parent state regs= stack=-8:")
+__msg("mark_precise: frame0: last_idx 5 first_idx 0")
+__msg("mark_precise: frame0: regs= stack=-8 before 5: (85) call pc+6")
+__msg("mark_precise: frame0: regs= stack=-8 before 4: (b7) r1 = 0")
+__msg("mark_precise: frame0: regs= stack=-8 before 3: (7b) *(u64 *)(r10 -8) = r6")
+__msg("mark_precise: frame0: regs=r6 stack= before 2: (b7) r6 = 3")
+__naked int parent_stack_slot_precise_global(void)
+{
+       asm volatile (
+               /* spill reg */
+               "r6 = 3;"
+               "*(u64 *)(r10 - 8) = r6;"
+
+               /* call subprog and ignore result; we need this call only to
+                * complicate jump history
+                */
+               "r1 = 0;"
+               "call global_identity_subprog;"
+
+               /* restore reg from stack; in this case we'll be carrying
+                * stack mask when going back into subprog through jump
+                * history
+                */
+               "r6 = *(u64 *)(r10 - 8);"
+
+               "r6 *= 4;"
+               "r1 = %[vals];"
+               /* here r6 is forced to be precise and has to be propagated
+                * back to the beginning, handling (and ignoring) subprog call
+                */
+               "r1 += r6;"
+               "r0 = *(u32 *)(r1 + 0);"
+               "exit;"
+               :
+               : __imm_ptr(vals)
+               : __clobber_common, "r6"
+       );
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("14: (0f) r1 += r6")
+__msg("mark_precise: frame0: last_idx 14 first_idx 11")
+__msg("mark_precise: frame0: regs=r6 stack= before 13: (bf) r1 = r7")
+__msg("mark_precise: frame0: regs=r6 stack= before 12: (27) r6 *= 4")
+__msg("mark_precise: frame0: regs=r6 stack= before 11: (79) r6 = *(u64 *)(r10 -8)")
+__msg("mark_precise: frame0: parent state regs= stack=-8:")
+__msg("mark_precise: frame0: last_idx 18 first_idx 0")
+__msg("mark_precise: frame0: regs= stack=-8 before 18: (95) exit")
+__msg("mark_precise: frame1: regs= stack= before 17: (b7) r0 = 0")
+__msg("mark_precise: frame1: regs= stack= before 10: (85) call bpf_loop#181")
+__msg("mark_precise: frame0: regs= stack=-8 before 9: (b7) r4 = 0")
+__msg("mark_precise: frame0: regs= stack=-8 before 8: (b7) r3 = 0")
+__msg("mark_precise: frame0: regs= stack=-8 before 7: (bf) r2 = r8")
+__msg("mark_precise: frame0: regs= stack=-8 before 6: (bf) r1 = r6")
+__msg("mark_precise: frame0: regs= stack=-8 before 5: (7b) *(u64 *)(r10 -8) = r6")
+__msg("mark_precise: frame0: regs=r6 stack= before 4: (b7) r6 = 3")
+__naked int parent_stack_slot_precise_with_callback(void)
+{
+       asm volatile (
+               /* spill reg */
+               "r6 = 3;"
+               "*(u64 *)(r10 - 8) = r6;"
+
+               /* ensure we have callback frame in jump history */
+               "r1 = r6;"                      /* nr_loops */
+               "r2 = %[callback_subprog];"     /* callback_fn */
+               "r3 = 0;"                       /* callback_ctx */
+               "r4 = 0;"                       /* flags */
+               "call %[bpf_loop];"
+
+               /* restore reg from stack; in this case we'll be carrying
+                * stack mask when going back into subprog through jump
+                * history
+                */
+               "r6 = *(u64 *)(r10 - 8);"
+
+               "r6 *= 4;"
+               "r1 = %[vals];"
+               /* here r6 is forced to be precise and has to be propagated
+                * back to the beginning, handling (and ignoring) subprog call
+                */
+               "r1 += r6;"
+               "r0 = *(u32 *)(r1 + 0);"
+               "exit;"
+               :
+               : __imm_ptr(vals),
+                 __imm_ptr(callback_subprog),
+                 __imm(bpf_loop)
+               : __clobber_common, "r6"
+       );
+}
+
+__noinline __used
+static __u64 subprog_with_precise_arg(__u64 x)
+{
+       return vals[x]; /* x is forced to be precise */
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("8: (0f) r2 += r1")
+__msg("mark_precise: frame1: last_idx 8 first_idx 0")
+__msg("mark_precise: frame1: regs=r1 stack= before 6: (18) r2 = ")
+__msg("mark_precise: frame1: regs=r1 stack= before 5: (67) r1 <<= 2")
+__msg("mark_precise: frame1: regs=r1 stack= before 2: (85) call pc+2")
+__msg("mark_precise: frame0: regs=r1 stack= before 1: (bf) r1 = r6")
+__msg("mark_precise: frame0: regs=r6 stack= before 0: (b7) r6 = 3")
+__naked int subprog_arg_precise(void)
+{
+       asm volatile (
+               "r6 = 3;"
+               "r1 = r6;"
+               /* subprog_with_precise_arg expects its argument to be
+                * precise, so r1->r6 will be marked precise from inside the
+                * subprog
+                */
+               "call subprog_with_precise_arg;"
+               "r0 += r6;"
+               "exit;"
+               :
+               :
+               : __clobber_common, "r6"
+       );
+}
+
+/* r1 is pointer to stack slot;
+ * r2 is a register to spill into that slot
+ * subprog also spills r2 into its own stack slot
+ */
+__naked __noinline __used
+static __u64 subprog_spill_reg_precise(void)
+{
+       asm volatile (
+               /* spill to parent stack */
+               "*(u64 *)(r1 + 0) = r2;"
+               /* spill to subprog stack (we use -16 offset to avoid
+                * accidental confusion with parent's -8 stack slot in
+                * verifier log output)
+                */
+               "*(u64 *)(r10 - 16) = r2;"
+               /* use both spills as return result to propagete precision everywhere */
+               "r0 = *(u64 *)(r10 - 16);"
+               "r2 = *(u64 *)(r1 + 0);"
+               "r0 += r2;"
+               "exit;"
+       );
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+/* precision backtracking can't currently handle stack access not through r10,
+ * so we won't be able to mark stack slot fp-8 as precise, and so will
+ * fallback to forcing all as precise
+ */
+__msg("mark_precise: frame0: falling back to forcing all scalars precise")
+__naked int subprog_spill_into_parent_stack_slot_precise(void)
+{
+       asm volatile (
+               "r6 = 1;"
+
+               /* pass pointer to stack slot and r6 to subprog;
+                * r6 will be marked precise and spilled into fp-8 slot, which
+                * also should be marked precise
+                */
+               "r1 = r10;"
+               "r1 += -8;"
+               "r2 = r6;"
+               "call subprog_spill_reg_precise;"
+
+               /* restore reg from stack; in this case we'll be carrying
+                * stack mask when going back into subprog through jump
+                * history
+                */
+               "r7 = *(u64 *)(r10 - 8);"
+
+               "r7 *= 4;"
+               "r1 = %[vals];"
+               /* here r7 is forced to be precise and has to be propagated
+                * back to the beginning, handling subprog call and logic
+                */
+               "r1 += r7;"
+               "r0 = *(u32 *)(r1 + 0);"
+               "exit;"
+               :
+               : __imm_ptr(vals)
+               : __clobber_common, "r6", "r7"
+       );
+}
+
+__naked __noinline __used
+static __u64 subprog_with_checkpoint(void)
+{
+       asm volatile (
+               "r0 = 0;"
+               /* guaranteed checkpoint if BPF_F_TEST_STATE_FREQ is used */
+               "goto +0;"
+               "exit;"
+       );
+}
+
+char _license[] SEC("license") = "GPL";
index e1c7878..b2dfd70 100644 (file)
@@ -77,7 +77,9 @@ int rx(struct xdp_md *ctx)
        }
 
        err = bpf_xdp_metadata_rx_timestamp(ctx, &meta->rx_timestamp);
-       if (err)
+       if (!err)
+               meta->xdp_timestamp = bpf_ktime_get_tai_ns();
+       else
                meta->rx_timestamp = 0; /* Used by AF_XDP as not avail signal */
 
        err = bpf_xdp_metadata_rx_hash(ctx, &meta->rx_hash, &meta->rx_hash_type);
index ea82921..4d582ca 100644 (file)
@@ -11,7 +11,6 @@
 #include <signal.h>
 #include <string.h>
 #include <execinfo.h> /* backtrace */
-#include <linux/membarrier.h>
 #include <sys/sysinfo.h> /* get_nprocs */
 #include <netinet/in.h>
 #include <sys/select.h>
@@ -629,68 +628,6 @@ out:
        return err;
 }
 
-static int finit_module(int fd, const char *param_values, int flags)
-{
-       return syscall(__NR_finit_module, fd, param_values, flags);
-}
-
-static int delete_module(const char *name, int flags)
-{
-       return syscall(__NR_delete_module, name, flags);
-}
-
-/*
- * Trigger synchronize_rcu() in kernel.
- */
-int kern_sync_rcu(void)
-{
-       return syscall(__NR_membarrier, MEMBARRIER_CMD_SHARED, 0, 0);
-}
-
-static void unload_bpf_testmod(void)
-{
-       if (kern_sync_rcu())
-               fprintf(env.stderr, "Failed to trigger kernel-side RCU sync!\n");
-       if (delete_module("bpf_testmod", 0)) {
-               if (errno == ENOENT) {
-                       if (verbose())
-                               fprintf(stdout, "bpf_testmod.ko is already unloaded.\n");
-                       return;
-               }
-               fprintf(env.stderr, "Failed to unload bpf_testmod.ko from kernel: %d\n", -errno);
-               return;
-       }
-       if (verbose())
-               fprintf(stdout, "Successfully unloaded bpf_testmod.ko.\n");
-}
-
-static int load_bpf_testmod(void)
-{
-       int fd;
-
-       /* ensure previous instance of the module is unloaded */
-       unload_bpf_testmod();
-
-       if (verbose())
-               fprintf(stdout, "Loading bpf_testmod.ko...\n");
-
-       fd = open("bpf_testmod.ko", O_RDONLY);
-       if (fd < 0) {
-               fprintf(env.stderr, "Can't find bpf_testmod.ko kernel module: %d\n", -errno);
-               return -ENOENT;
-       }
-       if (finit_module(fd, "", 0)) {
-               fprintf(env.stderr, "Failed to load bpf_testmod.ko into the kernel: %d\n", -errno);
-               close(fd);
-               return -EINVAL;
-       }
-       close(fd);
-
-       if (verbose())
-               fprintf(stdout, "Successfully loaded bpf_testmod.ko.\n");
-       return 0;
-}
-
 /* extern declarations for test funcs */
 #define DEFINE_TEST(name)                              \
        extern void test_##name(void) __weak;           \
@@ -714,7 +651,13 @@ static struct test_state test_states[ARRAY_SIZE(prog_test_defs)];
 
 const char *argp_program_version = "test_progs 0.1";
 const char *argp_program_bug_address = "<bpf@vger.kernel.org>";
-static const char argp_program_doc[] = "BPF selftests test runner";
+static const char argp_program_doc[] =
+"BPF selftests test runner\v"
+"Options accepting the NAMES parameter take either a comma-separated list\n"
+"of test names, or a filename prefixed with @. The file contains one name\n"
+"(or wildcard pattern) per line, and comments beginning with # are ignored.\n"
+"\n"
+"These options can be passed repeatedly to read multiple files.\n";
 
 enum ARG_KEYS {
        ARG_TEST_NUM = 'n',
@@ -797,6 +740,7 @@ extern int extra_prog_load_log_flags;
 static error_t parse_arg(int key, char *arg, struct argp_state *state)
 {
        struct test_env *env = state->input;
+       int err = 0;
 
        switch (key) {
        case ARG_TEST_NUM: {
@@ -821,18 +765,28 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
        }
        case ARG_TEST_NAME_GLOB_ALLOWLIST:
        case ARG_TEST_NAME: {
-               if (parse_test_list(arg,
-                                   &env->test_selector.whitelist,
-                                   key == ARG_TEST_NAME_GLOB_ALLOWLIST))
-                       return -ENOMEM;
+               if (arg[0] == '@')
+                       err = parse_test_list_file(arg + 1,
+                                                  &env->test_selector.whitelist,
+                                                  key == ARG_TEST_NAME_GLOB_ALLOWLIST);
+               else
+                       err = parse_test_list(arg,
+                                             &env->test_selector.whitelist,
+                                             key == ARG_TEST_NAME_GLOB_ALLOWLIST);
+
                break;
        }
        case ARG_TEST_NAME_GLOB_DENYLIST:
        case ARG_TEST_NAME_BLACKLIST: {
-               if (parse_test_list(arg,
-                                   &env->test_selector.blacklist,
-                                   key == ARG_TEST_NAME_GLOB_DENYLIST))
-                       return -ENOMEM;
+               if (arg[0] == '@')
+                       err = parse_test_list_file(arg + 1,
+                                                  &env->test_selector.blacklist,
+                                                  key == ARG_TEST_NAME_GLOB_DENYLIST);
+               else
+                       err = parse_test_list(arg,
+                                             &env->test_selector.blacklist,
+                                             key == ARG_TEST_NAME_GLOB_DENYLIST);
+
                break;
        }
        case ARG_VERIFIER_STATS:
@@ -900,7 +854,7 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
        default:
                return ARGP_ERR_UNKNOWN;
        }
-       return 0;
+       return err;
 }
 
 /*
@@ -1703,9 +1657,14 @@ int main(int argc, char **argv)
        env.stderr = stderr;
 
        env.has_testmod = true;
-       if (!env.list_test_names && load_bpf_testmod()) {
-               fprintf(env.stderr, "WARNING! Selftests relying on bpf_testmod.ko will be skipped.\n");
-               env.has_testmod = false;
+       if (!env.list_test_names) {
+               /* ensure previous instance of the module is unloaded */
+               unload_bpf_testmod(verbose());
+
+               if (load_bpf_testmod(verbose())) {
+                       fprintf(env.stderr, "WARNING! Selftests relying on bpf_testmod.ko will be skipped.\n");
+                       env.has_testmod = false;
+               }
        }
 
        /* initializing tests */
@@ -1802,7 +1761,7 @@ int main(int argc, char **argv)
        close(env.saved_netns_fd);
 out:
        if (!env.list_test_names && env.has_testmod)
-               unload_bpf_testmod();
+               unload_bpf_testmod(verbose());
 
        free_test_selector(&env.test_selector);
        free_test_selector(&env.subtest_selector);
index 0ed3134..77bd492 100644 (file)
@@ -405,7 +405,6 @@ static inline void *u64_to_ptr(__u64 ptr)
 int bpf_find_map(const char *test, struct bpf_object *obj, const char *name);
 int compare_map_keys(int map1_fd, int map2_fd);
 int compare_stack_ips(int smap_fd, int amap_fd, int stack_trace_len);
-int kern_sync_rcu(void);
 int trigger_module_test_read(int read_sz);
 int trigger_module_test_write(int write_sz);
 int write_sysctl(const char *sysctl, const char *value);
index e4657c5..71704a3 100644 (file)
@@ -40,6 +40,7 @@
 #include "bpf_util.h"
 #include "test_btf.h"
 #include "../../../include/linux/filter.h"
+#include "testing_helpers.h"
 
 #ifndef ENOTSUPP
 #define ENOTSUPP 524
@@ -873,8 +874,140 @@ static int create_map_kptr(void)
        return fd;
 }
 
+static void set_root(bool set)
+{
+       __u64 caps;
+
+       if (set) {
+               if (cap_enable_effective(1ULL << CAP_SYS_ADMIN, &caps))
+                       perror("cap_disable_effective(CAP_SYS_ADMIN)");
+       } else {
+               if (cap_disable_effective(1ULL << CAP_SYS_ADMIN, &caps))
+                       perror("cap_disable_effective(CAP_SYS_ADMIN)");
+       }
+}
+
+static __u64 ptr_to_u64(const void *ptr)
+{
+       return (uintptr_t) ptr;
+}
+
+static struct btf *btf__load_testmod_btf(struct btf *vmlinux)
+{
+       struct bpf_btf_info info;
+       __u32 len = sizeof(info);
+       struct btf *btf = NULL;
+       char name[64];
+       __u32 id = 0;
+       int err, fd;
+
+       /* Iterate all loaded BTF objects and find bpf_testmod,
+        * we need SYS_ADMIN cap for that.
+        */
+       set_root(true);
+
+       while (true) {
+               err = bpf_btf_get_next_id(id, &id);
+               if (err) {
+                       if (errno == ENOENT)
+                               break;
+                       perror("bpf_btf_get_next_id failed");
+                       break;
+               }
+
+               fd = bpf_btf_get_fd_by_id(id);
+               if (fd < 0) {
+                       if (errno == ENOENT)
+                               continue;
+                       perror("bpf_btf_get_fd_by_id failed");
+                       break;
+               }
+
+               memset(&info, 0, sizeof(info));
+               info.name_len = sizeof(name);
+               info.name = ptr_to_u64(name);
+               len = sizeof(info);
+
+               err = bpf_obj_get_info_by_fd(fd, &info, &len);
+               if (err) {
+                       close(fd);
+                       perror("bpf_obj_get_info_by_fd failed");
+                       break;
+               }
+
+               if (strcmp("bpf_testmod", name)) {
+                       close(fd);
+                       continue;
+               }
+
+               btf = btf__load_from_kernel_by_id_split(id, vmlinux);
+               if (!btf) {
+                       close(fd);
+                       break;
+               }
+
+               /* We need the fd to stay open so it can be used in fd_array.
+                * The final cleanup call to btf__free will free btf object
+                * and close the file descriptor.
+                */
+               btf__set_fd(btf, fd);
+               break;
+       }
+
+       set_root(false);
+       return btf;
+}
+
+static struct btf *testmod_btf;
+static struct btf *vmlinux_btf;
+
+static void kfuncs_cleanup(void)
+{
+       btf__free(testmod_btf);
+       btf__free(vmlinux_btf);
+}
+
+static void fixup_prog_kfuncs(struct bpf_insn *prog, int *fd_array,
+                             struct kfunc_btf_id_pair *fixup_kfunc_btf_id)
+{
+       /* Patch in kfunc BTF IDs */
+       while (fixup_kfunc_btf_id->kfunc) {
+               int btf_id = 0;
+
+               /* try to find kfunc in kernel BTF */
+               vmlinux_btf = vmlinux_btf ?: btf__load_vmlinux_btf();
+               if (vmlinux_btf) {
+                       btf_id = btf__find_by_name_kind(vmlinux_btf,
+                                                       fixup_kfunc_btf_id->kfunc,
+                                                       BTF_KIND_FUNC);
+                       btf_id = btf_id < 0 ? 0 : btf_id;
+               }
+
+               /* kfunc not found in kernel BTF, try bpf_testmod BTF */
+               if (!btf_id) {
+                       testmod_btf = testmod_btf ?: btf__load_testmod_btf(vmlinux_btf);
+                       if (testmod_btf) {
+                               btf_id = btf__find_by_name_kind(testmod_btf,
+                                                               fixup_kfunc_btf_id->kfunc,
+                                                               BTF_KIND_FUNC);
+                               btf_id = btf_id < 0 ? 0 : btf_id;
+                               if (btf_id) {
+                                       /* We put bpf_testmod module fd into fd_array
+                                        * and its index 1 into instruction 'off'.
+                                        */
+                                       *fd_array = btf__fd(testmod_btf);
+                                       prog[fixup_kfunc_btf_id->insn_idx].off = 1;
+                               }
+                       }
+               }
+
+               prog[fixup_kfunc_btf_id->insn_idx].imm = btf_id;
+               fixup_kfunc_btf_id++;
+       }
+}
+
 static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
-                         struct bpf_insn *prog, int *map_fds)
+                         struct bpf_insn *prog, int *map_fds, int *fd_array)
 {
        int *fixup_map_hash_8b = test->fixup_map_hash_8b;
        int *fixup_map_hash_48b = test->fixup_map_hash_48b;
@@ -899,7 +1032,6 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
        int *fixup_map_ringbuf = test->fixup_map_ringbuf;
        int *fixup_map_timer = test->fixup_map_timer;
        int *fixup_map_kptr = test->fixup_map_kptr;
-       struct kfunc_btf_id_pair *fixup_kfunc_btf_id = test->fixup_kfunc_btf_id;
 
        if (test->fill_helper) {
                test->fill_insns = calloc(MAX_TEST_INSNS, sizeof(struct bpf_insn));
@@ -1100,25 +1232,7 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
                } while (*fixup_map_kptr);
        }
 
-       /* Patch in kfunc BTF IDs */
-       if (fixup_kfunc_btf_id->kfunc) {
-               struct btf *btf;
-               int btf_id;
-
-               do {
-                       btf_id = 0;
-                       btf = btf__load_vmlinux_btf();
-                       if (btf) {
-                               btf_id = btf__find_by_name_kind(btf,
-                                                               fixup_kfunc_btf_id->kfunc,
-                                                               BTF_KIND_FUNC);
-                               btf_id = btf_id < 0 ? 0 : btf_id;
-                       }
-                       btf__free(btf);
-                       prog[fixup_kfunc_btf_id->insn_idx].imm = btf_id;
-                       fixup_kfunc_btf_id++;
-               } while (fixup_kfunc_btf_id->kfunc);
-       }
+       fixup_prog_kfuncs(prog, fd_array, test->fixup_kfunc_btf_id);
 }
 
 struct libcap {
@@ -1445,6 +1559,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
        int run_errs, run_successes;
        int map_fds[MAX_NR_MAPS];
        const char *expected_err;
+       int fd_array[2] = { -1, -1 };
        int saved_errno;
        int fixup_skips;
        __u32 pflags;
@@ -1458,7 +1573,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
        if (!prog_type)
                prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
        fixup_skips = skips;
-       do_test_fixup(test, prog_type, prog, map_fds);
+       do_test_fixup(test, prog_type, prog, map_fds, &fd_array[1]);
        if (test->fill_insns) {
                prog = test->fill_insns;
                prog_len = test->prog_len;
@@ -1492,6 +1607,8 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
        else
                opts.log_level = DEFAULT_LIBBPF_LOG_LEVEL;
        opts.prog_flags = pflags;
+       if (fd_array[1] != -1)
+               opts.fd_array = &fd_array[0];
 
        if ((prog_type == BPF_PROG_TYPE_TRACING ||
             prog_type == BPF_PROG_TYPE_LSM) && test->kfunc) {
@@ -1684,6 +1801,12 @@ static int do_test(bool unpriv, unsigned int from, unsigned int to)
 {
        int i, passes = 0, errors = 0;
 
+       /* ensure previous instance of the module is unloaded */
+       unload_bpf_testmod(verbose);
+
+       if (load_bpf_testmod(verbose))
+               return EXIT_FAILURE;
+
        for (i = from; i < to; i++) {
                struct bpf_test *test = &tests[i];
 
@@ -1711,6 +1834,9 @@ static int do_test(bool unpriv, unsigned int from, unsigned int to)
                }
        }
 
+       unload_bpf_testmod(verbose);
+       kfuncs_cleanup();
+
        printf("Summary: %d PASSED, %d SKIPPED, %d FAILED\n", passes,
               skips, errors);
        return errors ? EXIT_FAILURE : EXIT_SUCCESS;
index 377fb15..c2ad50f 100755 (executable)
@@ -68,9 +68,6 @@
 # Run with verbose output:
 #   sudo ./test_xsk.sh -v
 #
-# Run and dump packet contents:
-#   sudo ./test_xsk.sh -D
-#
 # Set up veth interfaces and leave them up so xskxceiver can be launched in a debugger:
 #   sudo ./test_xsk.sh -d
 #
 
 ETH=""
 
-while getopts "vDi:d" flag
+while getopts "vi:d" flag
 do
        case "${flag}" in
                v) verbose=1;;
-               D) dump_pkts=1;;
                d) debug=1;;
                i) ETH=${OPTARG};;
        esac
@@ -157,10 +153,6 @@ if [[ $verbose -eq 1 ]]; then
        ARGS+="-v "
 fi
 
-if [[ $dump_pkts -eq 1 ]]; then
-       ARGS="-D "
-fi
-
 retval=$?
 test_status $retval "${TEST_NAME}"
 
index 0b5e082..8d99488 100644 (file)
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
 /* Copyright (C) 2019 Netronome Systems, Inc. */
 /* Copyright (C) 2020 Facebook, Inc. */
+#include <ctype.h>
 #include <stdlib.h>
 #include <string.h>
 #include <errno.h>
@@ -8,6 +9,7 @@
 #include <bpf/libbpf.h>
 #include "test_progs.h"
 #include "testing_helpers.h"
+#include <linux/membarrier.h>
 
 int parse_num_list(const char *s, bool **num_set, int *num_set_len)
 {
@@ -70,92 +72,168 @@ int parse_num_list(const char *s, bool **num_set, int *num_set_len)
        return 0;
 }
 
-int parse_test_list(const char *s,
-                   struct test_filter_set *set,
-                   bool is_glob_pattern)
+static int do_insert_test(struct test_filter_set *set,
+                         char *test_str,
+                         char *subtest_str)
 {
-       char *input, *state = NULL, *next;
-       struct test_filter *tmp, *tests = NULL;
-       int i, j, cnt = 0;
+       struct test_filter *tmp, *test;
+       char **ctmp;
+       int i;
 
-       input = strdup(s);
-       if (!input)
+       for (i = 0; i < set->cnt; i++) {
+               test = &set->tests[i];
+
+               if (strcmp(test_str, test->name) == 0) {
+                       free(test_str);
+                       goto subtest;
+               }
+       }
+
+       tmp = realloc(set->tests, sizeof(*test) * (set->cnt + 1));
+       if (!tmp)
                return -ENOMEM;
 
-       while ((next = strtok_r(state ? NULL : input, ",", &state))) {
-               char *subtest_str = strchr(next, '/');
-               char *pattern = NULL;
-               int glob_chars = 0;
+       set->tests = tmp;
+       test = &set->tests[set->cnt];
 
-               tmp = realloc(tests, sizeof(*tests) * (cnt + 1));
-               if (!tmp)
-                       goto err;
-               tests = tmp;
+       test->name = test_str;
+       test->subtests = NULL;
+       test->subtest_cnt = 0;
 
-               tests[cnt].subtest_cnt = 0;
-               tests[cnt].subtests = NULL;
+       set->cnt++;
 
-               if (is_glob_pattern) {
-                       pattern = "%s";
-               } else {
-                       pattern = "*%s*";
-                       glob_chars = 2;
-               }
+subtest:
+       if (!subtest_str)
+               return 0;
 
-               if (subtest_str) {
-                       char **tmp_subtests = NULL;
-                       int subtest_cnt = tests[cnt].subtest_cnt;
-
-                       *subtest_str = '\0';
-                       subtest_str += 1;
-                       tmp_subtests = realloc(tests[cnt].subtests,
-                                              sizeof(*tmp_subtests) *
-                                              (subtest_cnt + 1));
-                       if (!tmp_subtests)
-                               goto err;
-                       tests[cnt].subtests = tmp_subtests;
-
-                       tests[cnt].subtests[subtest_cnt] =
-                               malloc(strlen(subtest_str) + glob_chars + 1);
-                       if (!tests[cnt].subtests[subtest_cnt])
-                               goto err;
-                       sprintf(tests[cnt].subtests[subtest_cnt],
-                               pattern,
-                               subtest_str);
-
-                       tests[cnt].subtest_cnt++;
+       for (i = 0; i < test->subtest_cnt; i++) {
+               if (strcmp(subtest_str, test->subtests[i]) == 0) {
+                       free(subtest_str);
+                       return 0;
                }
+       }
 
-               tests[cnt].name = malloc(strlen(next) + glob_chars + 1);
-               if (!tests[cnt].name)
-                       goto err;
-               sprintf(tests[cnt].name, pattern, next);
+       ctmp = realloc(test->subtests,
+                      sizeof(*test->subtests) * (test->subtest_cnt + 1));
+       if (!ctmp)
+               return -ENOMEM;
 
-               cnt++;
+       test->subtests = ctmp;
+       test->subtests[test->subtest_cnt] = subtest_str;
+
+       test->subtest_cnt++;
+
+       return 0;
+}
+
+static int insert_test(struct test_filter_set *set,
+                      char *test_spec,
+                      bool is_glob_pattern)
+{
+       char *pattern, *subtest_str, *ext_test_str, *ext_subtest_str = NULL;
+       int glob_chars = 0;
+
+       if (is_glob_pattern) {
+               pattern = "%s";
+       } else {
+               pattern = "*%s*";
+               glob_chars = 2;
        }
 
-       tmp = realloc(set->tests, sizeof(*tests) * (cnt + set->cnt));
-       if (!tmp)
+       subtest_str = strchr(test_spec, '/');
+       if (subtest_str) {
+               *subtest_str = '\0';
+               subtest_str += 1;
+       }
+
+       ext_test_str = malloc(strlen(test_spec) + glob_chars + 1);
+       if (!ext_test_str)
                goto err;
 
-       memcpy(tmp +  set->cnt, tests, sizeof(*tests) * cnt);
-       set->tests = tmp;
-       set->cnt += cnt;
+       sprintf(ext_test_str, pattern, test_spec);
 
-       free(tests);
-       free(input);
-       return 0;
+       if (subtest_str) {
+               ext_subtest_str = malloc(strlen(subtest_str) + glob_chars + 1);
+               if (!ext_subtest_str)
+                       goto err;
+
+               sprintf(ext_subtest_str, pattern, subtest_str);
+       }
+
+       return do_insert_test(set, ext_test_str, ext_subtest_str);
 
 err:
-       for (i = 0; i < cnt; i++) {
-               for (j = 0; j < tests[i].subtest_cnt; j++)
-                       free(tests[i].subtests[j]);
+       free(ext_test_str);
+       free(ext_subtest_str);
+
+       return -ENOMEM;
+}
+
+int parse_test_list_file(const char *path,
+                        struct test_filter_set *set,
+                        bool is_glob_pattern)
+{
+       char *buf = NULL, *capture_start, *capture_end, *scan_end;
+       size_t buflen = 0;
+       int err = 0;
+       FILE *f;
+
+       f = fopen(path, "r");
+       if (!f) {
+               err = -errno;
+               fprintf(stderr, "Failed to open '%s': %d\n", path, err);
+               return err;
+       }
+
+       while (getline(&buf, &buflen, f) != -1) {
+               capture_start = buf;
+
+               while (isspace(*capture_start))
+                       ++capture_start;
+
+               capture_end = capture_start;
+               scan_end = capture_start;
+
+               while (*scan_end && *scan_end != '#') {
+                       if (!isspace(*scan_end))
+                               capture_end = scan_end;
+
+                       ++scan_end;
+               }
+
+               if (capture_end == capture_start)
+                       continue;
+
+               *(++capture_end) = '\0';
+
+               err = insert_test(set, capture_start, is_glob_pattern);
+               if (err)
+                       break;
+       }
+
+       fclose(f);
+       return err;
+}
+
+int parse_test_list(const char *s,
+                   struct test_filter_set *set,
+                   bool is_glob_pattern)
+{
+       char *input, *state = NULL, *test_spec;
+       int err = 0;
+
+       input = strdup(s);
+       if (!input)
+               return -ENOMEM;
 
-               free(tests[i].name);
+       while ((test_spec = strtok_r(state ? NULL : input, ",", &state))) {
+               err = insert_test(set, test_spec, is_glob_pattern);
+               if (err)
+                       break;
        }
-       free(tests);
+
        free(input);
-       return -ENOMEM;
+       return err;
 }
 
 __u32 link_info_prog_id(const struct bpf_link *link, struct bpf_link_info *info)
@@ -249,3 +327,63 @@ __u64 read_perf_max_sample_freq(void)
        fclose(f);
        return sample_freq;
 }
+
+static int finit_module(int fd, const char *param_values, int flags)
+{
+       return syscall(__NR_finit_module, fd, param_values, flags);
+}
+
+static int delete_module(const char *name, int flags)
+{
+       return syscall(__NR_delete_module, name, flags);
+}
+
+int unload_bpf_testmod(bool verbose)
+{
+       if (kern_sync_rcu())
+               fprintf(stdout, "Failed to trigger kernel-side RCU sync!\n");
+       if (delete_module("bpf_testmod", 0)) {
+               if (errno == ENOENT) {
+                       if (verbose)
+                               fprintf(stdout, "bpf_testmod.ko is already unloaded.\n");
+                       return -1;
+               }
+               fprintf(stdout, "Failed to unload bpf_testmod.ko from kernel: %d\n", -errno);
+               return -1;
+       }
+       if (verbose)
+               fprintf(stdout, "Successfully unloaded bpf_testmod.ko.\n");
+       return 0;
+}
+
+int load_bpf_testmod(bool verbose)
+{
+       int fd;
+
+       if (verbose)
+               fprintf(stdout, "Loading bpf_testmod.ko...\n");
+
+       fd = open("bpf_testmod.ko", O_RDONLY);
+       if (fd < 0) {
+               fprintf(stdout, "Can't find bpf_testmod.ko kernel module: %d\n", -errno);
+               return -ENOENT;
+       }
+       if (finit_module(fd, "", 0)) {
+               fprintf(stdout, "Failed to load bpf_testmod.ko into the kernel: %d\n", -errno);
+               close(fd);
+               return -EINVAL;
+       }
+       close(fd);
+
+       if (verbose)
+               fprintf(stdout, "Successfully loaded bpf_testmod.ko.\n");
+       return 0;
+}
+
+/*
+ * Trigger synchronize_rcu() in kernel.
+ */
+int kern_sync_rcu(void)
+{
+       return syscall(__NR_membarrier, MEMBARRIER_CMD_SHARED, 0, 0);
+}
index eb8790f..5312323 100644 (file)
@@ -1,5 +1,9 @@
 /* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
 /* Copyright (C) 2020 Facebook, Inc. */
+
+#ifndef __TESTING_HELPERS_H
+#define __TESTING_HELPERS_H
+
 #include <stdbool.h>
 #include <bpf/bpf.h>
 #include <bpf/libbpf.h>
@@ -20,5 +24,13 @@ struct test_filter_set;
 int parse_test_list(const char *s,
                    struct test_filter_set *test_set,
                    bool is_glob_pattern);
+int parse_test_list_file(const char *path,
+                        struct test_filter_set *test_set,
+                        bool is_glob_pattern);
 
 __u64 read_perf_max_sample_freq(void);
+int load_bpf_testmod(bool verbose);
+int unload_bpf_testmod(bool verbose);
+int kern_sync_rcu(void);
+
+#endif /* __TESTING_HELPERS_H */
index 6c03a7d..b8c0aae 100644 (file)
        .fixup_map_array_48b = { 1 },
        .result = VERBOSE_ACCEPT,
        .errstr =
-       "26: (85) call bpf_probe_read_kernel#113\
-       last_idx 26 first_idx 20\
-       regs=4 stack=0 before 25\
-       regs=4 stack=0 before 24\
-       regs=4 stack=0 before 23\
-       regs=4 stack=0 before 22\
-       regs=4 stack=0 before 20\
-       parent didn't have regs=4 stack=0 marks\
-       last_idx 19 first_idx 10\
-       regs=4 stack=0 before 19\
-       regs=200 stack=0 before 18\
-       regs=300 stack=0 before 17\
-       regs=201 stack=0 before 15\
-       regs=201 stack=0 before 14\
-       regs=200 stack=0 before 13\
-       regs=200 stack=0 before 12\
-       regs=200 stack=0 before 11\
-       regs=200 stack=0 before 10\
-       parent already had regs=0 stack=0 marks",
+       "mark_precise: frame0: last_idx 26 first_idx 20\
+       mark_precise: frame0: regs=r2 stack= before 25\
+       mark_precise: frame0: regs=r2 stack= before 24\
+       mark_precise: frame0: regs=r2 stack= before 23\
+       mark_precise: frame0: regs=r2 stack= before 22\
+       mark_precise: frame0: regs=r2 stack= before 20\
+       mark_precise: frame0: parent state regs=r2 stack=:\
+       mark_precise: frame0: last_idx 19 first_idx 10\
+       mark_precise: frame0: regs=r2 stack= before 19\
+       mark_precise: frame0: regs=r9 stack= before 18\
+       mark_precise: frame0: regs=r8,r9 stack= before 17\
+       mark_precise: frame0: regs=r0,r9 stack= before 15\
+       mark_precise: frame0: regs=r0,r9 stack= before 14\
+       mark_precise: frame0: regs=r9 stack= before 13\
+       mark_precise: frame0: regs=r9 stack= before 12\
+       mark_precise: frame0: regs=r9 stack= before 11\
+       mark_precise: frame0: regs=r9 stack= before 10\
+       mark_precise: frame0: parent state regs= stack=:",
 },
 {
        "precise: test 2",
        .flags = BPF_F_TEST_STATE_FREQ,
        .errstr =
        "26: (85) call bpf_probe_read_kernel#113\
-       last_idx 26 first_idx 22\
-       regs=4 stack=0 before 25\
-       regs=4 stack=0 before 24\
-       regs=4 stack=0 before 23\
-       regs=4 stack=0 before 22\
-       parent didn't have regs=4 stack=0 marks\
-       last_idx 20 first_idx 20\
-       regs=4 stack=0 before 20\
-       parent didn't have regs=4 stack=0 marks\
-       last_idx 19 first_idx 17\
-       regs=4 stack=0 before 19\
-       regs=200 stack=0 before 18\
-       regs=300 stack=0 before 17\
-       parent already had regs=0 stack=0 marks",
+       mark_precise: frame0: last_idx 26 first_idx 22\
+       mark_precise: frame0: regs=r2 stack= before 25\
+       mark_precise: frame0: regs=r2 stack= before 24\
+       mark_precise: frame0: regs=r2 stack= before 23\
+       mark_precise: frame0: regs=r2 stack= before 22\
+       mark_precise: frame0: parent state regs=r2 stack=:\
+       mark_precise: frame0: last_idx 20 first_idx 20\
+       mark_precise: frame0: regs=r2 stack= before 20\
+       mark_precise: frame0: parent state regs=r2 stack=:\
+       mark_precise: frame0: last_idx 19 first_idx 17\
+       mark_precise: frame0: regs=r2 stack= before 19\
+       mark_precise: frame0: regs=r9 stack= before 18\
+       mark_precise: frame0: regs=r8,r9 stack= before 17\
+       mark_precise: frame0: parent state regs= stack=:",
 },
 {
        "precise: cross frame pruning",
        },
        .prog_type = BPF_PROG_TYPE_XDP,
        .flags = BPF_F_TEST_STATE_FREQ,
-       .errstr = "5: (2d) if r4 > r0 goto pc+0\
-       last_idx 5 first_idx 5\
-       parent didn't have regs=10 stack=0 marks\
-       last_idx 4 first_idx 2\
-       regs=10 stack=0 before 4\
-       regs=10 stack=0 before 3\
-       regs=0 stack=1 before 2\
-       last_idx 5 first_idx 5\
-       parent didn't have regs=1 stack=0 marks",
+       .errstr = "mark_precise: frame0: last_idx 5 first_idx 5\
+       mark_precise: frame0: parent state regs=r4 stack=:\
+       mark_precise: frame0: last_idx 4 first_idx 2\
+       mark_precise: frame0: regs=r4 stack= before 4\
+       mark_precise: frame0: regs=r4 stack= before 3\
+       mark_precise: frame0: regs= stack=-8 before 2\
+       mark_precise: frame0: falling back to forcing all scalars precise\
+       force_precise: frame0: forcing r0 to be precise\
+       mark_precise: frame0: last_idx 5 first_idx 5\
+       mark_precise: frame0: parent state regs= stack=:",
        .result = VERBOSE_ACCEPT,
        .retval = -1,
 },
        },
        .prog_type = BPF_PROG_TYPE_XDP,
        .flags = BPF_F_TEST_STATE_FREQ,
-       .errstr = "last_idx 6 first_idx 6\
-       parent didn't have regs=10 stack=0 marks\
-       last_idx 5 first_idx 3\
-       regs=10 stack=0 before 5\
-       regs=10 stack=0 before 4\
-       regs=0 stack=1 before 3\
-       last_idx 6 first_idx 6\
-       parent didn't have regs=1 stack=0 marks\
-       last_idx 5 first_idx 3\
-       regs=1 stack=0 before 5",
+       .errstr = "mark_precise: frame0: last_idx 6 first_idx 6\
+       mark_precise: frame0: parent state regs=r4 stack=:\
+       mark_precise: frame0: last_idx 5 first_idx 3\
+       mark_precise: frame0: regs=r4 stack= before 5\
+       mark_precise: frame0: regs=r4 stack= before 4\
+       mark_precise: frame0: regs= stack=-8 before 3\
+       mark_precise: frame0: falling back to forcing all scalars precise\
+       force_precise: frame0: forcing r0 to be precise\
+       force_precise: frame0: forcing r0 to be precise\
+       force_precise: frame0: forcing r0 to be precise\
+       force_precise: frame0: forcing r0 to be precise\
+       mark_precise: frame0: last_idx 6 first_idx 6\
+       mark_precise: frame0: parent state regs= stack=:",
        .result = VERBOSE_ACCEPT,
        .retval = -1,
 },
        .errstr = "invalid access to memory, mem_size=1 off=42 size=8",
        .result = REJECT,
 },
+{
+       "precise: program doesn't prematurely prune branches",
+       .insns = {
+               BPF_ALU64_IMM(BPF_MOV, BPF_REG_6, 0x400),
+               BPF_ALU64_IMM(BPF_MOV, BPF_REG_7, 0),
+               BPF_ALU64_IMM(BPF_MOV, BPF_REG_8, 0),
+               BPF_ALU64_IMM(BPF_MOV, BPF_REG_9, 0x80000000),
+               BPF_ALU64_IMM(BPF_MOD, BPF_REG_6, 0x401),
+               BPF_JMP_IMM(BPF_JA, 0, 0, 0),
+               BPF_JMP_REG(BPF_JLE, BPF_REG_6, BPF_REG_9, 2),
+               BPF_ALU64_IMM(BPF_MOD, BPF_REG_6, 1),
+               BPF_ALU64_IMM(BPF_MOV, BPF_REG_9, 0),
+               BPF_JMP_REG(BPF_JLE, BPF_REG_6, BPF_REG_9, 1),
+               BPF_ALU64_IMM(BPF_MOV, BPF_REG_6, 0),
+               BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0),
+               BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4),
+               BPF_LD_MAP_FD(BPF_REG_4, 0),
+               BPF_ALU64_REG(BPF_MOV, BPF_REG_1, BPF_REG_4),
+               BPF_ALU64_REG(BPF_MOV, BPF_REG_2, BPF_REG_10),
+               BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+               BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+               BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+               BPF_EXIT_INSN(),
+               BPF_ALU64_IMM(BPF_RSH, BPF_REG_6, 10),
+               BPF_ALU64_IMM(BPF_MUL, BPF_REG_6, 8192),
+               BPF_ALU64_REG(BPF_MOV, BPF_REG_1, BPF_REG_0),
+               BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_6),
+               BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0),
+               BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_3, 0),
+               BPF_EXIT_INSN(),
+       },
+       .fixup_map_array_48b = { 13 },
+       .prog_type = BPF_PROG_TYPE_XDP,
+       .result = REJECT,
+       .errstr = "register with unbounded min value is not allowed",
+},
index 1db7185..6550958 100644 (file)
@@ -141,6 +141,7 @@ static struct env {
        bool verbose;
        bool debug;
        bool quiet;
+       bool force_checkpoints;
        enum resfmt out_fmt;
        bool show_version;
        bool comparison_mode;
@@ -209,6 +210,8 @@ static const struct argp_option opts[] = {
        { "log-level", 'l', "LEVEL", 0, "Verifier log level (default 0 for normal mode, 1 for verbose mode)" },
        { "log-fixed", OPT_LOG_FIXED, NULL, 0, "Disable verifier log rotation" },
        { "log-size", OPT_LOG_SIZE, "BYTES", 0, "Customize verifier log size (default to 16MB)" },
+       { "test-states", 't', NULL, 0,
+         "Force frequent BPF verifier state checkpointing (set BPF_F_TEST_STATE_FREQ program flag)" },
        { "quiet", 'q', NULL, 0, "Quiet mode" },
        { "emit", 'e', "SPEC", 0, "Specify stats to be emitted" },
        { "sort", 's', "SPEC", 0, "Specify sort order" },
@@ -284,6 +287,9 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
                        argp_usage(state);
                }
                break;
+       case 't':
+               env.force_checkpoints = true;
+               break;
        case 'C':
                env.comparison_mode = true;
                break;
@@ -989,6 +995,9 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf
        /* increase chances of successful BPF object loading */
        fixup_obj(obj, prog, base_filename);
 
+       if (env.force_checkpoints)
+               bpf_program__set_flags(prog, bpf_program__flags(prog) | BPF_F_TEST_STATE_FREQ);
+
        err = bpf_object__load(obj);
        env.progs_processed++;
 
index 987cf0d..613321e 100644 (file)
@@ -27,6 +27,7 @@
 #include <sys/mman.h>
 #include <net/if.h>
 #include <poll.h>
+#include <time.h>
 
 #include "xdp_metadata.h"
 
@@ -134,18 +135,52 @@ static void refill_rx(struct xsk *xsk, __u64 addr)
        }
 }
 
-static void verify_xdp_metadata(void *data)
+#define NANOSEC_PER_SEC 1000000000 /* 10^9 */
+static __u64 gettime(clockid_t clock_id)
+{
+       struct timespec t;
+       int res;
+
+       /* See man clock_gettime(2) for type of clock_id's */
+       res = clock_gettime(clock_id, &t);
+
+       if (res < 0)
+               error(res, errno, "Error with clock_gettime()");
+
+       return (__u64) t.tv_sec * NANOSEC_PER_SEC + t.tv_nsec;
+}
+
+static void verify_xdp_metadata(void *data, clockid_t clock_id)
 {
        struct xdp_meta *meta;
 
        meta = data - sizeof(*meta);
 
-       printf("rx_timestamp: %llu\n", meta->rx_timestamp);
        if (meta->rx_hash_err < 0)
                printf("No rx_hash err=%d\n", meta->rx_hash_err);
        else
                printf("rx_hash: 0x%X with RSS type:0x%X\n",
                       meta->rx_hash, meta->rx_hash_type);
+
+       printf("rx_timestamp:  %llu (sec:%0.4f)\n", meta->rx_timestamp,
+              (double)meta->rx_timestamp / NANOSEC_PER_SEC);
+       if (meta->rx_timestamp) {
+               __u64 usr_clock = gettime(clock_id);
+               __u64 xdp_clock = meta->xdp_timestamp;
+               __s64 delta_X = xdp_clock - meta->rx_timestamp;
+               __s64 delta_X2U = usr_clock - xdp_clock;
+
+               printf("XDP RX-time:   %llu (sec:%0.4f) delta sec:%0.4f (%0.3f usec)\n",
+                      xdp_clock, (double)xdp_clock / NANOSEC_PER_SEC,
+                      (double)delta_X / NANOSEC_PER_SEC,
+                      (double)delta_X / 1000);
+
+               printf("AF_XDP time:   %llu (sec:%0.4f) delta sec:%0.4f (%0.3f usec)\n",
+                      usr_clock, (double)usr_clock / NANOSEC_PER_SEC,
+                      (double)delta_X2U / NANOSEC_PER_SEC,
+                      (double)delta_X2U / 1000);
+       }
+
 }
 
 static void verify_skb_metadata(int fd)
@@ -193,7 +228,7 @@ static void verify_skb_metadata(int fd)
        printf("skb hwtstamp is not found!\n");
 }
 
-static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd)
+static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd, clockid_t clock_id)
 {
        const struct xdp_desc *rx_desc;
        struct pollfd fds[rxq + 1];
@@ -243,7 +278,8 @@ static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd)
                        addr = xsk_umem__add_offset_to_addr(rx_desc->addr);
                        printf("%p: rx_desc[%u]->addr=%llx addr=%llx comp_addr=%llx\n",
                               xsk, idx, rx_desc->addr, addr, comp_addr);
-                       verify_xdp_metadata(xsk_umem__get_data(xsk->umem_area, addr));
+                       verify_xdp_metadata(xsk_umem__get_data(xsk->umem_area, addr),
+                                           clock_id);
                        xsk_ring_cons__release(&xsk->rx, 1);
                        refill_rx(xsk, comp_addr);
                }
@@ -370,6 +406,7 @@ static void timestamping_enable(int fd, int val)
 
 int main(int argc, char *argv[])
 {
+       clockid_t clock_id = CLOCK_TAI;
        int server_fd = -1;
        int ret;
        int i;
@@ -443,7 +480,7 @@ int main(int argc, char *argv[])
                error(1, -ret, "bpf_xdp_attach");
 
        signal(SIGINT, handle_signal);
-       ret = verify_metadata(rx_xsk, rxq, server_fd);
+       ret = verify_metadata(rx_xsk, rxq, server_fd, clock_id);
        close(server_fd);
        cleanup();
        if (ret)
index 0c4624d..938a729 100644 (file)
@@ -11,6 +11,7 @@
 
 struct xdp_meta {
        __u64 rx_timestamp;
+       __u64 xdp_timestamp;
        __u32 rx_hash;
        union {
                __u32 rx_hash_type;
index 04ed8b5..8da8d55 100644 (file)
@@ -134,6 +134,11 @@ static inline void xsk_ring_prod__submit(struct xsk_ring_prod *prod, __u32 nb)
        __atomic_store_n(prod->producer, *prod->producer + nb, __ATOMIC_RELEASE);
 }
 
+static inline void xsk_ring_prod__cancel(struct xsk_ring_prod *prod, __u32 nb)
+{
+       prod->cached_prod -= nb;
+}
+
 static inline __u32 xsk_ring_cons__peek(struct xsk_ring_cons *cons, __u32 nb, __u32 *idx)
 {
        __u32 entries = xsk_cons_nb_avail(cons, nb);
index f144d06..218d7f6 100644 (file)
 #include <asm/barrier.h>
 #include <linux/if_link.h>
 #include <linux/if_ether.h>
-#include <linux/ip.h>
 #include <linux/mman.h>
-#include <linux/udp.h>
 #include <arpa/inet.h>
 #include <net/if.h>
 #include <locale.h>
 #include <poll.h>
 #include <pthread.h>
 #include <signal.h>
-#include <stdbool.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <sys/socket.h>
 #include <sys/time.h>
 #include <sys/types.h>
-#include <sys/queue.h>
 #include <time.h>
 #include <unistd.h>
-#include <stdatomic.h>
 
 #include "xsk_xdp_progs.skel.h"
 #include "xsk.h"
 
 static const char *MAC1 = "\x00\x0A\x56\x9E\xEE\x62";
 static const char *MAC2 = "\x00\x0A\x56\x9E\xEE\x61";
-static const char *IP1 = "192.168.100.162";
-static const char *IP2 = "192.168.100.161";
-static const u16 UDP_PORT1 = 2020;
-static const u16 UDP_PORT2 = 2121;
 
 static void __exit_with_error(int error, const char *file, const char *func, int line)
 {
@@ -147,112 +138,25 @@ static void report_failure(struct test_spec *test)
        test->fail = true;
 }
 
-static void memset32_htonl(void *dest, u32 val, u32 size)
-{
-       u32 *ptr = (u32 *)dest;
-       int i;
-
-       val = htonl(val);
-
-       for (i = 0; i < (size & (~0x3)); i += 4)
-               ptr[i >> 2] = val;
-}
-
-/*
- * Fold a partial checksum
- * This function code has been taken from
- * Linux kernel include/asm-generic/checksum.h
- */
-static __u16 csum_fold(__u32 csum)
-{
-       u32 sum = (__force u32)csum;
-
-       sum = (sum & 0xffff) + (sum >> 16);
-       sum = (sum & 0xffff) + (sum >> 16);
-       return (__force __u16)~sum;
-}
-
-/*
- * This function code has been taken from
- * Linux kernel lib/checksum.c
- */
-static u32 from64to32(u64 x)
-{
-       /* add up 32-bit and 32-bit for 32+c bit */
-       x = (x & 0xffffffff) + (x >> 32);
-       /* add up carry.. */
-       x = (x & 0xffffffff) + (x >> 32);
-       return (u32)x;
-}
-
-/*
- * This function code has been taken from
- * Linux kernel lib/checksum.c
- */
-static __u32 csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u32 sum)
-{
-       unsigned long long s = (__force u32)sum;
-
-       s += (__force u32)saddr;
-       s += (__force u32)daddr;
-#ifdef __BIG_ENDIAN__
-       s += proto + len;
-#else
-       s += (proto + len) << 8;
-#endif
-       return (__force __u32)from64to32(s);
-}
-
-/*
- * This function has been taken from
- * Linux kernel include/asm-generic/checksum.h
+/* The payload is a word consisting of a packet sequence number in the upper
+ * 16-bits and a intra packet data sequence number in the lower 16 bits. So the 3rd packet's
+ * 5th word of data will contain the number (2<<16) | 4 as they are numbered from 0.
  */
-static __u16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u32 sum)
-{
-       return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum));
-}
-
-static u16 udp_csum(u32 saddr, u32 daddr, u32 len, u8 proto, u16 *udp_pkt)
+static void write_payload(void *dest, u32 pkt_nb, u32 start, u32 size)
 {
-       u32 csum = 0;
-       u32 cnt = 0;
-
-       /* udp hdr and data */
-       for (; cnt < len; cnt += 2)
-               csum += udp_pkt[cnt >> 1];
+       u32 *ptr = (u32 *)dest, i;
 
-       return csum_tcpudp_magic(saddr, daddr, len, proto, csum);
+       start /= sizeof(*ptr);
+       size /= sizeof(*ptr);
+       for (i = 0; i < size; i++)
+               ptr[i] = htonl(pkt_nb << 16 | (i + start));
 }
 
 static void gen_eth_hdr(struct ifobject *ifobject, struct ethhdr *eth_hdr)
 {
        memcpy(eth_hdr->h_dest, ifobject->dst_mac, ETH_ALEN);
        memcpy(eth_hdr->h_source, ifobject->src_mac, ETH_ALEN);
-       eth_hdr->h_proto = htons(ETH_P_IP);
-}
-
-static void gen_ip_hdr(struct ifobject *ifobject, struct iphdr *ip_hdr)
-{
-       ip_hdr->version = IP_PKT_VER;
-       ip_hdr->ihl = 0x5;
-       ip_hdr->tos = IP_PKT_TOS;
-       ip_hdr->tot_len = htons(IP_PKT_SIZE);
-       ip_hdr->id = 0;
-       ip_hdr->frag_off = 0;
-       ip_hdr->ttl = IPDEFTTL;
-       ip_hdr->protocol = IPPROTO_UDP;
-       ip_hdr->saddr = ifobject->src_ip;
-       ip_hdr->daddr = ifobject->dst_ip;
-       ip_hdr->check = 0;
-}
-
-static void gen_udp_hdr(u32 payload, void *pkt, struct ifobject *ifobject,
-                       struct udphdr *udp_hdr)
-{
-       udp_hdr->source = htons(ifobject->src_port);
-       udp_hdr->dest = htons(ifobject->dst_port);
-       udp_hdr->len = htons(UDP_PKT_SIZE);
-       memset32_htonl(pkt + PKT_HDR_SIZE, payload, UDP_PKT_DATA_SIZE);
+       eth_hdr->h_proto = htons(ETH_P_LOOPBACK);
 }
 
 static bool is_umem_valid(struct ifobject *ifobj)
@@ -260,19 +164,18 @@ static bool is_umem_valid(struct ifobject *ifobj)
        return !!ifobj->umem->umem;
 }
 
-static void gen_udp_csum(struct udphdr *udp_hdr, struct iphdr *ip_hdr)
+static u32 mode_to_xdp_flags(enum test_mode mode)
 {
-       udp_hdr->check = 0;
-       udp_hdr->check =
-           udp_csum(ip_hdr->saddr, ip_hdr->daddr, UDP_PKT_SIZE, IPPROTO_UDP, (u16 *)udp_hdr);
+       return (mode == TEST_MODE_SKB) ? XDP_FLAGS_SKB_MODE : XDP_FLAGS_DRV_MODE;
 }
 
-static u32 mode_to_xdp_flags(enum test_mode mode)
+static u64 umem_size(struct xsk_umem_info *umem)
 {
-       return (mode == TEST_MODE_SKB) ? XDP_FLAGS_SKB_MODE : XDP_FLAGS_DRV_MODE;
+       return umem->num_frames * umem->frame_size;
 }
 
-static int xsk_configure_umem(struct xsk_umem_info *umem, void *buffer, u64 size)
+static int xsk_configure_umem(struct ifobject *ifobj, struct xsk_umem_info *umem, void *buffer,
+                             u64 size)
 {
        struct xsk_umem_config cfg = {
                .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
@@ -292,9 +195,31 @@ static int xsk_configure_umem(struct xsk_umem_info *umem, void *buffer, u64 size
                return ret;
 
        umem->buffer = buffer;
+       if (ifobj->shared_umem && ifobj->rx_on) {
+               umem->base_addr = umem_size(umem);
+               umem->next_buffer = umem_size(umem);
+       }
+
        return 0;
 }
 
+static u64 umem_alloc_buffer(struct xsk_umem_info *umem)
+{
+       u64 addr;
+
+       addr = umem->next_buffer;
+       umem->next_buffer += umem->frame_size;
+       if (umem->next_buffer >= umem->base_addr + umem_size(umem))
+               umem->next_buffer = umem->base_addr;
+
+       return addr;
+}
+
+static void umem_reset_alloc(struct xsk_umem_info *umem)
+{
+       umem->next_buffer = 0;
+}
+
 static void enable_busy_poll(struct xsk_socket_info *xsk)
 {
        int sock_opt;
@@ -354,7 +279,7 @@ static bool ifobj_zc_avail(struct ifobject *ifobject)
                exit_with_error(ENOMEM);
        }
        umem->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
-       ret = xsk_configure_umem(umem, bufs, umem_sz);
+       ret = xsk_configure_umem(ifobject, umem, bufs, umem_sz);
        if (ret)
                exit_with_error(-ret);
 
@@ -380,7 +305,6 @@ out:
 static struct option long_options[] = {
        {"interface", required_argument, 0, 'i'},
        {"busy-poll", no_argument, 0, 'b'},
-       {"dump-pkts", no_argument, 0, 'D'},
        {"verbose", no_argument, 0, 'v'},
        {0, 0, 0, 0}
 };
@@ -391,7 +315,6 @@ static void usage(const char *prog)
                "  Usage: %s [OPTIONS]\n"
                "  Options:\n"
                "  -i, --interface      Use interface\n"
-               "  -D, --dump-pkts      Dump packets L2 - L5\n"
                "  -v, --verbose        Verbose output\n"
                "  -b, --busy-poll      Enable busy poll\n";
 
@@ -415,7 +338,7 @@ static void parse_command_line(struct ifobject *ifobj_tx, struct ifobject *ifobj
        opterr = 0;
 
        for (;;) {
-               c = getopt_long(argc, argv, "i:Dvb", long_options, &option_index);
+               c = getopt_long(argc, argv, "i:vb", long_options, &option_index);
                if (c == -1)
                        break;
 
@@ -437,9 +360,6 @@ static void parse_command_line(struct ifobject *ifobj_tx, struct ifobject *ifobj
 
                        interface_nb++;
                        break;
-               case 'D':
-                       opt_pkt_dump = true;
-                       break;
                case 'v':
                        opt_verbose = true;
                        break;
@@ -482,9 +402,6 @@ static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
                memset(ifobj->umem, 0, sizeof(*ifobj->umem));
                ifobj->umem->num_frames = DEFAULT_UMEM_BUFFERS;
                ifobj->umem->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
-               if (ifobj->shared_umem && ifobj->rx_on)
-                       ifobj->umem->base_addr = DEFAULT_UMEM_BUFFERS *
-                               XSK_UMEM__DEFAULT_FRAME_SIZE;
 
                for (j = 0; j < MAX_SOCKETS; j++) {
                        memset(&ifobj->xsk_arr[j], 0, sizeof(ifobj->xsk_arr[j]));
@@ -554,24 +471,24 @@ static void test_spec_set_xdp_prog(struct test_spec *test, struct bpf_program *x
 static void pkt_stream_reset(struct pkt_stream *pkt_stream)
 {
        if (pkt_stream)
-               pkt_stream->rx_pkt_nb = 0;
+               pkt_stream->current_pkt_nb = 0;
 }
 
-static struct pkt *pkt_stream_get_pkt(struct pkt_stream *pkt_stream, u32 pkt_nb)
+static struct pkt *pkt_stream_get_next_tx_pkt(struct pkt_stream *pkt_stream)
 {
-       if (pkt_nb >= pkt_stream->nb_pkts)
+       if (pkt_stream->current_pkt_nb >= pkt_stream->nb_pkts)
                return NULL;
 
-       return &pkt_stream->pkts[pkt_nb];
+       return &pkt_stream->pkts[pkt_stream->current_pkt_nb++];
 }
 
 static struct pkt *pkt_stream_get_next_rx_pkt(struct pkt_stream *pkt_stream, u32 *pkts_sent)
 {
-       while (pkt_stream->rx_pkt_nb < pkt_stream->nb_pkts) {
+       while (pkt_stream->current_pkt_nb < pkt_stream->nb_pkts) {
                (*pkts_sent)++;
-               if (pkt_stream->pkts[pkt_stream->rx_pkt_nb].valid)
-                       return &pkt_stream->pkts[pkt_stream->rx_pkt_nb++];
-               pkt_stream->rx_pkt_nb++;
+               if (pkt_stream->pkts[pkt_stream->current_pkt_nb].valid)
+                       return &pkt_stream->pkts[pkt_stream->current_pkt_nb++];
+               pkt_stream->current_pkt_nb++;
        }
        return NULL;
 }
@@ -616,9 +533,21 @@ static struct pkt_stream *__pkt_stream_alloc(u32 nb_pkts)
        return pkt_stream;
 }
 
-static void pkt_set(struct xsk_umem_info *umem, struct pkt *pkt, u64 addr, u32 len)
+static u32 ceil_u32(u32 a, u32 b)
+{
+       return (a + b - 1) / b;
+}
+
+static u32 pkt_nb_frags(u32 frame_size, struct pkt *pkt)
+{
+       if (!pkt || !pkt->valid)
+               return 1;
+       return ceil_u32(pkt->len, frame_size);
+}
+
+static void pkt_set(struct xsk_umem_info *umem, struct pkt *pkt, int offset, u32 len)
 {
-       pkt->addr = addr + umem->base_addr;
+       pkt->offset = offset;
        pkt->len = len;
        if (len > umem->frame_size - XDP_PACKET_HEADROOM - MIN_PKT_SIZE * 2 - umem->frame_headroom)
                pkt->valid = false;
@@ -626,6 +555,11 @@ static void pkt_set(struct xsk_umem_info *umem, struct pkt *pkt, u64 addr, u32 l
                pkt->valid = true;
 }
 
+static u32 pkt_get_buffer_len(struct xsk_umem_info *umem, u32 len)
+{
+       return ceil_u32(len, umem->frame_size) * umem->frame_size;
+}
+
 static struct pkt_stream *pkt_stream_generate(struct xsk_umem_info *umem, u32 nb_pkts, u32 pkt_len)
 {
        struct pkt_stream *pkt_stream;
@@ -635,10 +569,13 @@ static struct pkt_stream *pkt_stream_generate(struct xsk_umem_info *umem, u32 nb
        if (!pkt_stream)
                exit_with_error(ENOMEM);
 
+       pkt_stream->nb_pkts = nb_pkts;
+       pkt_stream->max_pkt_len = pkt_len;
        for (i = 0; i < nb_pkts; i++) {
-               pkt_set(umem, &pkt_stream->pkts[i], (i % umem->num_frames) * umem->frame_size,
-                       pkt_len);
-               pkt_stream->pkts[i].payload = i;
+               struct pkt *pkt = &pkt_stream->pkts[i];
+
+               pkt_set(umem, pkt, 0, pkt_len);
+               pkt->pkt_nb = i;
        }
 
        return pkt_stream;
@@ -669,8 +606,7 @@ static void __pkt_stream_replace_half(struct ifobject *ifobj, u32 pkt_len,
 
        pkt_stream = pkt_stream_clone(umem, ifobj->pkt_stream);
        for (i = 1; i < ifobj->pkt_stream->nb_pkts; i += 2)
-               pkt_set(umem, &pkt_stream->pkts[i],
-                       (i % umem->num_frames) * umem->frame_size + offset, pkt_len);
+               pkt_set(umem, &pkt_stream->pkts[i], offset, pkt_len);
 
        ifobj->pkt_stream = pkt_stream;
 }
@@ -694,30 +630,31 @@ static void pkt_stream_receive_half(struct test_spec *test)
                pkt_stream->pkts[i].valid = false;
 }
 
-static struct pkt *pkt_generate(struct ifobject *ifobject, u32 pkt_nb)
+static u64 pkt_get_addr(struct pkt *pkt, struct xsk_umem_info *umem)
 {
-       struct pkt *pkt = pkt_stream_get_pkt(ifobject->pkt_stream, pkt_nb);
-       struct udphdr *udp_hdr;
-       struct ethhdr *eth_hdr;
-       struct iphdr *ip_hdr;
-       void *data;
+       if (!pkt->valid)
+               return pkt->offset;
+       return pkt->offset + umem_alloc_buffer(umem);
+}
 
-       if (!pkt)
-               return NULL;
-       if (!pkt->valid || pkt->len < MIN_PKT_SIZE)
-               return pkt;
+static void pkt_generate(struct ifobject *ifobject, u64 addr, u32 len, u32 pkt_nb,
+                        u32 bytes_written)
+{
+       void *data = xsk_umem__get_data(ifobject->umem->buffer, addr);
 
-       data = xsk_umem__get_data(ifobject->umem->buffer, pkt->addr);
-       udp_hdr = (struct udphdr *)(data + sizeof(struct ethhdr) + sizeof(struct iphdr));
-       ip_hdr = (struct iphdr *)(data + sizeof(struct ethhdr));
-       eth_hdr = (struct ethhdr *)data;
+       if (len < MIN_PKT_SIZE)
+               return;
 
-       gen_udp_hdr(pkt_nb, data, ifobject, udp_hdr);
-       gen_ip_hdr(ifobject, ip_hdr);
-       gen_udp_csum(udp_hdr, ip_hdr);
-       gen_eth_hdr(ifobject, eth_hdr);
+       if (!bytes_written) {
+               gen_eth_hdr(ifobject, data);
 
-       return pkt;
+               len -= PKT_HDR_SIZE;
+               data += PKT_HDR_SIZE;
+       } else {
+               bytes_written -= PKT_HDR_SIZE;
+       }
+
+       write_payload(data, pkt_nb, bytes_written, len);
 }
 
 static void __pkt_stream_generate_custom(struct ifobject *ifobj,
@@ -731,10 +668,14 @@ static void __pkt_stream_generate_custom(struct ifobject *ifobj,
                exit_with_error(ENOMEM);
 
        for (i = 0; i < nb_pkts; i++) {
-               pkt_stream->pkts[i].addr = pkts[i].addr + ifobj->umem->base_addr;
-               pkt_stream->pkts[i].len = pkts[i].len;
-               pkt_stream->pkts[i].payload = i;
-               pkt_stream->pkts[i].valid = pkts[i].valid;
+               struct pkt *pkt = &pkt_stream->pkts[i];
+
+               pkt->offset = pkts[i].offset;
+               pkt->len = pkts[i].len;
+               pkt->pkt_nb = i;
+               pkt->valid = pkts[i].valid;
+               if (pkt->len > pkt_stream->max_pkt_len)
+                       pkt_stream->max_pkt_len = pkt->len;
        }
 
        ifobj->pkt_stream = pkt_stream;
@@ -746,53 +687,62 @@ static void pkt_stream_generate_custom(struct test_spec *test, struct pkt *pkts,
        __pkt_stream_generate_custom(test->ifobj_rx, pkts, nb_pkts);
 }
 
-static void pkt_dump(void *pkt, u32 len)
-{
-       char s[INET_ADDRSTRLEN];
-       struct ethhdr *ethhdr;
-       struct udphdr *udphdr;
-       struct iphdr *iphdr;
-       u32 payload, i;
-
-       ethhdr = pkt;
-       iphdr = pkt + sizeof(*ethhdr);
-       udphdr = pkt + sizeof(*ethhdr) + sizeof(*iphdr);
-
-       /*extract L2 frame */
-       fprintf(stdout, "DEBUG>> L2: dst mac: ");
-       for (i = 0; i < ETH_ALEN; i++)
-               fprintf(stdout, "%02X", ethhdr->h_dest[i]);
-
-       fprintf(stdout, "\nDEBUG>> L2: src mac: ");
-       for (i = 0; i < ETH_ALEN; i++)
-               fprintf(stdout, "%02X", ethhdr->h_source[i]);
-
-       /*extract L3 frame */
-       fprintf(stdout, "\nDEBUG>> L3: ip_hdr->ihl: %02X\n", iphdr->ihl);
-       fprintf(stdout, "DEBUG>> L3: ip_hdr->saddr: %s\n",
-               inet_ntop(AF_INET, &iphdr->saddr, s, sizeof(s)));
-       fprintf(stdout, "DEBUG>> L3: ip_hdr->daddr: %s\n",
-               inet_ntop(AF_INET, &iphdr->daddr, s, sizeof(s)));
-       /*extract L4 frame */
-       fprintf(stdout, "DEBUG>> L4: udp_hdr->src: %d\n", ntohs(udphdr->source));
-       fprintf(stdout, "DEBUG>> L4: udp_hdr->dst: %d\n", ntohs(udphdr->dest));
-       /*extract L5 frame */
-       payload = ntohl(*((u32 *)(pkt + PKT_HDR_SIZE)));
+static void pkt_print_data(u32 *data, u32 cnt)
+{
+       u32 i;
+
+       for (i = 0; i < cnt; i++) {
+               u32 seqnum, pkt_nb;
 
-       fprintf(stdout, "DEBUG>> L5: payload: %d\n", payload);
-       fprintf(stdout, "---------------------------------------\n");
+               seqnum = ntohl(*data) & 0xffff;
+               pkt_nb = ntohl(*data) >> 16;
+               fprintf(stdout, "%u:%u ", pkt_nb, seqnum);
+               data++;
+       }
 }
 
-static bool is_offset_correct(struct xsk_umem_info *umem, struct pkt_stream *pkt_stream, u64 addr,
-                             u64 pkt_stream_addr)
+static void pkt_dump(void *pkt, u32 len, bool eth_header)
+{
+       struct ethhdr *ethhdr = pkt;
+       u32 i, *data;
+
+       if (eth_header) {
+               /*extract L2 frame */
+               fprintf(stdout, "DEBUG>> L2: dst mac: ");
+               for (i = 0; i < ETH_ALEN; i++)
+                       fprintf(stdout, "%02X", ethhdr->h_dest[i]);
+
+               fprintf(stdout, "\nDEBUG>> L2: src mac: ");
+               for (i = 0; i < ETH_ALEN; i++)
+                       fprintf(stdout, "%02X", ethhdr->h_source[i]);
+
+               data = pkt + PKT_HDR_SIZE;
+       } else {
+               data = pkt;
+       }
+
+       /*extract L5 frame */
+       fprintf(stdout, "\nDEBUG>> L5: seqnum: ");
+       pkt_print_data(data, PKT_DUMP_NB_TO_PRINT);
+       fprintf(stdout, "....");
+       if (len > PKT_DUMP_NB_TO_PRINT * sizeof(u32)) {
+               fprintf(stdout, "\n.... ");
+               pkt_print_data(data + len / sizeof(u32) - PKT_DUMP_NB_TO_PRINT,
+                              PKT_DUMP_NB_TO_PRINT);
+       }
+       fprintf(stdout, "\n---------------------------------------\n");
+}
+
+static bool is_offset_correct(struct xsk_umem_info *umem, struct pkt *pkt, u64 addr)
 {
        u32 headroom = umem->unaligned_mode ? 0 : umem->frame_headroom;
-       u32 offset = addr % umem->frame_size, expected_offset = 0;
+       u32 offset = addr % umem->frame_size, expected_offset;
+       int pkt_offset = pkt->valid ? pkt->offset : 0;
 
-       if (!pkt_stream->use_addr_for_fill)
-               pkt_stream_addr = 0;
+       if (!umem->unaligned_mode)
+               pkt_offset = 0;
 
-       expected_offset += (pkt_stream_addr + headroom + XDP_PACKET_HEADROOM) % umem->frame_size;
+       expected_offset = (pkt_offset + headroom + XDP_PACKET_HEADROOM) % umem->frame_size;
 
        if (offset == expected_offset)
                return true;
@@ -806,9 +756,9 @@ static bool is_metadata_correct(struct pkt *pkt, void *buffer, u64 addr)
        void *data = xsk_umem__get_data(buffer, addr);
        struct xdp_info *meta = data - sizeof(struct xdp_info);
 
-       if (meta->count != pkt->payload) {
+       if (meta->count != pkt->pkt_nb) {
                ksft_print_msg("[%s] expected meta_count [%d], got meta_count [%d]\n",
-                              __func__, pkt->payload, meta->count);
+                              __func__, pkt->pkt_nb, meta->count);
                return false;
        }
 
@@ -818,11 +768,11 @@ static bool is_metadata_correct(struct pkt *pkt, void *buffer, u64 addr)
 static bool is_pkt_valid(struct pkt *pkt, void *buffer, u64 addr, u32 len)
 {
        void *data = xsk_umem__get_data(buffer, addr);
-       struct iphdr *iphdr = (struct iphdr *)(data + sizeof(struct ethhdr));
+       u32 seqnum, pkt_data;
 
        if (!pkt) {
                ksft_print_msg("[%s] too many packets received\n", __func__);
-               return false;
+               goto error;
        }
 
        if (len < MIN_PKT_SIZE || pkt->len < MIN_PKT_SIZE) {
@@ -833,28 +783,23 @@ static bool is_pkt_valid(struct pkt *pkt, void *buffer, u64 addr, u32 len)
        if (pkt->len != len) {
                ksft_print_msg("[%s] expected length [%d], got length [%d]\n",
                               __func__, pkt->len, len);
-               return false;
+               goto error;
        }
 
-       if (iphdr->version == IP_PKT_VER && iphdr->tos == IP_PKT_TOS) {
-               u32 seqnum = ntohl(*((u32 *)(data + PKT_HDR_SIZE)));
-
-               if (opt_pkt_dump)
-                       pkt_dump(data, PKT_SIZE);
+       pkt_data = ntohl(*((u32 *)(data + PKT_HDR_SIZE)));
+       seqnum = pkt_data >> 16;
 
-               if (pkt->payload != seqnum) {
-                       ksft_print_msg("[%s] expected seqnum [%d], got seqnum [%d]\n",
-                                      __func__, pkt->payload, seqnum);
-                       return false;
-               }
-       } else {
-               ksft_print_msg("Invalid frame received: ");
-               ksft_print_msg("[IP_PKT_VER: %02X], [IP_PKT_TOS: %02X]\n", iphdr->version,
-                              iphdr->tos);
-               return false;
+       if (pkt->pkt_nb != seqnum) {
+               ksft_print_msg("[%s] expected seqnum [%d], got seqnum [%d]\n",
+                              __func__, pkt->pkt_nb, seqnum);
+               goto error;
        }
 
        return true;
+
+error:
+       pkt_dump(data, len, true);
+       return false;
 }
 
 static void kick_tx(struct xsk_socket_info *xsk)
@@ -976,7 +921,7 @@ static int receive_pkts(struct test_spec *test, struct pollfd *fds)
                        addr = xsk_umem__add_offset_to_addr(addr);
 
                        if (!is_pkt_valid(pkt, umem->buffer, addr, desc->len) ||
-                           !is_offset_correct(umem, pkt_stream, addr, pkt->addr) ||
+                           !is_offset_correct(umem, pktaddr) ||
                            (ifobj->use_metadata && !is_metadata_correct(pkt, umem->buffer, addr)))
                                return TEST_FAILURE;
 
@@ -992,8 +937,6 @@ static int receive_pkts(struct test_spec *test, struct pollfd *fds)
 
                pthread_mutex_lock(&pacing_mutex);
                pkts_in_flight -= pkts_sent;
-               if (pkts_in_flight < umem->num_frames)
-                       pthread_cond_signal(&pacing_cond);
                pthread_mutex_unlock(&pacing_mutex);
                pkts_sent = 0;
        }
@@ -1001,14 +944,21 @@ static int receive_pkts(struct test_spec *test, struct pollfd *fds)
        return TEST_PASS;
 }
 
-static int __send_pkts(struct ifobject *ifobject, u32 *pkt_nb, struct pollfd *fds,
-                      bool timeout)
+static int __send_pkts(struct ifobject *ifobject, struct pollfd *fds, bool timeout)
 {
        struct xsk_socket_info *xsk = ifobject->xsk;
+       struct xsk_umem_info *umem = ifobject->umem;
+       u32 i, idx = 0, valid_pkts = 0, buffer_len;
        bool use_poll = ifobject->use_poll;
-       u32 i, idx = 0, valid_pkts = 0;
        int ret;
 
+       buffer_len = pkt_get_buffer_len(umem, ifobject->pkt_stream->max_pkt_len);
+       /* pkts_in_flight might be negative if many invalid packets are sent */
+       if (pkts_in_flight >= (int)((umem_size(umem) - BATCH_SIZE * buffer_len) / buffer_len)) {
+               kick_tx(xsk);
+               return TEST_CONTINUE;
+       }
+
        while (xsk_ring_prod__reserve(&xsk->tx, BATCH_SIZE, &idx) < BATCH_SIZE) {
                if (use_poll) {
                        ret = poll(fds, 1, POLL_TMOUT);
@@ -1034,25 +984,21 @@ static int __send_pkts(struct ifobject *ifobject, u32 *pkt_nb, struct pollfd *fd
 
        for (i = 0; i < BATCH_SIZE; i++) {
                struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx + i);
-               struct pkt *pkt = pkt_generate(ifobject, *pkt_nb);
+               struct pkt *pkt = pkt_stream_get_next_tx_pkt(ifobject->pkt_stream);
 
                if (!pkt)
                        break;
 
-               tx_desc->addr = pkt->addr;
+               tx_desc->addr = pkt_get_addr(pkt, umem);
                tx_desc->len = pkt->len;
-               (*pkt_nb)++;
-               if (pkt->valid)
+               if (pkt->valid) {
                        valid_pkts++;
+                       pkt_generate(ifobject, tx_desc->addr, tx_desc->len, pkt->pkt_nb, 0);
+               }
        }
 
        pthread_mutex_lock(&pacing_mutex);
        pkts_in_flight += valid_pkts;
-       /* pkts_in_flight might be negative if many invalid packets are sent */
-       if (pkts_in_flight >= (int)(ifobject->umem->num_frames - BATCH_SIZE)) {
-               kick_tx(xsk);
-               pthread_cond_wait(&pacing_cond, &pacing_mutex);
-       }
        pthread_mutex_unlock(&pacing_mutex);
 
        xsk_ring_prod__submit(&xsk->tx, i);
@@ -1088,18 +1034,21 @@ static void wait_for_tx_completion(struct xsk_socket_info *xsk)
 
 static int send_pkts(struct test_spec *test, struct ifobject *ifobject)
 {
+       struct pkt_stream *pkt_stream = ifobject->pkt_stream;
        bool timeout = !is_umem_valid(test->ifobj_rx);
        struct pollfd fds = { };
-       u32 pkt_cnt = 0, ret;
+       u32 ret;
 
        fds.fd = xsk_socket__fd(ifobject->xsk->xsk);
        fds.events = POLLOUT;
 
-       while (pkt_cnt < ifobject->pkt_stream->nb_pkts) {
-               ret = __send_pkts(ifobject, &pkt_cnt, &fds, timeout);
+       while (pkt_stream->current_pkt_nb < pkt_stream->nb_pkts) {
+               ret = __send_pkts(ifobject, &fds, timeout);
+               if (ret == TEST_CONTINUE && !test->fail)
+                       continue;
                if ((ret || test->fail) && !timeout)
                        return TEST_FAILURE;
-               else if (ret == TEST_PASS && timeout)
+               if (ret == TEST_PASS && timeout)
                        return ret;
        }
 
@@ -1249,11 +1198,14 @@ static void thread_common_ops_tx(struct test_spec *test, struct ifobject *ifobje
        ifobject->xsk = &ifobject->xsk_arr[0];
        ifobject->xskmap = test->ifobj_rx->xskmap;
        memcpy(ifobject->umem, test->ifobj_rx->umem, sizeof(struct xsk_umem_info));
+       ifobject->umem->base_addr = 0;
 }
 
-static void xsk_populate_fill_ring(struct xsk_umem_info *umem, struct pkt_stream *pkt_stream)
+static void xsk_populate_fill_ring(struct xsk_umem_info *umem, struct pkt_stream *pkt_stream,
+                                  bool fill_up)
 {
-       u32 idx = 0, i, buffers_to_fill;
+       u32 rx_frame_size = umem->frame_size - XDP_PACKET_HEADROOM;
+       u32 idx = 0, filled = 0, buffers_to_fill, nb_pkts;
        int ret;
 
        if (umem->num_frames < XSK_RING_PROD__DEFAULT_NUM_DESCS)
@@ -1264,22 +1216,33 @@ static void xsk_populate_fill_ring(struct xsk_umem_info *umem, struct pkt_stream
        ret = xsk_ring_prod__reserve(&umem->fq, buffers_to_fill, &idx);
        if (ret != buffers_to_fill)
                exit_with_error(ENOSPC);
-       for (i = 0; i < buffers_to_fill; i++) {
-               u64 addr;
 
-               if (pkt_stream->use_addr_for_fill) {
-                       struct pkt *pkt = pkt_stream_get_pkt(pkt_stream, i);
+       while (filled < buffers_to_fill) {
+               struct pkt *pkt = pkt_stream_get_next_rx_pkt(pkt_stream, &nb_pkts);
+               u64 addr;
+               u32 i;
+
+               for (i = 0; i < pkt_nb_frags(rx_frame_size, pkt); i++) {
+                       if (!pkt) {
+                               if (!fill_up)
+                                       break;
+                               addr = filled * umem->frame_size + umem->base_addr;
+                       } else if (pkt->offset >= 0) {
+                               addr = pkt->offset % umem->frame_size + umem_alloc_buffer(umem);
+                       } else {
+                               addr = pkt->offset + umem_alloc_buffer(umem);
+                       }
 
-                       if (!pkt)
+                       *xsk_ring_prod__fill_addr(&umem->fq, idx++) = addr;
+                       if (++filled >= buffers_to_fill)
                                break;
-                       addr = pkt->addr;
-               } else {
-                       addr = i * umem->frame_size;
                }
-
-               *xsk_ring_prod__fill_addr(&umem->fq, idx++) = addr;
        }
-       xsk_ring_prod__submit(&umem->fq, i);
+       xsk_ring_prod__submit(&umem->fq, filled);
+       xsk_ring_prod__cancel(&umem->fq, buffers_to_fill - filled);
+
+       pkt_stream_reset(pkt_stream);
+       umem_reset_alloc(umem);
 }
 
 static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject)
@@ -1300,12 +1263,10 @@ static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject)
        if (bufs == MAP_FAILED)
                exit_with_error(errno);
 
-       ret = xsk_configure_umem(ifobject->umem, bufs, umem_sz);
+       ret = xsk_configure_umem(ifobject, ifobject->umem, bufs, umem_sz);
        if (ret)
                exit_with_error(-ret);
 
-       xsk_populate_fill_ring(ifobject->umem, ifobject->pkt_stream);
-
        xsk_configure_socket(test, ifobject, ifobject->umem, false);
 
        ifobject->xsk = &ifobject->xsk_arr[0];
@@ -1313,6 +1274,8 @@ static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject)
        if (!ifobject->rx_on)
                return;
 
+       xsk_populate_fill_ring(ifobject->umem, ifobject->pkt_stream, ifobject->use_fill_ring);
+
        ret = xsk_update_xskmap(ifobject->xskmap, ifobject->xsk->xsk);
        if (ret)
                exit_with_error(errno);
@@ -1370,12 +1333,8 @@ static void *worker_testapp_validate_rx(void *arg)
 
        if (!err && ifobject->validation_func)
                err = ifobject->validation_func(ifobject);
-       if (err) {
+       if (err)
                report_failure(test);
-               pthread_mutex_lock(&pacing_mutex);
-               pthread_cond_signal(&pacing_cond);
-               pthread_mutex_unlock(&pacing_mutex);
-       }
 
        pthread_exit(NULL);
 }
@@ -1402,11 +1361,20 @@ static void handler(int signum)
        pthread_exit(NULL);
 }
 
-static bool xdp_prog_changed(struct test_spec *test, struct ifobject *ifobj)
+static bool xdp_prog_changed_rx(struct test_spec *test)
 {
+       struct ifobject *ifobj = test->ifobj_rx;
+
        return ifobj->xdp_prog != test->xdp_prog_rx || ifobj->mode != test->mode;
 }
 
+static bool xdp_prog_changed_tx(struct test_spec *test)
+{
+       struct ifobject *ifobj = test->ifobj_tx;
+
+       return ifobj->xdp_prog != test->xdp_prog_tx || ifobj->mode != test->mode;
+}
+
 static void xsk_reattach_xdp(struct ifobject *ifobj, struct bpf_program *xdp_prog,
                             struct bpf_map *xskmap, enum test_mode mode)
 {
@@ -1433,13 +1401,13 @@ static void xsk_reattach_xdp(struct ifobject *ifobj, struct bpf_program *xdp_pro
 static void xsk_attach_xdp_progs(struct test_spec *test, struct ifobject *ifobj_rx,
                                 struct ifobject *ifobj_tx)
 {
-       if (xdp_prog_changed(test, ifobj_rx))
+       if (xdp_prog_changed_rx(test))
                xsk_reattach_xdp(ifobj_rx, test->xdp_prog_rx, test->xskmap_rx, test->mode);
 
        if (!ifobj_tx || ifobj_tx->shared_umem)
                return;
 
-       if (xdp_prog_changed(test, ifobj_tx))
+       if (xdp_prog_changed_tx(test))
                xsk_reattach_xdp(ifobj_tx, test->xdp_prog_tx, test->xskmap_tx, test->mode);
 }
 
@@ -1448,9 +1416,11 @@ static int __testapp_validate_traffic(struct test_spec *test, struct ifobject *i
 {
        pthread_t t0, t1;
 
-       if (ifobj2)
+       if (ifobj2) {
                if (pthread_barrier_init(&barr, NULL, 2))
                        exit_with_error(errno);
+               pkt_stream_reset(ifobj2->pkt_stream);
+       }
 
        test->current_step++;
        pkt_stream_reset(ifobj1->pkt_stream);
@@ -1493,6 +1463,12 @@ static int testapp_validate_traffic(struct test_spec *test)
        struct ifobject *ifobj_rx = test->ifobj_rx;
        struct ifobject *ifobj_tx = test->ifobj_tx;
 
+       if ((ifobj_rx->umem->unaligned_mode && !ifobj_rx->unaligned_supp) ||
+           (ifobj_tx->umem->unaligned_mode && !ifobj_tx->unaligned_supp)) {
+               ksft_test_result_skip("No huge pages present.\n");
+               return TEST_SKIP;
+       }
+
        xsk_attach_xdp_progs(test, ifobj_rx, ifobj_tx);
        return __testapp_validate_traffic(test, ifobj_rx, ifobj_tx);
 }
@@ -1502,16 +1478,18 @@ static int testapp_validate_traffic_single_thread(struct test_spec *test, struct
        return __testapp_validate_traffic(test, ifobj, NULL);
 }
 
-static void testapp_teardown(struct test_spec *test)
+static int testapp_teardown(struct test_spec *test)
 {
        int i;
 
        test_spec_set_name(test, "TEARDOWN");
        for (i = 0; i < MAX_TEARDOWN_ITER; i++) {
                if (testapp_validate_traffic(test))
-                       return;
+                       return TEST_FAILURE;
                test_spec_reset(test);
        }
+
+       return TEST_PASS;
 }
 
 static void swap_directions(struct ifobject **ifobj1, struct ifobject **ifobj2)
@@ -1526,20 +1504,23 @@ static void swap_directions(struct ifobject **ifobj1, struct ifobject **ifobj2)
        *ifobj2 = tmp_ifobj;
 }
 
-static void testapp_bidi(struct test_spec *test)
+static int testapp_bidi(struct test_spec *test)
 {
+       int res;
+
        test_spec_set_name(test, "BIDIRECTIONAL");
        test->ifobj_tx->rx_on = true;
        test->ifobj_rx->tx_on = true;
        test->total_steps = 2;
        if (testapp_validate_traffic(test))
-               return;
+               return TEST_FAILURE;
 
        print_verbose("Switching Tx/Rx vectors\n");
        swap_directions(&test->ifobj_rx, &test->ifobj_tx);
-       __testapp_validate_traffic(test, test->ifobj_rx, test->ifobj_tx);
+       res = __testapp_validate_traffic(test, test->ifobj_rx, test->ifobj_tx);
 
        swap_directions(&test->ifobj_rx, &test->ifobj_tx);
+       return res;
 }
 
 static void swap_xsk_resources(struct ifobject *ifobj_tx, struct ifobject *ifobj_rx)
@@ -1556,160 +1537,139 @@ static void swap_xsk_resources(struct ifobject *ifobj_tx, struct ifobject *ifobj
                exit_with_error(errno);
 }
 
-static void testapp_bpf_res(struct test_spec *test)
+static int testapp_bpf_res(struct test_spec *test)
 {
        test_spec_set_name(test, "BPF_RES");
        test->total_steps = 2;
        test->nb_sockets = 2;
        if (testapp_validate_traffic(test))
-               return;
+               return TEST_FAILURE;
 
        swap_xsk_resources(test->ifobj_tx, test->ifobj_rx);
-       testapp_validate_traffic(test);
+       return testapp_validate_traffic(test);
 }
 
-static void testapp_headroom(struct test_spec *test)
+static int testapp_headroom(struct test_spec *test)
 {
        test_spec_set_name(test, "UMEM_HEADROOM");
        test->ifobj_rx->umem->frame_headroom = UMEM_HEADROOM_TEST_SIZE;
-       testapp_validate_traffic(test);
+       return testapp_validate_traffic(test);
 }
 
-static void testapp_stats_rx_dropped(struct test_spec *test)
+static int testapp_stats_rx_dropped(struct test_spec *test)
 {
        test_spec_set_name(test, "STAT_RX_DROPPED");
+       if (test->mode == TEST_MODE_ZC) {
+               ksft_test_result_skip("Can not run RX_DROPPED test for ZC mode\n");
+               return TEST_SKIP;
+       }
+
        pkt_stream_replace_half(test, MIN_PKT_SIZE * 4, 0);
        test->ifobj_rx->umem->frame_headroom = test->ifobj_rx->umem->frame_size -
                XDP_PACKET_HEADROOM - MIN_PKT_SIZE * 3;
        pkt_stream_receive_half(test);
        test->ifobj_rx->validation_func = validate_rx_dropped;
-       testapp_validate_traffic(test);
+       return testapp_validate_traffic(test);
 }
 
-static void testapp_stats_tx_invalid_descs(struct test_spec *test)
+static int testapp_stats_tx_invalid_descs(struct test_spec *test)
 {
        test_spec_set_name(test, "STAT_TX_INVALID");
        pkt_stream_replace_half(test, XSK_UMEM__INVALID_FRAME_SIZE, 0);
        test->ifobj_tx->validation_func = validate_tx_invalid_descs;
-       testapp_validate_traffic(test);
+       return testapp_validate_traffic(test);
 }
 
-static void testapp_stats_rx_full(struct test_spec *test)
+static int testapp_stats_rx_full(struct test_spec *test)
 {
        test_spec_set_name(test, "STAT_RX_FULL");
-       pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, PKT_SIZE);
+       pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, MIN_PKT_SIZE);
        test->ifobj_rx->pkt_stream = pkt_stream_generate(test->ifobj_rx->umem,
-                                                        DEFAULT_UMEM_BUFFERS, PKT_SIZE);
-       if (!test->ifobj_rx->pkt_stream)
-               exit_with_error(ENOMEM);
+                                                        DEFAULT_UMEM_BUFFERS, MIN_PKT_SIZE);
 
        test->ifobj_rx->xsk->rxqsize = DEFAULT_UMEM_BUFFERS;
        test->ifobj_rx->release_rx = false;
        test->ifobj_rx->validation_func = validate_rx_full;
-       testapp_validate_traffic(test);
+       return testapp_validate_traffic(test);
 }
 
-static void testapp_stats_fill_empty(struct test_spec *test)
+static int testapp_stats_fill_empty(struct test_spec *test)
 {
        test_spec_set_name(test, "STAT_RX_FILL_EMPTY");
-       pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, PKT_SIZE);
+       pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, MIN_PKT_SIZE);
        test->ifobj_rx->pkt_stream = pkt_stream_generate(test->ifobj_rx->umem,
-                                                        DEFAULT_UMEM_BUFFERS, PKT_SIZE);
-       if (!test->ifobj_rx->pkt_stream)
-               exit_with_error(ENOMEM);
+                                                        DEFAULT_UMEM_BUFFERS, MIN_PKT_SIZE);
 
        test->ifobj_rx->use_fill_ring = false;
        test->ifobj_rx->validation_func = validate_fill_empty;
-       testapp_validate_traffic(test);
-}
-
-/* Simple test */
-static bool hugepages_present(struct ifobject *ifobject)
-{
-       size_t mmap_sz = 2 * ifobject->umem->num_frames * ifobject->umem->frame_size;
-       void *bufs;
-
-       bufs = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE,
-                   MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_HUGE_2MB, -1, 0);
-       if (bufs == MAP_FAILED)
-               return false;
-
-       mmap_sz = ceil_u64(mmap_sz, HUGEPAGE_SIZE) * HUGEPAGE_SIZE;
-       munmap(bufs, mmap_sz);
-       return true;
+       return testapp_validate_traffic(test);
 }
 
-static bool testapp_unaligned(struct test_spec *test)
+static int testapp_unaligned(struct test_spec *test)
 {
-       if (!hugepages_present(test->ifobj_tx)) {
-               ksft_test_result_skip("No 2M huge pages present.\n");
-               return false;
-       }
-
        test_spec_set_name(test, "UNALIGNED_MODE");
        test->ifobj_tx->umem->unaligned_mode = true;
        test->ifobj_rx->umem->unaligned_mode = true;
-       /* Let half of the packets straddle a buffer boundrary */
-       pkt_stream_replace_half(test, PKT_SIZE, -PKT_SIZE / 2);
-       test->ifobj_rx->pkt_stream->use_addr_for_fill = true;
-       testapp_validate_traffic(test);
+       /* Let half of the packets straddle a 4K buffer boundary */
+       pkt_stream_replace_half(test, MIN_PKT_SIZE, -MIN_PKT_SIZE / 2);
 
-       return true;
+       return testapp_validate_traffic(test);
 }
 
-static void testapp_single_pkt(struct test_spec *test)
+static int testapp_single_pkt(struct test_spec *test)
 {
-       struct pkt pkts[] = {{0x1000, PKT_SIZE, 0, true}};
+       struct pkt pkts[] = {{0, MIN_PKT_SIZE, 0, true}};
 
        pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts));
-       testapp_validate_traffic(test);
+       return testapp_validate_traffic(test);
 }
 
-static void testapp_invalid_desc(struct test_spec *test)
+static int testapp_invalid_desc(struct test_spec *test)
 {
-       u64 umem_size = test->ifobj_tx->umem->num_frames * test->ifobj_tx->umem->frame_size;
+       struct xsk_umem_info *umem = test->ifobj_tx->umem;
+       u64 umem_size = umem->num_frames * umem->frame_size;
        struct pkt pkts[] = {
                /* Zero packet address allowed */
-               {0, PKT_SIZE, 0, true},
+               {0, MIN_PKT_SIZE, 0, true},
                /* Allowed packet */
-               {0x1000, PKT_SIZE, 0, true},
+               {0, MIN_PKT_SIZE, 0, true},
                /* Straddling the start of umem */
-               {-2, PKT_SIZE, 0, false},
+               {-2, MIN_PKT_SIZE, 0, false},
                /* Packet too large */
-               {0x2000, XSK_UMEM__INVALID_FRAME_SIZE, 0, false},
+               {0, XSK_UMEM__INVALID_FRAME_SIZE, 0, false},
                /* Up to end of umem allowed */
-               {umem_size - PKT_SIZE, PKT_SIZE, 0, true},
+               {umem_size - MIN_PKT_SIZE - 2 * umem->frame_size, MIN_PKT_SIZE, 0, true},
                /* After umem ends */
-               {umem_size, PKT_SIZE, 0, false},
+               {umem_size, MIN_PKT_SIZE, 0, false},
                /* Straddle the end of umem */
-               {umem_size - PKT_SIZE / 2, PKT_SIZE, 0, false},
-               /* Straddle a page boundrary */
-               {0x3000 - PKT_SIZE / 2, PKT_SIZE, 0, false},
-               /* Straddle a 2K boundrary */
-               {0x3800 - PKT_SIZE / 2, PKT_SIZE, 0, true},
+               {umem_size - MIN_PKT_SIZE / 2, MIN_PKT_SIZE, 0, false},
+               /* Straddle a 4K boundary */
+               {0x1000 - MIN_PKT_SIZE / 2, MIN_PKT_SIZE, 0, false},
+               /* Straddle a 2K boundary */
+               {0x800 - MIN_PKT_SIZE / 2, MIN_PKT_SIZE, 0, true},
                /* Valid packet for synch so that something is received */
-               {0x4000, PKT_SIZE, 0, true}};
+               {0, MIN_PKT_SIZE, 0, true}};
 
-       if (test->ifobj_tx->umem->unaligned_mode) {
-               /* Crossing a page boundrary allowed */
+       if (umem->unaligned_mode) {
+               /* Crossing a page boundary allowed */
                pkts[7].valid = true;
        }
-       if (test->ifobj_tx->umem->frame_size == XSK_UMEM__DEFAULT_FRAME_SIZE / 2) {
-               /* Crossing a 2K frame size boundrary not allowed */
+       if (umem->frame_size == XSK_UMEM__DEFAULT_FRAME_SIZE / 2) {
+               /* Crossing a 2K frame size boundary not allowed */
                pkts[8].valid = false;
        }
 
        if (test->ifobj_tx->shared_umem) {
-               pkts[4].addr += umem_size;
-               pkts[5].addr += umem_size;
-               pkts[6].addr += umem_size;
+               pkts[4].offset += umem_size;
+               pkts[5].offset += umem_size;
+               pkts[6].offset += umem_size;
        }
 
        pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts));
-       testapp_validate_traffic(test);
+       return testapp_validate_traffic(test);
 }
 
-static void testapp_xdp_drop(struct test_spec *test)
+static int testapp_xdp_drop(struct test_spec *test)
 {
        struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs;
        struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs;
@@ -1719,10 +1679,10 @@ static void testapp_xdp_drop(struct test_spec *test)
                               skel_rx->maps.xsk, skel_tx->maps.xsk);
 
        pkt_stream_receive_half(test);
-       testapp_validate_traffic(test);
+       return testapp_validate_traffic(test);
 }
 
-static void testapp_xdp_metadata_count(struct test_spec *test)
+static int testapp_xdp_metadata_count(struct test_spec *test)
 {
        struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs;
        struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs;
@@ -1743,10 +1703,10 @@ static void testapp_xdp_metadata_count(struct test_spec *test)
        if (bpf_map_update_elem(bpf_map__fd(data_map), &key, &count, BPF_ANY))
                exit_with_error(errno);
 
-       testapp_validate_traffic(test);
+       return testapp_validate_traffic(test);
 }
 
-static void testapp_poll_txq_tmout(struct test_spec *test)
+static int testapp_poll_txq_tmout(struct test_spec *test)
 {
        test_spec_set_name(test, "POLL_TXQ_FULL");
 
@@ -1754,14 +1714,14 @@ static void testapp_poll_txq_tmout(struct test_spec *test)
        /* create invalid frame by set umem frame_size and pkt length equal to 2048 */
        test->ifobj_tx->umem->frame_size = 2048;
        pkt_stream_replace(test, 2 * DEFAULT_PKT_CNT, 2048);
-       testapp_validate_traffic_single_thread(test, test->ifobj_tx);
+       return testapp_validate_traffic_single_thread(test, test->ifobj_tx);
 }
 
-static void testapp_poll_rxq_tmout(struct test_spec *test)
+static int testapp_poll_rxq_tmout(struct test_spec *test)
 {
        test_spec_set_name(test, "POLL_RXQ_EMPTY");
        test->ifobj_rx->use_poll = true;
-       testapp_validate_traffic_single_thread(test, test->ifobj_rx);
+       return testapp_validate_traffic_single_thread(test, test->ifobj_rx);
 }
 
 static int xsk_load_xdp_programs(struct ifobject *ifobj)
@@ -1778,25 +1738,30 @@ static void xsk_unload_xdp_programs(struct ifobject *ifobj)
        xsk_xdp_progs__destroy(ifobj->xdp_progs);
 }
 
+/* Simple test */
+static bool hugepages_present(void)
+{
+       size_t mmap_sz = 2 * DEFAULT_UMEM_BUFFERS * XSK_UMEM__DEFAULT_FRAME_SIZE;
+       void *bufs;
+
+       bufs = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE,
+                   MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, MAP_HUGE_2MB);
+       if (bufs == MAP_FAILED)
+               return false;
+
+       mmap_sz = ceil_u64(mmap_sz, HUGEPAGE_SIZE) * HUGEPAGE_SIZE;
+       munmap(bufs, mmap_sz);
+       return true;
+}
+
 static void init_iface(struct ifobject *ifobj, const char *dst_mac, const char *src_mac,
-                      const char *dst_ip, const char *src_ip, const u16 dst_port,
-                      const u16 src_port, thread_func_t func_ptr)
+                      thread_func_t func_ptr)
 {
-       struct in_addr ip;
        int err;
 
        memcpy(ifobj->dst_mac, dst_mac, ETH_ALEN);
        memcpy(ifobj->src_mac, src_mac, ETH_ALEN);
 
-       inet_aton(dst_ip, &ip);
-       ifobj->dst_ip = ip.s_addr;
-
-       inet_aton(src_ip, &ip);
-       ifobj->src_ip = ip.s_addr;
-
-       ifobj->dst_port = dst_port;
-       ifobj->src_port = src_port;
-
        ifobj->func_ptr = func_ptr;
 
        err = xsk_load_xdp_programs(ifobj);
@@ -1804,94 +1769,87 @@ static void init_iface(struct ifobject *ifobj, const char *dst_mac, const char *
                printf("Error loading XDP program\n");
                exit_with_error(err);
        }
+
+       if (hugepages_present())
+               ifobj->unaligned_supp = true;
 }
 
 static void run_pkt_test(struct test_spec *test, enum test_mode mode, enum test_type type)
 {
+       int ret = TEST_SKIP;
+
        switch (type) {
        case TEST_TYPE_STATS_RX_DROPPED:
-               if (mode == TEST_MODE_ZC) {
-                       ksft_test_result_skip("Can not run RX_DROPPED test for ZC mode\n");
-                       return;
-               }
-               testapp_stats_rx_dropped(test);
+               ret = testapp_stats_rx_dropped(test);
                break;
        case TEST_TYPE_STATS_TX_INVALID_DESCS:
-               testapp_stats_tx_invalid_descs(test);
+               ret = testapp_stats_tx_invalid_descs(test);
                break;
        case TEST_TYPE_STATS_RX_FULL:
-               testapp_stats_rx_full(test);
+               ret = testapp_stats_rx_full(test);
                break;
        case TEST_TYPE_STATS_FILL_EMPTY:
-               testapp_stats_fill_empty(test);
+               ret = testapp_stats_fill_empty(test);
                break;
        case TEST_TYPE_TEARDOWN:
-               testapp_teardown(test);
+               ret = testapp_teardown(test);
                break;
        case TEST_TYPE_BIDI:
-               testapp_bidi(test);
+               ret = testapp_bidi(test);
                break;
        case TEST_TYPE_BPF_RES:
-               testapp_bpf_res(test);
+               ret = testapp_bpf_res(test);
                break;
        case TEST_TYPE_RUN_TO_COMPLETION:
                test_spec_set_name(test, "RUN_TO_COMPLETION");
-               testapp_validate_traffic(test);
+               ret = testapp_validate_traffic(test);
                break;
        case TEST_TYPE_RUN_TO_COMPLETION_SINGLE_PKT:
                test_spec_set_name(test, "RUN_TO_COMPLETION_SINGLE_PKT");
-               testapp_single_pkt(test);
+               ret = testapp_single_pkt(test);
                break;
        case TEST_TYPE_RUN_TO_COMPLETION_2K_FRAME:
                test_spec_set_name(test, "RUN_TO_COMPLETION_2K_FRAME_SIZE");
                test->ifobj_tx->umem->frame_size = 2048;
                test->ifobj_rx->umem->frame_size = 2048;
-               pkt_stream_replace(test, DEFAULT_PKT_CNT, PKT_SIZE);
-               testapp_validate_traffic(test);
+               pkt_stream_replace(test, DEFAULT_PKT_CNT, MIN_PKT_SIZE);
+               ret = testapp_validate_traffic(test);
                break;
        case TEST_TYPE_RX_POLL:
                test->ifobj_rx->use_poll = true;
                test_spec_set_name(test, "POLL_RX");
-               testapp_validate_traffic(test);
+               ret = testapp_validate_traffic(test);
                break;
        case TEST_TYPE_TX_POLL:
                test->ifobj_tx->use_poll = true;
                test_spec_set_name(test, "POLL_TX");
-               testapp_validate_traffic(test);
+               ret = testapp_validate_traffic(test);
                break;
        case TEST_TYPE_POLL_TXQ_TMOUT:
-               testapp_poll_txq_tmout(test);
+               ret = testapp_poll_txq_tmout(test);
                break;
        case TEST_TYPE_POLL_RXQ_TMOUT:
-               testapp_poll_rxq_tmout(test);
+               ret = testapp_poll_rxq_tmout(test);
                break;
        case TEST_TYPE_ALIGNED_INV_DESC:
                test_spec_set_name(test, "ALIGNED_INV_DESC");
-               testapp_invalid_desc(test);
+               ret = testapp_invalid_desc(test);
                break;
        case TEST_TYPE_ALIGNED_INV_DESC_2K_FRAME:
                test_spec_set_name(test, "ALIGNED_INV_DESC_2K_FRAME_SIZE");
                test->ifobj_tx->umem->frame_size = 2048;
                test->ifobj_rx->umem->frame_size = 2048;
-               testapp_invalid_desc(test);
+               ret = testapp_invalid_desc(test);
                break;
        case TEST_TYPE_UNALIGNED_INV_DESC:
-               if (!hugepages_present(test->ifobj_tx)) {
-                       ksft_test_result_skip("No 2M huge pages present.\n");
-                       return;
-               }
                test_spec_set_name(test, "UNALIGNED_INV_DESC");
                test->ifobj_tx->umem->unaligned_mode = true;
                test->ifobj_rx->umem->unaligned_mode = true;
-               testapp_invalid_desc(test);
+               ret = testapp_invalid_desc(test);
                break;
        case TEST_TYPE_UNALIGNED_INV_DESC_4K1_FRAME: {
                u64 page_size, umem_size;
 
-               if (!hugepages_present(test->ifobj_tx)) {
-                       ksft_test_result_skip("No 2M huge pages present.\n");
-                       return;
-               }
                test_spec_set_name(test, "UNALIGNED_INV_DESC_4K1_FRAME_SIZE");
                /* Odd frame size so the UMEM doesn't end near a page boundary. */
                test->ifobj_tx->umem->frame_size = 4001;
@@ -1903,29 +1861,28 @@ static void run_pkt_test(struct test_spec *test, enum test_mode mode, enum test_
                 */
                page_size = sysconf(_SC_PAGESIZE);
                umem_size = test->ifobj_tx->umem->num_frames * test->ifobj_tx->umem->frame_size;
-               assert(umem_size % page_size > PKT_SIZE);
-               assert(umem_size % page_size < page_size - PKT_SIZE);
-               testapp_invalid_desc(test);
+               assert(umem_size % page_size > MIN_PKT_SIZE);
+               assert(umem_size % page_size < page_size - MIN_PKT_SIZE);
+               ret = testapp_invalid_desc(test);
                break;
        }
        case TEST_TYPE_UNALIGNED:
-               if (!testapp_unaligned(test))
-                       return;
+               ret = testapp_unaligned(test);
                break;
        case TEST_TYPE_HEADROOM:
-               testapp_headroom(test);
+               ret = testapp_headroom(test);
                break;
        case TEST_TYPE_XDP_DROP_HALF:
-               testapp_xdp_drop(test);
+               ret = testapp_xdp_drop(test);
                break;
        case TEST_TYPE_XDP_METADATA_COUNT:
-               testapp_xdp_metadata_count(test);
+               ret = testapp_xdp_metadata_count(test);
                break;
        default:
                break;
        }
 
-       if (!test->fail)
+       if (ret == TEST_PASS)
                ksft_test_result_pass("PASS: %s %s%s\n", mode_string(test), busy_poll_string(test),
                                      test->name);
        pkt_stream_restore_default(test);
@@ -2030,14 +1987,12 @@ int main(int argc, char **argv)
                        modes++;
        }
 
-       init_iface(ifobj_rx, MAC1, MAC2, IP1, IP2, UDP_PORT1, UDP_PORT2,
-                  worker_testapp_validate_rx);
-       init_iface(ifobj_tx, MAC2, MAC1, IP2, IP1, UDP_PORT2, UDP_PORT1,
-                  worker_testapp_validate_tx);
+       init_iface(ifobj_rx, MAC1, MAC2, worker_testapp_validate_rx);
+       init_iface(ifobj_tx, MAC2, MAC1, worker_testapp_validate_tx);
 
        test_spec_init(&test, ifobj_tx, ifobj_rx, 0);
-       tx_pkt_stream_default = pkt_stream_generate(ifobj_tx->umem, DEFAULT_PKT_CNT, PKT_SIZE);
-       rx_pkt_stream_default = pkt_stream_generate(ifobj_rx->umem, DEFAULT_PKT_CNT, PKT_SIZE);
+       tx_pkt_stream_default = pkt_stream_generate(ifobj_tx->umem, DEFAULT_PKT_CNT, MIN_PKT_SIZE);
+       rx_pkt_stream_default = pkt_stream_generate(ifobj_rx->umem, DEFAULT_PKT_CNT, MIN_PKT_SIZE);
        if (!tx_pkt_stream_default || !rx_pkt_stream_default)
                exit_with_error(ENOMEM);
        test.tx_pkt_stream_default = tx_pkt_stream_default;
index c535aea..aaf27e0 100644 (file)
 #define TEST_PASS 0
 #define TEST_FAILURE -1
 #define TEST_CONTINUE 1
+#define TEST_SKIP 2
 #define MAX_INTERFACES 2
 #define MAX_INTERFACE_NAME_CHARS 16
 #define MAX_SOCKETS 2
 #define MAX_TEST_NAME_SIZE 32
 #define MAX_TEARDOWN_ITER 10
-#define PKT_HDR_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) + \
-                       sizeof(struct udphdr))
-#define MIN_ETH_PKT_SIZE 64
-#define ETH_FCS_SIZE 4
-#define MIN_PKT_SIZE (MIN_ETH_PKT_SIZE - ETH_FCS_SIZE)
-#define PKT_SIZE (MIN_PKT_SIZE)
-#define IP_PKT_SIZE (PKT_SIZE - sizeof(struct ethhdr))
-#define IP_PKT_VER 0x4
-#define IP_PKT_TOS 0x9
-#define UDP_PKT_SIZE (IP_PKT_SIZE - sizeof(struct iphdr))
-#define UDP_PKT_DATA_SIZE (UDP_PKT_SIZE - sizeof(struct udphdr))
+#define PKT_HDR_SIZE (sizeof(struct ethhdr) + 2) /* Just to align the data in the packet */
+#define MIN_PKT_SIZE 64
 #define USLEEP_MAX 10000
 #define SOCK_RECONF_CTR 10
 #define BATCH_SIZE 64
@@ -57,6 +49,7 @@
 #define UMEM_HEADROOM_TEST_SIZE 128
 #define XSK_UMEM__INVALID_FRAME_SIZE (XSK_UMEM__DEFAULT_FRAME_SIZE + 1)
 #define HUGEPAGE_SIZE (2 * 1024 * 1024)
+#define PKT_DUMP_NB_TO_PRINT 16
 
 #define print_verbose(x...) do { if (opt_verbose) ksft_print_msg(x); } while (0)
 
@@ -93,13 +86,13 @@ enum test_type {
        TEST_TYPE_MAX
 };
 
-static bool opt_pkt_dump;
 static bool opt_verbose;
 
 struct xsk_umem_info {
        struct xsk_ring_prod fq;
        struct xsk_ring_cons cq;
        struct xsk_umem *umem;
+       u64 next_buffer;
        u32 num_frames;
        u32 frame_headroom;
        void *buffer;
@@ -118,17 +111,17 @@ struct xsk_socket_info {
 };
 
 struct pkt {
-       u64 addr;
+       int offset;
        u32 len;
-       u32 payload;
+       u32 pkt_nb;
        bool valid;
 };
 
 struct pkt_stream {
        u32 nb_pkts;
-       u32 rx_pkt_nb;
+       u32 current_pkt_nb;
        struct pkt *pkts;
-       bool use_addr_for_fill;
+       u32 max_pkt_len;
 };
 
 struct ifobject;
@@ -148,11 +141,7 @@ struct ifobject {
        struct bpf_program *xdp_prog;
        enum test_mode mode;
        int ifindex;
-       u32 dst_ip;
-       u32 src_ip;
        u32 bind_flags;
-       u16 src_port;
-       u16 dst_port;
        bool tx_on;
        bool rx_on;
        bool use_poll;
@@ -161,6 +150,7 @@ struct ifobject {
        bool release_rx;
        bool shared_umem;
        bool use_metadata;
+       bool unaligned_supp;
        u8 dst_mac[ETH_ALEN];
        u8 src_mac[ETH_ALEN];
 };
@@ -184,7 +174,6 @@ struct test_spec {
 
 pthread_barrier_t barr;
 pthread_mutex_t pacing_mutex = PTHREAD_MUTEX_INITIALIZER;
-pthread_cond_t pacing_cond = PTHREAD_COND_INITIALIZER;
 
 int pkts_in_flight;
 
index 5cdd220..862e947 100755 (executable)
@@ -53,7 +53,6 @@ bond_test_enslave_type_change()
        # restore ARPHRD_ETHER type by enslaving such device
        ip link set dev "$devbond2" master "$devbond0"
        check_err $? "could not enslave $devbond2 to $devbond0"
-       ip link set dev "$devbond1" nomaster
 
        bond_check_flags "$devbond0"
 
index 0cf9e47..a5c2aec 100755 (executable)
 # +----------------|--+                   +--|-----------------+
 #                  |                         |
 # +----------------|-------------------------|-----------------+
-# | SW             |                         |                 |
+# | SW       $swp1 +                         + $swp2           |
+# |                |                         |                 |
 # | +--------------|-------------------------|---------------+ |
-# | |        $swp1 +                         + $swp2         | |
-# | |              |                         |               | |
 # | |     $swp1.10 +                         + $swp2.10      | |
 # | |                                                        | |
 # | |                           br0                          | |
index df2b099..7d7f862 100755 (executable)
 # +----------------|--+                   +--|-----------------+
 #                  |                         |
 # +----------------|-------------------------|-----------------+
-# | SW             |                         |                 |
+# | SW       $swp1 +                         + $swp2           |
+# |                |                         |                 |
 # | +--------------|-------------------------|---------------+ |
-# | |        $swp1 +                         + $swp2         | |
-# | |              |                         |               | |
 # | |     $swp1.10 +                         + $swp2.10      | |
 # | |                                                        | |
 # | |                           br0                          | |
index c12df57..7f3ab2a 100644 (file)
@@ -84,6 +84,7 @@ TEST_GEN_FILES += ip_local_port_range
 TEST_GEN_FILES += bind_wildcard
 TEST_PROGS += test_vxlan_mdb.sh
 TEST_PROGS += test_bridge_neigh_suppress.sh
+TEST_PROGS += test_vxlan_nolocalbypass.sh
 
 TEST_FILES := settings
 
index 21ca914..05b5c4a 100755 (executable)
@@ -585,6 +585,20 @@ ipv4_ping_novrf()
        done
 
        #
+       # out, but don't use gateway if peer is not on link
+       #
+       a=${NSB_IP}
+       log_start
+       run_cmd ping -c 1 -w 1 -r ${a}
+       log_test_addr ${a} $? 0 "ping out (don't route), peer on link"
+
+       a=${NSB_LO_IP}
+       log_start
+       show_hint "Fails since peer is not on link"
+       run_cmd ping -c 1 -w 1 -r ${a}
+       log_test_addr ${a} $? 1 "ping out (don't route), peer not on link"
+
+       #
        # in
        #
        for a in ${NSA_IP} ${NSA_LO_IP}
@@ -1098,6 +1112,59 @@ test_ipv4_md5_vrf__global_server__bind_ifindex0()
        set_sysctl net.ipv4.tcp_l3mdev_accept="$old_tcp_l3mdev_accept"
 }
 
+ipv4_tcp_dontroute()
+{
+       local syncookies=$1
+       local nsa_syncookies
+       local nsb_syncookies
+       local a
+
+       #
+       # Link local connection tests (SO_DONTROUTE).
+       # Connections should succeed only when the remote IP address is
+       # on link (doesn't need to be routed through a gateway).
+       #
+
+       nsa_syncookies=$(ip netns exec "${NSA}" sysctl -n net.ipv4.tcp_syncookies)
+       nsb_syncookies=$(ip netns exec "${NSB}" sysctl -n net.ipv4.tcp_syncookies)
+       ip netns exec "${NSA}" sysctl -wq net.ipv4.tcp_syncookies=${syncookies}
+       ip netns exec "${NSB}" sysctl -wq net.ipv4.tcp_syncookies=${syncookies}
+
+       # Test with eth1 address (on link).
+
+       a=${NSB_IP}
+       log_start
+       do_run_cmd nettest -B -N "${NSA}" -O "${NSB}" -r ${a} --client-dontroute
+       log_test_addr ${a} $? 0 "SO_DONTROUTE client, syncookies=${syncookies}"
+
+       a=${NSB_IP}
+       log_start
+       do_run_cmd nettest -B -N "${NSA}" -O "${NSB}" -r ${a} --server-dontroute
+       log_test_addr ${a} $? 0 "SO_DONTROUTE server, syncookies=${syncookies}"
+
+       # Test with loopback address (routed).
+       #
+       # The client would use the eth1 address as source IP by default.
+       # Therefore, we need to use the -c option here, to force the use of the
+       # routed (loopback) address as source IP (so that the server will try
+       # to respond to a routed address and not a link local one).
+
+       a=${NSB_LO_IP}
+       log_start
+       show_hint "Should fail 'Network is unreachable' since server is not on link"
+       do_run_cmd nettest -B -N "${NSA}" -O "${NSB}" -c "${NSA_LO_IP}" -r ${a} --client-dontroute
+       log_test_addr ${a} $? 1 "SO_DONTROUTE client, syncookies=${syncookies}"
+
+       a=${NSB_LO_IP}
+       log_start
+       show_hint "Should timeout since server cannot respond (client is not on link)"
+       do_run_cmd nettest -B -N "${NSA}" -O "${NSB}" -c "${NSA_LO_IP}" -r ${a} --server-dontroute
+       log_test_addr ${a} $? 2 "SO_DONTROUTE server, syncookies=${syncookies}"
+
+       ip netns exec "${NSB}" sysctl -wq net.ipv4.tcp_syncookies=${nsb_syncookies}
+       ip netns exec "${NSA}" sysctl -wq net.ipv4.tcp_syncookies=${nsa_syncookies}
+}
+
 ipv4_tcp_novrf()
 {
        local a
@@ -1217,6 +1284,9 @@ ipv4_tcp_novrf()
        log_test_addr ${a} $? 1 "No server, device client, local conn"
 
        ipv4_tcp_md5_novrf
+
+       ipv4_tcp_dontroute 0
+       ipv4_tcp_dontroute 2
 }
 
 ipv4_tcp_vrf()
@@ -1585,6 +1655,23 @@ ipv4_udp_novrf()
        log_start
        run_cmd nettest -D -d ${NSA_DEV} -r ${a}
        log_test_addr ${a} $? 2 "No server, device client, local conn"
+
+       #
+       # Link local connection tests (SO_DONTROUTE).
+       # Connections should succeed only when the remote IP address is
+       # on link (doesn't need to be routed through a gateway).
+       #
+
+       a=${NSB_IP}
+       log_start
+       do_run_cmd nettest -B -D -N "${NSA}" -O "${NSB}" -r ${a} --client-dontroute
+       log_test_addr ${a} $? 0 "SO_DONTROUTE client"
+
+       a=${NSB_LO_IP}
+       log_start
+       show_hint "Should fail 'Network is unreachable' since server is not on link"
+       do_run_cmd nettest -B -D -N "${NSA}" -O "${NSB}" -r ${a} --client-dontroute
+       log_test_addr ${a} $? 1 "SO_DONTROUTE client"
 }
 
 ipv4_udp_vrf()
index a474c60..9d0062b 100644 (file)
@@ -83,6 +83,7 @@ TEST_PROGS = bridge_igmp.sh \
        tc_chains.sh \
        tc_flower_router.sh \
        tc_flower.sh \
+       tc_flower_l2_miss.sh \
        tc_mpls_l2vpn.sh \
        tc_police.sh \
        tc_shblocks.sh \
index fa6a88c..de2b2d5 100755 (executable)
@@ -1,6 +1,28 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
+# +------------------------+                           +----------------------+
+# | H1 (vrf)               |                           |             H2 (vrf) |
+# |    + $h1.555           |                           |  + $h2               |
+# |    | 192.0.2.1/28      |                           |  | 192.0.2.130/28    |
+# |    | 2001:db8:1::1/64  |                           |  | 2001:db8:2::2/64  |
+# |    |                   |                           |  |                   |
+# |    + $h1               |                           |  |                   |
+# +----|-------------------+                           +--|-------------------+
+#      |                                                  |
+# +----|--------------------------------------------------|-------------------+
+# | SW |                                                  |                   |
+# | +--|-------------------------------+                  + $swp2             |
+# | |  + $swp1                         |                    192.0.2.129/28    |
+# | |    vid 555                       |                    2001:db8:2::1/64  |
+# | |                                  |                                      |
+# | |  + BR1 (802.1q)                  |                                      |
+# | |    vid 555 pvid untagged         |                                      |
+# | |    192.0.2.2/28                  |                                      |
+# | |    2001:db8:1::2/64              |                                      |
+# | +----------------------------------+                                      |
+# +---------------------------------------------------------------------------+
+
 ALL_TESTS="
        ping_ipv4
        ping_ipv6
@@ -41,7 +63,7 @@ h2_destroy()
 
 router_create()
 {
-       ip link add name br1 type bridge vlan_filtering 1
+       ip link add name br1 type bridge vlan_filtering 1 vlan_default_pvid 0
        ip link set dev br1 up
 
        ip link set dev $swp1 master br1
diff --git a/tools/testing/selftests/net/forwarding/tc_flower_l2_miss.sh b/tools/testing/selftests/net/forwarding/tc_flower_l2_miss.sh
new file mode 100755 (executable)
index 0000000..37b0369
--- /dev/null
@@ -0,0 +1,350 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +-----------------------+                             +----------------------+
+# | H1 (vrf)              |                             | H2 (vrf)             |
+# |    + $h1              |                             |              $h2 +   |
+# |    | 192.0.2.1/28     |                             |     192.0.2.2/28 |   |
+# |    | 2001:db8:1::1/64 |                             | 2001:db8:1::2/64 |   |
+# +----|------------------+                             +------------------|---+
+#      |                                                                   |
+# +----|-------------------------------------------------------------------|---+
+# | SW |                                                                   |   |
+# |  +-|-------------------------------------------------------------------|-+ |
+# |  | + $swp1                       BR                              $swp2 + | |
+# |  +-----------------------------------------------------------------------+ |
+# +----------------------------------------------------------------------------+
+
+ALL_TESTS="
+       test_l2_miss_unicast
+       test_l2_miss_multicast
+       test_l2_miss_ll_multicast
+       test_l2_miss_broadcast
+"
+
+NUM_NETIFS=4
+source lib.sh
+source tc_common.sh
+
+h1_create()
+{
+       simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+       simple_if_fini $h1 192.0.2.1/28 2001:db8:1::1/64
+}
+
+h2_create()
+{
+       simple_if_init $h2 192.0.2.2/28 2001:db8:1::2/64
+}
+
+h2_destroy()
+{
+       simple_if_fini $h2 192.0.2.2/28 2001:db8:1::2/64
+}
+
+switch_create()
+{
+       ip link add name br1 up type bridge
+       ip link set dev $swp1 master br1
+       ip link set dev $swp1 up
+       ip link set dev $swp2 master br1
+       ip link set dev $swp2 up
+
+       tc qdisc add dev $swp2 clsact
+}
+
+switch_destroy()
+{
+       tc qdisc del dev $swp2 clsact
+
+       ip link set dev $swp2 down
+       ip link set dev $swp2 nomaster
+       ip link set dev $swp1 down
+       ip link set dev $swp1 nomaster
+       ip link del dev br1
+}
+
+test_l2_miss_unicast()
+{
+       local dmac=00:01:02:03:04:05
+       local dip=192.0.2.2
+       local sip=192.0.2.1
+
+       RET=0
+
+       # Unknown unicast.
+       tc filter add dev $swp2 egress protocol ipv4 handle 101 pref 1 \
+          flower indev $swp1 l2_miss true dst_mac $dmac src_ip $sip \
+          dst_ip $dip action pass
+       # Known unicast.
+       tc filter add dev $swp2 egress protocol ipv4 handle 102 pref 1 \
+          flower indev $swp1 l2_miss false dst_mac $dmac src_ip $sip \
+          dst_ip $dip action pass
+
+       # Before adding FDB entry.
+       $MZ $h1 -a own -b $dmac -t ip -A $sip -B $dip -c 1 -p 100 -q
+
+       tc_check_packets "dev $swp2 egress" 101 1
+       check_err $? "Unknown unicast filter was not hit before adding FDB entry"
+
+       tc_check_packets "dev $swp2 egress" 102 0
+       check_err $? "Known unicast filter was hit before adding FDB entry"
+
+       # Adding FDB entry.
+       bridge fdb replace $dmac dev $swp2 master static
+
+       $MZ $h1 -a own -b $dmac -t ip -A $sip -B $dip -c 1 -p 100 -q
+
+       tc_check_packets "dev $swp2 egress" 101 1
+       check_err $? "Unknown unicast filter was hit after adding FDB entry"
+
+       tc_check_packets "dev $swp2 egress" 102 1
+       check_err $? "Known unicast filter was not hit after adding FDB entry"
+
+       # Deleting FDB entry.
+       bridge fdb del $dmac dev $swp2 master static
+
+       $MZ $h1 -a own -b $dmac -t ip -A $sip -B $dip -c 1 -p 100 -q
+
+       tc_check_packets "dev $swp2 egress" 101 2
+       check_err $? "Unknown unicast filter was not hit after deleting FDB entry"
+
+       tc_check_packets "dev $swp2 egress" 102 1
+       check_err $? "Known unicast filter was hit after deleting FDB entry"
+
+       tc filter del dev $swp2 egress protocol ipv4 pref 1 handle 102 flower
+       tc filter del dev $swp2 egress protocol ipv4 pref 1 handle 101 flower
+
+       log_test "L2 miss - Unicast"
+}
+
+test_l2_miss_multicast_common()
+{
+       local proto=$1; shift
+       local sip=$1; shift
+       local dip=$1; shift
+       local mode=$1; shift
+       local name=$1; shift
+
+       RET=0
+
+       # Unregistered multicast.
+       tc filter add dev $swp2 egress protocol $proto handle 101 pref 1 \
+          flower indev $swp1 l2_miss true src_ip $sip dst_ip $dip \
+          action pass
+       # Registered multicast.
+       tc filter add dev $swp2 egress protocol $proto handle 102 pref 1 \
+          flower indev $swp1 l2_miss false src_ip $sip dst_ip $dip \
+          action pass
+
+       # Before adding MDB entry.
+       $MZ $mode $h1 -t ip -A $sip -B $dip -c 1 -p 100 -q
+
+       tc_check_packets "dev $swp2 egress" 101 1
+       check_err $? "Unregistered multicast filter was not hit before adding MDB entry"
+
+       tc_check_packets "dev $swp2 egress" 102 0
+       check_err $? "Registered multicast filter was hit before adding MDB entry"
+
+       # Adding MDB entry.
+       bridge mdb replace dev br1 port $swp2 grp $dip permanent
+
+       $MZ $mode $h1 -t ip -A $sip -B $dip -c 1 -p 100 -q
+
+       tc_check_packets "dev $swp2 egress" 101 1
+       check_err $? "Unregistered multicast filter was hit after adding MDB entry"
+
+       tc_check_packets "dev $swp2 egress" 102 1
+       check_err $? "Registered multicast filter was not hit after adding MDB entry"
+
+       # Deleting MDB entry.
+       bridge mdb del dev br1 port $swp2 grp $dip
+
+       $MZ $mode $h1 -t ip -A $sip -B $dip -c 1 -p 100 -q
+
+       tc_check_packets "dev $swp2 egress" 101 2
+       check_err $? "Unregistered multicast filter was not hit after deleting MDB entry"
+
+       tc_check_packets "dev $swp2 egress" 102 1
+       check_err $? "Registered multicast filter was hit after deleting MDB entry"
+
+       tc filter del dev $swp2 egress protocol $proto pref 1 handle 102 flower
+       tc filter del dev $swp2 egress protocol $proto pref 1 handle 101 flower
+
+       log_test "L2 miss - Multicast ($name)"
+}
+
+test_l2_miss_multicast_ipv4()
+{
+       local proto="ipv4"
+       local sip=192.0.2.1
+       local dip=239.1.1.1
+       local mode="-4"
+       local name="IPv4"
+
+       test_l2_miss_multicast_common $proto $sip $dip $mode $name
+}
+
+test_l2_miss_multicast_ipv6()
+{
+       local proto="ipv6"
+       local sip=2001:db8:1::1
+       local dip=ff0e::1
+       local mode="-6"
+       local name="IPv6"
+
+       test_l2_miss_multicast_common $proto $sip $dip $mode $name
+}
+
+test_l2_miss_multicast()
+{
+       # Configure $swp2 as a multicast router port so that it will forward
+       # both registered and unregistered multicast traffic.
+       bridge link set dev $swp2 mcast_router 2
+
+       # Forwarding according to MDB entries only takes place when the bridge
+       # detects that there is a valid querier in the network. Set the bridge
+       # as the querier and assign it a valid IPv6 link-local address to be
+       # used as the source address for MLD queries.
+       ip link set dev br1 type bridge mcast_querier 1
+       ip -6 address add fe80::1/64 nodad dev br1
+       # Wait the default Query Response Interval (10 seconds) for the bridge
+       # to determine that there are no other queriers in the network.
+       sleep 10
+
+       test_l2_miss_multicast_ipv4
+       test_l2_miss_multicast_ipv6
+
+       ip -6 address del fe80::1/64 dev br1
+       ip link set dev br1 type bridge mcast_querier 0
+       bridge link set dev $swp2 mcast_router 1
+}
+
+test_l2_miss_multicast_common2()
+{
+       local name=$1; shift
+       local dmac=$1; shift
+       local dip=224.0.0.1
+       local sip=192.0.2.1
+
+}
+
+test_l2_miss_ll_multicast_common()
+{
+       local proto=$1; shift
+       local dmac=$1; shift
+       local sip=$1; shift
+       local dip=$1; shift
+       local mode=$1; shift
+       local name=$1; shift
+
+       RET=0
+
+       tc filter add dev $swp2 egress protocol $proto handle 101 pref 1 \
+          flower indev $swp1 l2_miss true dst_mac $dmac src_ip $sip \
+          dst_ip $dip action pass
+
+       $MZ $mode $h1 -a own -b $dmac -t ip -A $sip -B $dip -c 1 -p 100 -q
+
+       tc_check_packets "dev $swp2 egress" 101 1
+       check_err $? "Filter was not hit"
+
+       tc filter del dev $swp2 egress protocol $proto pref 1 handle 101 flower
+
+       log_test "L2 miss - Link-local multicast ($name)"
+}
+
+test_l2_miss_ll_multicast_ipv4()
+{
+       local proto=ipv4
+       local dmac=01:00:5e:00:00:01
+       local sip=192.0.2.1
+       local dip=224.0.0.1
+       local mode="-4"
+       local name="IPv4"
+
+       test_l2_miss_ll_multicast_common $proto $dmac $sip $dip $mode $name
+}
+
+test_l2_miss_ll_multicast_ipv6()
+{
+       local proto=ipv6
+       local dmac=33:33:00:00:00:01
+       local sip=2001:db8:1::1
+       local dip=ff02::1
+       local mode="-6"
+       local name="IPv6"
+
+       test_l2_miss_ll_multicast_common $proto $dmac $sip $dip $mode $name
+}
+
+test_l2_miss_ll_multicast()
+{
+       test_l2_miss_ll_multicast_ipv4
+       test_l2_miss_ll_multicast_ipv6
+}
+
+test_l2_miss_broadcast()
+{
+       local dmac=ff:ff:ff:ff:ff:ff
+       local smac=00:01:02:03:04:05
+
+       RET=0
+
+       tc filter add dev $swp2 egress protocol all handle 101 pref 1 \
+          flower l2_miss true dst_mac $dmac src_mac $smac \
+          action pass
+       tc filter add dev $swp2 egress protocol all handle 102 pref 1 \
+          flower l2_miss false dst_mac $dmac src_mac $smac \
+          action pass
+
+       $MZ $h1 -a $smac -b $dmac -c 1 -p 100 -q
+
+       tc_check_packets "dev $swp2 egress" 101 0
+       check_err $? "L2 miss filter was hit when should not"
+
+       tc_check_packets "dev $swp2 egress" 102 1
+       check_err $? "L2 no miss filter was not hit when should"
+
+       tc filter del dev $swp2 egress protocol all pref 1 handle 102 flower
+       tc filter del dev $swp2 egress protocol all pref 1 handle 101 flower
+
+       log_test "L2 miss - Broadcast"
+}
+
+setup_prepare()
+{
+       h1=${NETIFS[p1]}
+       swp1=${NETIFS[p2]}
+
+       swp2=${NETIFS[p3]}
+       h2=${NETIFS[p4]}
+
+       vrf_prepare
+       h1_create
+       h2_create
+       switch_create
+}
+
+cleanup()
+{
+       pre_cleanup
+
+       switch_destroy
+       h2_destroy
+       h1_destroy
+       vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
index 29f0c99..85474e0 100755 (executable)
@@ -37,6 +37,7 @@ evts_ns1=""
 evts_ns2=""
 evts_ns1_pid=0
 evts_ns2_pid=0
+stats_dumped=0
 
 declare -A all_tests
 declare -a only_tests_ids
@@ -90,6 +91,7 @@ init_partial()
                fi
        done
 
+       stats_dumped=0
        check_invert=0
        validate_checksum=$checksum
        FAILING_LINKS=""
@@ -353,6 +355,9 @@ fail_test()
 {
        ret=1
        failed_tests[${TEST_COUNT}]="${TEST_NAME}"
+
+       [ "${stats_dumped}" = 0 ] && dump_stats
+       stats_dumped=1
 }
 
 get_failed_tests_ids()
@@ -1142,7 +1147,6 @@ chk_csum_nr()
        local csum_ns1=${1:-0}
        local csum_ns2=${2:-0}
        local count
-       local dump_stats
        local extra_msg=""
        local allow_multi_errors_ns1=0
        local allow_multi_errors_ns2=0
@@ -1166,7 +1170,6 @@ chk_csum_nr()
           { [ "$count" -lt $csum_ns1 ] && [ $allow_multi_errors_ns1 -eq 1 ]; }; then
                echo "[fail] got $count data checksum error[s] expected $csum_ns1"
                fail_test
-               dump_stats=1
        else
                echo -n "[ ok ]"
        fi
@@ -1180,11 +1183,9 @@ chk_csum_nr()
           { [ "$count" -lt $csum_ns2 ] && [ $allow_multi_errors_ns2 -eq 1 ]; }; then
                echo "[fail] got $count data checksum error[s] expected $csum_ns2"
                fail_test
-               dump_stats=1
        else
                echo -n "[ ok ]"
        fi
-       [ "${dump_stats}" = 1 ] && dump_stats
 
        echo "$extra_msg"
 }
@@ -1195,7 +1196,6 @@ chk_fail_nr()
        local fail_rx=$2
        local ns_invert=${3:-""}
        local count
-       local dump_stats
        local ns_tx=$ns1
        local ns_rx=$ns2
        local extra_msg=""
@@ -1227,7 +1227,6 @@ chk_fail_nr()
           { [ "$count" -gt "$fail_tx" ] && [ $allow_tx_lost -eq 1 ]; }; then
                echo "[fail] got $count MP_FAIL[s] TX expected $fail_tx"
                fail_test
-               dump_stats=1
        else
                echo -n "[ ok ]"
        fi
@@ -1242,13 +1241,10 @@ chk_fail_nr()
           { [ "$count" -gt "$fail_rx" ] && [ $allow_rx_lost -eq 1 ]; }; then
                echo "[fail] got $count MP_FAIL[s] RX expected $fail_rx"
                fail_test
-               dump_stats=1
        else
                echo -n "[ ok ]"
        fi
 
-       [ "${dump_stats}" = 1 ] && dump_stats
-
        echo "$extra_msg"
 }
 
@@ -1258,7 +1254,6 @@ chk_fclose_nr()
        local fclose_rx=$2
        local ns_invert=$3
        local count
-       local dump_stats
        local ns_tx=$ns2
        local ns_rx=$ns1
        local extra_msg="   "
@@ -1276,7 +1271,6 @@ chk_fclose_nr()
        if [ "$count" != "$fclose_tx" ]; then
                echo "[fail] got $count MP_FASTCLOSE[s] TX expected $fclose_tx"
                fail_test
-               dump_stats=1
        else
                echo -n "[ ok ]"
        fi
@@ -1288,13 +1282,10 @@ chk_fclose_nr()
        if [ "$count" != "$fclose_rx" ]; then
                echo "[fail] got $count MP_FASTCLOSE[s] RX expected $fclose_rx"
                fail_test
-               dump_stats=1
        else
                echo -n "[ ok ]"
        fi
 
-       [ "${dump_stats}" = 1 ] && dump_stats
-
        echo "$extra_msg"
 }
 
@@ -1304,7 +1295,6 @@ chk_rst_nr()
        local rst_rx=$2
        local ns_invert=${3:-""}
        local count
-       local dump_stats
        local ns_tx=$ns1
        local ns_rx=$ns2
        local extra_msg=""
@@ -1321,7 +1311,6 @@ chk_rst_nr()
        if [ $count -lt $rst_tx ]; then
                echo "[fail] got $count MP_RST[s] TX expected $rst_tx"
                fail_test
-               dump_stats=1
        else
                echo -n "[ ok ]"
        fi
@@ -1332,13 +1321,10 @@ chk_rst_nr()
        if [ "$count" -lt "$rst_rx" ]; then
                echo "[fail] got $count MP_RST[s] RX expected $rst_rx"
                fail_test
-               dump_stats=1
        else
                echo -n "[ ok ]"
        fi
 
-       [ "${dump_stats}" = 1 ] && dump_stats
-
        echo "$extra_msg"
 }
 
@@ -1347,7 +1333,6 @@ chk_infi_nr()
        local infi_tx=$1
        local infi_rx=$2
        local count
-       local dump_stats
 
        printf "%-${nr_blank}s %s" " " "itx"
        count=$(ip netns exec $ns2 nstat -as | grep InfiniteMapTx | awk '{print $2}')
@@ -1355,7 +1340,6 @@ chk_infi_nr()
        if [ "$count" != "$infi_tx" ]; then
                echo "[fail] got $count infinite map[s] TX expected $infi_tx"
                fail_test
-               dump_stats=1
        else
                echo -n "[ ok ]"
        fi
@@ -1366,12 +1350,9 @@ chk_infi_nr()
        if [ "$count" != "$infi_rx" ]; then
                echo "[fail] got $count infinite map[s] RX expected $infi_rx"
                fail_test
-               dump_stats=1
        else
                echo "[ ok ]"
        fi
-
-       [ "${dump_stats}" = 1 ] && dump_stats
 }
 
 chk_join_nr()
@@ -1386,7 +1367,6 @@ chk_join_nr()
        local infi_nr=${8:-0}
        local corrupted_pkts=${9:-0}
        local count
-       local dump_stats
        local with_cookie
        local title="${TEST_NAME}"
 
@@ -1400,7 +1380,6 @@ chk_join_nr()
        if [ "$count" != "$syn_nr" ]; then
                echo "[fail] got $count JOIN[s] syn expected $syn_nr"
                fail_test
-               dump_stats=1
        else
                echo -n "[ ok ]"
        fi
@@ -1418,7 +1397,6 @@ chk_join_nr()
                else
                        echo "[fail] got $count JOIN[s] synack expected $syn_ack_nr"
                        fail_test
-                       dump_stats=1
                fi
        else
                echo -n "[ ok ]"
@@ -1430,11 +1408,9 @@ chk_join_nr()
        if [ "$count" != "$ack_nr" ]; then
                echo "[fail] got $count JOIN[s] ack expected $ack_nr"
                fail_test
-               dump_stats=1
        else
                echo "[ ok ]"
        fi
-       [ "${dump_stats}" = 1 ] && dump_stats
        if [ $validate_checksum -eq 1 ]; then
                chk_csum_nr $csum_ns1 $csum_ns2
                chk_fail_nr $fail_nr $fail_nr
@@ -1494,7 +1470,6 @@ chk_add_nr()
        local mis_syn_nr=${7:-0}
        local mis_ack_nr=${8:-0}
        local count
-       local dump_stats
        local timeout
 
        timeout=$(ip netns exec $ns1 sysctl -n net.mptcp.add_addr_timeout)
@@ -1508,18 +1483,16 @@ chk_add_nr()
        if [ "$count" != "$add_nr" ] && { [ "$timeout" -gt 1 ] || [ "$count" -lt "$add_nr" ]; }; then
                echo "[fail] got $count ADD_ADDR[s] expected $add_nr"
                fail_test
-               dump_stats=1
        else
                echo -n "[ ok ]"
        fi
 
        echo -n " - echo  "
-       count=$(ip netns exec $ns1 nstat -as | grep MPTcpExtEchoAdd | awk '{print $2}')
+       count=$(ip netns exec $ns1 nstat -as MPTcpExtEchoAdd | grep MPTcpExtEchoAdd | awk '{print $2}')
        [ -z "$count" ] && count=0
        if [ "$count" != "$echo_nr" ]; then
                echo "[fail] got $count ADD_ADDR echo[s] expected $echo_nr"
                fail_test
-               dump_stats=1
        else
                echo -n "[ ok ]"
        fi
@@ -1531,7 +1504,6 @@ chk_add_nr()
                if [ "$count" != "$port_nr" ]; then
                        echo "[fail] got $count ADD_ADDR[s] with a port-number expected $port_nr"
                        fail_test
-                       dump_stats=1
                else
                        echo "[ ok ]"
                fi
@@ -1544,7 +1516,6 @@ chk_add_nr()
                        echo "[fail] got $count JOIN[s] syn with a different \
                                port-number expected $syn_nr"
                        fail_test
-                       dump_stats=1
                else
                        echo -n "[ ok ]"
                fi
@@ -1557,7 +1528,6 @@ chk_add_nr()
                        echo "[fail] got $count JOIN[s] synack with a different \
                                port-number expected $syn_ack_nr"
                        fail_test
-                       dump_stats=1
                else
                        echo -n "[ ok ]"
                fi
@@ -1570,7 +1540,6 @@ chk_add_nr()
                        echo "[fail] got $count JOIN[s] ack with a different \
                                port-number expected $ack_nr"
                        fail_test
-                       dump_stats=1
                else
                        echo "[ ok ]"
                fi
@@ -1583,7 +1552,6 @@ chk_add_nr()
                        echo "[fail] got $count JOIN[s] syn with a mismatched \
                                port-number expected $mis_syn_nr"
                        fail_test
-                       dump_stats=1
                else
                        echo -n "[ ok ]"
                fi
@@ -1596,15 +1564,45 @@ chk_add_nr()
                        echo "[fail] got $count JOIN[s] ack with a mismatched \
                                port-number expected $mis_ack_nr"
                        fail_test
-                       dump_stats=1
                else
                        echo "[ ok ]"
                fi
        else
                echo ""
        fi
+}
+
+chk_add_tx_nr()
+{
+       local add_tx_nr=$1
+       local echo_tx_nr=$2
+       local timeout
+       local count
+
+       timeout=$(ip netns exec $ns1 sysctl -n net.mptcp.add_addr_timeout)
+
+       printf "%-${nr_blank}s %s" " " "add TX"
+       count=$(ip netns exec $ns1 nstat -as MPTcpExtAddAddrTx | grep MPTcpExtAddAddrTx | awk '{print $2}')
+       [ -z "$count" ] && count=0
+
+       # if the test configured a short timeout tolerate greater then expected
+       # add addrs options, due to retransmissions
+       if [ "$count" != "$add_tx_nr" ] && { [ "$timeout" -gt 1 ] || [ "$count" -lt "$add_tx_nr" ]; }; then
+               echo "[fail] got $count ADD_ADDR[s] TX, expected $add_tx_nr"
+               fail_test
+       else
+               echo -n "[ ok ]"
+       fi
 
-       [ "${dump_stats}" = 1 ] && dump_stats
+       echo -n " - echo TX "
+       count=$(ip netns exec $ns2 nstat -as MPTcpExtEchoAddTx | grep MPTcpExtEchoAddTx | awk '{print $2}')
+       [ -z "$count" ] && count=0
+       if [ "$count" != "$echo_tx_nr" ]; then
+               echo "[fail] got $count ADD_ADDR echo[s] TX, expected $echo_tx_nr"
+               fail_test
+       else
+               echo "[ ok ]"
+       fi
 }
 
 chk_rm_nr()
@@ -1614,7 +1612,6 @@ chk_rm_nr()
        local invert
        local simult
        local count
-       local dump_stats
        local addr_ns=$ns1
        local subflow_ns=$ns2
        local extra_msg=""
@@ -1636,12 +1633,11 @@ chk_rm_nr()
        fi
 
        printf "%-${nr_blank}s %s" " " "rm "
-       count=$(ip netns exec $addr_ns nstat -as | grep MPTcpExtRmAddr | awk '{print $2}')
+       count=$(ip netns exec $addr_ns nstat -as MPTcpExtRmAddr | grep MPTcpExtRmAddr | awk '{print $2}')
        [ -z "$count" ] && count=0
        if [ "$count" != "$rm_addr_nr" ]; then
                echo "[fail] got $count RM_ADDR[s] expected $rm_addr_nr"
                fail_test
-               dump_stats=1
        else
                echo -n "[ ok ]"
        fi
@@ -1665,19 +1661,32 @@ chk_rm_nr()
                else
                        echo "[fail] got $count RM_SUBFLOW[s] expected in range [$rm_subflow_nr:$((rm_subflow_nr*2))]"
                        fail_test
-                       dump_stats=1
                fi
                return
        fi
        if [ "$count" != "$rm_subflow_nr" ]; then
                echo "[fail] got $count RM_SUBFLOW[s] expected $rm_subflow_nr"
                fail_test
-               dump_stats=1
        else
                echo -n "[ ok ]"
        fi
 
-       [ "${dump_stats}" = 1 ] && dump_stats
+       echo "$extra_msg"
+}
+
+chk_rm_tx_nr()
+{
+       local rm_addr_tx_nr=$1
+
+       printf "%-${nr_blank}s %s" " " "rm TX "
+       count=$(ip netns exec $ns2 nstat -as MPTcpExtRmAddrTx | grep MPTcpExtRmAddrTx | awk '{print $2}')
+       [ -z "$count" ] && count=0
+       if [ "$count" != "$rm_addr_tx_nr" ]; then
+               echo "[fail] got $count RM_ADDR[s] expected $rm_addr_tx_nr"
+               fail_test
+       else
+               echo -n "[ ok ]"
+       fi
 
        echo "$extra_msg"
 }
@@ -1687,7 +1696,6 @@ chk_prio_nr()
        local mp_prio_nr_tx=$1
        local mp_prio_nr_rx=$2
        local count
-       local dump_stats
 
        printf "%-${nr_blank}s %s" " " "ptx"
        count=$(ip netns exec $ns1 nstat -as | grep MPTcpExtMPPrioTx | awk '{print $2}')
@@ -1695,7 +1703,6 @@ chk_prio_nr()
        if [ "$count" != "$mp_prio_nr_tx" ]; then
                echo "[fail] got $count MP_PRIO[s] TX expected $mp_prio_nr_tx"
                fail_test
-               dump_stats=1
        else
                echo -n "[ ok ]"
        fi
@@ -1706,12 +1713,9 @@ chk_prio_nr()
        if [ "$count" != "$mp_prio_nr_rx" ]; then
                echo "[fail] got $count MP_PRIO[s] RX expected $mp_prio_nr_rx"
                fail_test
-               dump_stats=1
        else
                echo "[ ok ]"
        fi
-
-       [ "${dump_stats}" = 1 ] && dump_stats
 }
 
 chk_subflow_nr()
@@ -1743,7 +1747,6 @@ chk_subflow_nr()
                ss -N $ns1 -tOni
                ss -N $ns1 -tOni | grep token
                ip -n $ns1 mptcp endpoint
-               dump_stats
        fi
 }
 
@@ -1783,7 +1786,6 @@ chk_mptcp_info()
        if [ "$dump_stats" = 1 ]; then
                ss -N $ns1 -inmHM
                ss -N $ns2 -inmHM
-               dump_stats
        fi
 }
 
@@ -1961,6 +1963,7 @@ signal_address_tests()
                pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
                run_tests $ns1 $ns2 10.0.1.1
                chk_join_nr 0 0 0
+               chk_add_tx_nr 1 1
                chk_add_nr 1 1
        fi
 
@@ -2142,6 +2145,7 @@ add_addr_timeout_tests()
                pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
                run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow
                chk_join_nr 1 1 1
+               chk_add_tx_nr 4 4
                chk_add_nr 4 0
        fi
 
@@ -2187,6 +2191,7 @@ remove_tests()
                pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
                run_tests $ns1 $ns2 10.0.1.1 0 0 -1 slow
                chk_join_nr 1 1 1
+               chk_rm_tx_nr 1
                chk_rm_nr 1 1
        fi
 
@@ -2285,6 +2290,7 @@ remove_tests()
                pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow
                run_tests $ns1 $ns2 10.0.1.1 0 -8 -8 slow
                chk_join_nr 3 3 3
+               chk_rm_tx_nr 0
                chk_rm_nr 0 3 simult
        fi
 
index ee9a729..39a0e01 100644 (file)
@@ -76,7 +76,9 @@ struct sock_args {
                     has_grp:1,
                     has_expected_laddr:1,
                     has_expected_raddr:1,
-                    bind_test_only:1;
+                    bind_test_only:1,
+                    client_dontroute:1,
+                    server_dontroute:1;
 
        unsigned short port;
 
@@ -611,6 +613,18 @@ static int set_dsfield(int sd, int version, int dsfield)
        return 0;
 }
 
+static int set_dontroute(int sd)
+{
+       unsigned int one = 1;
+
+       if (setsockopt(sd, SOL_SOCKET, SO_DONTROUTE, &one, sizeof(one)) < 0) {
+               log_err_errno("setsockopt(SO_DONTROUTE)");
+               return -1;
+       }
+
+       return 0;
+}
+
 static int str_to_uint(const char *str, int min, int max, unsigned int *value)
 {
        int number;
@@ -1351,6 +1365,14 @@ static int msock_init(struct sock_args *args, int server)
        if (set_dsfield(sd, AF_INET, args->dsfield) != 0)
                goto out_err;
 
+       if (server) {
+               if (args->server_dontroute && set_dontroute(sd) != 0)
+                       goto out_err;
+       } else {
+               if (args->client_dontroute && set_dontroute(sd) != 0)
+                       goto out_err;
+       }
+
        if (args->dev && bind_to_device(sd, args->dev) != 0)
                goto out_err;
        else if (args->use_setsockopt &&
@@ -1482,6 +1504,9 @@ static int lsock_init(struct sock_args *args)
        if (set_dsfield(sd, args->version, args->dsfield) != 0)
                goto err;
 
+       if (args->server_dontroute && set_dontroute(sd) != 0)
+               goto err;
+
        if (args->dev && bind_to_device(sd, args->dev) != 0)
                goto err;
        else if (args->use_setsockopt &&
@@ -1698,6 +1723,9 @@ static int connectsock(void *addr, socklen_t alen, struct sock_args *args)
        if (set_dsfield(sd, args->version, args->dsfield) != 0)
                goto err;
 
+       if (args->client_dontroute && set_dontroute(sd) != 0)
+               goto err;
+
        if (args->dev && bind_to_device(sd, args->dev) != 0)
                goto err;
        else if (args->use_setsockopt &&
@@ -1905,10 +1933,14 @@ static int ipc_parent(int cpid, int fd, struct sock_args *args)
 #define GETOPT_STR  "sr:l:c:Q:p:t:g:P:DRn:M:X:m:d:I:BN:O:SUCi6xL:0:1:2:3:Fbqf"
 #define OPT_FORCE_BIND_KEY_IFINDEX 1001
 #define OPT_NO_BIND_KEY_IFINDEX 1002
+#define OPT_CLIENT_DONTROUTE 1003
+#define OPT_SERVER_DONTROUTE 1004
 
 static struct option long_opts[] = {
        {"force-bind-key-ifindex", 0, 0, OPT_FORCE_BIND_KEY_IFINDEX},
        {"no-bind-key-ifindex", 0, 0, OPT_NO_BIND_KEY_IFINDEX},
+       {"client-dontroute", 0, 0, OPT_CLIENT_DONTROUTE},
+       {"server-dontroute", 0, 0, OPT_SERVER_DONTROUTE},
        {0, 0, 0, 0}
 };
 
@@ -1954,6 +1986,12 @@ static void print_usage(char *prog)
        "    --no-bind-key-ifindex: Force TCP_MD5SIG_FLAG_IFINDEX off\n"
        "    --force-bind-key-ifindex: Force TCP_MD5SIG_FLAG_IFINDEX on\n"
        "        (default: only if -I is passed)\n"
+       "    --client-dontroute: don't use gateways for client socket: send\n"
+       "                        packets only if destination is on link (see\n"
+       "                        SO_DONTROUTE in socket(7))\n"
+       "    --server-dontroute: don't use gateways for server socket: send\n"
+       "                        packets only if destination is on link (see\n"
+       "                        SO_DONTROUTE in socket(7))\n"
        "\n"
        "    -g grp        multicast group (e.g., 239.1.1.1)\n"
        "    -i            interactive mode (default is echo and terminate)\n"
@@ -2076,6 +2114,12 @@ int main(int argc, char *argv[])
                case OPT_NO_BIND_KEY_IFINDEX:
                        args.bind_key_ifindex = -1;
                        break;
+               case OPT_CLIENT_DONTROUTE:
+                       args.client_dontroute = 1;
+                       break;
+               case OPT_SERVER_DONTROUTE:
+                       args.server_dontroute = 1;
+                       break;
                case 'X':
                        args.client_pw = optarg;
                        break;
diff --git a/tools/testing/selftests/net/test_vxlan_nolocalbypass.sh b/tools/testing/selftests/net/test_vxlan_nolocalbypass.sh
new file mode 100755 (executable)
index 0000000..46067db
--- /dev/null
@@ -0,0 +1,240 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test is for checking the [no]localbypass VXLAN device option. The test
+# configures two VXLAN devices in the same network namespace and a tc filter on
+# the loopback device that drops encapsulated packets. The test sends packets
+# from the first VXLAN device and verifies that by default these packets are
+# received by the second VXLAN device. The test then enables the nolocalbypass
+# option and verifies that packets are no longer received by the second VXLAN
+# device.
+
+ret=0
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+TESTS="
+       nolocalbypass
+"
+VERBOSE=0
+PAUSE_ON_FAIL=no
+PAUSE=no
+
+################################################################################
+# Utilities
+
+log_test()
+{
+       local rc=$1
+       local expected=$2
+       local msg="$3"
+
+       if [ ${rc} -eq ${expected} ]; then
+               printf "TEST: %-60s  [ OK ]\n" "${msg}"
+               nsuccess=$((nsuccess+1))
+       else
+               ret=1
+               nfail=$((nfail+1))
+               printf "TEST: %-60s  [FAIL]\n" "${msg}"
+               if [ "$VERBOSE" = "1" ]; then
+                       echo "    rc=$rc, expected $expected"
+               fi
+
+               if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+               echo
+                       echo "hit enter to continue, 'q' to quit"
+                       read a
+                       [ "$a" = "q" ] && exit 1
+               fi
+       fi
+
+       if [ "${PAUSE}" = "yes" ]; then
+               echo
+               echo "hit enter to continue, 'q' to quit"
+               read a
+               [ "$a" = "q" ] && exit 1
+       fi
+
+       [ "$VERBOSE" = "1" ] && echo
+}
+
+run_cmd()
+{
+       local cmd="$1"
+       local out
+       local stderr="2>/dev/null"
+
+       if [ "$VERBOSE" = "1" ]; then
+               printf "COMMAND: $cmd\n"
+               stderr=
+       fi
+
+       out=$(eval $cmd $stderr)
+       rc=$?
+       if [ "$VERBOSE" = "1" -a -n "$out" ]; then
+               echo "    $out"
+       fi
+
+       return $rc
+}
+
+tc_check_packets()
+{
+       local ns=$1; shift
+       local id=$1; shift
+       local handle=$1; shift
+       local count=$1; shift
+       local pkts
+
+       sleep 0.1
+       pkts=$(tc -n $ns -j -s filter show $id \
+               | jq ".[] | select(.options.handle == $handle) | \
+               .options.actions[0].stats.packets")
+       [[ $pkts == $count ]]
+}
+
+################################################################################
+# Setup
+
+setup()
+{
+       ip netns add ns1
+
+       ip -n ns1 link set dev lo up
+       ip -n ns1 address add 192.0.2.1/32 dev lo
+       ip -n ns1 address add 198.51.100.1/32 dev lo
+
+       ip -n ns1 link add name vx0 up type vxlan id 100 local 198.51.100.1 \
+               dstport 4789 nolearning
+       ip -n ns1 link add name vx1 up type vxlan id 100 dstport 4790
+}
+
+cleanup()
+{
+       ip netns del ns1 &> /dev/null
+}
+
+################################################################################
+# Tests
+
+nolocalbypass()
+{
+       local smac=00:01:02:03:04:05
+       local dmac=00:0a:0b:0c:0d:0e
+
+       run_cmd "bridge -n ns1 fdb add $dmac dev vx0 self static dst 192.0.2.1 port 4790"
+
+       run_cmd "tc -n ns1 qdisc add dev vx1 clsact"
+       run_cmd "tc -n ns1 filter add dev vx1 ingress pref 1 handle 101 proto all flower src_mac $smac dst_mac $dmac action pass"
+
+       run_cmd "tc -n ns1 qdisc add dev lo clsact"
+       run_cmd "tc -n ns1 filter add dev lo ingress pref 1 handle 101 proto ip flower ip_proto udp dst_port 4790 action drop"
+
+       run_cmd "ip -n ns1 -d link show dev vx0 | grep ' localbypass'"
+       log_test $? 0 "localbypass enabled"
+
+       run_cmd "ip netns exec ns1 mausezahn vx0 -a $smac -b $dmac -c 1 -p 100 -q"
+
+       tc_check_packets "ns1" "dev vx1 ingress" 101 1
+       log_test $? 0 "Packet received by local VXLAN device - localbypass"
+
+       run_cmd "ip -n ns1 link set dev vx0 type vxlan nolocalbypass"
+
+       run_cmd "ip -n ns1 -d link show dev vx0 | grep 'nolocalbypass'"
+       log_test $? 0 "localbypass disabled"
+
+       run_cmd "ip netns exec ns1 mausezahn vx0 -a $smac -b $dmac -c 1 -p 100 -q"
+
+       tc_check_packets "ns1" "dev vx1 ingress" 101 1
+       log_test $? 0 "Packet not received by local VXLAN device - nolocalbypass"
+
+       run_cmd "ip -n ns1 link set dev vx0 type vxlan localbypass"
+
+       run_cmd "ip -n ns1 -d link show dev vx0 | grep ' localbypass'"
+       log_test $? 0 "localbypass enabled"
+
+       run_cmd "ip netns exec ns1 mausezahn vx0 -a $smac -b $dmac -c 1 -p 100 -q"
+
+       tc_check_packets "ns1" "dev vx1 ingress" 101 2
+       log_test $? 0 "Packet received by local VXLAN device - localbypass"
+}
+
+################################################################################
+# Usage
+
+usage()
+{
+       cat <<EOF
+usage: ${0##*/} OPTS
+
+        -t <test>   Test(s) to run (default: all)
+                    (options: $TESTS)
+        -p          Pause on fail
+        -P          Pause after each test before cleanup
+        -v          Verbose mode (show commands and output)
+EOF
+}
+
+################################################################################
+# Main
+
+trap cleanup EXIT
+
+while getopts ":t:pPvh" opt; do
+       case $opt in
+               t) TESTS=$OPTARG ;;
+               p) PAUSE_ON_FAIL=yes;;
+               P) PAUSE=yes;;
+               v) VERBOSE=$(($VERBOSE + 1));;
+               h) usage; exit 0;;
+               *) usage; exit 1;;
+       esac
+done
+
+# Make sure we don't pause twice.
+[ "${PAUSE}" = "yes" ] && PAUSE_ON_FAIL=no
+
+if [ "$(id -u)" -ne 0 ];then
+       echo "SKIP: Need root privileges"
+       exit $ksft_skip;
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+       echo "SKIP: Could not run test without ip tool"
+       exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v bridge)" ]; then
+       echo "SKIP: Could not run test without bridge tool"
+       exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v mausezahn)" ]; then
+       echo "SKIP: Could not run test without mausezahn tool"
+       exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v jq)" ]; then
+       echo "SKIP: Could not run test without jq tool"
+       exit $ksft_skip
+fi
+
+ip link help vxlan 2>&1 | grep -q "localbypass"
+if [ $? -ne 0 ]; then
+       echo "SKIP: iproute2 ip too old, missing VXLAN nolocalbypass support"
+       exit $ksft_skip
+fi
+
+cleanup
+
+for t in $TESTS
+do
+       setup; $t; cleanup;
+done
+
+if [ "$TESTS" != "none" ]; then
+       printf "\nTests passed: %3d\n" ${nsuccess}
+       printf "Tests failed: %3d\n"   ${nfail}
+fi
+
+exit $ret
index e699548..eccea98 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/tcp.h>
 #include <linux/socket.h>
 
+#include <sys/epoll.h>
 #include <sys/types.h>
 #include <sys/sendfile.h>
 #include <sys/socket.h>
@@ -1637,6 +1638,136 @@ TEST_F(tls_err, timeo)
        }
 }
 
+TEST_F(tls_err, poll_partial_rec)
+{
+       struct pollfd pfd = { };
+       ssize_t rec_len;
+       char rec[256];
+       char buf[128];
+
+       if (self->notls)
+               SKIP(return, "no TLS support");
+
+       pfd.fd = self->cfd2;
+       pfd.events = POLLIN;
+       EXPECT_EQ(poll(&pfd, 1, 1), 0);
+
+       memrnd(buf, sizeof(buf));
+       EXPECT_EQ(send(self->fd, buf, sizeof(buf), 0), sizeof(buf));
+       rec_len = recv(self->cfd, rec, sizeof(rec), 0);
+       EXPECT_GT(rec_len, sizeof(buf));
+
+       /* Write 100B, not the full record ... */
+       EXPECT_EQ(send(self->fd2, rec, 100, 0), 100);
+       /* ... no full record should mean no POLLIN */
+       pfd.fd = self->cfd2;
+       pfd.events = POLLIN;
+       EXPECT_EQ(poll(&pfd, 1, 1), 0);
+       /* Now write the rest, and it should all pop out of the other end. */
+       EXPECT_EQ(send(self->fd2, rec + 100, rec_len - 100, 0), rec_len - 100);
+       pfd.fd = self->cfd2;
+       pfd.events = POLLIN;
+       EXPECT_EQ(poll(&pfd, 1, 1), 1);
+       EXPECT_EQ(recv(self->cfd2, rec, sizeof(rec), 0), sizeof(buf));
+       EXPECT_EQ(memcmp(buf, rec, sizeof(buf)), 0);
+}
+
+TEST_F(tls_err, epoll_partial_rec)
+{
+       struct epoll_event ev, events[10];
+       ssize_t rec_len;
+       char rec[256];
+       char buf[128];
+       int epollfd;
+
+       if (self->notls)
+               SKIP(return, "no TLS support");
+
+       epollfd = epoll_create1(0);
+       ASSERT_GE(epollfd, 0);
+
+       memset(&ev, 0, sizeof(ev));
+       ev.events = EPOLLIN;
+       ev.data.fd = self->cfd2;
+       ASSERT_GE(epoll_ctl(epollfd, EPOLL_CTL_ADD, self->cfd2, &ev), 0);
+
+       EXPECT_EQ(epoll_wait(epollfd, events, 10, 0), 0);
+
+       memrnd(buf, sizeof(buf));
+       EXPECT_EQ(send(self->fd, buf, sizeof(buf), 0), sizeof(buf));
+       rec_len = recv(self->cfd, rec, sizeof(rec), 0);
+       EXPECT_GT(rec_len, sizeof(buf));
+
+       /* Write 100B, not the full record ... */
+       EXPECT_EQ(send(self->fd2, rec, 100, 0), 100);
+       /* ... no full record should mean no POLLIN */
+       EXPECT_EQ(epoll_wait(epollfd, events, 10, 0), 0);
+       /* Now write the rest, and it should all pop out of the other end. */
+       EXPECT_EQ(send(self->fd2, rec + 100, rec_len - 100, 0), rec_len - 100);
+       EXPECT_EQ(epoll_wait(epollfd, events, 10, 0), 1);
+       EXPECT_EQ(recv(self->cfd2, rec, sizeof(rec), 0), sizeof(buf));
+       EXPECT_EQ(memcmp(buf, rec, sizeof(buf)), 0);
+
+       close(epollfd);
+}
+
+TEST_F(tls_err, poll_partial_rec_async)
+{
+       struct pollfd pfd = { };
+       ssize_t rec_len;
+       char rec[256];
+       char buf[128];
+       char token;
+       int p[2];
+       int ret;
+
+       if (self->notls)
+               SKIP(return, "no TLS support");
+
+       ASSERT_GE(pipe(p), 0);
+
+       memrnd(buf, sizeof(buf));
+       EXPECT_EQ(send(self->fd, buf, sizeof(buf), 0), sizeof(buf));
+       rec_len = recv(self->cfd, rec, sizeof(rec), 0);
+       EXPECT_GT(rec_len, sizeof(buf));
+
+       ret = fork();
+       ASSERT_GE(ret, 0);
+
+       if (ret) {
+               int status, pid2;
+
+               close(p[1]);
+               usleep(1000); /* Give child a head start */
+
+               EXPECT_EQ(send(self->fd2, rec, 100, 0), 100);
+
+               EXPECT_EQ(read(p[0], &token, 1), 1); /* Barrier #1 */
+
+               EXPECT_EQ(send(self->fd2, rec + 100, rec_len - 100, 0),
+                         rec_len - 100);
+
+               pid2 = wait(&status);
+               EXPECT_EQ(pid2, ret);
+               EXPECT_EQ(status, 0);
+       } else {
+               close(p[0]);
+
+               /* Child should sleep in poll(), never get a wake */
+               pfd.fd = self->cfd2;
+               pfd.events = POLLIN;
+               EXPECT_EQ(poll(&pfd, 1, 5), 0);
+
+               EXPECT_EQ(write(p[1], &token, 1), 1); /* Barrier #1 */
+
+               pfd.fd = self->cfd2;
+               pfd.events = POLLIN;
+               EXPECT_EQ(poll(&pfd, 1, 5), 1);
+
+               exit(!_metadata->passed);
+       }
+}
+
 TEST(non_established) {
        struct tls12_crypto_info_aes_gcm_256 tls12;
        struct sockaddr_in addr;
index 44fbfc6..e3d2de5 100644 (file)
             "teardown": [
                 "echo \"1\" > /sys/bus/netdevsim/del_device"
             ]
-        }
+       },
+       {
+               "id": "0531",
+               "name": "Replace mq with invalid parent ID",
+               "category": [
+                       "qdisc",
+                       "mq"
+               ],
+               "plugins": {
+                       "requires": "nsPlugin"
+               },
+               "setup": [
+                       "echo \"1 1 16\" > /sys/bus/netdevsim/new_device",
+                       "$TC qdisc add dev $ETH root handle ffff: mq"
+               ],
+               "cmdUnderTest": "$TC qdisc replace dev $ETH parent ffff:fff1 handle ffff: mq",
+               "expExitCode": "2",
+               "verifyCmd": "$TC qdisc show dev $ETH",
+               "matchPattern": "qdisc [a-zA-Z0-9_]+ 0: parent ffff",
+               "matchCount": "16",
+               "teardown": [
+                       "echo \"1\" > /sys/bus/netdevsim/del_device"
+               ]
+       }
 ]