Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
authorJakub Kicinski <kuba@kernel.org>
Thu, 27 Jan 2022 20:54:16 +0000 (12:54 -0800)
committerJakub Kicinski <kuba@kernel.org>
Thu, 27 Jan 2022 20:54:16 +0000 (12:54 -0800)
No conflicts.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
215 files changed:
Documentation/bpf/btf.rst
Documentation/devicetree/bindings/net/fsl-fman.txt
drivers/net/dsa/b53/b53_common.c
drivers/net/dsa/mt7530.c
drivers/net/ethernet/broadcom/bnxt/bnxt.c
drivers/net/ethernet/broadcom/bnxt/bnxt.h
drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h
drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h
drivers/net/ethernet/broadcom/genet/bcmgenet.c
drivers/net/ethernet/cortina/gemini.c
drivers/net/ethernet/dec/tulip/pnic.c
drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c
drivers/net/ethernet/freescale/enetc/enetc_pf.c
drivers/net/ethernet/freescale/fec_main.c
drivers/net/ethernet/freescale/fec_ptp.c
drivers/net/ethernet/freescale/xgmac_mdio.c
drivers/net/ethernet/marvell/mvneta.c
drivers/net/ethernet/mediatek/mtk_star_emac.c
drivers/net/ethernet/mellanox/mlxsw/core_env.c
drivers/net/ethernet/mellanox/mlxsw/core_env.h
drivers/net/ethernet/mellanox/mlxsw/minimal.c
drivers/net/ethernet/mellanox/mlxsw/reg.h
drivers/net/ethernet/mellanox/mlxsw/spectrum1_kvdl.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c
drivers/net/ethernet/microchip/lan743x_ethtool.c
drivers/net/ethernet/microsoft/mana/gdma_main.c
drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c
drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
drivers/net/ethernet/netronome/nfp/nfp_net_sriov.h
drivers/net/ethernet/netronome/nfp/nfp_port.h
drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c
drivers/net/ethernet/pensando/ionic/ionic.h
drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c
drivers/net/ethernet/pensando/ionic/ionic_dev.c
drivers/net/ethernet/pensando/ionic/ionic_dev.h
drivers/net/ethernet/pensando/ionic/ionic_lif.c
drivers/net/ethernet/pensando/ionic/ionic_lif.h
drivers/net/ethernet/pensando/ionic/ionic_main.c
drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c
drivers/net/ethernet/pensando/ionic/ionic_txrx.c
drivers/net/ethernet/realtek/r8169_main.c
drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
drivers/net/ethernet/xilinx/xilinx_axienet.h
drivers/net/ethernet/xilinx/xilinx_axienet_main.c
drivers/net/pcs/pcs-xpcs.c
drivers/net/phy/at803x.c
drivers/net/usb/asix_devices.c
drivers/nfc/st-nci/vendor_cmds.c
drivers/nfc/st21nfca/vendor_cmds.c
drivers/ptp/ptp_sysfs.c
include/linux/bpf.h
include/linux/bpf_verifier.h
include/linux/btf.h
include/linux/btf_ids.h
include/linux/filter.h
include/linux/ipv6.h
include/linux/linkmode.h
include/linux/mii.h
include/linux/netlink.h
include/linux/pcs/pcs-xpcs.h
include/linux/skbuff.h
include/linux/udp.h
include/net/ax25.h
include/net/bonding.h
include/net/inet_timewait_sock.h
include/net/netfilter/nf_conntrack_bpf.h [new file with mode: 0644]
include/net/netns/ipv4.h
include/net/netns/ipv6.h
include/net/pkt_sched.h
include/net/sch_generic.h
include/net/udplite.h
include/net/xdp.h
include/uapi/linux/bpf.h
kernel/bpf/arraymap.c
kernel/bpf/btf.c
kernel/bpf/cgroup.c
kernel/bpf/core.c
kernel/bpf/cpumap.c
kernel/bpf/devmap.c
kernel/bpf/syscall.c
kernel/bpf/verifier.c
kernel/trace/bpf_trace.c
net/ax25/ax25_route.c
net/bpf/test_run.c
net/core/filter.c
net/core/net_namespace.c
net/core/sock.c
net/core/sock_map.c
net/core/xdp.c
net/dccp/dccp.h
net/dccp/ipv4.c
net/dccp/ipv6.c
net/dccp/minisocks.c
net/dsa/switch.c
net/hsr/hsr_main.h
net/ipv4/bpf_tcp_ca.c
net/ipv4/fib_semantics.c
net/ipv4/icmp.c
net/ipv4/inet_timewait_sock.c
net/ipv4/proc.c
net/ipv4/route.c
net/ipv4/sysctl_net_ipv4.c
net/ipv4/tcp.c
net/ipv4/tcp_bbr.c
net/ipv4/tcp_cubic.c
net/ipv4/tcp_dctcp.c
net/ipv4/tcp_ipv4.c
net/ipv4/tcp_minisocks.c
net/ipv4/tcp_output.c
net/ipv6/icmp.c
net/ipv6/ip6_offload.c
net/ipv6/ip6_tunnel.c
net/ipv6/tcp_ipv6.c
net/netfilter/Makefile
net/netfilter/nf_conntrack_bpf.c [new file with mode: 0644]
net/netfilter/nf_conntrack_core.c
net/tipc/msg.h
net/unix/af_unix.c
samples/bpf/xdp1_user.c
samples/bpf/xdp_adjust_tail_user.c
samples/bpf/xdp_fwd_user.c
samples/bpf/xdp_router_ipv4_user.c
samples/bpf/xdp_rxq_info_user.c
samples/bpf/xdp_sample_pkts_user.c
samples/bpf/xdp_sample_user.c
samples/bpf/xdp_tx_iptunnel_user.c
samples/bpf/xdpsock_ctrl_proc.c
samples/bpf/xdpsock_user.c
samples/bpf/xsk_fwd.c
scripts/bpf_doc.py
security/device_cgroup.c
tools/bpf/bpftool/btf.c
tools/bpf/bpftool/cgroup.c
tools/bpf/bpftool/common.c
tools/bpf/bpftool/gen.c
tools/bpf/bpftool/link.c
tools/bpf/bpftool/main.c
tools/bpf/bpftool/main.h
tools/bpf/bpftool/map.c
tools/bpf/bpftool/net.c
tools/bpf/bpftool/pids.c
tools/bpf/bpftool/prog.c
tools/bpf/bpftool/struct_ops.c
tools/bpf/resolve_btfids/Makefile
tools/include/uapi/linux/bpf.h
tools/lib/bpf/bpf.c
tools/lib/bpf/bpf.h
tools/lib/bpf/bpf_helpers.h
tools/lib/bpf/btf.c
tools/lib/bpf/btf.h
tools/lib/bpf/hashmap.c
tools/lib/bpf/libbpf.c
tools/lib/bpf/libbpf.h
tools/lib/bpf/libbpf.map
tools/lib/bpf/libbpf_legacy.h
tools/lib/bpf/netlink.c
tools/perf/util/bpf-loader.c
tools/perf/util/bpf_map.c
tools/testing/selftests/bpf/Makefile
tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
tools/testing/selftests/bpf/config
tools/testing/selftests/bpf/prog_tests/bind_perm.c
tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt_unix.c [new file with mode: 0644]
tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c [new file with mode: 0644]
tools/testing/selftests/bpf/prog_tests/bpf_nf.c [new file with mode: 0644]
tools/testing/selftests/bpf/prog_tests/btf.c
tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c
tools/testing/selftests/bpf/prog_tests/cgroup_getset_retval.c [new file with mode: 0644]
tools/testing/selftests/bpf/prog_tests/flow_dissector.c
tools/testing/selftests/bpf/prog_tests/global_data.c
tools/testing/selftests/bpf/prog_tests/global_data_init.c
tools/testing/selftests/bpf/prog_tests/kfunc_call.c
tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
tools/testing/selftests/bpf/prog_tests/sockopt_sk.c
tools/testing/selftests/bpf/prog_tests/tailcalls.c
tools/testing/selftests/bpf/prog_tests/xdp_adjust_frags.c [new file with mode: 0644]
tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
tools/testing/selftests/bpf/progs/bpf_iter_setsockopt_unix.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/bpf_iter_unix.c
tools/testing/selftests/bpf/progs/bpf_mod_race.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/bpf_tracing_net.h
tools/testing/selftests/bpf/progs/cgroup_getset_retval_getsockopt.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/cgroup_getset_retval_setsockopt.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/freplace_cls_redirect.c
tools/testing/selftests/bpf/progs/kfunc_call_race.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/kfunc_call_test.c
tools/testing/selftests/bpf/progs/ksym_race.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/sample_map_ret0.c
tools/testing/selftests/bpf/progs/sockmap_parse_prog.c
tools/testing/selftests/bpf/progs/sockopt_sk.c
tools/testing/selftests/bpf/progs/test_bpf_nf.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/test_btf_haskv.c
tools/testing/selftests/bpf/progs/test_btf_newkv.c
tools/testing/selftests/bpf/progs/test_btf_nokv.c
tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c
tools/testing/selftests/bpf/progs/test_sockmap_progs_query.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/test_tc_edt.c
tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c
tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c
tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c
tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c
tools/testing/selftests/bpf/progs/test_xdp_update_frags.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_frags_helpers.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c
tools/testing/selftests/bpf/progs/test_xdp_with_devmap_frags_helpers.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c
tools/testing/selftests/bpf/test_verifier.c
tools/testing/selftests/bpf/verifier/calls.c
tools/testing/selftests/bpf/xdpxceiver.c

index 1ebf4c5..ab08852 100644 (file)
@@ -565,18 +565,15 @@ A map can be created with ``btf_fd`` and specified key/value type id.::
 In libbpf, the map can be defined with extra annotation like below:
 ::
 
-    struct bpf_map_def SEC("maps") btf_map = {
-        .type = BPF_MAP_TYPE_ARRAY,
-        .key_size = sizeof(int),
-        .value_size = sizeof(struct ipv_counts),
-        .max_entries = 4,
-    };
-    BPF_ANNOTATE_KV_PAIR(btf_map, int, struct ipv_counts);
+    struct {
+        __uint(type, BPF_MAP_TYPE_ARRAY);
+        __type(key, int);
+        __type(value, struct ipv_counts);
+        __uint(max_entries, 4);
+    } btf_map SEC(".maps");
 
-Here, the parameters for macro BPF_ANNOTATE_KV_PAIR are map name, key and
-value types for the map. During ELF parsing, libbpf is able to extract
-key/value type_id's and assign them to BPF_MAP_CREATE attributes
-automatically.
+During ELF parsing, libbpf is able to extract key/value type_id's and assign
+them to BPF_MAP_CREATE attributes automatically.
 
 .. _BPF_Prog_Load:
 
@@ -824,13 +821,12 @@ structure has bitfields. For example, for the following map,::
            ___A b1:4;
            enum A b2:4;
       };
-      struct bpf_map_def SEC("maps") tmpmap = {
-           .type = BPF_MAP_TYPE_ARRAY,
-           .key_size = sizeof(__u32),
-           .value_size = sizeof(struct tmp_t),
-           .max_entries = 1,
-      };
-      BPF_ANNOTATE_KV_PAIR(tmpmap, int, struct tmp_t);
+      struct {
+           __uint(type, BPF_MAP_TYPE_ARRAY);
+           __type(key, int);
+           __type(value, struct tmp_t);
+           __uint(max_entries, 1);
+      } tmpmap SEC(".maps");
 
 bpftool is able to pretty print like below:
 ::
index 020337f..801efc7 100644 (file)
@@ -388,14 +388,24 @@ PROPERTIES
                Value type: <prop-encoded-array>
                Definition: A standard property.
 
-- bus-frequency
+- clocks
+               Usage: optional
+               Value type: <phandle>
+               Definition: A reference to the input clock of the controller
+               from which the MDC frequency is derived.
+
+- clock-frequency
                Usage: optional
                Value type: <u32>
-               Definition: Specifies the external MDIO bus clock speed to
-               be used, if different from the standard 2.5 MHz.
-               This may be due to the standard speed being unsupported (e.g.
-               due to a hardware problem), or to advertise that all relevant
-               components in the system support a faster speed.
+               Definition: Specifies the external MDC frequency, in Hertz, to
+               be used. Requires that the input clock is specified in the
+               "clocks" property. See also: mdio.yaml.
+
+- suppress-preamble
+               Usage: optional
+               Value type: <boolean>
+               Definition: Disable generation of preamble bits. See also:
+               mdio.yaml.
 
 - interrupts
                Usage: required for external MDIO
index 3867f3d..a3b9899 100644 (file)
@@ -2186,7 +2186,7 @@ int b53_eee_init(struct dsa_switch *ds, int port, struct phy_device *phy)
 {
        int ret;
 
-       ret = phy_init_eee(phy, 0);
+       ret = phy_init_eee(phy, false);
        if (ret)
                return 0;
 
index b82512e..bc77a26 100644 (file)
@@ -2846,7 +2846,7 @@ static void mt753x_phylink_mac_link_up(struct dsa_switch *ds, int port,
                        mcr |= PMCR_RX_FC_EN;
        }
 
-       if (mode == MLO_AN_PHY && phydev && phy_init_eee(phydev, 0) >= 0) {
+       if (mode == MLO_AN_PHY && phydev && phy_init_eee(phydev, false) >= 0) {
                switch (speed) {
                case SPEED_1000:
                        mcr |= PMCR_FORCE_EEE1G;
index 4f94136..c313221 100644 (file)
@@ -233,6 +233,7 @@ static const u16 bnxt_async_events_arr[] = {
        ASYNC_EVENT_CMPL_EVENT_ID_ECHO_REQUEST,
        ASYNC_EVENT_CMPL_EVENT_ID_PPS_TIMESTAMP,
        ASYNC_EVENT_CMPL_EVENT_ID_ERROR_REPORT,
+       ASYNC_EVENT_CMPL_EVENT_ID_PHC_UPDATE,
 };
 
 static struct workqueue_struct *bnxt_pf_wq;
@@ -2079,6 +2080,16 @@ static void bnxt_event_error_report(struct bnxt *bp, u32 data1, u32 data2)
        (BNXT_EVENT_RING_TYPE(data2) == \
         ASYNC_EVENT_CMPL_RING_MONITOR_MSG_EVENT_DATA2_DISABLE_RING_TYPE_RX)
 
+#define BNXT_EVENT_PHC_EVENT_TYPE(data1)       \
+       (((data1) & ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_MASK) >>\
+        ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_SFT)
+
+#define BNXT_EVENT_PHC_RTC_UPDATE(data1)       \
+       (((data1) & ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_PHC_TIME_MSB_MASK) >>\
+        ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_PHC_TIME_MSB_SFT)
+
+#define BNXT_PHC_BITS  48
+
 static int bnxt_async_event_process(struct bnxt *bp,
                                    struct hwrm_async_event_cmpl *cmpl)
 {
@@ -2258,6 +2269,24 @@ static int bnxt_async_event_process(struct bnxt *bp,
                bnxt_event_error_report(bp, data1, data2);
                goto async_event_process_exit;
        }
+       case ASYNC_EVENT_CMPL_EVENT_ID_PHC_UPDATE: {
+               switch (BNXT_EVENT_PHC_EVENT_TYPE(data1)) {
+               case ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_PHC_RTC_UPDATE:
+                       if (bp->fw_cap & BNXT_FW_CAP_PTP_RTC) {
+                               struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
+                               u64 ns;
+
+                               spin_lock_bh(&ptp->ptp_lock);
+                               bnxt_ptp_update_current_time(bp);
+                               ns = (((u64)BNXT_EVENT_PHC_RTC_UPDATE(data1) <<
+                                      BNXT_PHC_BITS) | ptp->current_time);
+                               bnxt_ptp_rtc_timecounter_init(ptp, ns);
+                               spin_unlock_bh(&ptp->ptp_lock);
+                       }
+                       break;
+               }
+               goto async_event_process_exit;
+       }
        case ASYNC_EVENT_CMPL_EVENT_ID_DEFERRED_RESPONSE: {
                u16 seq_id = le32_to_cpu(cmpl->event_data2) & 0xffff;
 
@@ -7414,6 +7443,7 @@ static int __bnxt_hwrm_ptp_qcfg(struct bnxt *bp)
        struct hwrm_port_mac_ptp_qcfg_output *resp;
        struct hwrm_port_mac_ptp_qcfg_input *req;
        struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
+       bool phc_cfg;
        u8 flags;
        int rc;
 
@@ -7456,7 +7486,8 @@ static int __bnxt_hwrm_ptp_qcfg(struct bnxt *bp)
                rc = -ENODEV;
                goto exit;
        }
-       rc = bnxt_ptp_init(bp);
+       phc_cfg = (flags & PORT_MAC_PTP_QCFG_RESP_FLAGS_RTC_CONFIGURED) != 0;
+       rc = bnxt_ptp_init(bp, phc_cfg);
        if (rc)
                netdev_warn(bp->dev, "PTP initialization failed.\n");
 exit:
@@ -7514,6 +7545,8 @@ static int __bnxt_hwrm_func_qcaps(struct bnxt *bp)
                bp->fw_cap |= BNXT_FW_CAP_EXT_HW_STATS_SUPPORTED;
        if (BNXT_PF(bp) && (flags_ext & FUNC_QCAPS_RESP_FLAGS_EXT_PTP_PPS_SUPPORTED))
                bp->fw_cap |= BNXT_FW_CAP_PTP_PPS;
+       if (flags_ext & FUNC_QCAPS_RESP_FLAGS_EXT_PTP_64BIT_RTC_SUPPORTED)
+               bp->fw_cap |= BNXT_FW_CAP_PTP_RTC;
        if (BNXT_PF(bp) && (flags_ext & FUNC_QCAPS_RESP_FLAGS_EXT_HOT_RESET_IF_SUPPORT))
                bp->fw_cap |= BNXT_FW_CAP_HOT_RESET_IF;
        if (BNXT_PF(bp) && (flags_ext & FUNC_QCAPS_RESP_FLAGS_EXT_FW_LIVEPATCH_SUPPORTED))
@@ -10288,6 +10321,7 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init)
        /* VF-reps may need to be re-opened after the PF is re-opened */
        if (BNXT_PF(bp))
                bnxt_vf_reps_open(bp);
+       bnxt_ptp_init_rtc(bp, true);
        return 0;
 
 open_err_irq:
index 440dfeb..4b023e3 100644 (file)
@@ -1957,6 +1957,7 @@ struct bnxt {
        #define BNXT_FW_CAP_EXT_STATS_SUPPORTED         0x00040000
        #define BNXT_FW_CAP_ERR_RECOVER_RELOAD          0x00100000
        #define BNXT_FW_CAP_HOT_RESET                   0x00200000
+       #define BNXT_FW_CAP_PTP_RTC                     0x00400000
        #define BNXT_FW_CAP_VLAN_RX_STRIP               0x01000000
        #define BNXT_FW_CAP_VLAN_TX_INSERT              0x02000000
        #define BNXT_FW_CAP_EXT_HW_STATS_SUPPORTED      0x04000000
index ea86c54..b7100ed 100644 (file)
@@ -369,6 +369,12 @@ struct cmd_nums {
        #define HWRM_FUNC_PTP_EXT_CFG                     0x1a0UL
        #define HWRM_FUNC_PTP_EXT_QCFG                    0x1a1UL
        #define HWRM_FUNC_KEY_CTX_ALLOC                   0x1a2UL
+       #define HWRM_FUNC_BACKING_STORE_CFG_V2            0x1a3UL
+       #define HWRM_FUNC_BACKING_STORE_QCFG_V2           0x1a4UL
+       #define HWRM_FUNC_DBR_PACING_CFG                  0x1a5UL
+       #define HWRM_FUNC_DBR_PACING_QCFG                 0x1a6UL
+       #define HWRM_FUNC_DBR_PACING_BROADCAST_EVENT      0x1a7UL
+       #define HWRM_FUNC_BACKING_STORE_QCAPS_V2          0x1a8UL
        #define HWRM_SELFTEST_QLIST                       0x200UL
        #define HWRM_SELFTEST_EXEC                        0x201UL
        #define HWRM_SELFTEST_IRQ                         0x202UL
@@ -390,6 +396,9 @@ struct cmd_nums {
        #define HWRM_MFG_PRVSN_IMPORT_CERT                0x212UL
        #define HWRM_MFG_PRVSN_GET_STATE                  0x213UL
        #define HWRM_MFG_GET_NVM_MEASUREMENT              0x214UL
+       #define HWRM_MFG_PSOC_QSTATUS                     0x215UL
+       #define HWRM_MFG_SELFTEST_QLIST                   0x216UL
+       #define HWRM_MFG_SELFTEST_EXEC                    0x217UL
        #define HWRM_TF                                   0x2bcUL
        #define HWRM_TF_VERSION_GET                       0x2bdUL
        #define HWRM_TF_SESSION_OPEN                      0x2c6UL
@@ -532,8 +541,8 @@ struct hwrm_err_output {
 #define HWRM_VERSION_MAJOR 1
 #define HWRM_VERSION_MINOR 10
 #define HWRM_VERSION_UPDATE 2
-#define HWRM_VERSION_RSVD 63
-#define HWRM_VERSION_STR "1.10.2.63"
+#define HWRM_VERSION_RSVD 73
+#define HWRM_VERSION_STR "1.10.2.73"
 
 /* hwrm_ver_get_input (size:192b/24B) */
 struct hwrm_ver_get_input {
@@ -757,10 +766,11 @@ struct hwrm_async_event_cmpl {
        #define ASYNC_EVENT_CMPL_EVENT_ID_DEFERRED_RESPONSE          0x40UL
        #define ASYNC_EVENT_CMPL_EVENT_ID_PFC_WATCHDOG_CFG_CHANGE    0x41UL
        #define ASYNC_EVENT_CMPL_EVENT_ID_ECHO_REQUEST               0x42UL
-       #define ASYNC_EVENT_CMPL_EVENT_ID_PHC_MASTER                 0x43UL
+       #define ASYNC_EVENT_CMPL_EVENT_ID_PHC_UPDATE                 0x43UL
        #define ASYNC_EVENT_CMPL_EVENT_ID_PPS_TIMESTAMP              0x44UL
        #define ASYNC_EVENT_CMPL_EVENT_ID_ERROR_REPORT               0x45UL
-       #define ASYNC_EVENT_CMPL_EVENT_ID_MAX_RGTR_EVENT_ID          0x46UL
+       #define ASYNC_EVENT_CMPL_EVENT_ID_DOORBELL_PACING_THRESHOLD  0x46UL
+       #define ASYNC_EVENT_CMPL_EVENT_ID_MAX_RGTR_EVENT_ID          0x47UL
        #define ASYNC_EVENT_CMPL_EVENT_ID_FW_TRACE_MSG               0xfeUL
        #define ASYNC_EVENT_CMPL_EVENT_ID_HWRM_ERROR                 0xffUL
        #define ASYNC_EVENT_CMPL_EVENT_ID_LAST                      ASYNC_EVENT_CMPL_EVENT_ID_HWRM_ERROR
@@ -1112,34 +1122,37 @@ struct hwrm_async_event_cmpl_echo_request {
        __le32  event_data1;
 };
 
-/* hwrm_async_event_cmpl_phc_master (size:128b/16B) */
-struct hwrm_async_event_cmpl_phc_master {
+/* hwrm_async_event_cmpl_phc_update (size:128b/16B) */
+struct hwrm_async_event_cmpl_phc_update {
        __le16  type;
-       #define ASYNC_EVENT_CMPL_PHC_MASTER_TYPE_MASK            0x3fUL
-       #define ASYNC_EVENT_CMPL_PHC_MASTER_TYPE_SFT             0
-       #define ASYNC_EVENT_CMPL_PHC_MASTER_TYPE_HWRM_ASYNC_EVENT  0x2eUL
-       #define ASYNC_EVENT_CMPL_PHC_MASTER_TYPE_LAST             ASYNC_EVENT_CMPL_PHC_MASTER_TYPE_HWRM_ASYNC_EVENT
+       #define ASYNC_EVENT_CMPL_PHC_UPDATE_TYPE_MASK            0x3fUL
+       #define ASYNC_EVENT_CMPL_PHC_UPDATE_TYPE_SFT             0
+       #define ASYNC_EVENT_CMPL_PHC_UPDATE_TYPE_HWRM_ASYNC_EVENT  0x2eUL
+       #define ASYNC_EVENT_CMPL_PHC_UPDATE_TYPE_LAST             ASYNC_EVENT_CMPL_PHC_UPDATE_TYPE_HWRM_ASYNC_EVENT
        __le16  event_id;
-       #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_ID_PHC_MASTER 0x43UL
-       #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_ID_LAST      ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_ID_PHC_MASTER
+       #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_ID_PHC_UPDATE 0x43UL
+       #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_ID_LAST      ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_ID_PHC_UPDATE
        __le32  event_data2;
-       #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA2_PHC_MASTER_FID_MASK 0xffffUL
-       #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA2_PHC_MASTER_FID_SFT 0
-       #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA2_PHC_SEC_FID_MASK   0xffff0000UL
-       #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA2_PHC_SEC_FID_SFT    16
+       #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA2_PHC_MASTER_FID_MASK 0xffffUL
+       #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA2_PHC_MASTER_FID_SFT 0
+       #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA2_PHC_SEC_FID_MASK   0xffff0000UL
+       #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA2_PHC_SEC_FID_SFT    16
        u8      opaque_v;
-       #define ASYNC_EVENT_CMPL_PHC_MASTER_V          0x1UL
-       #define ASYNC_EVENT_CMPL_PHC_MASTER_OPAQUE_MASK 0xfeUL
-       #define ASYNC_EVENT_CMPL_PHC_MASTER_OPAQUE_SFT 1
+       #define ASYNC_EVENT_CMPL_PHC_UPDATE_V          0x1UL
+       #define ASYNC_EVENT_CMPL_PHC_UPDATE_OPAQUE_MASK 0xfeUL
+       #define ASYNC_EVENT_CMPL_PHC_UPDATE_OPAQUE_SFT 1
        u8      timestamp_lo;
        __le16  timestamp_hi;
        __le32  event_data1;
-       #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA1_FLAGS_MASK         0xfUL
-       #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA1_FLAGS_SFT          0
-       #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA1_FLAGS_PHC_MASTER     0x1UL
-       #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA1_FLAGS_PHC_SECONDARY  0x2UL
-       #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA1_FLAGS_PHC_FAILOVER   0x3UL
-       #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA1_FLAGS_LAST          ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA1_FLAGS_PHC_FAILOVER
+       #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_MASK          0xfUL
+       #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_SFT           0
+       #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_PHC_MASTER      0x1UL
+       #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_PHC_SECONDARY   0x2UL
+       #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_PHC_FAILOVER    0x3UL
+       #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_PHC_RTC_UPDATE  0x4UL
+       #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_LAST           ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_PHC_RTC_UPDATE
+       #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_PHC_TIME_MSB_MASK   0xffff0UL
+       #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_PHC_TIME_MSB_SFT    4
 };
 
 /* hwrm_async_event_cmpl_pps_timestamp (size:128b/16B) */
@@ -1330,6 +1343,30 @@ struct hwrm_async_event_cmpl_error_report_nvm {
        #define ASYNC_EVENT_CMPL_ERROR_REPORT_NVM_EVENT_DATA1_NVM_ERR_TYPE_LAST    ASYNC_EVENT_CMPL_ERROR_REPORT_NVM_EVENT_DATA1_NVM_ERR_TYPE_ERASE
 };
 
+/* hwrm_async_event_cmpl_error_report_doorbell_drop_threshold (size:128b/16B) */
+struct hwrm_async_event_cmpl_error_report_doorbell_drop_threshold {
+       __le16  type;
+       #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_TYPE_MASK            0x3fUL
+       #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_TYPE_SFT             0
+       #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_TYPE_HWRM_ASYNC_EVENT  0x2eUL
+       #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_TYPE_LAST             ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_TYPE_HWRM_ASYNC_EVENT
+       __le16  event_id;
+       #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_EVENT_ID_ERROR_REPORT 0x45UL
+       #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_EVENT_ID_LAST        ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_EVENT_ID_ERROR_REPORT
+       __le32  event_data2;
+       u8      opaque_v;
+       #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_V          0x1UL
+       #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_OPAQUE_MASK 0xfeUL
+       #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_OPAQUE_SFT 1
+       u8      timestamp_lo;
+       __le16  timestamp_hi;
+       __le32  event_data1;
+       #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_EVENT_DATA1_ERROR_TYPE_MASK                   0xffUL
+       #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_EVENT_DATA1_ERROR_TYPE_SFT                    0
+       #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_EVENT_DATA1_ERROR_TYPE_DOORBELL_DROP_THRESHOLD  0x4UL
+       #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_EVENT_DATA1_ERROR_TYPE_LAST                    ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_EVENT_DATA1_ERROR_TYPE_DOORBELL_DROP_THRESHOLD
+};
+
 /* hwrm_func_reset_input (size:192b/24B) */
 struct hwrm_func_reset_input {
        __le16  req_type;
@@ -1589,6 +1626,10 @@ struct hwrm_func_qcaps_output {
        #define FUNC_QCAPS_RESP_FLAGS_EXT_EP_RATE_CONTROL                        0x800000UL
        #define FUNC_QCAPS_RESP_FLAGS_EXT_MIN_BW_SUPPORTED                       0x1000000UL
        #define FUNC_QCAPS_RESP_FLAGS_EXT_TX_COAL_CMPL_CAP                       0x2000000UL
+       #define FUNC_QCAPS_RESP_FLAGS_EXT_BS_V2_SUPPORTED                        0x4000000UL
+       #define FUNC_QCAPS_RESP_FLAGS_EXT_BS_V2_REQUIRED                         0x8000000UL
+       #define FUNC_QCAPS_RESP_FLAGS_EXT_PTP_64BIT_RTC_SUPPORTED                0x10000000UL
+       #define FUNC_QCAPS_RESP_FLAGS_EXT_DBR_PACING_SUPPORTED                   0x20000000UL
        u8      max_schqs;
        u8      mpc_chnls_cap;
        #define FUNC_QCAPS_RESP_MPC_CHNLS_CAP_TCE         0x1UL
@@ -2455,7 +2496,7 @@ struct hwrm_func_backing_store_qcaps_output {
        __le16  rkc_entry_size;
        __le32  tkc_max_entries;
        __le32  rkc_max_entries;
-       u8      rsvd[7];
+       u8      rsvd1[7];
        u8      valid;
 };
 
@@ -3164,7 +3205,7 @@ struct hwrm_func_ptp_pin_cfg_output {
        u8      valid;
 };
 
-/* hwrm_func_ptp_cfg_input (size:320b/40B) */
+/* hwrm_func_ptp_cfg_input (size:384b/48B) */
 struct hwrm_func_ptp_cfg_input {
        __le16  req_type;
        __le16  cmpl_ring;
@@ -3178,6 +3219,7 @@ struct hwrm_func_ptp_cfg_input {
        #define FUNC_PTP_CFG_REQ_ENABLES_PTP_FREQ_ADJ_EXT_PERIOD     0x8UL
        #define FUNC_PTP_CFG_REQ_ENABLES_PTP_FREQ_ADJ_EXT_UP         0x10UL
        #define FUNC_PTP_CFG_REQ_ENABLES_PTP_FREQ_ADJ_EXT_PHASE      0x20UL
+       #define FUNC_PTP_CFG_REQ_ENABLES_PTP_SET_TIME                0x40UL
        u8      ptp_pps_event;
        #define FUNC_PTP_CFG_REQ_PTP_PPS_EVENT_INTERNAL     0x1UL
        #define FUNC_PTP_CFG_REQ_PTP_PPS_EVENT_EXTERNAL     0x2UL
@@ -3204,6 +3246,7 @@ struct hwrm_func_ptp_cfg_input {
        __le32  ptp_freq_adj_ext_up;
        __le32  ptp_freq_adj_ext_phase_lower;
        __le32  ptp_freq_adj_ext_phase_upper;
+       __le64  ptp_set_time;
 };
 
 /* hwrm_func_ptp_cfg_output (size:128b/16B) */
@@ -3243,6 +3286,308 @@ struct hwrm_func_ptp_ts_query_output {
        u8      valid;
 };
 
+/* hwrm_func_ptp_ext_cfg_input (size:256b/32B) */
+struct hwrm_func_ptp_ext_cfg_input {
+       __le16  req_type;
+       __le16  cmpl_ring;
+       __le16  seq_id;
+       __le16  target_id;
+       __le64  resp_addr;
+       __le16  enables;
+       #define FUNC_PTP_EXT_CFG_REQ_ENABLES_PHC_MASTER_FID     0x1UL
+       #define FUNC_PTP_EXT_CFG_REQ_ENABLES_PHC_SEC_FID        0x2UL
+       #define FUNC_PTP_EXT_CFG_REQ_ENABLES_PHC_SEC_MODE       0x4UL
+       #define FUNC_PTP_EXT_CFG_REQ_ENABLES_FAILOVER_TIMER     0x8UL
+       __le16  phc_master_fid;
+       __le16  phc_sec_fid;
+       u8      phc_sec_mode;
+       #define FUNC_PTP_EXT_CFG_REQ_PHC_SEC_MODE_SWITCH  0x0UL
+       #define FUNC_PTP_EXT_CFG_REQ_PHC_SEC_MODE_ALL     0x1UL
+       #define FUNC_PTP_EXT_CFG_REQ_PHC_SEC_MODE_PF_ONLY 0x2UL
+       #define FUNC_PTP_EXT_CFG_REQ_PHC_SEC_MODE_LAST   FUNC_PTP_EXT_CFG_REQ_PHC_SEC_MODE_PF_ONLY
+       u8      unused_0;
+       __le32  failover_timer;
+       u8      unused_1[4];
+};
+
+/* hwrm_func_ptp_ext_cfg_output (size:128b/16B) */
+struct hwrm_func_ptp_ext_cfg_output {
+       __le16  error_code;
+       __le16  req_type;
+       __le16  seq_id;
+       __le16  resp_len;
+       u8      unused_0[7];
+       u8      valid;
+};
+
+/* hwrm_func_ptp_ext_qcfg_input (size:192b/24B) */
+struct hwrm_func_ptp_ext_qcfg_input {
+       __le16  req_type;
+       __le16  cmpl_ring;
+       __le16  seq_id;
+       __le16  target_id;
+       __le64  resp_addr;
+       u8      unused_0[8];
+};
+
+/* hwrm_func_ptp_ext_qcfg_output (size:256b/32B) */
+struct hwrm_func_ptp_ext_qcfg_output {
+       __le16  error_code;
+       __le16  req_type;
+       __le16  seq_id;
+       __le16  resp_len;
+       __le16  phc_master_fid;
+       __le16  phc_sec_fid;
+       __le16  phc_active_fid0;
+       __le16  phc_active_fid1;
+       __le32  last_failover_event;
+       __le16  from_fid;
+       __le16  to_fid;
+       u8      unused_0[7];
+       u8      valid;
+};
+
+/* hwrm_func_backing_store_cfg_v2_input (size:448b/56B) */
+struct hwrm_func_backing_store_cfg_v2_input {
+       __le16  req_type;
+       __le16  cmpl_ring;
+       __le16  seq_id;
+       __le16  target_id;
+       __le64  resp_addr;
+       __le16  type;
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_QP          0x0UL
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_SRQ         0x1UL
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_CQ          0x2UL
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_VNIC        0x3UL
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_STAT        0x4UL
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_SP_TQM_RING 0x5UL
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_FP_TQM_RING 0x6UL
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_MRAV        0xeUL
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_TIM         0xfUL
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_TKC         0x13UL
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_RKC         0x14UL
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_MP_TQM_RING 0x15UL
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_INVALID     0xffffUL
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_LAST       FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_INVALID
+       __le16  instance;
+       __le32  flags;
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_FLAGS_PREBOOT_MODE     0x1UL
+       __le64  page_dir;
+       __le32  num_entries;
+       __le16  entry_size;
+       u8      page_size_pbl_level;
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_PBL_LEVEL_MASK  0xfUL
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_PBL_LEVEL_SFT   0
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_PBL_LEVEL_LVL_0   0x0UL
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_PBL_LEVEL_LVL_1   0x1UL
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_PBL_LEVEL_LVL_2   0x2UL
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_PBL_LEVEL_LAST   FUNC_BACKING_STORE_CFG_V2_REQ_PBL_LEVEL_LVL_2
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_MASK  0xf0UL
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_SFT   4
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_PG_4K   (0x0UL << 4)
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_PG_8K   (0x1UL << 4)
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_PG_64K  (0x2UL << 4)
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_PG_2M   (0x3UL << 4)
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_PG_8M   (0x4UL << 4)
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_PG_1G   (0x5UL << 4)
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_LAST   FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_PG_1G
+       u8      subtype_valid_cnt;
+       __le32  split_entry_0;
+       __le32  split_entry_1;
+       __le32  split_entry_2;
+       __le32  split_entry_3;
+};
+
+/* hwrm_func_backing_store_cfg_v2_output (size:128b/16B) */
+struct hwrm_func_backing_store_cfg_v2_output {
+       __le16  error_code;
+       __le16  req_type;
+       __le16  seq_id;
+       __le16  resp_len;
+       u8      rsvd0[7];
+       u8      valid;
+};
+
+/* hwrm_func_backing_store_qcfg_v2_input (size:192b/24B) */
+struct hwrm_func_backing_store_qcfg_v2_input {
+       __le16  req_type;
+       __le16  cmpl_ring;
+       __le16  seq_id;
+       __le16  target_id;
+       __le64  resp_addr;
+       __le16  type;
+       #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_QP          0x0UL
+       #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_SRQ         0x1UL
+       #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_CQ          0x2UL
+       #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_VNIC        0x3UL
+       #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_STAT        0x4UL
+       #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_SP_TQM_RING 0x5UL
+       #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_FP_TQM_RING 0x6UL
+       #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_MRAV        0xeUL
+       #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_TIM         0xfUL
+       #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_TKC         0x13UL
+       #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_RKC         0x14UL
+       #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_MP_TQM_RING 0x15UL
+       #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_INVALID     0xffffUL
+       #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_LAST       FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_INVALID
+       __le16  instance;
+       u8      rsvd[4];
+};
+
+/* hwrm_func_backing_store_qcfg_v2_output (size:448b/56B) */
+struct hwrm_func_backing_store_qcfg_v2_output {
+       __le16  error_code;
+       __le16  req_type;
+       __le16  seq_id;
+       __le16  resp_len;
+       __le16  type;
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_QP          0x0UL
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_SRQ         0x1UL
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_CQ          0x2UL
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_VNIC        0x3UL
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_STAT        0x4UL
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_SP_TQM_RING 0x5UL
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_FP_TQM_RING 0x6UL
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_MRAV        0xeUL
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_TIM         0xfUL
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_TKC         0x13UL
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_RKC         0x14UL
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_MP_TQM_RING 0x15UL
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_INVALID     0xffffUL
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_LAST       FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_INVALID
+       __le16  instance;
+       __le32  flags;
+       __le64  page_dir;
+       __le32  num_entries;
+       u8      page_size_pbl_level;
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_PBL_LEVEL_MASK  0xfUL
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_PBL_LEVEL_SFT   0
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_PBL_LEVEL_LVL_0   0x0UL
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_PBL_LEVEL_LVL_1   0x1UL
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_PBL_LEVEL_LVL_2   0x2UL
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_PBL_LEVEL_LAST   FUNC_BACKING_STORE_QCFG_V2_RESP_PBL_LEVEL_LVL_2
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_MASK  0xf0UL
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_SFT   4
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_PG_4K   (0x0UL << 4)
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_PG_8K   (0x1UL << 4)
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_PG_64K  (0x2UL << 4)
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_PG_2M   (0x3UL << 4)
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_PG_8M   (0x4UL << 4)
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_PG_1G   (0x5UL << 4)
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_LAST   FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_PG_1G
+       u8      subtype_valid_cnt;
+       u8      rsvd[2];
+       __le32  split_entry_0;
+       __le32  split_entry_1;
+       __le32  split_entry_2;
+       __le32  split_entry_3;
+       u8      rsvd2[7];
+       u8      valid;
+};
+
+/* qpc_split_entries (size:128b/16B) */
+struct qpc_split_entries {
+       __le32  qp_num_l2_entries;
+       __le32  qp_num_qp1_entries;
+       __le32  rsvd[2];
+};
+
+/* srq_split_entries (size:128b/16B) */
+struct srq_split_entries {
+       __le32  srq_num_l2_entries;
+       __le32  rsvd;
+       __le32  rsvd2[2];
+};
+
+/* cq_split_entries (size:128b/16B) */
+struct cq_split_entries {
+       __le32  cq_num_l2_entries;
+       __le32  rsvd;
+       __le32  rsvd2[2];
+};
+
+/* vnic_split_entries (size:128b/16B) */
+struct vnic_split_entries {
+       __le32  vnic_num_vnic_entries;
+       __le32  rsvd;
+       __le32  rsvd2[2];
+};
+
+/* mrav_split_entries (size:128b/16B) */
+struct mrav_split_entries {
+       __le32  mrav_num_av_entries;
+       __le32  rsvd;
+       __le32  rsvd2[2];
+};
+
+/* hwrm_func_backing_store_qcaps_v2_input (size:192b/24B) */
+struct hwrm_func_backing_store_qcaps_v2_input {
+       __le16  req_type;
+       __le16  cmpl_ring;
+       __le16  seq_id;
+       __le16  target_id;
+       __le64  resp_addr;
+       __le16  type;
+       #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_QP          0x0UL
+       #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SRQ         0x1UL
+       #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_CQ          0x2UL
+       #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_VNIC        0x3UL
+       #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_STAT        0x4UL
+       #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SP_TQM_RING 0x5UL
+       #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_FP_TQM_RING 0x6UL
+       #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_MRAV        0xeUL
+       #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_TIM         0xfUL
+       #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_TKC         0x13UL
+       #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_RKC         0x14UL
+       #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_MP_TQM_RING 0x15UL
+       #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_INVALID     0xffffUL
+       #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_LAST       FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_INVALID
+       u8      rsvd[6];
+};
+
+/* hwrm_func_backing_store_qcaps_v2_output (size:448b/56B) */
+struct hwrm_func_backing_store_qcaps_v2_output {
+       __le16  error_code;
+       __le16  req_type;
+       __le16  seq_id;
+       __le16  resp_len;
+       __le16  type;
+       #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_QP          0x0UL
+       #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_SRQ         0x1UL
+       #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_CQ          0x2UL
+       #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_VNIC        0x3UL
+       #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_STAT        0x4UL
+       #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_SP_TQM_RING 0x5UL
+       #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_FP_TQM_RING 0x6UL
+       #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_MRAV        0xeUL
+       #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_TIM         0xfUL
+       #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_TKC         0x13UL
+       #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_RKC         0x14UL
+       #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_MP_TQM_RING 0x15UL
+       #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_INVALID     0xffffUL
+       #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_LAST       FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_INVALID
+       __le16  entry_size;
+       __le32  flags;
+       #define FUNC_BACKING_STORE_QCAPS_V2_RESP_FLAGS_ENABLE_CTX_KIND_INIT     0x1UL
+       #define FUNC_BACKING_STORE_QCAPS_V2_RESP_FLAGS_TYPE_VALID               0x2UL
+       __le32  instance_bit_map;
+       u8      ctx_init_value;
+       u8      ctx_init_offset;
+       u8      entry_multiple;
+       u8      rsvd;
+       __le32  max_num_entries;
+       __le32  min_num_entries;
+       __le16  next_valid_type;
+       u8      subtype_valid_cnt;
+       u8      rsvd2;
+       __le32  split_entry_0;
+       __le32  split_entry_1;
+       __le32  split_entry_2;
+       __le32  split_entry_3;
+       u8      rsvd3[3];
+       u8      valid;
+};
+
 /* hwrm_func_drv_if_change_input (size:192b/24B) */
 struct hwrm_func_drv_if_change_input {
        __le16  req_type;
@@ -3741,7 +4086,7 @@ struct hwrm_port_phy_qcfg_output {
        u8      valid;
 };
 
-/* hwrm_port_mac_cfg_input (size:384b/48B) */
+/* hwrm_port_mac_cfg_input (size:448b/56B) */
 struct hwrm_port_mac_cfg_input {
        __le16  req_type;
        __le16  cmpl_ring;
@@ -3807,7 +4152,8 @@ struct hwrm_port_mac_cfg_input {
        #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_DEFAULT_COS_SFT           5
        u8      unused_0[3];
        __le32  ptp_freq_adj_ppb;
-       __le32  ptp_adj_phase;
+       u8      unused_1[4];
+       __le64  ptp_adj_phase;
 };
 
 /* hwrm_port_mac_cfg_output (size:128b/16B) */
@@ -3850,6 +4196,7 @@ struct hwrm_port_mac_ptp_qcfg_output {
        #define PORT_MAC_PTP_QCFG_RESP_FLAGS_ONE_STEP_TX_TS                      0x4UL
        #define PORT_MAC_PTP_QCFG_RESP_FLAGS_HWRM_ACCESS                         0x8UL
        #define PORT_MAC_PTP_QCFG_RESP_FLAGS_PARTIAL_DIRECT_ACCESS_REF_CLOCK     0x10UL
+       #define PORT_MAC_PTP_QCFG_RESP_FLAGS_RTC_CONFIGURED                      0x20UL
        u8      unused_0[3];
        __le32  rx_ts_reg_off_lower;
        __le32  rx_ts_reg_off_upper;
@@ -4339,7 +4686,8 @@ struct hwrm_port_phy_qcaps_output {
        #define PORT_PHY_QCAPS_RESP_PORT_CNT_2       0x2UL
        #define PORT_PHY_QCAPS_RESP_PORT_CNT_3       0x3UL
        #define PORT_PHY_QCAPS_RESP_PORT_CNT_4       0x4UL
-       #define PORT_PHY_QCAPS_RESP_PORT_CNT_LAST   PORT_PHY_QCAPS_RESP_PORT_CNT_4
+       #define PORT_PHY_QCAPS_RESP_PORT_CNT_12      0xcUL
+       #define PORT_PHY_QCAPS_RESP_PORT_CNT_LAST   PORT_PHY_QCAPS_RESP_PORT_CNT_12
        __le16  supported_speeds_force_mode;
        #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS_FORCE_MODE_100MBHD     0x1UL
        #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS_FORCE_MODE_100MB       0x2UL
@@ -4399,7 +4747,7 @@ struct hwrm_port_phy_qcaps_output {
        __le16  flags2;
        #define PORT_PHY_QCAPS_RESP_FLAGS2_PAUSE_UNSUPPORTED     0x1UL
        #define PORT_PHY_QCAPS_RESP_FLAGS2_PFC_UNSUPPORTED       0x2UL
-       u8      unused_0[1];
+       u8      internal_port_cnt;
        u8      valid;
 };
 
@@ -6221,12 +6569,13 @@ struct hwrm_vnic_rss_cfg_input {
        __le16  target_id;
        __le64  resp_addr;
        __le32  hash_type;
-       #define VNIC_RSS_CFG_REQ_HASH_TYPE_IPV4         0x1UL
-       #define VNIC_RSS_CFG_REQ_HASH_TYPE_TCP_IPV4     0x2UL
-       #define VNIC_RSS_CFG_REQ_HASH_TYPE_UDP_IPV4     0x4UL
-       #define VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6         0x8UL
-       #define VNIC_RSS_CFG_REQ_HASH_TYPE_TCP_IPV6     0x10UL
-       #define VNIC_RSS_CFG_REQ_HASH_TYPE_UDP_IPV6     0x20UL
+       #define VNIC_RSS_CFG_REQ_HASH_TYPE_IPV4                0x1UL
+       #define VNIC_RSS_CFG_REQ_HASH_TYPE_TCP_IPV4            0x2UL
+       #define VNIC_RSS_CFG_REQ_HASH_TYPE_UDP_IPV4            0x4UL
+       #define VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6                0x8UL
+       #define VNIC_RSS_CFG_REQ_HASH_TYPE_TCP_IPV6            0x10UL
+       #define VNIC_RSS_CFG_REQ_HASH_TYPE_UDP_IPV6            0x20UL
+       #define VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6_FLOW_LABEL     0x40UL
        __le16  vnic_id;
        u8      ring_table_pair_index;
        u8      hash_mode_flags;
@@ -7898,6 +8247,7 @@ struct hwrm_cfa_adv_flow_mgnt_qcaps_output {
        u8      valid;
 };
 
+/* hwrm_tunnel_dst_port_query_input (size:192b/24B) */
 struct hwrm_tunnel_dst_port_query_input {
        __le16  req_type;
        __le16  cmpl_ring;
@@ -8909,6 +9259,50 @@ struct hwrm_dbg_qcfg_output {
        u8      valid;
 };
 
+/* hwrm_dbg_crashdump_medium_cfg_input (size:320b/40B) */
+struct hwrm_dbg_crashdump_medium_cfg_input {
+       __le16  req_type;
+       __le16  cmpl_ring;
+       __le16  seq_id;
+       __le16  target_id;
+       __le64  resp_addr;
+       __le16  output_dest_flags;
+       #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_TYPE_DDR     0x1UL
+       __le16  pg_size_lvl;
+       #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_LVL_MASK      0x3UL
+       #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_LVL_SFT       0
+       #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_LVL_LVL_0       0x0UL
+       #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_LVL_LVL_1       0x1UL
+       #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_LVL_LVL_2       0x2UL
+       #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_LVL_LAST       DBG_CRASHDUMP_MEDIUM_CFG_REQ_LVL_LVL_2
+       #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_MASK  0x1cUL
+       #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_SFT   2
+       #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_PG_4K   (0x0UL << 2)
+       #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_PG_8K   (0x1UL << 2)
+       #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_PG_64K  (0x2UL << 2)
+       #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_PG_2M   (0x3UL << 2)
+       #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_PG_8M   (0x4UL << 2)
+       #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_PG_1G   (0x5UL << 2)
+       #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_LAST   DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_PG_1G
+       #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_UNUSED11_MASK 0xffe0UL
+       #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_UNUSED11_SFT  5
+       __le32  size;
+       __le32  coredump_component_disable_flags;
+       #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_NVRAM     0x1UL
+       __le32  unused_0;
+       __le64  pbl;
+};
+
+/* hwrm_dbg_crashdump_medium_cfg_output (size:128b/16B) */
+struct hwrm_dbg_crashdump_medium_cfg_output {
+       __le16  error_code;
+       __le16  req_type;
+       __le16  seq_id;
+       __le16  resp_len;
+       u8      unused_1[7];
+       u8      valid;
+};
+
 /* coredump_segment_record (size:128b/16B) */
 struct coredump_segment_record {
        __le16  component_id;
@@ -9372,8 +9766,35 @@ struct hwrm_nvm_install_update_output {
        __le16  resp_len;
        __le64  installed_items;
        u8      result;
-       #define NVM_INSTALL_UPDATE_RESP_RESULT_SUCCESS 0x0UL
-       #define NVM_INSTALL_UPDATE_RESP_RESULT_LAST   NVM_INSTALL_UPDATE_RESP_RESULT_SUCCESS
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_SUCCESS                      0x0UL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_FAILURE                      0xffUL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_MALLOC_FAILURE               0xfdUL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_INDEX_PARAMETER      0xfbUL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_TYPE_PARAMETER       0xf3UL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_PREREQUISITE         0xf2UL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_FILE_HEADER          0xecUL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_SIGNATURE            0xebUL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_PROP_STREAM          0xeaUL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_PROP_LENGTH          0xe9UL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_MANIFEST             0xe8UL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_TRAILER              0xe7UL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_CHECKSUM             0xe6UL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_ITEM_CHECKSUM        0xe5UL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_DATA_LENGTH          0xe4UL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_DIRECTIVE            0xe1UL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_UNSUPPORTED_CHIP_REV         0xceUL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_UNSUPPORTED_DEVICE_ID        0xcdUL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_UNSUPPORTED_SUBSYS_VENDOR    0xccUL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_UNSUPPORTED_SUBSYS_ID        0xcbUL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_UNSUPPORTED_PLATFORM         0xc5UL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_DUPLICATE_ITEM               0xc4UL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_ZERO_LENGTH_ITEM             0xc3UL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_INSTALL_CHECKSUM_ERROR       0xb9UL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_INSTALL_DATA_ERROR           0xb8UL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_INSTALL_AUTHENTICATION_ERROR 0xb7UL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_ITEM_NOT_FOUND               0xb0UL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_ITEM_LOCKED                  0xa7UL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_LAST                        NVM_INSTALL_UPDATE_RESP_RESULT_ITEM_LOCKED
        u8      problem_item;
        #define NVM_INSTALL_UPDATE_RESP_PROBLEM_ITEM_NONE    0x0UL
        #define NVM_INSTALL_UPDATE_RESP_PROBLEM_ITEM_PACKAGE 0xffUL
index 4852096..a0b321a 100644 (file)
 #include "bnxt_hwrm.h"
 #include "bnxt_ptp.h"
 
+static int bnxt_ptp_cfg_settime(struct bnxt *bp, u64 time)
+{
+       struct hwrm_func_ptp_cfg_input *req;
+       int rc;
+
+       rc = hwrm_req_init(bp, req, HWRM_FUNC_PTP_CFG);
+       if (rc)
+               return rc;
+
+       req->enables = cpu_to_le16(FUNC_PTP_CFG_REQ_ENABLES_PTP_SET_TIME);
+       req->ptp_set_time = cpu_to_le64(time);
+       return hwrm_req_send(bp, req);
+}
+
 int bnxt_ptp_parse(struct sk_buff *skb, u16 *seq_id, u16 *hdr_off)
 {
        unsigned int ptp_class;
@@ -48,6 +62,9 @@ static int bnxt_ptp_settime(struct ptp_clock_info *ptp_info,
                                                ptp_info);
        u64 ns = timespec64_to_ns(ts);
 
+       if (ptp->bp->fw_cap & BNXT_FW_CAP_PTP_RTC)
+               return bnxt_ptp_cfg_settime(ptp->bp, ns);
+
        spin_lock_bh(&ptp->ptp_lock);
        timecounter_init(&ptp->tc, &ptp->cc, ns);
        spin_unlock_bh(&ptp->ptp_lock);
@@ -131,11 +148,47 @@ static int bnxt_ptp_gettimex(struct ptp_clock_info *ptp_info,
        return 0;
 }
 
+/* Caller holds ptp_lock */
+void bnxt_ptp_update_current_time(struct bnxt *bp)
+{
+       struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
+
+       bnxt_refclk_read(ptp->bp, NULL, &ptp->current_time);
+       WRITE_ONCE(ptp->old_time, ptp->current_time);
+}
+
+static int bnxt_ptp_adjphc(struct bnxt_ptp_cfg *ptp, s64 delta)
+{
+       struct hwrm_port_mac_cfg_input *req;
+       int rc;
+
+       rc = hwrm_req_init(ptp->bp, req, HWRM_PORT_MAC_CFG);
+       if (rc)
+               return rc;
+
+       req->enables = cpu_to_le32(PORT_MAC_CFG_REQ_ENABLES_PTP_ADJ_PHASE);
+       req->ptp_adj_phase = cpu_to_le64(delta);
+
+       rc = hwrm_req_send(ptp->bp, req);
+       if (rc) {
+               netdev_err(ptp->bp->dev, "ptp adjphc failed. rc = %x\n", rc);
+       } else {
+               spin_lock_bh(&ptp->ptp_lock);
+               bnxt_ptp_update_current_time(ptp->bp);
+               spin_unlock_bh(&ptp->ptp_lock);
+       }
+
+       return rc;
+}
+
 static int bnxt_ptp_adjtime(struct ptp_clock_info *ptp_info, s64 delta)
 {
        struct bnxt_ptp_cfg *ptp = container_of(ptp_info, struct bnxt_ptp_cfg,
                                                ptp_info);
 
+       if (ptp->bp->fw_cap & BNXT_FW_CAP_PTP_RTC)
+               return bnxt_ptp_adjphc(ptp, delta);
+
        spin_lock_bh(&ptp->ptp_lock);
        timecounter_adjtime(&ptp->tc, delta);
        spin_unlock_bh(&ptp->ptp_lock);
@@ -714,7 +767,70 @@ static bool bnxt_pps_config_ok(struct bnxt *bp)
        return !(bp->fw_cap & BNXT_FW_CAP_PTP_PPS) == !ptp->ptp_info.pin_config;
 }
 
-int bnxt_ptp_init(struct bnxt *bp)
+static void bnxt_ptp_timecounter_init(struct bnxt *bp, bool init_tc)
+{
+       struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
+
+       if (!ptp->ptp_clock) {
+               memset(&ptp->cc, 0, sizeof(ptp->cc));
+               ptp->cc.read = bnxt_cc_read;
+               ptp->cc.mask = CYCLECOUNTER_MASK(48);
+               ptp->cc.shift = 0;
+               ptp->cc.mult = 1;
+               ptp->next_overflow_check = jiffies + BNXT_PHC_OVERFLOW_PERIOD;
+       }
+       if (init_tc)
+               timecounter_init(&ptp->tc, &ptp->cc, ktime_to_ns(ktime_get_real()));
+}
+
+/* Caller holds ptp_lock */
+void bnxt_ptp_rtc_timecounter_init(struct bnxt_ptp_cfg *ptp, u64 ns)
+{
+       timecounter_init(&ptp->tc, &ptp->cc, ns);
+       /* For RTC, cycle_last must be in sync with the timecounter value. */
+       ptp->tc.cycle_last = ns & ptp->cc.mask;
+}
+
+int bnxt_ptp_init_rtc(struct bnxt *bp, bool phc_cfg)
+{
+       struct timespec64 tsp;
+       u64 ns;
+       int rc;
+
+       if (!bp->ptp_cfg || !(bp->fw_cap & BNXT_FW_CAP_PTP_RTC))
+               return -ENODEV;
+
+       if (!phc_cfg) {
+               ktime_get_real_ts64(&tsp);
+               ns = timespec64_to_ns(&tsp);
+               rc = bnxt_ptp_cfg_settime(bp, ns);
+               if (rc)
+                       return rc;
+       } else {
+               rc = bnxt_hwrm_port_ts_query(bp, PORT_TS_QUERY_REQ_FLAGS_CURRENT_TIME, &ns);
+               if (rc)
+                       return rc;
+       }
+       spin_lock_bh(&bp->ptp_cfg->ptp_lock);
+       bnxt_ptp_rtc_timecounter_init(bp->ptp_cfg, ns);
+       spin_unlock_bh(&bp->ptp_cfg->ptp_lock);
+
+       return 0;
+}
+
+static void bnxt_ptp_free(struct bnxt *bp)
+{
+       struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
+
+       if (ptp->ptp_clock) {
+               ptp_clock_unregister(ptp->ptp_clock);
+               ptp->ptp_clock = NULL;
+               kfree(ptp->ptp_info.pin_config);
+               ptp->ptp_info.pin_config = NULL;
+       }
+}
+
+int bnxt_ptp_init(struct bnxt *bp, bool phc_cfg)
 {
        struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
        int rc;
@@ -726,26 +842,23 @@ int bnxt_ptp_init(struct bnxt *bp)
        if (rc)
                return rc;
 
+       if (bp->fw_cap & BNXT_FW_CAP_PTP_RTC) {
+               bnxt_ptp_timecounter_init(bp, false);
+               rc = bnxt_ptp_init_rtc(bp, phc_cfg);
+               if (rc)
+                       goto out;
+       }
+
        if (ptp->ptp_clock && bnxt_pps_config_ok(bp))
                return 0;
 
-       if (ptp->ptp_clock) {
-               ptp_clock_unregister(ptp->ptp_clock);
-               ptp->ptp_clock = NULL;
-               kfree(ptp->ptp_info.pin_config);
-               ptp->ptp_info.pin_config = NULL;
-       }
+       bnxt_ptp_free(bp);
+
        atomic_set(&ptp->tx_avail, BNXT_MAX_TX_TS);
        spin_lock_init(&ptp->ptp_lock);
 
-       memset(&ptp->cc, 0, sizeof(ptp->cc));
-       ptp->cc.read = bnxt_cc_read;
-       ptp->cc.mask = CYCLECOUNTER_MASK(48);
-       ptp->cc.shift = 0;
-       ptp->cc.mult = 1;
-
-       ptp->next_overflow_check = jiffies + BNXT_PHC_OVERFLOW_PERIOD;
-       timecounter_init(&ptp->tc, &ptp->cc, ktime_to_ns(ktime_get_real()));
+       if (!(bp->fw_cap & BNXT_FW_CAP_PTP_RTC))
+               bnxt_ptp_timecounter_init(bp, true);
 
        ptp->ptp_info = bnxt_ptp_caps;
        if ((bp->fw_cap & BNXT_FW_CAP_PTP_PPS)) {
@@ -757,8 +870,8 @@ int bnxt_ptp_init(struct bnxt *bp)
                int err = PTR_ERR(ptp->ptp_clock);
 
                ptp->ptp_clock = NULL;
-               bnxt_unmap_ptp_regs(bp);
-               return err;
+               rc = err;
+               goto out;
        }
        if (bp->flags & BNXT_FLAG_CHIP_P5) {
                spin_lock_bh(&ptp->ptp_lock);
@@ -768,6 +881,11 @@ int bnxt_ptp_init(struct bnxt *bp)
                ptp_schedule_worker(ptp->ptp_clock, 0);
        }
        return 0;
+
+out:
+       bnxt_ptp_free(bp);
+       bnxt_unmap_ptp_regs(bp);
+       return rc;
 }
 
 void bnxt_ptp_clear(struct bnxt *bp)
index 7c528e1..373baf4 100644 (file)
@@ -131,12 +131,15 @@ do {                                              \
 #endif
 
 int bnxt_ptp_parse(struct sk_buff *skb, u16 *seq_id, u16 *hdr_off);
+void bnxt_ptp_update_current_time(struct bnxt *bp);
 void bnxt_ptp_pps_event(struct bnxt *bp, u32 data1, u32 data2);
 void bnxt_ptp_reapply_pps(struct bnxt *bp);
 int bnxt_hwtstamp_set(struct net_device *dev, struct ifreq *ifr);
 int bnxt_hwtstamp_get(struct net_device *dev, struct ifreq *ifr);
 int bnxt_get_tx_ts_p5(struct bnxt *bp, struct sk_buff *skb);
 int bnxt_get_rx_ts_p5(struct bnxt *bp, u64 *ts, u32 pkt_ts);
-int bnxt_ptp_init(struct bnxt *bp);
+void bnxt_ptp_rtc_timecounter_init(struct bnxt_ptp_cfg *ptp, u64 ns);
+int bnxt_ptp_init_rtc(struct bnxt *bp, bool phc_cfg);
+int bnxt_ptp_init(struct bnxt *bp, bool phc_cfg);
 void bnxt_ptp_clear(struct bnxt *bp);
 #endif
index 87f1056..cfe0911 100644 (file)
@@ -1368,7 +1368,7 @@ static int bcmgenet_set_eee(struct net_device *dev, struct ethtool_eee *e)
        if (!p->eee_enabled) {
                bcmgenet_eee_enable_set(dev, false);
        } else {
-               ret = phy_init_eee(dev->phydev, 0);
+               ret = phy_init_eee(dev->phydev, false);
                if (ret) {
                        netif_err(priv, hw, dev, "EEE initialization failed\n");
                        return ret;
index c78b99a..8014eb3 100644 (file)
@@ -2363,11 +2363,13 @@ static void gemini_port_save_mac_addr(struct gemini_ethernet_port *port)
 static int gemini_ethernet_port_probe(struct platform_device *pdev)
 {
        char *port_names[2] = { "ethernet0", "ethernet1" };
+       struct device_node *np = pdev->dev.of_node;
        struct gemini_ethernet_port *port;
        struct device *dev = &pdev->dev;
        struct gemini_ethernet *geth;
        struct net_device *netdev;
        struct device *parent;
+       u8 mac[ETH_ALEN];
        unsigned int id;
        int irq;
        int ret;
@@ -2473,6 +2475,12 @@ static int gemini_ethernet_port_probe(struct platform_device *pdev)
        netif_napi_add(netdev, &port->napi, gmac_napi_poll,
                       DEFAULT_NAPI_WEIGHT);
 
+       ret = of_get_mac_address(np, mac);
+       if (!ret) {
+               dev_info(dev, "Setting macaddr from DT %pM\n", mac);
+               memcpy(port->mac_addr, mac, ETH_ALEN);
+       }
+
        if (is_valid_ether_addr((void *)port->mac_addr)) {
                eth_hw_addr_set(netdev, (u8 *)port->mac_addr);
        } else {
index 3fb39e3..653bde4 100644 (file)
@@ -21,7 +21,7 @@ void pnic_do_nway(struct net_device *dev)
        struct tulip_private *tp = netdev_priv(dev);
        void __iomem *ioaddr = tp->base_addr;
        u32 phy_reg = ioread32(ioaddr + 0xB8);
-       u32 new_csr6 = tp->csr6 & ~0x40C40200;
+       u32 new_csr6;
 
        if (phy_reg & 0x78000000) { /* Ignore baseT4 */
                if (phy_reg & 0x20000000)               dev->if_port = 5;
index 623d113..521f036 100644 (file)
@@ -100,6 +100,14 @@ static int dpaa2_mac_get_if_mode(struct fwnode_handle *dpmac_node,
        return err;
 }
 
+static struct phylink_pcs *dpaa2_mac_select_pcs(struct phylink_config *config,
+                                               phy_interface_t interface)
+{
+       struct dpaa2_mac *mac = phylink_to_dpaa2_mac(config);
+
+       return mac->pcs;
+}
+
 static void dpaa2_mac_config(struct phylink_config *config, unsigned int mode,
                             const struct phylink_link_state *state)
 {
@@ -172,6 +180,7 @@ static void dpaa2_mac_link_down(struct phylink_config *config,
 
 static const struct phylink_mac_ops dpaa2_mac_phylink_ops = {
        .validate = phylink_generic_validate,
+       .mac_select_pcs = dpaa2_mac_select_pcs,
        .mac_config = dpaa2_mac_config,
        .mac_link_up = dpaa2_mac_link_up,
        .mac_link_down = dpaa2_mac_link_down,
@@ -303,9 +312,6 @@ int dpaa2_mac_connect(struct dpaa2_mac *mac)
        }
        mac->phylink = phylink;
 
-       if (mac->pcs)
-               phylink_set_pcs(mac->phylink, mac->pcs);
-
        err = phylink_fwnode_phy_connect(mac->phylink, dpmac_node, 0);
        if (err) {
                netdev_err(net_dev, "phylink_fwnode_phy_connect() = %d\n", err);
index ed16a5a..a0c75c7 100644 (file)
@@ -934,18 +934,21 @@ static void enetc_mdiobus_destroy(struct enetc_pf *pf)
        enetc_imdio_remove(pf);
 }
 
+static struct phylink_pcs *
+enetc_pl_mac_select_pcs(struct phylink_config *config, phy_interface_t iface)
+{
+       struct enetc_pf *pf = phylink_to_enetc_pf(config);
+
+       return pf->pcs;
+}
+
 static void enetc_pl_mac_config(struct phylink_config *config,
                                unsigned int mode,
                                const struct phylink_link_state *state)
 {
        struct enetc_pf *pf = phylink_to_enetc_pf(config);
-       struct enetc_ndev_priv *priv;
 
        enetc_mac_config(&pf->si->hw, state->interface);
-
-       priv = netdev_priv(pf->si->ndev);
-       if (pf->pcs)
-               phylink_set_pcs(priv->phylink, pf->pcs);
 }
 
 static void enetc_force_rgmii_mac(struct enetc_hw *hw, int speed, int duplex)
@@ -1062,6 +1065,7 @@ static void enetc_pl_mac_link_down(struct phylink_config *config,
 
 static const struct phylink_mac_ops enetc_mac_phylink_ops = {
        .validate = phylink_generic_validate,
+       .mac_select_pcs = enetc_pl_mac_select_pcs,
        .mac_config = enetc_pl_mac_config,
        .mac_link_up = enetc_pl_mac_link_up,
        .mac_link_down = enetc_pl_mac_link_down,
index 796133d..11227f5 100644 (file)
@@ -2797,7 +2797,7 @@ static int fec_enet_eee_mode_set(struct net_device *ndev, bool enable)
        int ret = 0;
 
        if (enable) {
-               ret = phy_init_eee(ndev->phydev, 0);
+               ret = phy_init_eee(ndev->phydev, false);
                if (ret)
                        return ret;
 
index af99017..7d49c28 100644 (file)
@@ -101,7 +101,6 @@ static int fec_ptp_enable_pps(struct fec_enet_private *fep, uint enable)
        u32 val, tempval;
        struct timespec64 ts;
        u64 ns;
-       val = 0;
 
        if (fep->pps_enable == enable)
                return 0;
index 266e562..d38d0c3 100644 (file)
@@ -14,6 +14,7 @@
 
 #include <linux/acpi.h>
 #include <linux/acpi_mdio.h>
+#include <linux/clk.h>
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
 #include <linux/mdio.h>
@@ -36,9 +37,10 @@ struct tgec_mdio_controller {
 } __packed;
 
 #define MDIO_STAT_ENC          BIT(6)
-#define MDIO_STAT_CLKDIV(x)    (((x>>1) & 0xff) << 8)
+#define MDIO_STAT_CLKDIV(x)    (((x) & 0x1ff) << 7)
 #define MDIO_STAT_BSY          BIT(0)
 #define MDIO_STAT_RD_ER                BIT(1)
+#define MDIO_STAT_PRE_DIS      BIT(5)
 #define MDIO_CTL_DEV_ADDR(x)   (x & 0x1f)
 #define MDIO_CTL_PORT_ADDR(x)  ((x & 0x1f) << 5)
 #define MDIO_CTL_PRE_DIS       BIT(10)
@@ -50,6 +52,8 @@ struct tgec_mdio_controller {
 
 struct mdio_fsl_priv {
        struct  tgec_mdio_controller __iomem *mdio_base;
+       struct  clk *enet_clk;
+       u32     mdc_freq;
        bool    is_little_endian;
        bool    has_a009885;
        bool    has_a011043;
@@ -254,6 +258,50 @@ irq_restore:
        return ret;
 }
 
+static int xgmac_mdio_set_mdc_freq(struct mii_bus *bus)
+{
+       struct mdio_fsl_priv *priv = (struct mdio_fsl_priv *)bus->priv;
+       struct tgec_mdio_controller __iomem *regs = priv->mdio_base;
+       struct device *dev = bus->parent;
+       u32 mdio_stat, div;
+
+       if (device_property_read_u32(dev, "clock-frequency", &priv->mdc_freq))
+               return 0;
+
+       priv->enet_clk = devm_clk_get(dev, NULL);
+       if (IS_ERR(priv->enet_clk)) {
+               dev_err(dev, "Input clock unknown, not changing MDC frequency");
+               return PTR_ERR(priv->enet_clk);
+       }
+
+       div = ((clk_get_rate(priv->enet_clk) / priv->mdc_freq) - 1) / 2;
+       if (div < 5 || div > 0x1ff) {
+               dev_err(dev, "Requested MDC frequecy is out of range, ignoring");
+               return -EINVAL;
+       }
+
+       mdio_stat = xgmac_read32(&regs->mdio_stat, priv->is_little_endian);
+       mdio_stat &= ~MDIO_STAT_CLKDIV(0x1ff);
+       mdio_stat |= MDIO_STAT_CLKDIV(div);
+       xgmac_write32(mdio_stat, &regs->mdio_stat, priv->is_little_endian);
+       return 0;
+}
+
+static void xgmac_mdio_set_suppress_preamble(struct mii_bus *bus)
+{
+       struct mdio_fsl_priv *priv = (struct mdio_fsl_priv *)bus->priv;
+       struct tgec_mdio_controller __iomem *regs = priv->mdio_base;
+       struct device *dev = bus->parent;
+       u32 mdio_stat;
+
+       if (!device_property_read_bool(dev, "suppress-preamble"))
+               return;
+
+       mdio_stat = xgmac_read32(&regs->mdio_stat, priv->is_little_endian);
+       mdio_stat |= MDIO_STAT_PRE_DIS;
+       xgmac_write32(mdio_stat, &regs->mdio_stat, priv->is_little_endian);
+}
+
 static int xgmac_mdio_probe(struct platform_device *pdev)
 {
        struct fwnode_handle *fwnode;
@@ -273,7 +321,7 @@ static int xgmac_mdio_probe(struct platform_device *pdev)
                return -EINVAL;
        }
 
-       bus = mdiobus_alloc_size(sizeof(struct mdio_fsl_priv));
+       bus = devm_mdiobus_alloc_size(&pdev->dev, sizeof(struct mdio_fsl_priv));
        if (!bus)
                return -ENOMEM;
 
@@ -284,13 +332,11 @@ static int xgmac_mdio_probe(struct platform_device *pdev)
        bus->probe_capabilities = MDIOBUS_C22_C45;
        snprintf(bus->id, MII_BUS_ID_SIZE, "%pa", &res->start);
 
-       /* Set the PHY base address */
        priv = bus->priv;
-       priv->mdio_base = ioremap(res->start, resource_size(res));
-       if (!priv->mdio_base) {
-               ret = -ENOMEM;
-               goto err_ioremap;
-       }
+       priv->mdio_base = devm_ioremap(&pdev->dev, res->start,
+                                      resource_size(res));
+       if (IS_ERR(priv->mdio_base))
+               return PTR_ERR(priv->mdio_base);
 
        /* For both ACPI and DT cases, endianness of MDIO controller
         * needs to be specified using "little-endian" property.
@@ -303,6 +349,12 @@ static int xgmac_mdio_probe(struct platform_device *pdev)
        priv->has_a011043 = device_property_read_bool(&pdev->dev,
                                                      "fsl,erratum-a011043");
 
+       xgmac_mdio_set_suppress_preamble(bus);
+
+       ret = xgmac_mdio_set_mdc_freq(bus);
+       if (ret)
+               return ret;
+
        fwnode = pdev->dev.fwnode;
        if (is_of_node(fwnode))
                ret = of_mdiobus_register(bus, to_of_node(fwnode));
@@ -312,32 +364,12 @@ static int xgmac_mdio_probe(struct platform_device *pdev)
                ret = -EINVAL;
        if (ret) {
                dev_err(&pdev->dev, "cannot register MDIO bus\n");
-               goto err_registration;
+               return ret;
        }
 
        platform_set_drvdata(pdev, bus);
 
        return 0;
-
-err_registration:
-       iounmap(priv->mdio_base);
-
-err_ioremap:
-       mdiobus_free(bus);
-
-       return ret;
-}
-
-static int xgmac_mdio_remove(struct platform_device *pdev)
-{
-       struct mii_bus *bus = platform_get_drvdata(pdev);
-       struct mdio_fsl_priv *priv = bus->priv;
-
-       mdiobus_unregister(bus);
-       iounmap(priv->mdio_base);
-       mdiobus_free(bus);
-
-       return 0;
 }
 
 static const struct of_device_id xgmac_mdio_match[] = {
@@ -364,7 +396,6 @@ static struct platform_driver xgmac_mdio_driver = {
                .acpi_match_table = xgmac_acpi_match,
        },
        .probe = xgmac_mdio_probe,
-       .remove = xgmac_mdio_remove,
 };
 
 module_platform_driver(xgmac_mdio_driver);
index 83c8908..315a43e 100644 (file)
@@ -1884,8 +1884,8 @@ static void mvneta_txq_bufs_free(struct mvneta_port *pp,
                        bytes_compl += buf->skb->len;
                        pkts_compl++;
                        dev_kfree_skb_any(buf->skb);
-               } else if (buf->type == MVNETA_TYPE_XDP_TX ||
-                          buf->type == MVNETA_TYPE_XDP_NDO) {
+               } else if ((buf->type == MVNETA_TYPE_XDP_TX ||
+                           buf->type == MVNETA_TYPE_XDP_NDO) && buf->xdpf) {
                        if (napi && buf->type == MVNETA_TYPE_XDP_TX)
                                xdp_return_frame_rx_napi(buf->xdpf);
                        else
@@ -2060,61 +2060,106 @@ int mvneta_rx_refill_queue(struct mvneta_port *pp, struct mvneta_rx_queue *rxq)
 
 static void
 mvneta_xdp_put_buff(struct mvneta_port *pp, struct mvneta_rx_queue *rxq,
-                   struct xdp_buff *xdp, struct skb_shared_info *sinfo,
-                   int sync_len)
+                   struct xdp_buff *xdp, int sync_len)
 {
+       struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
        int i;
 
+       if (likely(!xdp_buff_has_frags(xdp)))
+               goto out;
+
        for (i = 0; i < sinfo->nr_frags; i++)
                page_pool_put_full_page(rxq->page_pool,
                                        skb_frag_page(&sinfo->frags[i]), true);
+
+out:
        page_pool_put_page(rxq->page_pool, virt_to_head_page(xdp->data),
                           sync_len, true);
 }
 
 static int
 mvneta_xdp_submit_frame(struct mvneta_port *pp, struct mvneta_tx_queue *txq,
-                       struct xdp_frame *xdpf, bool dma_map)
+                       struct xdp_frame *xdpf, int *nxmit_byte, bool dma_map)
 {
-       struct mvneta_tx_desc *tx_desc;
-       struct mvneta_tx_buf *buf;
-       dma_addr_t dma_addr;
+       struct skb_shared_info *sinfo = xdp_get_shared_info_from_frame(xdpf);
+       struct device *dev = pp->dev->dev.parent;
+       struct mvneta_tx_desc *tx_desc = NULL;
+       int i, num_frames = 1;
+       struct page *page;
+
+       if (unlikely(xdp_frame_has_frags(xdpf)))
+               num_frames += sinfo->nr_frags;
 
-       if (txq->count >= txq->tx_stop_threshold)
+       if (txq->count + num_frames >= txq->size)
                return MVNETA_XDP_DROPPED;
 
-       tx_desc = mvneta_txq_next_desc_get(txq);
+       for (i = 0; i < num_frames; i++) {
+               struct mvneta_tx_buf *buf = &txq->buf[txq->txq_put_index];
+               skb_frag_t *frag = NULL;
+               int len = xdpf->len;
+               dma_addr_t dma_addr;
 
-       buf = &txq->buf[txq->txq_put_index];
-       if (dma_map) {
-               /* ndo_xdp_xmit */
-               dma_addr = dma_map_single(pp->dev->dev.parent, xdpf->data,
-                                         xdpf->len, DMA_TO_DEVICE);
-               if (dma_mapping_error(pp->dev->dev.parent, dma_addr)) {
-                       mvneta_txq_desc_put(txq);
-                       return MVNETA_XDP_DROPPED;
+               if (unlikely(i)) { /* paged area */
+                       frag = &sinfo->frags[i - 1];
+                       len = skb_frag_size(frag);
                }
-               buf->type = MVNETA_TYPE_XDP_NDO;
-       } else {
-               struct page *page = virt_to_page(xdpf->data);
 
-               dma_addr = page_pool_get_dma_addr(page) +
-                          sizeof(*xdpf) + xdpf->headroom;
-               dma_sync_single_for_device(pp->dev->dev.parent, dma_addr,
-                                          xdpf->len, DMA_BIDIRECTIONAL);
-               buf->type = MVNETA_TYPE_XDP_TX;
+               tx_desc = mvneta_txq_next_desc_get(txq);
+               if (dma_map) {
+                       /* ndo_xdp_xmit */
+                       void *data;
+
+                       data = unlikely(frag) ? skb_frag_address(frag)
+                                             : xdpf->data;
+                       dma_addr = dma_map_single(dev, data, len,
+                                                 DMA_TO_DEVICE);
+                       if (dma_mapping_error(dev, dma_addr)) {
+                               mvneta_txq_desc_put(txq);
+                               goto unmap;
+                       }
+
+                       buf->type = MVNETA_TYPE_XDP_NDO;
+               } else {
+                       page = unlikely(frag) ? skb_frag_page(frag)
+                                             : virt_to_page(xdpf->data);
+                       dma_addr = page_pool_get_dma_addr(page);
+                       if (unlikely(frag))
+                               dma_addr += skb_frag_off(frag);
+                       else
+                               dma_addr += sizeof(*xdpf) + xdpf->headroom;
+                       dma_sync_single_for_device(dev, dma_addr, len,
+                                                  DMA_BIDIRECTIONAL);
+                       buf->type = MVNETA_TYPE_XDP_TX;
+               }
+               buf->xdpf = unlikely(i) ? NULL : xdpf;
+
+               tx_desc->command = unlikely(i) ? 0 : MVNETA_TXD_F_DESC;
+               tx_desc->buf_phys_addr = dma_addr;
+               tx_desc->data_size = len;
+               *nxmit_byte += len;
+
+               mvneta_txq_inc_put(txq);
        }
-       buf->xdpf = xdpf;
 
-       tx_desc->command = MVNETA_TXD_FLZ_DESC;
-       tx_desc->buf_phys_addr = dma_addr;
-       tx_desc->data_size = xdpf->len;
+       /*last descriptor */
+       if (likely(tx_desc))
+               tx_desc->command |= MVNETA_TXD_L_DESC | MVNETA_TXD_Z_PAD;
 
-       mvneta_txq_inc_put(txq);
-       txq->pending++;
-       txq->count++;
+       txq->pending += num_frames;
+       txq->count += num_frames;
 
        return MVNETA_XDP_TX;
+
+unmap:
+       for (i--; i >= 0; i--) {
+               mvneta_txq_desc_put(txq);
+               tx_desc = txq->descs + txq->next_desc_to_proc;
+               dma_unmap_single(dev, tx_desc->buf_phys_addr,
+                                tx_desc->data_size,
+                                DMA_TO_DEVICE);
+       }
+
+       return MVNETA_XDP_DROPPED;
 }
 
 static int
@@ -2123,8 +2168,8 @@ mvneta_xdp_xmit_back(struct mvneta_port *pp, struct xdp_buff *xdp)
        struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats);
        struct mvneta_tx_queue *txq;
        struct netdev_queue *nq;
+       int cpu, nxmit_byte = 0;
        struct xdp_frame *xdpf;
-       int cpu;
        u32 ret;
 
        xdpf = xdp_convert_buff_to_frame(xdp);
@@ -2136,10 +2181,10 @@ mvneta_xdp_xmit_back(struct mvneta_port *pp, struct xdp_buff *xdp)
        nq = netdev_get_tx_queue(pp->dev, txq->id);
 
        __netif_tx_lock(nq, cpu);
-       ret = mvneta_xdp_submit_frame(pp, txq, xdpf, false);
+       ret = mvneta_xdp_submit_frame(pp, txq, xdpf, &nxmit_byte, false);
        if (ret == MVNETA_XDP_TX) {
                u64_stats_update_begin(&stats->syncp);
-               stats->es.ps.tx_bytes += xdpf->len;
+               stats->es.ps.tx_bytes += nxmit_byte;
                stats->es.ps.tx_packets++;
                stats->es.ps.xdp_tx++;
                u64_stats_update_end(&stats->syncp);
@@ -2178,11 +2223,11 @@ mvneta_xdp_xmit(struct net_device *dev, int num_frame,
 
        __netif_tx_lock(nq, cpu);
        for (i = 0; i < num_frame; i++) {
-               ret = mvneta_xdp_submit_frame(pp, txq, frames[i], true);
+               ret = mvneta_xdp_submit_frame(pp, txq, frames[i], &nxmit_byte,
+                                             true);
                if (ret != MVNETA_XDP_TX)
                        break;
 
-               nxmit_byte += frames[i]->len;
                nxmit++;
        }
 
@@ -2205,7 +2250,6 @@ mvneta_run_xdp(struct mvneta_port *pp, struct mvneta_rx_queue *rxq,
               struct bpf_prog *prog, struct xdp_buff *xdp,
               u32 frame_sz, struct mvneta_stats *stats)
 {
-       struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
        unsigned int len, data_len, sync;
        u32 ret, act;
 
@@ -2226,7 +2270,7 @@ mvneta_run_xdp(struct mvneta_port *pp, struct mvneta_rx_queue *rxq,
 
                err = xdp_do_redirect(pp->dev, xdp, prog);
                if (unlikely(err)) {
-                       mvneta_xdp_put_buff(pp, rxq, xdp, sinfo, sync);
+                       mvneta_xdp_put_buff(pp, rxq, xdp, sync);
                        ret = MVNETA_XDP_DROPPED;
                } else {
                        ret = MVNETA_XDP_REDIR;
@@ -2237,7 +2281,7 @@ mvneta_run_xdp(struct mvneta_port *pp, struct mvneta_rx_queue *rxq,
        case XDP_TX:
                ret = mvneta_xdp_xmit_back(pp, xdp);
                if (ret != MVNETA_XDP_TX)
-                       mvneta_xdp_put_buff(pp, rxq, xdp, sinfo, sync);
+                       mvneta_xdp_put_buff(pp, rxq, xdp, sync);
                break;
        default:
                bpf_warn_invalid_xdp_action(pp->dev, prog, act);
@@ -2246,7 +2290,7 @@ mvneta_run_xdp(struct mvneta_port *pp, struct mvneta_rx_queue *rxq,
                trace_xdp_exception(pp->dev, prog, act);
                fallthrough;
        case XDP_DROP:
-               mvneta_xdp_put_buff(pp, rxq, xdp, sinfo, sync);
+               mvneta_xdp_put_buff(pp, rxq, xdp, sync);
                ret = MVNETA_XDP_DROPPED;
                stats->xdp_drop++;
                break;
@@ -2269,7 +2313,6 @@ mvneta_swbm_rx_frame(struct mvneta_port *pp,
        int data_len = -MVNETA_MH_SIZE, len;
        struct net_device *dev = pp->dev;
        enum dma_data_direction dma_dir;
-       struct skb_shared_info *sinfo;
 
        if (*size > MVNETA_MAX_RX_BUF_SIZE) {
                len = MVNETA_MAX_RX_BUF_SIZE;
@@ -2289,11 +2332,9 @@ mvneta_swbm_rx_frame(struct mvneta_port *pp,
 
        /* Prefetch header */
        prefetch(data);
+       xdp_buff_clear_frags_flag(xdp);
        xdp_prepare_buff(xdp, data, pp->rx_offset_correction + MVNETA_MH_SIZE,
                         data_len, false);
-
-       sinfo = xdp_get_shared_info_from_buff(xdp);
-       sinfo->nr_frags = 0;
 }
 
 static void
@@ -2301,9 +2342,9 @@ mvneta_swbm_add_rx_fragment(struct mvneta_port *pp,
                            struct mvneta_rx_desc *rx_desc,
                            struct mvneta_rx_queue *rxq,
                            struct xdp_buff *xdp, int *size,
-                           struct skb_shared_info *xdp_sinfo,
                            struct page *page)
 {
+       struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
        struct net_device *dev = pp->dev;
        enum dma_data_direction dma_dir;
        int data_len, len;
@@ -2321,25 +2362,25 @@ mvneta_swbm_add_rx_fragment(struct mvneta_port *pp,
                                len, dma_dir);
        rx_desc->buf_phys_addr = 0;
 
-       if (data_len > 0 && xdp_sinfo->nr_frags < MAX_SKB_FRAGS) {
-               skb_frag_t *frag = &xdp_sinfo->frags[xdp_sinfo->nr_frags++];
+       if (!xdp_buff_has_frags(xdp))
+               sinfo->nr_frags = 0;
+
+       if (data_len > 0 && sinfo->nr_frags < MAX_SKB_FRAGS) {
+               skb_frag_t *frag = &sinfo->frags[sinfo->nr_frags++];
 
                skb_frag_off_set(frag, pp->rx_offset_correction);
                skb_frag_size_set(frag, data_len);
                __skb_frag_set_page(frag, page);
+
+               if (!xdp_buff_has_frags(xdp)) {
+                       sinfo->xdp_frags_size = *size;
+                       xdp_buff_set_frags_flag(xdp);
+               }
+               if (page_is_pfmemalloc(page))
+                       xdp_buff_set_frag_pfmemalloc(xdp);
        } else {
                page_pool_put_full_page(rxq->page_pool, page, true);
        }
-
-       /* last fragment */
-       if (len == *size) {
-               struct skb_shared_info *sinfo;
-
-               sinfo = xdp_get_shared_info_from_buff(xdp);
-               sinfo->nr_frags = xdp_sinfo->nr_frags;
-               memcpy(sinfo->frags, xdp_sinfo->frags,
-                      sinfo->nr_frags * sizeof(skb_frag_t));
-       }
        *size -= len;
 }
 
@@ -2348,8 +2389,11 @@ mvneta_swbm_build_skb(struct mvneta_port *pp, struct page_pool *pool,
                      struct xdp_buff *xdp, u32 desc_status)
 {
        struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
-       int i, num_frags = sinfo->nr_frags;
        struct sk_buff *skb;
+       u8 num_frags;
+
+       if (unlikely(xdp_buff_has_frags(xdp)))
+               num_frags = sinfo->nr_frags;
 
        skb = build_skb(xdp->data_hard_start, PAGE_SIZE);
        if (!skb)
@@ -2361,13 +2405,11 @@ mvneta_swbm_build_skb(struct mvneta_port *pp, struct page_pool *pool,
        skb_put(skb, xdp->data_end - xdp->data);
        skb->ip_summed = mvneta_rx_csum(pp, desc_status);
 
-       for (i = 0; i < num_frags; i++) {
-               skb_frag_t *frag = &sinfo->frags[i];
-
-               skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
-                               skb_frag_page(frag), skb_frag_off(frag),
-                               skb_frag_size(frag), PAGE_SIZE);
-       }
+       if (unlikely(xdp_buff_has_frags(xdp)))
+               xdp_update_skb_shared_info(skb, num_frags,
+                                          sinfo->xdp_frags_size,
+                                          num_frags * xdp->frame_sz,
+                                          xdp_buff_is_frag_pfmemalloc(xdp));
 
        return skb;
 }
@@ -2379,7 +2421,6 @@ static int mvneta_rx_swbm(struct napi_struct *napi,
 {
        int rx_proc = 0, rx_todo, refill, size = 0;
        struct net_device *dev = pp->dev;
-       struct skb_shared_info sinfo;
        struct mvneta_stats ps = {};
        struct bpf_prog *xdp_prog;
        u32 desc_status, frame_sz;
@@ -2388,8 +2429,6 @@ static int mvneta_rx_swbm(struct napi_struct *napi,
        xdp_init_buff(&xdp_buf, PAGE_SIZE, &rxq->xdp_rxq);
        xdp_buf.data_hard_start = NULL;
 
-       sinfo.nr_frags = 0;
-
        /* Get number of received packets */
        rx_todo = mvneta_rxq_busy_desc_num_get(pp, rxq);
 
@@ -2431,7 +2470,7 @@ static int mvneta_rx_swbm(struct napi_struct *napi,
                        }
 
                        mvneta_swbm_add_rx_fragment(pp, rx_desc, rxq, &xdp_buf,
-                                                   &size, &sinfo, page);
+                                                   &size, page);
                } /* Middle or Last descriptor */
 
                if (!(rx_status & MVNETA_RXD_LAST_DESC))
@@ -2439,7 +2478,7 @@ static int mvneta_rx_swbm(struct napi_struct *napi,
                        continue;
 
                if (size) {
-                       mvneta_xdp_put_buff(pp, rxq, &xdp_buf, &sinfo, -1);
+                       mvneta_xdp_put_buff(pp, rxq, &xdp_buf, -1);
                        goto next;
                }
 
@@ -2451,7 +2490,7 @@ static int mvneta_rx_swbm(struct napi_struct *napi,
                if (IS_ERR(skb)) {
                        struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats);
 
-                       mvneta_xdp_put_buff(pp, rxq, &xdp_buf, &sinfo, -1);
+                       mvneta_xdp_put_buff(pp, rxq, &xdp_buf, -1);
 
                        u64_stats_update_begin(&stats->syncp);
                        stats->es.skb_alloc_error++;
@@ -2468,11 +2507,10 @@ static int mvneta_rx_swbm(struct napi_struct *napi,
                napi_gro_receive(napi, skb);
 next:
                xdp_buf.data_hard_start = NULL;
-               sinfo.nr_frags = 0;
        }
 
        if (xdp_buf.data_hard_start)
-               mvneta_xdp_put_buff(pp, rxq, &xdp_buf, &sinfo, -1);
+               mvneta_xdp_put_buff(pp, rxq, &xdp_buf, -1);
 
        if (ps.xdp_redirect)
                xdp_do_flush_map();
@@ -3260,7 +3298,8 @@ static int mvneta_create_page_pool(struct mvneta_port *pp,
                return err;
        }
 
-       err = xdp_rxq_info_reg(&rxq->xdp_rxq, pp->dev, rxq->id, 0);
+       err = __xdp_rxq_info_reg(&rxq->xdp_rxq, pp->dev, rxq->id, 0,
+                                PAGE_SIZE);
        if (err < 0)
                goto err_free_pp;
 
@@ -3740,6 +3779,7 @@ static void mvneta_percpu_disable(void *arg)
 static int mvneta_change_mtu(struct net_device *dev, int mtu)
 {
        struct mvneta_port *pp = netdev_priv(dev);
+       struct bpf_prog *prog = pp->xdp_prog;
        int ret;
 
        if (!IS_ALIGNED(MVNETA_RX_PKT_SIZE(mtu), 8)) {
@@ -3748,8 +3788,11 @@ static int mvneta_change_mtu(struct net_device *dev, int mtu)
                mtu = ALIGN(MVNETA_RX_PKT_SIZE(mtu), 8);
        }
 
-       if (pp->xdp_prog && mtu > MVNETA_MAX_RX_BUF_SIZE) {
-               netdev_info(dev, "Illegal MTU value %d for XDP mode\n", mtu);
+       if (prog && !prog->aux->xdp_has_frags &&
+           mtu > MVNETA_MAX_RX_BUF_SIZE) {
+               netdev_info(dev, "Illegal MTU %d for XDP prog without frags\n",
+                           mtu);
+
                return -EINVAL;
        }
 
@@ -3969,6 +4012,15 @@ static const struct phylink_pcs_ops mvneta_phylink_pcs_ops = {
        .pcs_an_restart = mvneta_pcs_an_restart,
 };
 
+static struct phylink_pcs *mvneta_mac_select_pcs(struct phylink_config *config,
+                                                phy_interface_t interface)
+{
+       struct net_device *ndev = to_net_dev(config->dev);
+       struct mvneta_port *pp = netdev_priv(ndev);
+
+       return &pp->phylink_pcs;
+}
+
 static int mvneta_mac_prepare(struct phylink_config *config, unsigned int mode,
                              phy_interface_t interface)
 {
@@ -4169,13 +4221,14 @@ static void mvneta_mac_link_up(struct phylink_config *config,
        mvneta_port_up(pp);
 
        if (phy && pp->eee_enabled) {
-               pp->eee_active = phy_init_eee(phy, 0) >= 0;
+               pp->eee_active = phy_init_eee(phy, false) >= 0;
                mvneta_set_eee(pp, pp->eee_active && pp->tx_lpi_enabled);
        }
 }
 
 static const struct phylink_mac_ops mvneta_phylink_ops = {
        .validate = phylink_generic_validate,
+       .mac_select_pcs = mvneta_mac_select_pcs,
        .mac_prepare = mvneta_mac_prepare,
        .mac_config = mvneta_mac_config,
        .mac_finish = mvneta_mac_finish,
@@ -4490,8 +4543,9 @@ static int mvneta_xdp_setup(struct net_device *dev, struct bpf_prog *prog,
        struct mvneta_port *pp = netdev_priv(dev);
        struct bpf_prog *old_prog;
 
-       if (prog && dev->mtu > MVNETA_MAX_RX_BUF_SIZE) {
-               NL_SET_ERR_MSG_MOD(extack, "MTU too large for XDP");
+       if (prog && !prog->aux->xdp_has_frags &&
+           dev->mtu > MVNETA_MAX_RX_BUF_SIZE) {
+               NL_SET_ERR_MSG_MOD(extack, "prog does not support XDP frags");
                return -EOPNOTSUPP;
        }
 
@@ -5321,26 +5375,62 @@ static int mvneta_probe(struct platform_device *pdev)
        if (!dev)
                return -ENOMEM;
 
-       dev->irq = irq_of_parse_and_map(dn, 0);
-       if (dev->irq == 0)
-               return -EINVAL;
+       dev->tx_queue_len = MVNETA_MAX_TXD;
+       dev->watchdog_timeo = 5 * HZ;
+       dev->netdev_ops = &mvneta_netdev_ops;
+       dev->ethtool_ops = &mvneta_eth_tool_ops;
+
+       pp = netdev_priv(dev);
+       spin_lock_init(&pp->lock);
+       pp->dn = dn;
+
+       pp->rxq_def = rxq_def;
+       pp->indir[0] = rxq_def;
 
        err = of_get_phy_mode(dn, &phy_mode);
        if (err) {
                dev_err(&pdev->dev, "incorrect phy-mode\n");
-               goto err_free_irq;
+               return err;
        }
 
+       pp->phy_interface = phy_mode;
+
        comphy = devm_of_phy_get(&pdev->dev, dn, NULL);
-       if (comphy == ERR_PTR(-EPROBE_DEFER)) {
-               err = -EPROBE_DEFER;
-               goto err_free_irq;
-       } else if (IS_ERR(comphy)) {
+       if (comphy == ERR_PTR(-EPROBE_DEFER))
+               return -EPROBE_DEFER;
+
+       if (IS_ERR(comphy))
                comphy = NULL;
+
+       pp->comphy = comphy;
+
+       pp->base = devm_platform_ioremap_resource(pdev, 0);
+       if (IS_ERR(pp->base))
+               return PTR_ERR(pp->base);
+
+       /* Get special SoC configurations */
+       if (of_device_is_compatible(dn, "marvell,armada-3700-neta"))
+               pp->neta_armada3700 = true;
+
+       dev->irq = irq_of_parse_and_map(dn, 0);
+       if (dev->irq == 0)
+               return -EINVAL;
+
+       pp->clk = devm_clk_get(&pdev->dev, "core");
+       if (IS_ERR(pp->clk))
+               pp->clk = devm_clk_get(&pdev->dev, NULL);
+       if (IS_ERR(pp->clk)) {
+               err = PTR_ERR(pp->clk);
+               goto err_free_irq;
        }
 
-       pp = netdev_priv(dev);
-       spin_lock_init(&pp->lock);
+       clk_prepare_enable(pp->clk);
+
+       pp->clk_bus = devm_clk_get(&pdev->dev, "bus");
+       if (!IS_ERR(pp->clk_bus))
+               clk_prepare_enable(pp->clk_bus);
+
+       pp->phylink_pcs.ops = &mvneta_phylink_pcs_ops;
 
        pp->phylink_config.dev = &dev->dev;
        pp->phylink_config.type = PHYLINK_NETDEV;
@@ -5377,55 +5467,16 @@ static int mvneta_probe(struct platform_device *pdev)
                                 phy_mode, &mvneta_phylink_ops);
        if (IS_ERR(phylink)) {
                err = PTR_ERR(phylink);
-               goto err_free_irq;
-       }
-
-       dev->tx_queue_len = MVNETA_MAX_TXD;
-       dev->watchdog_timeo = 5 * HZ;
-       dev->netdev_ops = &mvneta_netdev_ops;
-
-       dev->ethtool_ops = &mvneta_eth_tool_ops;
-
-       pp->phylink = phylink;
-       pp->comphy = comphy;
-       pp->phy_interface = phy_mode;
-       pp->dn = dn;
-
-       pp->rxq_def = rxq_def;
-       pp->indir[0] = rxq_def;
-
-       /* Get special SoC configurations */
-       if (of_device_is_compatible(dn, "marvell,armada-3700-neta"))
-               pp->neta_armada3700 = true;
-
-       pp->clk = devm_clk_get(&pdev->dev, "core");
-       if (IS_ERR(pp->clk))
-               pp->clk = devm_clk_get(&pdev->dev, NULL);
-       if (IS_ERR(pp->clk)) {
-               err = PTR_ERR(pp->clk);
-               goto err_free_phylink;
-       }
-
-       clk_prepare_enable(pp->clk);
-
-       pp->clk_bus = devm_clk_get(&pdev->dev, "bus");
-       if (!IS_ERR(pp->clk_bus))
-               clk_prepare_enable(pp->clk_bus);
-
-       pp->base = devm_platform_ioremap_resource(pdev, 0);
-       if (IS_ERR(pp->base)) {
-               err = PTR_ERR(pp->base);
                goto err_clk;
        }
 
-       pp->phylink_pcs.ops = &mvneta_phylink_pcs_ops;
-       phylink_set_pcs(phylink, &pp->phylink_pcs);
+       pp->phylink = phylink;
 
        /* Alloc per-cpu port structure */
        pp->ports = alloc_percpu(struct mvneta_pcpu_port);
        if (!pp->ports) {
                err = -ENOMEM;
-               goto err_clk;
+               goto err_free_phylink;
        }
 
        /* Alloc per-cpu stats */
@@ -5569,12 +5620,12 @@ err_netdev:
        free_percpu(pp->stats);
 err_free_ports:
        free_percpu(pp->ports);
-err_clk:
-       clk_disable_unprepare(pp->clk_bus);
-       clk_disable_unprepare(pp->clk);
 err_free_phylink:
        if (pp->phylink)
                phylink_destroy(pp->phylink);
+err_clk:
+       clk_disable_unprepare(pp->clk_bus);
+       clk_disable_unprepare(pp->clk);
 err_free_irq:
        irq_dispose_mapping(dev->irq);
        return err;
index 89ca796..4cd0747 100644 (file)
@@ -1556,6 +1556,7 @@ static int mtk_star_probe(struct platform_device *pdev)
        return devm_register_netdev(dev, ndev);
 }
 
+#ifdef CONFIG_OF
 static const struct of_device_id mtk_star_of_match[] = {
        { .compatible = "mediatek,mt8516-eth", },
        { .compatible = "mediatek,mt8518-eth", },
@@ -1563,6 +1564,7 @@ static const struct of_device_id mtk_star_of_match[] = {
        { }
 };
 MODULE_DEVICE_TABLE(of, mtk_star_of_match);
+#endif
 
 static SIMPLE_DEV_PM_OPS(mtk_star_pm_ops,
                         mtk_star_suspend, mtk_star_resume);
index 6dd4ae2..4e3de28 100644 (file)
@@ -18,6 +18,7 @@ struct mlxsw_env_module_info {
        int num_ports_mapped;
        int num_ports_up;
        enum ethtool_module_power_mode_policy power_mode_policy;
+       enum mlxsw_reg_pmtm_module_type type;
 };
 
 struct mlxsw_env {
@@ -27,14 +28,47 @@ struct mlxsw_env {
        struct mlxsw_env_module_info module_info[];
 };
 
-static int mlxsw_env_validate_cable_ident(struct mlxsw_core *core, int id,
-                                         bool *qsfp, bool *cmis)
+static int __mlxsw_env_validate_module_type(struct mlxsw_core *core, u8 module)
+{
+       struct mlxsw_env *mlxsw_env = mlxsw_core_env(core);
+       int err;
+
+       switch (mlxsw_env->module_info[module].type) {
+       case MLXSW_REG_PMTM_MODULE_TYPE_TWISTED_PAIR:
+               err = -EINVAL;
+               break;
+       default:
+               err = 0;
+       }
+
+       return err;
+}
+
+static int mlxsw_env_validate_module_type(struct mlxsw_core *core, u8 module)
+{
+       struct mlxsw_env *mlxsw_env = mlxsw_core_env(core);
+       int err;
+
+       mutex_lock(&mlxsw_env->module_info_lock);
+       err = __mlxsw_env_validate_module_type(core, module);
+       mutex_unlock(&mlxsw_env->module_info_lock);
+
+       return err;
+}
+
+static int
+mlxsw_env_validate_cable_ident(struct mlxsw_core *core, int id, bool *qsfp,
+                              bool *cmis)
 {
        char mcia_pl[MLXSW_REG_MCIA_LEN];
        char *eeprom_tmp;
        u8 ident;
        int err;
 
+       err = mlxsw_env_validate_module_type(core, id);
+       if (err)
+               return err;
+
        mlxsw_reg_mcia_pack(mcia_pl, id, 0, MLXSW_REG_MCIA_PAGE0_LO_OFF, 0, 1,
                            MLXSW_REG_MCIA_I2C_ADDR_LOW);
        err = mlxsw_reg_query(core, MLXSW_REG(mcia), mcia_pl);
@@ -206,7 +240,8 @@ int mlxsw_env_module_temp_thresholds_get(struct mlxsw_core *core, int module,
        return 0;
 }
 
-int mlxsw_env_get_module_info(struct mlxsw_core *mlxsw_core, int module,
+int mlxsw_env_get_module_info(struct net_device *netdev,
+                             struct mlxsw_core *mlxsw_core, int module,
                              struct ethtool_modinfo *modinfo)
 {
        u8 module_info[MLXSW_REG_MCIA_EEPROM_MODULE_INFO_SIZE];
@@ -215,6 +250,13 @@ int mlxsw_env_get_module_info(struct mlxsw_core *mlxsw_core, int module,
        unsigned int read_size;
        int err;
 
+       err = mlxsw_env_validate_module_type(mlxsw_core, module);
+       if (err) {
+               netdev_err(netdev,
+                          "EEPROM is not equipped on port module type");
+               return err;
+       }
+
        err = mlxsw_env_query_module_eeprom(mlxsw_core, module, 0, offset,
                                            module_info, false, &read_size);
        if (err)
@@ -356,6 +398,13 @@ mlxsw_env_get_module_eeprom_by_page(struct mlxsw_core *mlxsw_core, u8 module,
 {
        u32 bytes_read = 0;
        u16 device_addr;
+       int err;
+
+       err = mlxsw_env_validate_module_type(mlxsw_core, module);
+       if (err) {
+               NL_SET_ERR_MSG_MOD(extack, "EEPROM is not equipped on port module type");
+               return err;
+       }
 
        /* Offset cannot be larger than 2 * ETH_MODULE_EEPROM_PAGE_LEN */
        device_addr = page->offset;
@@ -364,7 +413,6 @@ mlxsw_env_get_module_eeprom_by_page(struct mlxsw_core *mlxsw_core, u8 module,
                char mcia_pl[MLXSW_REG_MCIA_LEN];
                char *eeprom_tmp;
                u8 size;
-               int err;
 
                size = min_t(u8, page->length - bytes_read,
                             MLXSW_REG_MCIA_EEPROM_SIZE);
@@ -419,6 +467,12 @@ int mlxsw_env_reset_module(struct net_device *netdev,
 
        mutex_lock(&mlxsw_env->module_info_lock);
 
+       err = __mlxsw_env_validate_module_type(mlxsw_core, module);
+       if (err) {
+               netdev_err(netdev, "Reset module is not supported on port module type\n");
+               goto out;
+       }
+
        if (mlxsw_env->module_info[module].num_ports_up) {
                netdev_err(netdev, "Cannot reset module when ports using it are administratively up\n");
                err = -EINVAL;
@@ -461,6 +515,12 @@ mlxsw_env_get_module_power_mode(struct mlxsw_core *mlxsw_core, u8 module,
 
        mutex_lock(&mlxsw_env->module_info_lock);
 
+       err = __mlxsw_env_validate_module_type(mlxsw_core, module);
+       if (err) {
+               NL_SET_ERR_MSG_MOD(extack, "Power mode is not supported on port module type");
+               goto out;
+       }
+
        params->policy = mlxsw_env->module_info[module].power_mode_policy;
 
        mlxsw_reg_mcion_pack(mcion_pl, module);
@@ -571,6 +631,13 @@ mlxsw_env_set_module_power_mode(struct mlxsw_core *mlxsw_core, u8 module,
 
        mutex_lock(&mlxsw_env->module_info_lock);
 
+       err = __mlxsw_env_validate_module_type(mlxsw_core, module);
+       if (err) {
+               NL_SET_ERR_MSG_MOD(extack,
+                                  "Power mode set is not supported on port module type");
+               goto out;
+       }
+
        if (mlxsw_env->module_info[module].power_mode_policy == policy)
                goto out;
 
@@ -661,13 +728,12 @@ static int mlxsw_env_temp_event_set(struct mlxsw_core *mlxsw_core,
        return mlxsw_reg_write(mlxsw_core, MLXSW_REG(mtmp), mtmp_pl);
 }
 
-static int mlxsw_env_module_temp_event_enable(struct mlxsw_core *mlxsw_core,
-                                             u8 module_count)
+static int mlxsw_env_module_temp_event_enable(struct mlxsw_core *mlxsw_core)
 {
        int i, err, sensor_index;
        bool has_temp_sensor;
 
-       for (i = 0; i < module_count; i++) {
+       for (i = 0; i < mlxsw_core_env(mlxsw_core)->module_count; i++) {
                err = mlxsw_env_module_has_temp_sensor(mlxsw_core, i,
                                                       &has_temp_sensor);
                if (err)
@@ -876,12 +942,11 @@ mlxsw_env_module_plug_event_unregister(struct mlxsw_env *mlxsw_env)
 }
 
 static int
-mlxsw_env_module_oper_state_event_enable(struct mlxsw_core *mlxsw_core,
-                                        u8 module_count)
+mlxsw_env_module_oper_state_event_enable(struct mlxsw_core *mlxsw_core)
 {
        int i, err;
 
-       for (i = 0; i < module_count; i++) {
+       for (i = 0; i < mlxsw_core_env(mlxsw_core)->module_count; i++) {
                char pmaos_pl[MLXSW_REG_PMAOS_LEN];
 
                mlxsw_reg_pmaos_pack(pmaos_pl, i);
@@ -999,6 +1064,28 @@ out_unlock:
 }
 EXPORT_SYMBOL(mlxsw_env_module_port_down);
 
+static int
+mlxsw_env_module_type_set(struct mlxsw_core *mlxsw_core)
+{
+       struct mlxsw_env *mlxsw_env = mlxsw_core_env(mlxsw_core);
+       int i;
+
+       for (i = 0; i < mlxsw_env->module_count; i++) {
+               char pmtm_pl[MLXSW_REG_PMTM_LEN];
+               int err;
+
+               mlxsw_reg_pmtm_pack(pmtm_pl, 0, i);
+               err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(pmtm), pmtm_pl);
+               if (err)
+                       return err;
+
+               mlxsw_env->module_info[i].type =
+                       mlxsw_reg_pmtm_module_type_get(pmtm_pl);
+       }
+
+       return 0;
+}
+
 int mlxsw_env_init(struct mlxsw_core *mlxsw_core, struct mlxsw_env **p_env)
 {
        char mgpir_pl[MLXSW_REG_MGPIR_LEN];
@@ -1037,17 +1124,21 @@ int mlxsw_env_init(struct mlxsw_core *mlxsw_core, struct mlxsw_env **p_env)
        if (err)
                goto err_module_plug_event_register;
 
-       err = mlxsw_env_module_oper_state_event_enable(mlxsw_core,
-                                                      env->module_count);
+       err = mlxsw_env_module_oper_state_event_enable(mlxsw_core);
        if (err)
                goto err_oper_state_event_enable;
 
-       err = mlxsw_env_module_temp_event_enable(mlxsw_core, env->module_count);
+       err = mlxsw_env_module_temp_event_enable(mlxsw_core);
        if (err)
                goto err_temp_event_enable;
 
+       err = mlxsw_env_module_type_set(mlxsw_core);
+       if (err)
+               goto err_type_set;
+
        return 0;
 
+err_type_set:
 err_temp_event_enable:
 err_oper_state_event_enable:
        mlxsw_env_module_plug_event_unregister(env);
index da121b1..ec6564e 100644 (file)
@@ -12,7 +12,8 @@ struct ethtool_eeprom;
 int mlxsw_env_module_temp_thresholds_get(struct mlxsw_core *core, int module,
                                         int off, int *temp);
 
-int mlxsw_env_get_module_info(struct mlxsw_core *mlxsw_core, int module,
+int mlxsw_env_get_module_info(struct net_device *netdev,
+                             struct mlxsw_core *mlxsw_core, int module,
                              struct ethtool_modinfo *modinfo);
 
 int mlxsw_env_get_module_eeprom(struct net_device *netdev,
index 10d13f5..9ac8ce0 100644 (file)
@@ -110,7 +110,8 @@ static int mlxsw_m_get_module_info(struct net_device *netdev,
        struct mlxsw_m_port *mlxsw_m_port = netdev_priv(netdev);
        struct mlxsw_core *core = mlxsw_m_port->mlxsw_m->core;
 
-       return mlxsw_env_get_module_info(core, mlxsw_m_port->module, modinfo);
+       return mlxsw_env_get_module_info(netdev, core, mlxsw_m_port->module,
+                                        modinfo);
 }
 
 static int
index 24cc650..aba5db4 100644 (file)
@@ -4482,6 +4482,8 @@ MLXSW_ITEM32(reg, ptys, ext_eth_proto_cap, 0x08, 0, 32);
 #define MLXSW_REG_PTYS_ETH_SPEED_100GBASE_SR4          BIT(21)
 #define MLXSW_REG_PTYS_ETH_SPEED_100GBASE_KR4          BIT(22)
 #define MLXSW_REG_PTYS_ETH_SPEED_100GBASE_LR4_ER4      BIT(23)
+#define MLXSW_REG_PTYS_ETH_SPEED_100BASE_T             BIT(24)
+#define MLXSW_REG_PTYS_ETH_SPEED_1000BASE_T            BIT(25)
 #define MLXSW_REG_PTYS_ETH_SPEED_25GBASE_CR            BIT(27)
 #define MLXSW_REG_PTYS_ETH_SPEED_25GBASE_KR            BIT(28)
 #define MLXSW_REG_PTYS_ETH_SPEED_25GBASE_SR            BIT(29)
@@ -6062,6 +6064,58 @@ static inline void mlxsw_reg_pllp_unpack(char *payload, u8 *label_port,
        *slot_index = mlxsw_reg_pllp_slot_index_get(payload);
 }
 
+/* PMTM - Port Module Type Mapping Register
+ * ----------------------------------------
+ * The PMTM register allows query or configuration of module types.
+ * The register can only be set when the module is disabled by PMAOS register
+ */
+#define MLXSW_REG_PMTM_ID 0x5067
+#define MLXSW_REG_PMTM_LEN 0x10
+
+MLXSW_REG_DEFINE(pmtm, MLXSW_REG_PMTM_ID, MLXSW_REG_PMTM_LEN);
+
+/* reg_pmtm_slot_index
+ * Slot index.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, pmtm, slot_index, 0x00, 24, 4);
+
+/* reg_pmtm_module
+ * Module number.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, pmtm, module, 0x00, 16, 8);
+
+enum mlxsw_reg_pmtm_module_type {
+       MLXSW_REG_PMTM_MODULE_TYPE_BACKPLANE_4_LANES = 0,
+       MLXSW_REG_PMTM_MODULE_TYPE_QSFP = 1,
+       MLXSW_REG_PMTM_MODULE_TYPE_SFP = 2,
+       MLXSW_REG_PMTM_MODULE_TYPE_BACKPLANE_SINGLE_LANE = 4,
+       MLXSW_REG_PMTM_MODULE_TYPE_BACKPLANE_2_LANES = 8,
+       MLXSW_REG_PMTM_MODULE_TYPE_CHIP2CHIP4X = 10,
+       MLXSW_REG_PMTM_MODULE_TYPE_CHIP2CHIP2X = 11,
+       MLXSW_REG_PMTM_MODULE_TYPE_CHIP2CHIP1X = 12,
+       MLXSW_REG_PMTM_MODULE_TYPE_QSFP_DD = 14,
+       MLXSW_REG_PMTM_MODULE_TYPE_OSFP = 15,
+       MLXSW_REG_PMTM_MODULE_TYPE_SFP_DD = 16,
+       MLXSW_REG_PMTM_MODULE_TYPE_DSFP = 17,
+       MLXSW_REG_PMTM_MODULE_TYPE_CHIP2CHIP8X = 18,
+       MLXSW_REG_PMTM_MODULE_TYPE_TWISTED_PAIR = 19,
+};
+
+/* reg_pmtm_module_type
+ * Module type.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, pmtm, module_type, 0x04, 0, 5);
+
+static inline void mlxsw_reg_pmtm_pack(char *payload, u8 slot_index, u8 module)
+{
+       MLXSW_REG_ZERO(pmtm, payload);
+       mlxsw_reg_pmtm_slot_index_set(payload, slot_index);
+       mlxsw_reg_pmtm_module_set(payload, module);
+}
+
 /* HTGT - Host Trap Group Table
  * ----------------------------
  * Configures the properties for forwarding to CPU.
@@ -12568,6 +12622,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = {
        MLXSW_REG(pddr),
        MLXSW_REG(pmmp),
        MLXSW_REG(pllp),
+       MLXSW_REG(pmtm),
        MLXSW_REG(htgt),
        MLXSW_REG(hpkt),
        MLXSW_REG(rgcr),
index a9fff8a..d20e794 100644 (file)
@@ -213,7 +213,6 @@ mlxsw_sp1_kvdl_part_init(struct mlxsw_sp *mlxsw_sp,
        struct mlxsw_sp1_kvdl_part *part;
        bool need_update = true;
        unsigned int nr_entries;
-       size_t usage_size;
        u64 resource_size;
        int err;
 
@@ -225,8 +224,8 @@ mlxsw_sp1_kvdl_part_init(struct mlxsw_sp *mlxsw_sp,
        }
 
        nr_entries = div_u64(resource_size, info->alloc_size);
-       usage_size = BITS_TO_LONGS(nr_entries) * sizeof(unsigned long);
-       part = kzalloc(sizeof(*part) + usage_size, GFP_KERNEL);
+       part = kzalloc(struct_size(part, usage, BITS_TO_LONGS(nr_entries)),
+                      GFP_KERNEL);
        if (!part)
                return ERR_PTR(-ENOMEM);
 
index 2053071..8b5d7f8 100644 (file)
@@ -1034,13 +1034,10 @@ static int mlxsw_sp_get_module_info(struct net_device *netdev,
 {
        struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(netdev);
        struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
-       int err;
-
-       err = mlxsw_env_get_module_info(mlxsw_sp->core,
-                                       mlxsw_sp_port->mapping.module,
-                                       modinfo);
 
-       return err;
+       return mlxsw_env_get_module_info(netdev, mlxsw_sp->core,
+                                        mlxsw_sp_port->mapping.module,
+                                        modinfo);
 }
 
 static int mlxsw_sp_get_module_eeprom(struct net_device *netdev,
@@ -1048,13 +1045,10 @@ static int mlxsw_sp_get_module_eeprom(struct net_device *netdev,
 {
        struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(netdev);
        struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
-       int err;
-
-       err = mlxsw_env_get_module_eeprom(netdev, mlxsw_sp->core,
-                                         mlxsw_sp_port->mapping.module, ee,
-                                         data);
 
-       return err;
+       return mlxsw_env_get_module_eeprom(netdev, mlxsw_sp->core,
+                                          mlxsw_sp_port->mapping.module, ee,
+                                          data);
 }
 
 static int
@@ -1273,12 +1267,22 @@ struct mlxsw_sp1_port_link_mode {
 
 static const struct mlxsw_sp1_port_link_mode mlxsw_sp1_port_link_mode[] = {
        {
+               .mask           = MLXSW_REG_PTYS_ETH_SPEED_100BASE_T,
+               .mask_ethtool   = ETHTOOL_LINK_MODE_100baseT_Full_BIT,
+               .speed          = SPEED_100,
+       },
+       {
                .mask           = MLXSW_REG_PTYS_ETH_SPEED_SGMII |
                                  MLXSW_REG_PTYS_ETH_SPEED_1000BASE_KX,
                .mask_ethtool   = ETHTOOL_LINK_MODE_1000baseKX_Full_BIT,
                .speed          = SPEED_1000,
        },
        {
+               .mask           = MLXSW_REG_PTYS_ETH_SPEED_1000BASE_T,
+               .mask_ethtool   = ETHTOOL_LINK_MODE_1000baseT_Full_BIT,
+               .speed          = SPEED_1000,
+       },
+       {
                .mask           = MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CX4 |
                                  MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KX4,
                .mask_ethtool   = ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT,
index 91a755e..5f1e7b8 100644 (file)
@@ -750,7 +750,7 @@ static int lan743x_ethtool_set_eee(struct net_device *netdev,
        }
 
        if (eee->eee_enabled) {
-               ret = phy_init_eee(phydev, 0);
+               ret = phy_init_eee(phydev, false);
                if (ret) {
                        netif_err(adapter, drv, adapter->netdev,
                                  "EEE initialization failed\n");
index 636dfef..49b85ca 100644 (file)
@@ -663,7 +663,7 @@ static int mana_gd_create_dma_region(struct gdma_dev *gd,
        struct gdma_context *gc = gd->gdma_context;
        struct hw_channel_context *hwc;
        u32 length = gmi->length;
-       u32 req_msg_size;
+       size_t req_msg_size;
        int err;
        int i;
 
@@ -674,7 +674,7 @@ static int mana_gd_create_dma_region(struct gdma_dev *gd,
                return -EINVAL;
 
        hwc = gc->hwc.driver_data;
-       req_msg_size = sizeof(*req) + num_page * sizeof(u64);
+       req_msg_size = struct_size(req, page_addr_list, num_page);
        if (req_msg_size > hwc->max_req_msg_size)
                return -EINVAL;
 
index dfb4468..ce865e6 100644 (file)
@@ -356,7 +356,7 @@ __nfp_tun_add_route_to_cache(struct list_head *route_list,
                        return 0;
                }
 
-       entry = kmalloc(sizeof(*entry) + add_len, GFP_ATOMIC);
+       entry = kmalloc(struct_size(entry, ip_add, add_len), GFP_ATOMIC);
        if (!entry) {
                spin_unlock_bh(list_lock);
                return -ENOMEM;
index 3d61a8c..50007cc 100644 (file)
@@ -1,8 +1,7 @@
 /* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
 /* Copyright (C) 2015-2018 Netronome Systems, Inc. */
 
-/*
- * nfp_net_ctrl.h
+/* nfp_net_ctrl.h
  * Netronome network device driver: Control BAR layout
  * Authors: Jakub Kicinski <jakub.kicinski@netronome.com>
  *          Jason McMullan <jason.mcmullan@netronome.com>
 
 #include <linux/types.h>
 
-/**
- * Configuration BAR size.
+/* Configuration BAR size.
  *
  * The configuration BAR is 8K in size, but due to
  * THB-350, 32k needs to be reserved.
  */
 #define NFP_NET_CFG_BAR_SZ             (32 * 1024)
 
-/**
- * Offset in Freelist buffer where packet starts on RX
- */
+/* Offset in Freelist buffer where packet starts on RX */
 #define NFP_NET_RX_OFFSET              32
 
-/**
- * LSO parameters
+/* LSO parameters
  * %NFP_NET_LSO_MAX_HDR_SZ:    Maximum header size supported for LSO frames
  * %NFP_NET_LSO_MAX_SEGS:      Maximum number of segments LSO frame can produce
  */
 #define NFP_NET_LSO_MAX_HDR_SZ         255
 #define NFP_NET_LSO_MAX_SEGS           64
 
-/**
- * Prepend field types
- */
+/* Prepend field types */
 #define NFP_NET_META_FIELD_SIZE                4
 #define NFP_NET_META_HASH              1 /* next field carries hash type */
 #define NFP_NET_META_MARK              2
@@ -49,9 +42,7 @@
 
 #define NFP_META_PORT_ID_CTRL          ~0U
 
-/**
- * Hash type pre-pended when a RSS hash was computed
- */
+/* Hash type pre-pended when a RSS hash was computed */
 #define NFP_NET_RSS_NONE               0
 #define NFP_NET_RSS_IPV4               1
 #define NFP_NET_RSS_IPV6               2
 #define NFP_NET_RSS_IPV6_UDP           8
 #define NFP_NET_RSS_IPV6_EX_UDP                9
 
-/**
- * Ring counts
+/* Ring counts
  * %NFP_NET_TXR_MAX:        Maximum number of TX rings
  * %NFP_NET_RXR_MAX:        Maximum number of RX rings
  */
 #define NFP_NET_TXR_MAX                        64
 #define NFP_NET_RXR_MAX                        64
 
-/**
- * Read/Write config words (0x0000 - 0x002c)
+/* Read/Write config words (0x0000 - 0x002c)
  * %NFP_NET_CFG_CTRL:       Global control
  * %NFP_NET_CFG_UPDATE:      Indicate which fields are updated
  * %NFP_NET_CFG_TXRS_ENABLE: Bitmask of enabled TX rings
 #define NFP_NET_CFG_LSC                        0x0020
 #define NFP_NET_CFG_MACADDR            0x0024
 
-/**
- * Read-only words (0x0030 - 0x0050):
+/* Read-only words (0x0030 - 0x0050):
  * %NFP_NET_CFG_VERSION:     Firmware version number
  * %NFP_NET_CFG_STS:        Status
  * %NFP_NET_CFG_CAP:        Capabilities (same bits as %NFP_NET_CFG_CTRL)
 #define NFP_NET_CFG_START_TXQ          0x0048
 #define NFP_NET_CFG_START_RXQ          0x004c
 
-/**
- * Prepend configuration
+/* Prepend configuration
  */
 #define NFP_NET_CFG_RX_OFFSET          0x0050
 #define NFP_NET_CFG_RX_OFFSET_DYNAMIC          0       /* Prepend mode */
 
-/**
- * RSS capabilities
+/* RSS capabilities
  * %NFP_NET_CFG_RSS_CAP_HFUNC: supported hash functions (same bits as
  *                             %NFP_NET_CFG_RSS_HFUNC)
  */
 #define NFP_NET_CFG_RSS_CAP            0x0054
 #define   NFP_NET_CFG_RSS_CAP_HFUNC      0xff000000
 
-/**
- * TLV area start
+/* TLV area start
  * %NFP_NET_CFG_TLV_BASE:      start anchor of the TLV area
  */
 #define NFP_NET_CFG_TLV_BASE           0x0058
 
-/**
- * VXLAN/UDP encap configuration
+/* VXLAN/UDP encap configuration
  * %NFP_NET_CFG_VXLAN_PORT:    Base address of table of tunnels' UDP dst ports
  * %NFP_NET_CFG_VXLAN_SZ:      Size of the UDP port table in bytes
  */
 #define NFP_NET_CFG_VXLAN_PORT         0x0060
 #define NFP_NET_CFG_VXLAN_SZ             0x0008
 
-/**
- * BPF section
+/* BPF section
  * %NFP_NET_CFG_BPF_ABI:       BPF ABI version
  * %NFP_NET_CFG_BPF_CAP:       BPF capabilities
  * %NFP_NET_CFG_BPF_MAX_LEN:   Maximum size of JITed BPF code in bytes
 #define   NFP_NET_CFG_BPF_CFG_MASK     7ULL
 #define   NFP_NET_CFG_BPF_ADDR_MASK    (~NFP_NET_CFG_BPF_CFG_MASK)
 
-/**
- * 40B reserved for future use (0x0098 - 0x00c0)
+/* 40B reserved for future use (0x0098 - 0x00c0)
  */
 #define NFP_NET_CFG_RESERVED           0x0098
 #define NFP_NET_CFG_RESERVED_SZ                0x0028
 
-/**
- * RSS configuration (0x0100 - 0x01ac):
+/* RSS configuration (0x0100 - 0x01ac):
  * Used only when NFP_NET_CFG_CTRL_RSS is enabled
  * %NFP_NET_CFG_RSS_CFG:     RSS configuration word
  * %NFP_NET_CFG_RSS_KEY:     RSS "secret" key
                                         NFP_NET_CFG_RSS_KEY_SZ)
 #define NFP_NET_CFG_RSS_ITBL_SZ                0x80
 
-/**
- * TX ring configuration (0x200 - 0x800)
+/* TX ring configuration (0x200 - 0x800)
  * %NFP_NET_CFG_TXR_BASE:    Base offset for TX ring configuration
  * %NFP_NET_CFG_TXR_ADDR:    Per TX ring DMA address (8B entries)
  * %NFP_NET_CFG_TXR_WB_ADDR: Per TX ring write back DMA address (8B entries)
 #define NFP_NET_CFG_TXR_IRQ_MOD(_x)    (NFP_NET_CFG_TXR_BASE + 0x500 + \
                                         ((_x) * 0x4))
 
-/**
- * RX ring configuration (0x0800 - 0x0c00)
+/* RX ring configuration (0x0800 - 0x0c00)
  * %NFP_NET_CFG_RXR_BASE:    Base offset for RX ring configuration
  * %NFP_NET_CFG_RXR_ADDR:    Per RX ring DMA address (8B entries)
  * %NFP_NET_CFG_RXR_SZ:      Per RX ring ring size (1B entries)
 #define NFP_NET_CFG_RXR_IRQ_MOD(_x)    (NFP_NET_CFG_RXR_BASE + 0x300 + \
                                         ((_x) * 0x4))
 
-/**
- * Interrupt Control/Cause registers (0x0c00 - 0x0d00)
+/* Interrupt Control/Cause registers (0x0c00 - 0x0d00)
  * These registers are only used when MSI-X auto-masking is not
  * enabled (%NFP_NET_CFG_CTRL_MSIXAUTO not set).  The array is index
  * by MSI-X entry and are 1B in size.  If an entry is zero, the
 #define   NFP_NET_CFG_ICR_RXTX         0x1
 #define   NFP_NET_CFG_ICR_LSC          0x2
 
-/**
- * General device stats (0x0d00 - 0x0d90)
+/* General device stats (0x0d00 - 0x0d90)
  * all counters are 64bit.
  */
 #define NFP_NET_CFG_STATS_BASE         0x0d00
 #define NFP_NET_CFG_STATS_APP3_FRAMES  (NFP_NET_CFG_STATS_BASE + 0xc0)
 #define NFP_NET_CFG_STATS_APP3_BYTES   (NFP_NET_CFG_STATS_BASE + 0xc8)
 
-/**
- * Per ring stats (0x1000 - 0x1800)
+/* Per ring stats (0x1000 - 0x1800)
  * options, 64bit per entry
  * %NFP_NET_CFG_TXR_STATS:   TX ring statistics (Packet and Byte count)
  * %NFP_NET_CFG_RXR_STATS:   RX ring statistics (Packet and Byte count)
 #define NFP_NET_CFG_RXR_STATS(_x)      (NFP_NET_CFG_RXR_STATS_BASE + \
                                         ((_x) * 0x10))
 
-/**
- * General use mailbox area (0x1800 - 0x19ff)
+/* General use mailbox area (0x1800 - 0x19ff)
  * 4B used for update command and 4B return code
  * followed by a max of 504B of variable length value
  */
 #define NFP_NET_CFG_MBOX_CMD_PCI_DSCP_PRIOMAP_SET      5
 #define NFP_NET_CFG_MBOX_CMD_TLV_CMSG                  6
 
-/**
- * VLAN filtering using general use mailbox
+/* VLAN filtering using general use mailbox
  * %NFP_NET_CFG_VLAN_FILTER:           Base address of VLAN filter mailbox
  * %NFP_NET_CFG_VLAN_FILTER_VID:       VLAN ID to filter
  * %NFP_NET_CFG_VLAN_FILTER_PROTO:     VLAN proto to filter
 #define  NFP_NET_CFG_VLAN_FILTER_PROTO  (NFP_NET_CFG_VLAN_FILTER + 2)
 #define NFP_NET_CFG_VLAN_FILTER_SZ      0x0004
 
-/**
- * TLV capabilities
+/* TLV capabilities
  * %NFP_NET_CFG_TLV_TYPE:      Offset of type within the TLV
  * %NFP_NET_CFG_TLV_TYPE_REQUIRED: Driver must be able to parse the TLV
  * %NFP_NET_CFG_TLV_LENGTH:    Offset of length within the TLV
 #define NFP_NET_CFG_TLV_HEADER_TYPE    0x7fff0000
 #define NFP_NET_CFG_TLV_HEADER_LENGTH  0x0000ffff
 
-/**
- * Capability TLV types
+/* Capability TLV types
  *
  * %NFP_NET_CFG_TLV_TYPE_UNKNOWN:
  * Special TLV type to catch bugs, should never be encountered.  Drivers should
 
 struct device;
 
-/**
- * struct nfp_net_tlv_caps - parsed control BAR TLV capabilities
+/* struct nfp_net_tlv_caps - parsed control BAR TLV capabilities
  * @me_freq_mhz:       ME clock_freq (MHz)
  * @mbox_off:          vNIC mailbox area offset
  * @mbox_len:          vNIC mailbox area length
index a3db0cb..786be58 100644 (file)
@@ -4,8 +4,7 @@
 #ifndef _NFP_NET_SRIOV_H_
 #define _NFP_NET_SRIOV_H_
 
-/**
- * SRIOV VF configuration.
+/* SRIOV VF configuration.
  * The configuration memory begins with a mailbox region for communication with
  * the firmware followed by individual VF entries.
  */
index ae4da18..df316b9 100644 (file)
@@ -132,8 +132,7 @@ void nfp_devlink_port_unregister(struct nfp_port *port);
 void nfp_devlink_port_type_eth_set(struct nfp_port *port);
 void nfp_devlink_port_type_clear(struct nfp_port *port);
 
-/**
- * Mac stats (0x0000 - 0x0200)
+/* Mac stats (0x0000 - 0x0200)
  * all counters are 64bit.
  */
 #define NFP_MAC_STATS_BASE                0x0000
index 10e7d8b..730fea2 100644 (file)
@@ -513,7 +513,7 @@ nfp_nsp_command_buf_dma_sg(struct nfp_nsp *nsp,
        dma_size = BIT_ULL(dma_order);
        nseg = DIV_ROUND_UP(max_size, chunk_size);
 
-       chunks = kzalloc(array_size(sizeof(*chunks), nseg), GFP_KERNEL);
+       chunks = kcalloc(nseg, sizeof(*chunks), GFP_KERNEL);
        if (!chunks)
                return -ENOMEM;
 
index 5e25411..602f4d4 100644 (file)
@@ -18,7 +18,7 @@ struct ionic_lif;
 #define PCI_DEVICE_ID_PENSANDO_IONIC_ETH_PF    0x1002
 #define PCI_DEVICE_ID_PENSANDO_IONIC_ETH_VF    0x1003
 
-#define DEVCMD_TIMEOUT  10
+#define DEVCMD_TIMEOUT                 5
 #define IONIC_ADMINQ_TIME_SLICE                msecs_to_jiffies(100)
 
 #define IONIC_PHC_UPDATE_NS    10000000000         /* 10s in nanoseconds */
@@ -78,6 +78,9 @@ void ionic_adminq_netdev_err_print(struct ionic_lif *lif, u8 opcode,
                                   u8 status, int err);
 
 int ionic_dev_cmd_wait(struct ionic *ionic, unsigned long max_wait);
+int ionic_dev_cmd_wait_nomsg(struct ionic *ionic, unsigned long max_wait);
+void ionic_dev_cmd_dev_err_print(struct ionic *ionic, u8 opcode, u8 status,
+                                int err);
 int ionic_set_dma_mask(struct ionic *ionic);
 int ionic_setup(struct ionic *ionic);
 
@@ -89,4 +92,6 @@ int ionic_port_identify(struct ionic *ionic);
 int ionic_port_init(struct ionic *ionic);
 int ionic_port_reset(struct ionic *ionic);
 
+const char *ionic_vf_attr_to_str(enum ionic_vf_attr attr);
+
 #endif /* _IONIC_H_ */
index 7e296fa..6ffc62c 100644 (file)
@@ -109,8 +109,8 @@ void ionic_bus_unmap_dbpage(struct ionic *ionic, void __iomem *page)
 
 static void ionic_vf_dealloc_locked(struct ionic *ionic)
 {
+       struct ionic_vf_setattr_cmd vfc = { .attr = IONIC_VF_ATTR_STATSADDR };
        struct ionic_vf *v;
-       dma_addr_t dma = 0;
        int i;
 
        if (!ionic->vfs)
@@ -120,9 +120,8 @@ static void ionic_vf_dealloc_locked(struct ionic *ionic)
                v = &ionic->vfs[i];
 
                if (v->stats_pa) {
-                       (void)ionic_set_vf_config(ionic, i,
-                                                 IONIC_VF_ATTR_STATSADDR,
-                                                 (u8 *)&dma);
+                       vfc.stats_pa = 0;
+                       (void)ionic_set_vf_config(ionic, i, &vfc);
                        dma_unmap_single(ionic->dev, v->stats_pa,
                                         sizeof(v->stats), DMA_FROM_DEVICE);
                        v->stats_pa = 0;
@@ -143,6 +142,7 @@ static void ionic_vf_dealloc(struct ionic *ionic)
 
 static int ionic_vf_alloc(struct ionic *ionic, int num_vfs)
 {
+       struct ionic_vf_setattr_cmd vfc = { .attr = IONIC_VF_ATTR_STATSADDR };
        struct ionic_vf *v;
        int err = 0;
        int i;
@@ -166,9 +166,10 @@ static int ionic_vf_alloc(struct ionic *ionic, int num_vfs)
                }
 
                ionic->num_vfs++;
+
                /* ignore failures from older FW, we just won't get stats */
-               (void)ionic_set_vf_config(ionic, i, IONIC_VF_ATTR_STATSADDR,
-                                         (u8 *)&v->stats_pa);
+               vfc.stats_pa = cpu_to_le64(v->stats_pa);
+               (void)ionic_set_vf_config(ionic, i, &vfc);
        }
 
 out:
@@ -331,6 +332,9 @@ static int ionic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
                goto err_out_deregister_lifs;
        }
 
+       mod_timer(&ionic->watchdog_timer,
+                 round_jiffies(jiffies + ionic->watchdog_period));
+
        return 0;
 
 err_out_deregister_lifs:
@@ -348,7 +352,6 @@ err_out_port_reset:
 err_out_reset:
        ionic_reset(ionic);
 err_out_teardown:
-       del_timer_sync(&ionic->watchdog_timer);
        pci_clear_master(pdev);
        /* Don't fail the probe for these errors, keep
         * the hw interface around for inspection
index d57e80d..52a1b5c 100644 (file)
@@ -33,7 +33,8 @@ static void ionic_watchdog_cb(struct timer_list *t)
            !test_bit(IONIC_LIF_F_FW_RESET, lif->state))
                ionic_link_status_check_request(lif, CAN_NOT_SLEEP);
 
-       if (test_bit(IONIC_LIF_F_FILTER_SYNC_NEEDED, lif->state)) {
+       if (test_bit(IONIC_LIF_F_FILTER_SYNC_NEEDED, lif->state) &&
+           !test_bit(IONIC_LIF_F_FW_RESET, lif->state)) {
                work = kzalloc(sizeof(*work), GFP_ATOMIC);
                if (!work) {
                        netdev_err(lif->netdev, "rxmode change dropped\n");
@@ -46,6 +47,24 @@ static void ionic_watchdog_cb(struct timer_list *t)
        }
 }
 
+static void ionic_watchdog_init(struct ionic *ionic)
+{
+       struct ionic_dev *idev = &ionic->idev;
+
+       timer_setup(&ionic->watchdog_timer, ionic_watchdog_cb, 0);
+       ionic->watchdog_period = IONIC_WATCHDOG_SECS * HZ;
+
+       /* set times to ensure the first check will proceed */
+       atomic_long_set(&idev->last_check_time, jiffies - 2 * HZ);
+       idev->last_hb_time = jiffies - 2 * ionic->watchdog_period;
+       /* init as ready, so no transition if the first check succeeds */
+       idev->last_fw_hb = 0;
+       idev->fw_hb_ready = true;
+       idev->fw_status_ready = true;
+       idev->fw_generation = IONIC_FW_STS_F_GENERATION &
+                             ioread8(&idev->dev_info_regs->fw_status);
+}
+
 void ionic_init_devinfo(struct ionic *ionic)
 {
        struct ionic_dev *idev = &ionic->idev;
@@ -109,21 +128,7 @@ int ionic_dev_setup(struct ionic *ionic)
                return -EFAULT;
        }
 
-       timer_setup(&ionic->watchdog_timer, ionic_watchdog_cb, 0);
-       ionic->watchdog_period = IONIC_WATCHDOG_SECS * HZ;
-
-       /* set times to ensure the first check will proceed */
-       atomic_long_set(&idev->last_check_time, jiffies - 2 * HZ);
-       idev->last_hb_time = jiffies - 2 * ionic->watchdog_period;
-       /* init as ready, so no transition if the first check succeeds */
-       idev->last_fw_hb = 0;
-       idev->fw_hb_ready = true;
-       idev->fw_status_ready = true;
-       idev->fw_generation = IONIC_FW_STS_F_GENERATION &
-                             ioread8(&idev->dev_info_regs->fw_status);
-
-       mod_timer(&ionic->watchdog_timer,
-                 round_jiffies(jiffies + ionic->watchdog_period));
+       ionic_watchdog_init(ionic);
 
        idev->db_pages = bar->vaddr;
        idev->phy_db_pages = bar->bus_addr;
@@ -132,10 +137,21 @@ int ionic_dev_setup(struct ionic *ionic)
 }
 
 /* Devcmd Interface */
+bool ionic_is_fw_running(struct ionic_dev *idev)
+{
+       u8 fw_status = ioread8(&idev->dev_info_regs->fw_status);
+
+       /* firmware is useful only if the running bit is set and
+        * fw_status != 0xff (bad PCI read)
+        */
+       return (fw_status != 0xff) && (fw_status & IONIC_FW_STS_F_RUNNING);
+}
+
 int ionic_heartbeat_check(struct ionic *ionic)
 {
-       struct ionic_dev *idev = &ionic->idev;
        unsigned long check_time, last_check_time;
+       struct ionic_dev *idev = &ionic->idev;
+       struct ionic_lif *lif = ionic->lif;
        bool fw_status_ready = true;
        bool fw_hb_ready;
        u8 fw_generation;
@@ -155,13 +171,10 @@ do_check_time:
                goto do_check_time;
        }
 
-       /* firmware is useful only if the running bit is set and
-        * fw_status != 0xff (bad PCI read)
-        * If fw_status is not ready don't bother with the generation.
-        */
        fw_status = ioread8(&idev->dev_info_regs->fw_status);
 
-       if (fw_status == 0xff || !(fw_status & IONIC_FW_STS_F_RUNNING)) {
+       /* If fw_status is not ready don't bother with the generation */
+       if (!ionic_is_fw_running(idev)) {
                fw_status_ready = false;
        } else {
                fw_generation = fw_status & IONIC_FW_STS_F_GENERATION;
@@ -176,31 +189,41 @@ do_check_time:
                         * the down, the next watchdog will see the fw is up
                         * and the generation value stable, so will trigger
                         * the fw-up activity.
+                        *
+                        * If we had already moved to FW_RESET from a RESET event,
+                        * it is possible that we never saw the fw_status go to 0,
+                        * so we fake the current idev->fw_status_ready here to
+                        * force the transition and get FW up again.
                         */
-                       fw_status_ready = false;
+                       if (test_bit(IONIC_LIF_F_FW_RESET, lif->state))
+                               idev->fw_status_ready = false;  /* go to running */
+                       else
+                               fw_status_ready = false;        /* go to down */
                }
        }
 
        /* is this a transition? */
        if (fw_status_ready != idev->fw_status_ready) {
-               struct ionic_lif *lif = ionic->lif;
                bool trigger = false;
 
-               idev->fw_status_ready = fw_status_ready;
-
-               if (!fw_status_ready) {
-                       dev_info(ionic->dev, "FW stopped %u\n", fw_status);
-                       if (lif && !test_bit(IONIC_LIF_F_FW_RESET, lif->state))
-                               trigger = true;
-               } else {
-                       dev_info(ionic->dev, "FW running %u\n", fw_status);
-                       if (lif && test_bit(IONIC_LIF_F_FW_RESET, lif->state))
-                               trigger = true;
+               if (!fw_status_ready && lif &&
+                   !test_bit(IONIC_LIF_F_FW_RESET, lif->state) &&
+                   !test_and_set_bit(IONIC_LIF_F_FW_STOPPING, lif->state)) {
+                       dev_info(ionic->dev, "FW stopped 0x%02x\n", fw_status);
+                       trigger = true;
+
+               } else if (fw_status_ready && lif &&
+                          test_bit(IONIC_LIF_F_FW_RESET, lif->state) &&
+                          !test_bit(IONIC_LIF_F_FW_STOPPING, lif->state)) {
+                       dev_info(ionic->dev, "FW running 0x%02x\n", fw_status);
+                       trigger = true;
                }
 
                if (trigger) {
                        struct ionic_deferred_work *work;
 
+                       idev->fw_status_ready = fw_status_ready;
+
                        work = kzalloc(sizeof(*work), GFP_ATOMIC);
                        if (work) {
                                work->type = IONIC_DW_TYPE_LIF_RESET;
@@ -210,12 +233,14 @@ do_check_time:
                }
        }
 
-       if (!fw_status_ready)
+       if (!idev->fw_status_ready)
                return -ENXIO;
 
-       /* wait at least one watchdog period since the last heartbeat */
+       /* Because of some variability in the actual FW heartbeat, we
+        * wait longer than the DEVCMD_TIMEOUT before checking again.
+        */
        last_check_time = idev->last_hb_time;
-       if (time_before(check_time, last_check_time + ionic->watchdog_period))
+       if (time_before(check_time, last_check_time + DEVCMD_TIMEOUT * 2 * HZ))
                return 0;
 
        fw_hb = ioread32(&idev->dev_info_regs->fw_heartbeat);
@@ -392,60 +417,63 @@ void ionic_dev_cmd_port_pause(struct ionic_dev *idev, u8 pause_type)
 }
 
 /* VF commands */
-int ionic_set_vf_config(struct ionic *ionic, int vf, u8 attr, u8 *data)
+int ionic_set_vf_config(struct ionic *ionic, int vf,
+                       struct ionic_vf_setattr_cmd *vfc)
 {
        union ionic_dev_cmd cmd = {
                .vf_setattr.opcode = IONIC_CMD_VF_SETATTR,
-               .vf_setattr.attr = attr,
+               .vf_setattr.attr = vfc->attr,
                .vf_setattr.vf_index = cpu_to_le16(vf),
        };
        int err;
 
+       memcpy(cmd.vf_setattr.pad, vfc->pad, sizeof(vfc->pad));
+
+       mutex_lock(&ionic->dev_cmd_lock);
+       ionic_dev_cmd_go(&ionic->idev, &cmd);
+       err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT);
+       mutex_unlock(&ionic->dev_cmd_lock);
+
+       return err;
+}
+
+int ionic_dev_cmd_vf_getattr(struct ionic *ionic, int vf, u8 attr,
+                            struct ionic_vf_getattr_comp *comp)
+{
+       union ionic_dev_cmd cmd = {
+               .vf_getattr.opcode = IONIC_CMD_VF_GETATTR,
+               .vf_getattr.attr = attr,
+               .vf_getattr.vf_index = cpu_to_le16(vf),
+       };
+       int err;
+
+       if (vf >= ionic->num_vfs)
+               return -EINVAL;
+
        switch (attr) {
        case IONIC_VF_ATTR_SPOOFCHK:
-               cmd.vf_setattr.spoofchk = *data;
-               dev_dbg(ionic->dev, "%s: vf %d spoof %d\n",
-                       __func__, vf, *data);
-               break;
        case IONIC_VF_ATTR_TRUST:
-               cmd.vf_setattr.trust = *data;
-               dev_dbg(ionic->dev, "%s: vf %d trust %d\n",
-                       __func__, vf, *data);
-               break;
        case IONIC_VF_ATTR_LINKSTATE:
-               cmd.vf_setattr.linkstate = *data;
-               dev_dbg(ionic->dev, "%s: vf %d linkstate %d\n",
-                       __func__, vf, *data);
-               break;
        case IONIC_VF_ATTR_MAC:
-               ether_addr_copy(cmd.vf_setattr.macaddr, data);
-               dev_dbg(ionic->dev, "%s: vf %d macaddr %pM\n",
-                       __func__, vf, data);
-               break;
        case IONIC_VF_ATTR_VLAN:
-               cmd.vf_setattr.vlanid = cpu_to_le16(*(u16 *)data);
-               dev_dbg(ionic->dev, "%s: vf %d vlan %d\n",
-                       __func__, vf, *(u16 *)data);
-               break;
        case IONIC_VF_ATTR_RATE:
-               cmd.vf_setattr.maxrate = cpu_to_le32(*(u32 *)data);
-               dev_dbg(ionic->dev, "%s: vf %d maxrate %d\n",
-                       __func__, vf, *(u32 *)data);
                break;
        case IONIC_VF_ATTR_STATSADDR:
-               cmd.vf_setattr.stats_pa = cpu_to_le64(*(u64 *)data);
-               dev_dbg(ionic->dev, "%s: vf %d stats_pa 0x%08llx\n",
-                       __func__, vf, *(u64 *)data);
-               break;
        default:
                return -EINVAL;
        }
 
        mutex_lock(&ionic->dev_cmd_lock);
        ionic_dev_cmd_go(&ionic->idev, &cmd);
-       err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT);
+       err = ionic_dev_cmd_wait_nomsg(ionic, DEVCMD_TIMEOUT);
+       memcpy_fromio(comp, &ionic->idev.dev_cmd_regs->comp.vf_getattr,
+                     sizeof(*comp));
        mutex_unlock(&ionic->dev_cmd_lock);
 
+       if (err && comp->status != IONIC_RC_ENOSUPP)
+               ionic_dev_cmd_dev_err_print(ionic, cmd.vf_getattr.opcode,
+                                           comp->status, err);
+
        return err;
 }
 
index e5acf3b..563c302 100644 (file)
@@ -318,7 +318,10 @@ void ionic_dev_cmd_port_autoneg(struct ionic_dev *idev, u8 an_enable);
 void ionic_dev_cmd_port_fec(struct ionic_dev *idev, u8 fec_type);
 void ionic_dev_cmd_port_pause(struct ionic_dev *idev, u8 pause_type);
 
-int ionic_set_vf_config(struct ionic *ionic, int vf, u8 attr, u8 *data);
+int ionic_set_vf_config(struct ionic *ionic, int vf,
+                       struct ionic_vf_setattr_cmd *vfc);
+int ionic_dev_cmd_vf_getattr(struct ionic *ionic, int vf, u8 attr,
+                            struct ionic_vf_getattr_comp *comp);
 void ionic_dev_cmd_queue_identify(struct ionic_dev *idev,
                                  u16 lif_type, u8 qtype, u8 qver);
 void ionic_dev_cmd_lif_identify(struct ionic_dev *idev, u8 type, u8 ver);
@@ -353,5 +356,6 @@ void ionic_q_rewind(struct ionic_queue *q, struct ionic_desc_info *start);
 void ionic_q_service(struct ionic_queue *q, struct ionic_cq_info *cq_info,
                     unsigned int stop_index);
 int ionic_heartbeat_check(struct ionic *ionic);
+bool ionic_is_fw_running(struct ionic_dev *idev);
 
 #endif /* _IONIC_DEV_H_ */
index 2ff7be1..542e395 100644 (file)
@@ -1112,12 +1112,17 @@ static bool ionic_notifyq_service(struct ionic_cq *cq,
                ionic_link_status_check_request(lif, CAN_NOT_SLEEP);
                break;
        case IONIC_EVENT_RESET:
-               work = kzalloc(sizeof(*work), GFP_ATOMIC);
-               if (!work) {
-                       netdev_err(lif->netdev, "Reset event dropped\n");
-               } else {
-                       work->type = IONIC_DW_TYPE_LIF_RESET;
-                       ionic_lif_deferred_enqueue(&lif->deferred, work);
+               if (lif->ionic->idev.fw_status_ready &&
+                   !test_bit(IONIC_LIF_F_FW_RESET, lif->state) &&
+                   !test_and_set_bit(IONIC_LIF_F_FW_STOPPING, lif->state)) {
+                       work = kzalloc(sizeof(*work), GFP_ATOMIC);
+                       if (!work) {
+                               netdev_err(lif->netdev, "Reset event dropped\n");
+                               clear_bit(IONIC_LIF_F_FW_STOPPING, lif->state);
+                       } else {
+                               work->type = IONIC_DW_TYPE_LIF_RESET;
+                               ionic_lif_deferred_enqueue(&lif->deferred, work);
+                       }
                }
                break;
        default:
@@ -1782,7 +1787,7 @@ static void ionic_lif_quiesce(struct ionic_lif *lif)
 
        err = ionic_adminq_post_wait(lif, &ctx);
        if (err)
-               netdev_err(lif->netdev, "lif quiesce failed %d\n", err);
+               netdev_dbg(lif->netdev, "lif quiesce failed %d\n", err);
 }
 
 static void ionic_txrx_disable(struct ionic_lif *lif)
@@ -2152,6 +2157,76 @@ static int ionic_eth_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd
        }
 }
 
+static int ionic_update_cached_vf_config(struct ionic *ionic, int vf)
+{
+       struct ionic_vf_getattr_comp comp = { 0 };
+       int err;
+       u8 attr;
+
+       attr = IONIC_VF_ATTR_VLAN;
+       err = ionic_dev_cmd_vf_getattr(ionic, vf, attr, &comp);
+       if (err && comp.status != IONIC_RC_ENOSUPP)
+               goto err_out;
+       if (!err)
+               ionic->vfs[vf].vlanid = comp.vlanid;
+
+       attr = IONIC_VF_ATTR_SPOOFCHK;
+       err = ionic_dev_cmd_vf_getattr(ionic, vf, attr, &comp);
+       if (err && comp.status != IONIC_RC_ENOSUPP)
+               goto err_out;
+       if (!err)
+               ionic->vfs[vf].spoofchk = comp.spoofchk;
+
+       attr = IONIC_VF_ATTR_LINKSTATE;
+       err = ionic_dev_cmd_vf_getattr(ionic, vf, attr, &comp);
+       if (err && comp.status != IONIC_RC_ENOSUPP)
+               goto err_out;
+       if (!err) {
+               switch (comp.linkstate) {
+               case IONIC_VF_LINK_STATUS_UP:
+                       ionic->vfs[vf].linkstate = IFLA_VF_LINK_STATE_ENABLE;
+                       break;
+               case IONIC_VF_LINK_STATUS_DOWN:
+                       ionic->vfs[vf].linkstate = IFLA_VF_LINK_STATE_DISABLE;
+                       break;
+               case IONIC_VF_LINK_STATUS_AUTO:
+                       ionic->vfs[vf].linkstate = IFLA_VF_LINK_STATE_AUTO;
+                       break;
+               default:
+                       dev_warn(ionic->dev, "Unexpected link state %u\n", comp.linkstate);
+                       break;
+               }
+       }
+
+       attr = IONIC_VF_ATTR_RATE;
+       err = ionic_dev_cmd_vf_getattr(ionic, vf, attr, &comp);
+       if (err && comp.status != IONIC_RC_ENOSUPP)
+               goto err_out;
+       if (!err)
+               ionic->vfs[vf].maxrate = comp.maxrate;
+
+       attr = IONIC_VF_ATTR_TRUST;
+       err = ionic_dev_cmd_vf_getattr(ionic, vf, attr, &comp);
+       if (err && comp.status != IONIC_RC_ENOSUPP)
+               goto err_out;
+       if (!err)
+               ionic->vfs[vf].trusted = comp.trust;
+
+       attr = IONIC_VF_ATTR_MAC;
+       err = ionic_dev_cmd_vf_getattr(ionic, vf, attr, &comp);
+       if (err && comp.status != IONIC_RC_ENOSUPP)
+               goto err_out;
+       if (!err)
+               ether_addr_copy(ionic->vfs[vf].macaddr, comp.macaddr);
+
+err_out:
+       if (err)
+               dev_err(ionic->dev, "Failed to get %s for VF %d\n",
+                       ionic_vf_attr_to_str(attr), vf);
+
+       return err;
+}
+
 static int ionic_get_vf_config(struct net_device *netdev,
                               int vf, struct ifla_vf_info *ivf)
 {
@@ -2167,14 +2242,18 @@ static int ionic_get_vf_config(struct net_device *netdev,
        if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) {
                ret = -EINVAL;
        } else {
-               ivf->vf           = vf;
-               ivf->vlan         = le16_to_cpu(ionic->vfs[vf].vlanid);
-               ivf->qos          = 0;
-               ivf->spoofchk     = ionic->vfs[vf].spoofchk;
-               ivf->linkstate    = ionic->vfs[vf].linkstate;
-               ivf->max_tx_rate  = le32_to_cpu(ionic->vfs[vf].maxrate);
-               ivf->trusted      = ionic->vfs[vf].trusted;
-               ether_addr_copy(ivf->mac, ionic->vfs[vf].macaddr);
+               ivf->vf = vf;
+               ivf->qos = 0;
+
+               ret = ionic_update_cached_vf_config(ionic, vf);
+               if (!ret) {
+                       ivf->vlan         = le16_to_cpu(ionic->vfs[vf].vlanid);
+                       ivf->spoofchk     = ionic->vfs[vf].spoofchk;
+                       ivf->linkstate    = ionic->vfs[vf].linkstate;
+                       ivf->max_tx_rate  = le32_to_cpu(ionic->vfs[vf].maxrate);
+                       ivf->trusted      = ionic->vfs[vf].trusted;
+                       ether_addr_copy(ivf->mac, ionic->vfs[vf].macaddr);
+               }
        }
 
        up_read(&ionic->vf_op_lock);
@@ -2220,6 +2299,7 @@ static int ionic_get_vf_stats(struct net_device *netdev, int vf,
 
 static int ionic_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
 {
+       struct ionic_vf_setattr_cmd vfc = { .attr = IONIC_VF_ATTR_MAC };
        struct ionic_lif *lif = netdev_priv(netdev);
        struct ionic *ionic = lif->ionic;
        int ret;
@@ -2235,7 +2315,11 @@ static int ionic_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
        if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) {
                ret = -EINVAL;
        } else {
-               ret = ionic_set_vf_config(ionic, vf, IONIC_VF_ATTR_MAC, mac);
+               ether_addr_copy(vfc.macaddr, mac);
+               dev_dbg(ionic->dev, "%s: vf %d macaddr %pM\n",
+                       __func__, vf, vfc.macaddr);
+
+               ret = ionic_set_vf_config(ionic, vf, &vfc);
                if (!ret)
                        ether_addr_copy(ionic->vfs[vf].macaddr, mac);
        }
@@ -2247,6 +2331,7 @@ static int ionic_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
 static int ionic_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan,
                             u8 qos, __be16 proto)
 {
+       struct ionic_vf_setattr_cmd vfc = { .attr = IONIC_VF_ATTR_VLAN };
        struct ionic_lif *lif = netdev_priv(netdev);
        struct ionic *ionic = lif->ionic;
        int ret;
@@ -2269,8 +2354,11 @@ static int ionic_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan,
        if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) {
                ret = -EINVAL;
        } else {
-               ret = ionic_set_vf_config(ionic, vf,
-                                         IONIC_VF_ATTR_VLAN, (u8 *)&vlan);
+               vfc.vlanid = cpu_to_le16(vlan);
+               dev_dbg(ionic->dev, "%s: vf %d vlan %d\n",
+                       __func__, vf, le16_to_cpu(vfc.vlanid));
+
+               ret = ionic_set_vf_config(ionic, vf, &vfc);
                if (!ret)
                        ionic->vfs[vf].vlanid = cpu_to_le16(vlan);
        }
@@ -2282,6 +2370,7 @@ static int ionic_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan,
 static int ionic_set_vf_rate(struct net_device *netdev, int vf,
                             int tx_min, int tx_max)
 {
+       struct ionic_vf_setattr_cmd vfc = { .attr = IONIC_VF_ATTR_RATE };
        struct ionic_lif *lif = netdev_priv(netdev);
        struct ionic *ionic = lif->ionic;
        int ret;
@@ -2298,8 +2387,11 @@ static int ionic_set_vf_rate(struct net_device *netdev, int vf,
        if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) {
                ret = -EINVAL;
        } else {
-               ret = ionic_set_vf_config(ionic, vf,
-                                         IONIC_VF_ATTR_RATE, (u8 *)&tx_max);
+               vfc.maxrate = cpu_to_le32(tx_max);
+               dev_dbg(ionic->dev, "%s: vf %d maxrate %d\n",
+                       __func__, vf, le32_to_cpu(vfc.maxrate));
+
+               ret = ionic_set_vf_config(ionic, vf, &vfc);
                if (!ret)
                        lif->ionic->vfs[vf].maxrate = cpu_to_le32(tx_max);
        }
@@ -2310,9 +2402,9 @@ static int ionic_set_vf_rate(struct net_device *netdev, int vf,
 
 static int ionic_set_vf_spoofchk(struct net_device *netdev, int vf, bool set)
 {
+       struct ionic_vf_setattr_cmd vfc = { .attr = IONIC_VF_ATTR_SPOOFCHK };
        struct ionic_lif *lif = netdev_priv(netdev);
        struct ionic *ionic = lif->ionic;
-       u8 data = set;  /* convert to u8 for config */
        int ret;
 
        if (!netif_device_present(netdev))
@@ -2323,10 +2415,13 @@ static int ionic_set_vf_spoofchk(struct net_device *netdev, int vf, bool set)
        if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) {
                ret = -EINVAL;
        } else {
-               ret = ionic_set_vf_config(ionic, vf,
-                                         IONIC_VF_ATTR_SPOOFCHK, &data);
+               vfc.spoofchk = set;
+               dev_dbg(ionic->dev, "%s: vf %d spoof %d\n",
+                       __func__, vf, vfc.spoofchk);
+
+               ret = ionic_set_vf_config(ionic, vf, &vfc);
                if (!ret)
-                       ionic->vfs[vf].spoofchk = data;
+                       ionic->vfs[vf].spoofchk = set;
        }
 
        up_write(&ionic->vf_op_lock);
@@ -2335,9 +2430,9 @@ static int ionic_set_vf_spoofchk(struct net_device *netdev, int vf, bool set)
 
 static int ionic_set_vf_trust(struct net_device *netdev, int vf, bool set)
 {
+       struct ionic_vf_setattr_cmd vfc = { .attr = IONIC_VF_ATTR_TRUST };
        struct ionic_lif *lif = netdev_priv(netdev);
        struct ionic *ionic = lif->ionic;
-       u8 data = set;  /* convert to u8 for config */
        int ret;
 
        if (!netif_device_present(netdev))
@@ -2348,10 +2443,13 @@ static int ionic_set_vf_trust(struct net_device *netdev, int vf, bool set)
        if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) {
                ret = -EINVAL;
        } else {
-               ret = ionic_set_vf_config(ionic, vf,
-                                         IONIC_VF_ATTR_TRUST, &data);
+               vfc.trust = set;
+               dev_dbg(ionic->dev, "%s: vf %d trust %d\n",
+                       __func__, vf, vfc.trust);
+
+               ret = ionic_set_vf_config(ionic, vf, &vfc);
                if (!ret)
-                       ionic->vfs[vf].trusted = data;
+                       ionic->vfs[vf].trusted = set;
        }
 
        up_write(&ionic->vf_op_lock);
@@ -2360,20 +2458,21 @@ static int ionic_set_vf_trust(struct net_device *netdev, int vf, bool set)
 
 static int ionic_set_vf_link_state(struct net_device *netdev, int vf, int set)
 {
+       struct ionic_vf_setattr_cmd vfc = { .attr = IONIC_VF_ATTR_LINKSTATE };
        struct ionic_lif *lif = netdev_priv(netdev);
        struct ionic *ionic = lif->ionic;
-       u8 data;
+       u8 vfls;
        int ret;
 
        switch (set) {
        case IFLA_VF_LINK_STATE_ENABLE:
-               data = IONIC_VF_LINK_STATUS_UP;
+               vfls = IONIC_VF_LINK_STATUS_UP;
                break;
        case IFLA_VF_LINK_STATE_DISABLE:
-               data = IONIC_VF_LINK_STATUS_DOWN;
+               vfls = IONIC_VF_LINK_STATUS_DOWN;
                break;
        case IFLA_VF_LINK_STATE_AUTO:
-               data = IONIC_VF_LINK_STATUS_AUTO;
+               vfls = IONIC_VF_LINK_STATUS_AUTO;
                break;
        default:
                return -EINVAL;
@@ -2387,8 +2486,11 @@ static int ionic_set_vf_link_state(struct net_device *netdev, int vf, int set)
        if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) {
                ret = -EINVAL;
        } else {
-               ret = ionic_set_vf_config(ionic, vf,
-                                         IONIC_VF_ATTR_LINKSTATE, &data);
+               vfc.linkstate = vfls;
+               dev_dbg(ionic->dev, "%s: vf %d linkstate %d\n",
+                       __func__, vf, vfc.linkstate);
+
+               ret = ionic_set_vf_config(ionic, vf, &vfc);
                if (!ret)
                        ionic->vfs[vf].linkstate = set;
        }
@@ -2835,6 +2937,7 @@ static void ionic_lif_handle_fw_down(struct ionic_lif *lif)
 
        mutex_unlock(&lif->queue_lock);
 
+       clear_bit(IONIC_LIF_F_FW_STOPPING, lif->state);
        dev_info(ionic->dev, "FW Down: LIFs stopped\n");
 }
 
@@ -2934,8 +3037,6 @@ void ionic_lif_free(struct ionic_lif *lif)
        /* unmap doorbell page */
        ionic_bus_unmap_dbpage(lif->ionic, lif->kern_dbpage);
        lif->kern_dbpage = NULL;
-       kfree(lif->dbid_inuse);
-       lif->dbid_inuse = NULL;
 
        mutex_destroy(&lif->config_lock);
        mutex_destroy(&lif->queue_lock);
@@ -3135,22 +3236,12 @@ int ionic_lif_init(struct ionic_lif *lif)
                return -EINVAL;
        }
 
-       lif->dbid_inuse = bitmap_zalloc(lif->dbid_count, GFP_KERNEL);
-       if (!lif->dbid_inuse) {
-               dev_err(dev, "Failed alloc doorbell id bitmap, aborting\n");
-               return -ENOMEM;
-       }
-
-       /* first doorbell id reserved for kernel (dbid aka pid == zero) */
-       set_bit(0, lif->dbid_inuse);
        lif->kern_pid = 0;
-
        dbpage_num = ionic_db_page_num(lif, lif->kern_pid);
        lif->kern_dbpage = ionic_bus_map_dbpage(lif->ionic, dbpage_num);
        if (!lif->kern_dbpage) {
                dev_err(dev, "Cannot map dbpage, aborting\n");
-               err = -ENOMEM;
-               goto err_out_free_dbid;
+               return -ENOMEM;
        }
 
        err = ionic_lif_adminq_init(lif);
@@ -3186,15 +3277,13 @@ int ionic_lif_init(struct ionic_lif *lif)
        return 0;
 
 err_out_notifyq_deinit:
+       napi_disable(&lif->adminqcq->napi);
        ionic_lif_qcq_deinit(lif, lif->notifyqcq);
 err_out_adminq_deinit:
        ionic_lif_qcq_deinit(lif, lif->adminqcq);
        ionic_lif_reset(lif);
        ionic_bus_unmap_dbpage(lif->ionic, lif->kern_dbpage);
        lif->kern_dbpage = NULL;
-err_out_free_dbid:
-       kfree(lif->dbid_inuse);
-       lif->dbid_inuse = NULL;
 
        return err;
 }
index 9f7ab2f..a53984b 100644 (file)
@@ -135,6 +135,7 @@ enum ionic_lif_state_flags {
        IONIC_LIF_F_LINK_CHECK_REQUESTED,
        IONIC_LIF_F_FILTER_SYNC_NEEDED,
        IONIC_LIF_F_FW_RESET,
+       IONIC_LIF_F_FW_STOPPING,
        IONIC_LIF_F_SPLIT_INTR,
        IONIC_LIF_F_BROKEN,
        IONIC_LIF_F_TX_DIM_INTR,
@@ -213,7 +214,6 @@ struct ionic_lif {
        u32 rx_coalesce_hw;             /* what the hw is using */
        u32 tx_coalesce_usecs;          /* what the user asked for */
        u32 tx_coalesce_hw;             /* what the hw is using */
-       unsigned long *dbid_inuse;
        unsigned int dbid_count;
 
        struct ionic_phc *phc;
index 875f4ec..4029b4e 100644 (file)
@@ -188,6 +188,28 @@ static const char *ionic_opcode_to_str(enum ionic_cmd_opcode opcode)
        }
 }
 
+const char *ionic_vf_attr_to_str(enum ionic_vf_attr attr)
+{
+       switch (attr) {
+       case IONIC_VF_ATTR_SPOOFCHK:
+               return "IONIC_VF_ATTR_SPOOFCHK";
+       case IONIC_VF_ATTR_TRUST:
+               return "IONIC_VF_ATTR_TRUST";
+       case IONIC_VF_ATTR_LINKSTATE:
+               return "IONIC_VF_ATTR_LINKSTATE";
+       case IONIC_VF_ATTR_MAC:
+               return "IONIC_VF_ATTR_MAC";
+       case IONIC_VF_ATTR_VLAN:
+               return "IONIC_VF_ATTR_VLAN";
+       case IONIC_VF_ATTR_RATE:
+               return "IONIC_VF_ATTR_RATE";
+       case IONIC_VF_ATTR_STATSADDR:
+               return "IONIC_VF_ATTR_STATSADDR";
+       default:
+               return "IONIC_VF_ATTR_UNKNOWN";
+       }
+}
+
 static void ionic_adminq_flush(struct ionic_lif *lif)
 {
        struct ionic_desc_info *desc_info;
@@ -215,9 +237,13 @@ static void ionic_adminq_flush(struct ionic_lif *lif)
 void ionic_adminq_netdev_err_print(struct ionic_lif *lif, u8 opcode,
                                   u8 status, int err)
 {
+       const char *stat_str;
+
+       stat_str = (err == -ETIMEDOUT) ? "TIMEOUT" :
+                                        ionic_error_to_str(status);
+
        netdev_err(lif->netdev, "%s (%d) failed: %s (%d)\n",
-                  ionic_opcode_to_str(opcode), opcode,
-                  ionic_error_to_str(status), err);
+                  ionic_opcode_to_str(opcode), opcode, stat_str, err);
 }
 
 static int ionic_adminq_check_err(struct ionic_lif *lif,
@@ -318,6 +344,7 @@ int ionic_adminq_wait(struct ionic_lif *lif, struct ionic_admin_ctx *ctx,
                if (do_msg && !test_bit(IONIC_LIF_F_FW_RESET, lif->state))
                        netdev_err(netdev, "Posting of %s (%d) failed: %d\n",
                                   name, ctx->cmd.cmd.opcode, err);
+               ctx->comp.comp.status = IONIC_RC_ERROR;
                return err;
        }
 
@@ -331,11 +358,15 @@ int ionic_adminq_wait(struct ionic_lif *lif, struct ionic_admin_ctx *ctx,
                if (remaining)
                        break;
 
-               /* interrupt the wait if FW stopped */
-               if (test_bit(IONIC_LIF_F_FW_RESET, lif->state)) {
+               /* force a check of FW status and break out if FW reset */
+               (void)ionic_heartbeat_check(lif->ionic);
+               if ((test_bit(IONIC_LIF_F_FW_RESET, lif->state) &&
+                    !lif->ionic->idev.fw_status_ready) ||
+                   test_bit(IONIC_LIF_F_FW_STOPPING, lif->state)) {
                        if (do_msg)
-                               netdev_err(netdev, "%s (%d) interrupted, FW in reset\n",
-                                          name, ctx->cmd.cmd.opcode);
+                               netdev_warn(netdev, "%s (%d) interrupted, FW in reset\n",
+                                           name, ctx->cmd.cmd.opcode);
+                       ctx->comp.comp.status = IONIC_RC_ERROR;
                        return -ENXIO;
                }
 
@@ -370,21 +401,34 @@ int ionic_adminq_post_wait_nomsg(struct ionic_lif *lif, struct ionic_admin_ctx *
 
 static void ionic_dev_cmd_clean(struct ionic *ionic)
 {
-       union __iomem ionic_dev_cmd_regs *regs = ionic->idev.dev_cmd_regs;
+       struct ionic_dev *idev = &ionic->idev;
 
-       iowrite32(0, &regs->doorbell);
-       memset_io(&regs->cmd, 0, sizeof(regs->cmd));
+       iowrite32(0, &idev->dev_cmd_regs->doorbell);
+       memset_io(&idev->dev_cmd_regs->cmd, 0, sizeof(idev->dev_cmd_regs->cmd));
 }
 
-int ionic_dev_cmd_wait(struct ionic *ionic, unsigned long max_seconds)
+void ionic_dev_cmd_dev_err_print(struct ionic *ionic, u8 opcode, u8 status,
+                                int err)
+{
+       const char *stat_str;
+
+       stat_str = (err == -ETIMEDOUT) ? "TIMEOUT" :
+                                        ionic_error_to_str(status);
+
+       dev_err(ionic->dev, "DEV_CMD %s (%d) error, %s (%d) failed\n",
+               ionic_opcode_to_str(opcode), opcode, stat_str, err);
+}
+
+static int __ionic_dev_cmd_wait(struct ionic *ionic, unsigned long max_seconds,
+                               const bool do_msg)
 {
        struct ionic_dev *idev = &ionic->idev;
        unsigned long start_time;
        unsigned long max_wait;
        unsigned long duration;
+       int done = 0;
+       bool fw_up;
        int opcode;
-       int hb = 0;
-       int done;
        int err;
 
        /* Wait for dev cmd to complete, retrying if we get EAGAIN,
@@ -394,31 +438,24 @@ int ionic_dev_cmd_wait(struct ionic *ionic, unsigned long max_seconds)
 try_again:
        opcode = readb(&idev->dev_cmd_regs->cmd.cmd.opcode);
        start_time = jiffies;
-       do {
+       for (fw_up = ionic_is_fw_running(idev);
+            !done && fw_up && time_before(jiffies, max_wait);
+            fw_up = ionic_is_fw_running(idev)) {
                done = ionic_dev_cmd_done(idev);
                if (done)
                        break;
                usleep_range(100, 200);
-
-               /* Don't check the heartbeat on FW_CONTROL commands as they are
-                * notorious for interrupting the firmware's heartbeat update.
-                */
-               if (opcode != IONIC_CMD_FW_CONTROL)
-                       hb = ionic_heartbeat_check(ionic);
-       } while (!done && !hb && time_before(jiffies, max_wait));
+       }
        duration = jiffies - start_time;
 
        dev_dbg(ionic->dev, "DEVCMD %s (%d) done=%d took %ld secs (%ld jiffies)\n",
                ionic_opcode_to_str(opcode), opcode,
                done, duration / HZ, duration);
 
-       if (!done && hb) {
-               /* It is possible (but unlikely) that FW was busy and missed a
-                * heartbeat check but is still alive and will process this
-                * request, so don't clean the dev_cmd in this case.
-                */
-               dev_dbg(ionic->dev, "DEVCMD %s (%d) failed - FW halted\n",
-                       ionic_opcode_to_str(opcode), opcode);
+       if (!done && !fw_up) {
+               ionic_dev_cmd_clean(ionic);
+               dev_warn(ionic->dev, "DEVCMD %s (%d) interrupted - FW is down\n",
+                        ionic_opcode_to_str(opcode), opcode);
                return -ENXIO;
        }
 
@@ -444,9 +481,9 @@ try_again:
                }
 
                if (!(opcode == IONIC_CMD_FW_CONTROL && err == IONIC_RC_EAGAIN))
-                       dev_err(ionic->dev, "DEV_CMD %s (%d) error, %s (%d) failed\n",
-                               ionic_opcode_to_str(opcode), opcode,
-                               ionic_error_to_str(err), err);
+                       if (do_msg)
+                               ionic_dev_cmd_dev_err_print(ionic, opcode, err,
+                                                           ionic_error_to_errno(err));
 
                return ionic_error_to_errno(err);
        }
@@ -454,6 +491,16 @@ try_again:
        return 0;
 }
 
+int ionic_dev_cmd_wait(struct ionic *ionic, unsigned long max_seconds)
+{
+       return __ionic_dev_cmd_wait(ionic, max_seconds, true);
+}
+
+int ionic_dev_cmd_wait_nomsg(struct ionic *ionic, unsigned long max_seconds)
+{
+       return __ionic_dev_cmd_wait(ionic, max_seconds, false);
+}
+
 int ionic_setup(struct ionic *ionic)
 {
        int err;
@@ -540,6 +587,9 @@ int ionic_reset(struct ionic *ionic)
        struct ionic_dev *idev = &ionic->idev;
        int err;
 
+       if (!ionic_is_fw_running(idev))
+               return 0;
+
        mutex_lock(&ionic->dev_cmd_lock);
        ionic_dev_cmd_reset(idev);
        err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT);
@@ -612,15 +662,17 @@ int ionic_port_init(struct ionic *ionic)
 int ionic_port_reset(struct ionic *ionic)
 {
        struct ionic_dev *idev = &ionic->idev;
-       int err;
+       int err = 0;
 
        if (!idev->port_info)
                return 0;
 
-       mutex_lock(&ionic->dev_cmd_lock);
-       ionic_dev_cmd_port_reset(idev);
-       err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT);
-       mutex_unlock(&ionic->dev_cmd_lock);
+       if (ionic_is_fw_running(idev)) {
+               mutex_lock(&ionic->dev_cmd_lock);
+               ionic_dev_cmd_port_reset(idev);
+               err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT);
+               mutex_unlock(&ionic->dev_cmd_lock);
+       }
 
        dma_free_coherent(ionic->dev, idev->port_info_sz,
                          idev->port_info, idev->port_info_pa);
@@ -628,9 +680,6 @@ int ionic_port_reset(struct ionic *ionic)
        idev->port_info = NULL;
        idev->port_info_pa = 0;
 
-       if (err)
-               dev_err(ionic->dev, "Failed to reset port\n");
-
        return err;
 }
 
index f6e785f..b736337 100644 (file)
@@ -376,10 +376,24 @@ static int ionic_lif_filter_add(struct ionic_lif *lif,
 
                spin_unlock_bh(&lif->rx_filters.lock);
 
-               if (err == -ENOSPC) {
-                       if (le16_to_cpu(ctx.cmd.rx_filter_add.match) == IONIC_RX_FILTER_MATCH_VLAN)
-                               lif->max_vlans = lif->nvlans;
+               /* store the max_vlans limit that we found */
+               if (err == -ENOSPC &&
+                   le16_to_cpu(ctx.cmd.rx_filter_add.match) == IONIC_RX_FILTER_MATCH_VLAN)
+                       lif->max_vlans = lif->nvlans;
+
+               /* Prevent unnecessary error messages on recoverable
+                * errors as the filter will get retried on the next
+                * sync attempt.
+                */
+               switch (err) {
+               case -ENOSPC:
+               case -ENXIO:
+               case -ETIMEDOUT:
+               case -EAGAIN:
+               case -EBUSY:
                        return 0;
+               default:
+                       break;
                }
 
                ionic_adminq_netdev_err_print(lif, ctx.cmd.cmd.opcode,
@@ -494,9 +508,22 @@ static int ionic_lif_filter_del(struct ionic_lif *lif,
        spin_unlock_bh(&lif->rx_filters.lock);
 
        if (state != IONIC_FILTER_STATE_NEW) {
-               err = ionic_adminq_post_wait(lif, &ctx);
-               if (err && err != -EEXIST)
+               err = ionic_adminq_post_wait_nomsg(lif, &ctx);
+
+               switch (err) {
+                       /* ignore these errors */
+               case -EEXIST:
+               case -ENXIO:
+               case -ETIMEDOUT:
+               case -EAGAIN:
+               case -EBUSY:
+               case 0:
+                       break;
+               default:
+                       ionic_adminq_netdev_err_print(lif, ctx.cmd.cmd.opcode,
+                                                     ctx.comp.comp.status, err);
                        return err;
+               }
        }
 
        return 0;
index 94384f5..d197a70 100644 (file)
@@ -669,27 +669,37 @@ dma_fail:
        return -EIO;
 }
 
+static void ionic_tx_desc_unmap_bufs(struct ionic_queue *q,
+                                    struct ionic_desc_info *desc_info)
+{
+       struct ionic_buf_info *buf_info = desc_info->bufs;
+       struct device *dev = q->dev;
+       unsigned int i;
+
+       if (!desc_info->nbufs)
+               return;
+
+       dma_unmap_single(dev, (dma_addr_t)buf_info->dma_addr,
+                        buf_info->len, DMA_TO_DEVICE);
+       buf_info++;
+       for (i = 1; i < desc_info->nbufs; i++, buf_info++)
+               dma_unmap_page(dev, (dma_addr_t)buf_info->dma_addr,
+                              buf_info->len, DMA_TO_DEVICE);
+
+       desc_info->nbufs = 0;
+}
+
 static void ionic_tx_clean(struct ionic_queue *q,
                           struct ionic_desc_info *desc_info,
                           struct ionic_cq_info *cq_info,
                           void *cb_arg)
 {
-       struct ionic_buf_info *buf_info = desc_info->bufs;
        struct ionic_tx_stats *stats = q_to_tx_stats(q);
        struct ionic_qcq *qcq = q_to_qcq(q);
        struct sk_buff *skb = cb_arg;
-       struct device *dev = q->dev;
-       unsigned int i;
        u16 qi;
 
-       if (desc_info->nbufs) {
-               dma_unmap_single(dev, (dma_addr_t)buf_info->dma_addr,
-                                buf_info->len, DMA_TO_DEVICE);
-               buf_info++;
-               for (i = 1; i < desc_info->nbufs; i++, buf_info++)
-                       dma_unmap_page(dev, (dma_addr_t)buf_info->dma_addr,
-                                      buf_info->len, DMA_TO_DEVICE);
-       }
+       ionic_tx_desc_unmap_bufs(q, desc_info);
 
        if (!skb)
                return;
@@ -931,8 +941,11 @@ static int ionic_tx_tso(struct ionic_queue *q, struct sk_buff *skb)
                err = ionic_tx_tcp_inner_pseudo_csum(skb);
        else
                err = ionic_tx_tcp_pseudo_csum(skb);
-       if (err)
+       if (err) {
+               /* clean up mapping from ionic_tx_map_skb */
+               ionic_tx_desc_unmap_bufs(q, desc_info);
                return err;
+       }
 
        if (encap)
                hdrlen = skb_inner_transport_header(skb) - skb->data +
@@ -1003,8 +1016,8 @@ static int ionic_tx_tso(struct ionic_queue *q, struct sk_buff *skb)
        return 0;
 }
 
-static int ionic_tx_calc_csum(struct ionic_queue *q, struct sk_buff *skb,
-                             struct ionic_desc_info *desc_info)
+static void ionic_tx_calc_csum(struct ionic_queue *q, struct sk_buff *skb,
+                              struct ionic_desc_info *desc_info)
 {
        struct ionic_txq_desc *desc = desc_info->txq_desc;
        struct ionic_buf_info *buf_info = desc_info->bufs;
@@ -1038,12 +1051,10 @@ static int ionic_tx_calc_csum(struct ionic_queue *q, struct sk_buff *skb,
                stats->crc32_csum++;
        else
                stats->csum++;
-
-       return 0;
 }
 
-static int ionic_tx_calc_no_csum(struct ionic_queue *q, struct sk_buff *skb,
-                                struct ionic_desc_info *desc_info)
+static void ionic_tx_calc_no_csum(struct ionic_queue *q, struct sk_buff *skb,
+                                 struct ionic_desc_info *desc_info)
 {
        struct ionic_txq_desc *desc = desc_info->txq_desc;
        struct ionic_buf_info *buf_info = desc_info->bufs;
@@ -1074,12 +1085,10 @@ static int ionic_tx_calc_no_csum(struct ionic_queue *q, struct sk_buff *skb,
        desc->csum_offset = 0;
 
        stats->csum_none++;
-
-       return 0;
 }
 
-static int ionic_tx_skb_frags(struct ionic_queue *q, struct sk_buff *skb,
-                             struct ionic_desc_info *desc_info)
+static void ionic_tx_skb_frags(struct ionic_queue *q, struct sk_buff *skb,
+                              struct ionic_desc_info *desc_info)
 {
        struct ionic_txq_sg_desc *sg_desc = desc_info->txq_sg_desc;
        struct ionic_buf_info *buf_info = &desc_info->bufs[1];
@@ -1093,31 +1102,24 @@ static int ionic_tx_skb_frags(struct ionic_queue *q, struct sk_buff *skb,
        }
 
        stats->frags += skb_shinfo(skb)->nr_frags;
-
-       return 0;
 }
 
 static int ionic_tx(struct ionic_queue *q, struct sk_buff *skb)
 {
        struct ionic_desc_info *desc_info = &q->info[q->head_idx];
        struct ionic_tx_stats *stats = q_to_tx_stats(q);
-       int err;
 
        if (unlikely(ionic_tx_map_skb(q, skb, desc_info)))
                return -EIO;
 
        /* set up the initial descriptor */
        if (skb->ip_summed == CHECKSUM_PARTIAL)
-               err = ionic_tx_calc_csum(q, skb, desc_info);
+               ionic_tx_calc_csum(q, skb, desc_info);
        else
-               err = ionic_tx_calc_no_csum(q, skb, desc_info);
-       if (err)
-               return err;
+               ionic_tx_calc_no_csum(q, skb, desc_info);
 
        /* add frags */
-       err = ionic_tx_skb_frags(q, skb, desc_info);
-       if (err)
-               return err;
+       ionic_tx_skb_frags(q, skb, desc_info);
 
        skb_tx_timestamp(skb);
        stats->pkts++;
index 19e2621..3c3d150 100644 (file)
@@ -2684,7 +2684,26 @@ static void rtl_hw_aspm_clkreq_enable(struct rtl8169_private *tp, bool enable)
        if (enable && tp->aspm_manageable) {
                RTL_W8(tp, Config5, RTL_R8(tp, Config5) | ASPM_en);
                RTL_W8(tp, Config2, RTL_R8(tp, Config2) | ClkReqEn);
+
+               switch (tp->mac_version) {
+               case RTL_GIGA_MAC_VER_60 ... RTL_GIGA_MAC_VER_63:
+                       /* reset ephy tx/rx disable timer */
+                       r8168_mac_ocp_modify(tp, 0xe094, 0xff00, 0);
+                       /* chip can trigger L1.2 */
+                       r8168_mac_ocp_modify(tp, 0xe092, 0x00ff, BIT(2));
+                       break;
+               default:
+                       break;
+               }
        } else {
+               switch (tp->mac_version) {
+               case RTL_GIGA_MAC_VER_60 ... RTL_GIGA_MAC_VER_63:
+                       r8168_mac_ocp_modify(tp, 0xe092, 0x00ff, 0);
+                       break;
+               default:
+                       break;
+               }
+
                RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~ClkReqEn);
                RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~ASPM_en);
        }
@@ -4843,8 +4862,6 @@ static void rtl8169_net_suspend(struct rtl8169_private *tp)
                rtl8169_down(tp);
 }
 
-#ifdef CONFIG_PM
-
 static int rtl8169_runtime_resume(struct device *dev)
 {
        struct rtl8169_private *tp = dev_get_drvdata(dev);
@@ -4860,7 +4877,7 @@ static int rtl8169_runtime_resume(struct device *dev)
        return 0;
 }
 
-static int __maybe_unused rtl8169_suspend(struct device *device)
+static int rtl8169_suspend(struct device *device)
 {
        struct rtl8169_private *tp = dev_get_drvdata(device);
 
@@ -4873,7 +4890,7 @@ static int __maybe_unused rtl8169_suspend(struct device *device)
        return 0;
 }
 
-static int __maybe_unused rtl8169_resume(struct device *device)
+static int rtl8169_resume(struct device *device)
 {
        struct rtl8169_private *tp = dev_get_drvdata(device);
 
@@ -4915,13 +4932,11 @@ static int rtl8169_runtime_idle(struct device *device)
 }
 
 static const struct dev_pm_ops rtl8169_pm_ops = {
-       SET_SYSTEM_SLEEP_PM_OPS(rtl8169_suspend, rtl8169_resume)
-       SET_RUNTIME_PM_OPS(rtl8169_runtime_suspend, rtl8169_runtime_resume,
-                          rtl8169_runtime_idle)
+       SYSTEM_SLEEP_PM_OPS(rtl8169_suspend, rtl8169_resume)
+       RUNTIME_PM_OPS(rtl8169_runtime_suspend, rtl8169_runtime_resume,
+                      rtl8169_runtime_idle)
 };
 
-#endif /* CONFIG_PM */
-
 static void rtl_wol_shutdown_quirk(struct rtl8169_private *tp)
 {
        /* WoL fails with 8168b when the receiver is disabled. */
@@ -5255,6 +5270,16 @@ done:
        rtl_rar_set(tp, mac_addr);
 }
 
+/* register is set if system vendor successfully tested ASPM 1.2 */
+static bool rtl_aspm_is_safe(struct rtl8169_private *tp)
+{
+       if (tp->mac_version >= RTL_GIGA_MAC_VER_60 &&
+           r8168_mac_ocp_read(tp, 0xc0b2) & 0xf)
+               return true;
+
+       return false;
+}
+
 static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
        struct rtl8169_private *tp;
@@ -5333,7 +5358,9 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
         * Chips from RTL8168h partially have issues with L1.2, but seem
         * to work fine with L1 and L1.1.
         */
-       if (tp->mac_version >= RTL_GIGA_MAC_VER_45)
+       if (rtl_aspm_is_safe(tp))
+               rc = 0;
+       else if (tp->mac_version >= RTL_GIGA_MAC_VER_45)
                rc = pci_disable_link_state(pdev, PCIE_LINK_STATE_L1_2);
        else
                rc = pci_disable_link_state(pdev, PCIE_LINK_STATE_L1);
@@ -5460,9 +5487,7 @@ static struct pci_driver rtl8169_pci_driver = {
        .probe          = rtl_init_one,
        .remove         = rtl_remove_one,
        .shutdown       = rtl_shutdown,
-#ifdef CONFIG_PM
-       .driver.pm      = &rtl8169_pm_ops,
-#endif
+       .driver.pm      = pm_ptr(&rtl8169_pm_ops),
 };
 
 module_pci_driver(rtl8169_pci_driver);
index 32161a5..77a0d9d 100644 (file)
@@ -127,7 +127,7 @@ bool sxgbe_eee_init(struct sxgbe_priv_data * const priv)
        /* MAC core supports the EEE feature. */
        if (priv->hw_cap.eee) {
                /* Check if the PHY supports EEE */
-               if (phy_init_eee(ndev->phydev, 1))
+               if (phy_init_eee(ndev->phydev, true))
                        return false;
 
                priv->eee_active = 1;
index 639a753..eb7e41d 100644 (file)
@@ -938,105 +938,15 @@ static void stmmac_mac_flow_ctrl(struct stmmac_priv *priv, u32 duplex)
                        priv->pause, tx_cnt);
 }
 
-static void stmmac_validate(struct phylink_config *config,
-                           unsigned long *supported,
-                           struct phylink_link_state *state)
+static struct phylink_pcs *stmmac_mac_select_pcs(struct phylink_config *config,
+                                                phy_interface_t interface)
 {
        struct stmmac_priv *priv = netdev_priv(to_net_dev(config->dev));
-       __ETHTOOL_DECLARE_LINK_MODE_MASK(mac_supported) = { 0, };
-       __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, };
-       int tx_cnt = priv->plat->tx_queues_to_use;
-       int max_speed = priv->plat->max_speed;
-
-       phylink_set(mac_supported, 10baseT_Half);
-       phylink_set(mac_supported, 10baseT_Full);
-       phylink_set(mac_supported, 100baseT_Half);
-       phylink_set(mac_supported, 100baseT_Full);
-       phylink_set(mac_supported, 1000baseT_Half);
-       phylink_set(mac_supported, 1000baseT_Full);
-       phylink_set(mac_supported, 1000baseKX_Full);
-
-       phylink_set(mac_supported, Autoneg);
-       phylink_set(mac_supported, Pause);
-       phylink_set(mac_supported, Asym_Pause);
-       phylink_set_port_modes(mac_supported);
-
-       /* Cut down 1G if asked to */
-       if ((max_speed > 0) && (max_speed < 1000)) {
-               phylink_set(mask, 1000baseT_Full);
-               phylink_set(mask, 1000baseX_Full);
-       } else if (priv->plat->has_gmac4) {
-               if (!max_speed || max_speed >= 2500) {
-                       phylink_set(mac_supported, 2500baseT_Full);
-                       phylink_set(mac_supported, 2500baseX_Full);
-               }
-       } else if (priv->plat->has_xgmac) {
-               if (!max_speed || (max_speed >= 2500)) {
-                       phylink_set(mac_supported, 2500baseT_Full);
-                       phylink_set(mac_supported, 2500baseX_Full);
-               }
-               if (!max_speed || (max_speed >= 5000)) {
-                       phylink_set(mac_supported, 5000baseT_Full);
-               }
-               if (!max_speed || (max_speed >= 10000)) {
-                       phylink_set(mac_supported, 10000baseSR_Full);
-                       phylink_set(mac_supported, 10000baseLR_Full);
-                       phylink_set(mac_supported, 10000baseER_Full);
-                       phylink_set(mac_supported, 10000baseLRM_Full);
-                       phylink_set(mac_supported, 10000baseT_Full);
-                       phylink_set(mac_supported, 10000baseKX4_Full);
-                       phylink_set(mac_supported, 10000baseKR_Full);
-               }
-               if (!max_speed || (max_speed >= 25000)) {
-                       phylink_set(mac_supported, 25000baseCR_Full);
-                       phylink_set(mac_supported, 25000baseKR_Full);
-                       phylink_set(mac_supported, 25000baseSR_Full);
-               }
-               if (!max_speed || (max_speed >= 40000)) {
-                       phylink_set(mac_supported, 40000baseKR4_Full);
-                       phylink_set(mac_supported, 40000baseCR4_Full);
-                       phylink_set(mac_supported, 40000baseSR4_Full);
-                       phylink_set(mac_supported, 40000baseLR4_Full);
-               }
-               if (!max_speed || (max_speed >= 50000)) {
-                       phylink_set(mac_supported, 50000baseCR2_Full);
-                       phylink_set(mac_supported, 50000baseKR2_Full);
-                       phylink_set(mac_supported, 50000baseSR2_Full);
-                       phylink_set(mac_supported, 50000baseKR_Full);
-                       phylink_set(mac_supported, 50000baseSR_Full);
-                       phylink_set(mac_supported, 50000baseCR_Full);
-                       phylink_set(mac_supported, 50000baseLR_ER_FR_Full);
-                       phylink_set(mac_supported, 50000baseDR_Full);
-               }
-               if (!max_speed || (max_speed >= 100000)) {
-                       phylink_set(mac_supported, 100000baseKR4_Full);
-                       phylink_set(mac_supported, 100000baseSR4_Full);
-                       phylink_set(mac_supported, 100000baseCR4_Full);
-                       phylink_set(mac_supported, 100000baseLR4_ER4_Full);
-                       phylink_set(mac_supported, 100000baseKR2_Full);
-                       phylink_set(mac_supported, 100000baseSR2_Full);
-                       phylink_set(mac_supported, 100000baseCR2_Full);
-                       phylink_set(mac_supported, 100000baseLR2_ER2_FR2_Full);
-                       phylink_set(mac_supported, 100000baseDR2_Full);
-               }
-       }
-
-       /* Half-Duplex can only work with single queue */
-       if (tx_cnt > 1) {
-               phylink_set(mask, 10baseT_Half);
-               phylink_set(mask, 100baseT_Half);
-               phylink_set(mask, 1000baseT_Half);
-       }
-
-       linkmode_and(supported, supported, mac_supported);
-       linkmode_andnot(supported, supported, mask);
 
-       linkmode_and(state->advertising, state->advertising, mac_supported);
-       linkmode_andnot(state->advertising, state->advertising, mask);
+       if (!priv->hw->xpcs)
+               return NULL;
 
-       /* If PCS is supported, check which modes it supports. */
-       if (priv->hw->xpcs)
-               xpcs_validate(priv->hw->xpcs, supported, state);
+       return &priv->hw->xpcs->pcs;
 }
 
 static void stmmac_mac_config(struct phylink_config *config, unsigned int mode,
@@ -1175,7 +1085,8 @@ static void stmmac_mac_link_up(struct phylink_config *config,
 }
 
 static const struct phylink_mac_ops stmmac_phylink_mac_ops = {
-       .validate = stmmac_validate,
+       .validate = phylink_generic_validate,
+       .mac_select_pcs = stmmac_mac_select_pcs,
        .mac_config = stmmac_mac_config,
        .mac_link_down = stmmac_mac_link_down,
        .mac_link_up = stmmac_mac_link_up,
@@ -1255,12 +1166,12 @@ static int stmmac_phy_setup(struct stmmac_priv *priv)
 {
        struct stmmac_mdio_bus_data *mdio_bus_data = priv->plat->mdio_bus_data;
        struct fwnode_handle *fwnode = of_fwnode_handle(priv->plat->phylink_node);
+       int max_speed = priv->plat->max_speed;
        int mode = priv->plat->phy_interface;
        struct phylink *phylink;
 
        priv->phylink_config.dev = &priv->dev->dev;
        priv->phylink_config.type = PHYLINK_NETDEV;
-       priv->phylink_config.pcs_poll = true;
        if (priv->plat->mdio_bus_data)
                priv->phylink_config.ovr_an_inband =
                        mdio_bus_data->xpcs_an_inband;
@@ -1268,14 +1179,50 @@ static int stmmac_phy_setup(struct stmmac_priv *priv)
        if (!fwnode)
                fwnode = dev_fwnode(priv->device);
 
+       /* Set the platform/firmware specified interface mode */
+       __set_bit(mode, priv->phylink_config.supported_interfaces);
+
+       /* If we have an xpcs, it defines which PHY interfaces are supported. */
+       if (priv->hw->xpcs)
+               xpcs_get_interfaces(priv->hw->xpcs,
+                                   priv->phylink_config.supported_interfaces);
+
+       priv->phylink_config.mac_capabilities = MAC_ASYM_PAUSE | MAC_SYM_PAUSE |
+               MAC_10 | MAC_100;
+
+       if (!max_speed || max_speed >= 1000)
+               priv->phylink_config.mac_capabilities |= MAC_1000;
+
+       if (priv->plat->has_gmac4) {
+               if (!max_speed || max_speed >= 2500)
+                       priv->phylink_config.mac_capabilities |= MAC_2500FD;
+       } else if (priv->plat->has_xgmac) {
+               if (!max_speed || max_speed >= 2500)
+                       priv->phylink_config.mac_capabilities |= MAC_2500FD;
+               if (!max_speed || max_speed >= 5000)
+                       priv->phylink_config.mac_capabilities |= MAC_5000FD;
+               if (!max_speed || max_speed >= 10000)
+                       priv->phylink_config.mac_capabilities |= MAC_10000FD;
+               if (!max_speed || max_speed >= 25000)
+                       priv->phylink_config.mac_capabilities |= MAC_25000FD;
+               if (!max_speed || max_speed >= 40000)
+                       priv->phylink_config.mac_capabilities |= MAC_40000FD;
+               if (!max_speed || max_speed >= 50000)
+                       priv->phylink_config.mac_capabilities |= MAC_50000FD;
+               if (!max_speed || max_speed >= 100000)
+                       priv->phylink_config.mac_capabilities |= MAC_100000FD;
+       }
+
+       /* Half-Duplex can only work with single queue */
+       if (priv->plat->tx_queues_to_use > 1)
+               priv->phylink_config.mac_capabilities &=
+                       ~(MAC_10HD | MAC_100HD | MAC_1000HD);
+
        phylink = phylink_create(&priv->phylink_config, fwnode,
                                 mode, &stmmac_phylink_mac_ops);
        if (IS_ERR(phylink))
                return PTR_ERR(phylink);
 
-       if (priv->hw->xpcs)
-               phylink_set_pcs(phylink, &priv->hw->xpcs->pcs);
-
        priv->phylink = phylink;
        return 0;
 }
index 5b4d153..4010896 100644 (file)
@@ -386,6 +386,7 @@ struct axidma_bd {
  * @phylink:   Pointer to phylink instance
  * @phylink_config: phylink configuration settings
  * @pcs_phy:   Reference to PCS/PMA PHY if used
+ * @pcs:       phylink pcs structure for PCS PHY
  * @switch_x_sgmii: Whether switchable 1000BaseX/SGMII mode is enabled in the core
  * @axi_clk:   AXI4-Lite bus clock
  * @misc_clks: Misc ethernet clocks (AXI4-Stream, Ref, MGT clocks)
@@ -434,6 +435,7 @@ struct axienet_local {
        struct phylink_config phylink_config;
 
        struct mdio_device *pcs_phy;
+       struct phylink_pcs pcs;
 
        bool switch_x_sgmii;
 
index 377c94e..de0a637 100644 (file)
@@ -1537,78 +1537,78 @@ static const struct ethtool_ops axienet_ethtool_ops = {
        .nway_reset     = axienet_ethtools_nway_reset,
 };
 
-static void axienet_mac_pcs_get_state(struct phylink_config *config,
-                                     struct phylink_link_state *state)
+static struct axienet_local *pcs_to_axienet_local(struct phylink_pcs *pcs)
 {
-       struct net_device *ndev = to_net_dev(config->dev);
-       struct axienet_local *lp = netdev_priv(ndev);
+       return container_of(pcs, struct axienet_local, pcs);
+}
 
-       switch (state->interface) {
-       case PHY_INTERFACE_MODE_SGMII:
-       case PHY_INTERFACE_MODE_1000BASEX:
-               phylink_mii_c22_pcs_get_state(lp->pcs_phy, state);
-               break;
-       default:
-               break;
-       }
+static void axienet_pcs_get_state(struct phylink_pcs *pcs,
+                                 struct phylink_link_state *state)
+{
+       struct mdio_device *pcs_phy = pcs_to_axienet_local(pcs)->pcs_phy;
+
+       phylink_mii_c22_pcs_get_state(pcs_phy, state);
 }
 
-static void axienet_mac_an_restart(struct phylink_config *config)
+static void axienet_pcs_an_restart(struct phylink_pcs *pcs)
 {
-       struct net_device *ndev = to_net_dev(config->dev);
-       struct axienet_local *lp = netdev_priv(ndev);
+       struct mdio_device *pcs_phy = pcs_to_axienet_local(pcs)->pcs_phy;
 
-       phylink_mii_c22_pcs_an_restart(lp->pcs_phy);
+       phylink_mii_c22_pcs_an_restart(pcs_phy);
 }
 
-static int axienet_mac_prepare(struct phylink_config *config, unsigned int mode,
-                              phy_interface_t iface)
+static int axienet_pcs_config(struct phylink_pcs *pcs, unsigned int mode,
+                             phy_interface_t interface,
+                             const unsigned long *advertising,
+                             bool permit_pause_to_mac)
 {
-       struct net_device *ndev = to_net_dev(config->dev);
+       struct mdio_device *pcs_phy = pcs_to_axienet_local(pcs)->pcs_phy;
+       struct net_device *ndev = pcs_to_axienet_local(pcs)->ndev;
        struct axienet_local *lp = netdev_priv(ndev);
        int ret;
 
-       switch (iface) {
-       case PHY_INTERFACE_MODE_SGMII:
-       case PHY_INTERFACE_MODE_1000BASEX:
-               if (!lp->switch_x_sgmii)
-                       return 0;
-
-               ret = mdiobus_write(lp->pcs_phy->bus,
-                                   lp->pcs_phy->addr,
-                                   XLNX_MII_STD_SELECT_REG,
-                                   iface == PHY_INTERFACE_MODE_SGMII ?
+       if (lp->switch_x_sgmii) {
+               ret = mdiodev_write(pcs_phy, XLNX_MII_STD_SELECT_REG,
+                                   interface == PHY_INTERFACE_MODE_SGMII ?
                                        XLNX_MII_STD_SELECT_SGMII : 0);
-               if (ret < 0)
-                       netdev_warn(ndev, "Failed to switch PHY interface: %d\n",
+               if (ret < 0) {
+                       netdev_warn(ndev,
+                                   "Failed to switch PHY interface: %d\n",
                                    ret);
-               return ret;
-       default:
-               return 0;
+                       return ret;
+               }
        }
+
+       ret = phylink_mii_c22_pcs_config(pcs_phy, mode, interface, advertising);
+       if (ret < 0)
+               netdev_warn(ndev, "Failed to configure PCS: %d\n", ret);
+
+       return ret;
 }
 
-static void axienet_mac_config(struct phylink_config *config, unsigned int mode,
-                              const struct phylink_link_state *state)
+static const struct phylink_pcs_ops axienet_pcs_ops = {
+       .pcs_get_state = axienet_pcs_get_state,
+       .pcs_config = axienet_pcs_config,
+       .pcs_an_restart = axienet_pcs_an_restart,
+};
+
+static struct phylink_pcs *axienet_mac_select_pcs(struct phylink_config *config,
+                                                 phy_interface_t interface)
 {
        struct net_device *ndev = to_net_dev(config->dev);
        struct axienet_local *lp = netdev_priv(ndev);
-       int ret;
 
-       switch (state->interface) {
-       case PHY_INTERFACE_MODE_SGMII:
-       case PHY_INTERFACE_MODE_1000BASEX:
-               ret = phylink_mii_c22_pcs_config(lp->pcs_phy, mode,
-                                                state->interface,
-                                                state->advertising);
-               if (ret < 0)
-                       netdev_warn(ndev, "Failed to configure PCS: %d\n",
-                                   ret);
-               break;
+       if (interface == PHY_INTERFACE_MODE_1000BASEX ||
+           interface ==  PHY_INTERFACE_MODE_SGMII)
+               return &lp->pcs;
 
-       default:
-               break;
-       }
+       return NULL;
+}
+
+static void axienet_mac_config(struct phylink_config *config, unsigned int mode,
+                              const struct phylink_link_state *state)
+{
+       /* nothing meaningful to do */
 }
 
 static void axienet_mac_link_down(struct phylink_config *config,
@@ -1663,9 +1663,7 @@ static void axienet_mac_link_up(struct phylink_config *config,
 
 static const struct phylink_mac_ops axienet_phylink_ops = {
        .validate = phylink_generic_validate,
-       .mac_pcs_get_state = axienet_mac_pcs_get_state,
-       .mac_an_restart = axienet_mac_an_restart,
-       .mac_prepare = axienet_mac_prepare,
+       .mac_select_pcs = axienet_mac_select_pcs,
        .mac_config = axienet_mac_config,
        .mac_link_down = axienet_mac_link_down,
        .mac_link_up = axienet_mac_link_up,
@@ -2079,12 +2077,12 @@ static int axienet_probe(struct platform_device *pdev)
                        ret = -EPROBE_DEFER;
                        goto cleanup_mdio;
                }
-               lp->phylink_config.pcs_poll = true;
+               lp->pcs.ops = &axienet_pcs_ops;
+               lp->pcs.poll = true;
        }
 
        lp->phylink_config.dev = &ndev->dev;
        lp->phylink_config.type = PHYLINK_NETDEV;
-       lp->phylink_config.legacy_pre_march2020 = true;
        lp->phylink_config.mac_capabilities = MAC_SYM_PAUSE | MAC_ASYM_PAUSE |
                MAC_10FD | MAC_100FD | MAC_1000FD;
 
index cd6742e..61418d4 100644 (file)
@@ -632,35 +632,43 @@ static void xpcs_resolve_pma(struct dw_xpcs *xpcs,
        }
 }
 
-void xpcs_validate(struct dw_xpcs *xpcs, unsigned long *supported,
-                  struct phylink_link_state *state)
+static int xpcs_validate(struct phylink_pcs *pcs, unsigned long *supported,
+                        const struct phylink_link_state *state)
 {
-       __ETHTOOL_DECLARE_LINK_MODE_MASK(xpcs_supported);
+       __ETHTOOL_DECLARE_LINK_MODE_MASK(xpcs_supported) = { 0, };
        const struct xpcs_compat *compat;
+       struct dw_xpcs *xpcs;
        int i;
 
-       /* phylink expects us to report all supported modes with
-        * PHY_INTERFACE_MODE_NA, just don't limit the supported and
-        * advertising masks and exit.
-        */
-       if (state->interface == PHY_INTERFACE_MODE_NA)
-               return;
-
-       linkmode_zero(xpcs_supported);
-
+       xpcs = phylink_pcs_to_xpcs(pcs);
        compat = xpcs_find_compat(xpcs->id, state->interface);
 
-       /* Populate the supported link modes for this
-        * PHY interface type
+       /* Populate the supported link modes for this PHY interface type.
+        * FIXME: what about the port modes and autoneg bit? This masks
+        * all those away.
         */
        if (compat)
                for (i = 0; compat->supported[i] != __ETHTOOL_LINK_MODE_MASK_NBITS; i++)
                        set_bit(compat->supported[i], xpcs_supported);
 
        linkmode_and(supported, supported, xpcs_supported);
-       linkmode_and(state->advertising, state->advertising, xpcs_supported);
+
+       return 0;
+}
+
+void xpcs_get_interfaces(struct dw_xpcs *xpcs, unsigned long *interfaces)
+{
+       int i, j;
+
+       for (i = 0; i < DW_XPCS_INTERFACE_MAX; i++) {
+               const struct xpcs_compat *compat = &xpcs->id->compat[i];
+
+               for (j = 0; j < compat->num_interfaces; j++)
+                       if (compat->interface[j] < PHY_INTERFACE_MODE_MAX)
+                               __set_bit(compat->interface[j], interfaces);
+       }
 }
-EXPORT_SYMBOL_GPL(xpcs_validate);
+EXPORT_SYMBOL_GPL(xpcs_get_interfaces);
 
 int xpcs_config_eee(struct dw_xpcs *xpcs, int mult_fact_100ns, int enable)
 {
@@ -1106,6 +1114,7 @@ static const struct xpcs_id xpcs_id_list[] = {
 };
 
 static const struct phylink_pcs_ops xpcs_phylink_ops = {
+       .pcs_validate = xpcs_validate,
        .pcs_config = xpcs_config,
        .pcs_get_state = xpcs_get_state,
        .pcs_link_up = xpcs_link_up,
index 5b6c0d1..f504fe5 100644 (file)
@@ -19,6 +19,8 @@
 #include <linux/regulator/of_regulator.h>
 #include <linux/regulator/driver.h>
 #include <linux/regulator/consumer.h>
+#include <linux/phylink.h>
+#include <linux/sfp.h>
 #include <dt-bindings/net/qca-ar803x.h>
 
 #define AT803X_SPECIFIC_FUNCTION_CONTROL       0x10
@@ -51,6 +53,8 @@
 #define AT803X_INTR_ENABLE_PAGE_RECEIVED       BIT(12)
 #define AT803X_INTR_ENABLE_LINK_FAIL           BIT(11)
 #define AT803X_INTR_ENABLE_LINK_SUCCESS                BIT(10)
+#define AT803X_INTR_ENABLE_LINK_FAIL_BX                BIT(8)
+#define AT803X_INTR_ENABLE_LINK_SUCCESS_BX     BIT(7)
 #define AT803X_INTR_ENABLE_WIRESPEED_DOWNGRADE BIT(5)
 #define AT803X_INTR_ENABLE_POLARITY_CHANGED    BIT(1)
 #define AT803X_INTR_ENABLE_WOL                 BIT(0)
 #define AT803X_DEBUG_DATA                      0x1E
 
 #define AT803X_MODE_CFG_MASK                   0x0F
-#define AT803X_MODE_CFG_SGMII                  0x01
+#define AT803X_MODE_CFG_BASET_RGMII            0x00
+#define AT803X_MODE_CFG_BASET_SGMII            0x01
+#define AT803X_MODE_CFG_BX1000_RGMII_50OHM     0x02
+#define AT803X_MODE_CFG_BX1000_RGMII_75OHM     0x03
+#define AT803X_MODE_CFG_BX1000_CONV_50OHM      0x04
+#define AT803X_MODE_CFG_BX1000_CONV_75OHM      0x05
+#define AT803X_MODE_CFG_FX100_RGMII_50OHM      0x06
+#define AT803X_MODE_CFG_FX100_CONV_50OHM       0x07
+#define AT803X_MODE_CFG_RGMII_AUTO_MDET                0x0B
+#define AT803X_MODE_CFG_FX100_RGMII_75OHM      0x0E
+#define AT803X_MODE_CFG_FX100_CONV_75OHM       0x0F
 
 #define AT803X_PSSR                            0x11    /*PHY-Specific Status Register*/
 #define AT803X_PSSR_MR_AN_COMPLETE             0x0200
@@ -283,6 +297,8 @@ struct at803x_priv {
        u16 clk_25m_mask;
        u8 smarteee_lpi_tw_1g;
        u8 smarteee_lpi_tw_100m;
+       bool is_fiber;
+       bool is_1000basex;
        struct regulator_dev *vddio_rdev;
        struct regulator_dev *vddh_rdev;
        struct regulator *vddio;
@@ -650,6 +666,55 @@ static int at8031_register_regulators(struct phy_device *phydev)
        return 0;
 }
 
+static int at803x_sfp_insert(void *upstream, const struct sfp_eeprom_id *id)
+{
+       struct phy_device *phydev = upstream;
+       __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_support);
+       __ETHTOOL_DECLARE_LINK_MODE_MASK(sfp_support);
+       phy_interface_t iface;
+
+       linkmode_zero(phy_support);
+       phylink_set(phy_support, 1000baseX_Full);
+       phylink_set(phy_support, 1000baseT_Full);
+       phylink_set(phy_support, Autoneg);
+       phylink_set(phy_support, Pause);
+       phylink_set(phy_support, Asym_Pause);
+
+       linkmode_zero(sfp_support);
+       sfp_parse_support(phydev->sfp_bus, id, sfp_support);
+       /* Some modules support 10G modes as well as others we support.
+        * Mask out non-supported modes so the correct interface is picked.
+        */
+       linkmode_and(sfp_support, phy_support, sfp_support);
+
+       if (linkmode_empty(sfp_support)) {
+               dev_err(&phydev->mdio.dev, "incompatible SFP module inserted\n");
+               return -EINVAL;
+       }
+
+       iface = sfp_select_interface(phydev->sfp_bus, sfp_support);
+
+       /* Only 1000Base-X is supported by AR8031/8033 as the downstream SerDes
+        * interface for use with SFP modules.
+        * However, some copper modules detected as having a preferred SGMII
+        * interface do default to and function in 1000Base-X mode, so just
+        * print a warning and allow such modules, as they may have some chance
+        * of working.
+        */
+       if (iface == PHY_INTERFACE_MODE_SGMII)
+               dev_warn(&phydev->mdio.dev, "module may not function if 1000Base-X not supported\n");
+       else if (iface != PHY_INTERFACE_MODE_1000BASEX)
+               return -EINVAL;
+
+       return 0;
+}
+
+static const struct sfp_upstream_ops at803x_sfp_ops = {
+       .attach = phy_sfp_attach,
+       .detach = phy_sfp_detach,
+       .module_insert = at803x_sfp_insert,
+};
+
 static int at803x_parse_dt(struct phy_device *phydev)
 {
        struct device_node *node = phydev->mdio.dev.of_node;
@@ -757,6 +822,11 @@ static int at803x_parse_dt(struct phy_device *phydev)
                        phydev_err(phydev, "failed to get VDDIO regulator\n");
                        return PTR_ERR(priv->vddio);
                }
+
+               /* Only AR8031/8033 support 1000Base-X for SFP modules */
+               ret = phy_sfp_probe(phydev, &at803x_sfp_ops);
+               if (ret < 0)
+                       return ret;
        }
 
        return 0;
@@ -784,16 +854,24 @@ static int at803x_probe(struct phy_device *phydev)
                        return ret;
        }
 
-       /* Some bootloaders leave the fiber page selected.
-        * Switch to the copper page, as otherwise we read
-        * the PHY capabilities from the fiber side.
-        */
        if (phydev->drv->phy_id == ATH8031_PHY_ID) {
-               phy_lock_mdio_bus(phydev);
-               ret = at803x_write_page(phydev, AT803X_PAGE_COPPER);
-               phy_unlock_mdio_bus(phydev);
-               if (ret)
+               int ccr = phy_read(phydev, AT803X_REG_CHIP_CONFIG);
+               int mode_cfg;
+
+               if (ccr < 0)
                        goto err;
+               mode_cfg = ccr & AT803X_MODE_CFG_MASK;
+
+               switch (mode_cfg) {
+               case AT803X_MODE_CFG_BX1000_RGMII_50OHM:
+               case AT803X_MODE_CFG_BX1000_RGMII_75OHM:
+                       priv->is_1000basex = true;
+                       fallthrough;
+               case AT803X_MODE_CFG_FX100_RGMII_50OHM:
+               case AT803X_MODE_CFG_FX100_RGMII_75OHM:
+                       priv->is_fiber = true;
+                       break;
+               }
        }
 
        return 0;
@@ -815,6 +893,7 @@ static void at803x_remove(struct phy_device *phydev)
 
 static int at803x_get_features(struct phy_device *phydev)
 {
+       struct at803x_priv *priv = phydev->priv;
        int err;
 
        err = genphy_read_abilities(phydev);
@@ -841,12 +920,13 @@ static int at803x_get_features(struct phy_device *phydev)
         * As a result of that, ESTATUS_1000_XFULL is set
         * to 1 even when operating in copper TP mode.
         *
-        * Remove this mode from the supported link modes,
-        * as this driver currently only supports copper
-        * operation.
+        * Remove this mode from the supported link modes
+        * when not operating in 1000BaseX mode.
         */
-       linkmode_clear_bit(ETHTOOL_LINK_MODE_1000baseX_Full_BIT,
-                          phydev->supported);
+       if (!priv->is_1000basex)
+               linkmode_clear_bit(ETHTOOL_LINK_MODE_1000baseX_Full_BIT,
+                                  phydev->supported);
+
        return 0;
 }
 
@@ -910,8 +990,27 @@ static int at8031_pll_config(struct phy_device *phydev)
 
 static int at803x_config_init(struct phy_device *phydev)
 {
+       struct at803x_priv *priv = phydev->priv;
        int ret;
 
+       if (phydev->drv->phy_id == ATH8031_PHY_ID) {
+               /* Some bootloaders leave the fiber page selected.
+                * Switch to the appropriate page (fiber or copper), as otherwise we
+                * read the PHY capabilities from the wrong page.
+                */
+               phy_lock_mdio_bus(phydev);
+               ret = at803x_write_page(phydev,
+                                       priv->is_fiber ? AT803X_PAGE_FIBER :
+                                                        AT803X_PAGE_COPPER);
+               phy_unlock_mdio_bus(phydev);
+               if (ret)
+                       return ret;
+
+               ret = at8031_pll_config(phydev);
+               if (ret < 0)
+                       return ret;
+       }
+
        /* The RX and TX delay default is:
         *   after HW reset: RX delay enabled and TX delay disabled
         *   after SW reset: RX delay enabled, while TX delay retains the
@@ -941,12 +1040,6 @@ static int at803x_config_init(struct phy_device *phydev)
        if (ret < 0)
                return ret;
 
-       if (phydev->drv->phy_id == ATH8031_PHY_ID) {
-               ret = at8031_pll_config(phydev);
-               if (ret < 0)
-                       return ret;
-       }
-
        /* Ar803x extended next page bit is enabled by default. Cisco
         * multigig switches read this bit and attempt to negotiate 10Gbps
         * rates even if the next page bit is disabled. This is incorrect
@@ -967,6 +1060,7 @@ static int at803x_ack_interrupt(struct phy_device *phydev)
 
 static int at803x_config_intr(struct phy_device *phydev)
 {
+       struct at803x_priv *priv = phydev->priv;
        int err;
        int value;
 
@@ -983,6 +1077,10 @@ static int at803x_config_intr(struct phy_device *phydev)
                value |= AT803X_INTR_ENABLE_DUPLEX_CHANGED;
                value |= AT803X_INTR_ENABLE_LINK_FAIL;
                value |= AT803X_INTR_ENABLE_LINK_SUCCESS;
+               if (priv->is_fiber) {
+                       value |= AT803X_INTR_ENABLE_LINK_FAIL_BX;
+                       value |= AT803X_INTR_ENABLE_LINK_SUCCESS_BX;
+               }
 
                err = phy_write(phydev, AT803X_INTR_ENABLE, value);
        } else {
@@ -1115,8 +1213,12 @@ static int at803x_read_specific_status(struct phy_device *phydev)
 
 static int at803x_read_status(struct phy_device *phydev)
 {
+       struct at803x_priv *priv = phydev->priv;
        int err, old_link = phydev->link;
 
+       if (priv->is_1000basex)
+               return genphy_c37_read_status(phydev);
+
        /* Update the link, but return if there was an error */
        err = genphy_update_link(phydev);
        if (err)
@@ -1170,6 +1272,7 @@ static int at803x_config_mdix(struct phy_device *phydev, u8 ctrl)
 
 static int at803x_config_aneg(struct phy_device *phydev)
 {
+       struct at803x_priv *priv = phydev->priv;
        int ret;
 
        ret = at803x_config_mdix(phydev, phydev->mdix_ctrl);
@@ -1186,6 +1289,9 @@ static int at803x_config_aneg(struct phy_device *phydev)
                        return ret;
        }
 
+       if (priv->is_1000basex)
+               return genphy_c37_config_aneg(phydev);
+
        /* Do not restart auto-negotiation by setting ret to 0 defautly,
         * when calling __genphy_config_aneg later.
         */
index 4514d35..9b72334 100644 (file)
@@ -858,7 +858,6 @@ static int marvell_phy_init(struct usbnet *dev)
                reg = asix_mdio_read(dev->net, dev->mii.phy_id,
                        MII_MARVELL_LED_CTRL);
                netdev_dbg(dev->net, "MII_MARVELL_LED_CTRL (2) = 0x%04x\n", reg);
-               reg &= 0xfc0f;
        }
 
        return 0;
index 30d2912..6335d7a 100644 (file)
@@ -456,7 +456,7 @@ static const struct nfc_vendor_cmd st_nci_vendor_cmds[] = {
 
 int st_nci_vendor_cmds_init(struct nci_dev *ndev)
 {
-       return nfc_set_vendor_cmds(ndev->nfc_dev, st_nci_vendor_cmds,
+       return nci_set_vendor_cmds(ndev, st_nci_vendor_cmds,
                                   sizeof(st_nci_vendor_cmds));
 }
 EXPORT_SYMBOL(st_nci_vendor_cmds_init);
index 7488286..bfa418d 100644 (file)
@@ -358,7 +358,7 @@ int st21nfca_vendor_cmds_init(struct nfc_hci_dev *hdev)
        struct st21nfca_hci_info *info = nfc_hci_get_clientdata(hdev);
 
        init_completion(&info->vendor_info.req_completion);
-       return nfc_set_vendor_cmds(hdev->ndev, st21nfca_vendor_cmds,
-                                  sizeof(st21nfca_vendor_cmds));
+       return nfc_hci_set_vendor_cmds(hdev, st21nfca_vendor_cmds,
+                                      sizeof(st21nfca_vendor_cmds));
 }
 EXPORT_SYMBOL(st21nfca_vendor_cmds_init);
index 41b92dc..9233bfe 100644 (file)
@@ -14,7 +14,7 @@ static ssize_t clock_name_show(struct device *dev,
                               struct device_attribute *attr, char *page)
 {
        struct ptp_clock *ptp = dev_get_drvdata(dev);
-       return snprintf(page, PAGE_SIZE-1, "%s\n", ptp->info->name);
+       return sysfs_emit(page, "%s\n", ptp->info->name);
 }
 static DEVICE_ATTR_RO(clock_name);
 
@@ -387,7 +387,7 @@ static ssize_t ptp_pin_show(struct device *dev, struct device_attribute *attr,
 
        mutex_unlock(&ptp->pincfg_mux);
 
-       return snprintf(page, PAGE_SIZE, "%u %u\n", func, chan);
+       return sysfs_emit(page, "%u %u\n", func, chan);
 }
 
 static ssize_t ptp_pin_store(struct device *dev, struct device_attribute *attr,
index fa517ae..8c92c97 100644 (file)
@@ -194,6 +194,17 @@ struct bpf_map {
        struct work_struct work;
        struct mutex freeze_mutex;
        atomic64_t writecnt;
+       /* 'Ownership' of program-containing map is claimed by the first program
+        * that is going to use this map or by the first program which FD is
+        * stored in the map to make sure that all callers and callees have the
+        * same prog type, JITed flag and xdp_has_frags flag.
+        */
+       struct {
+               spinlock_t lock;
+               enum bpf_prog_type type;
+               bool jited;
+               bool xdp_has_frags;
+       } owner;
 };
 
 static inline bool map_value_has_spin_lock(const struct bpf_map *map)
@@ -578,7 +589,6 @@ struct bpf_verifier_ops {
                                 const struct btf_type *t, int off, int size,
                                 enum bpf_access_type atype,
                                 u32 *next_btf_id);
-       bool (*check_kfunc_call)(u32 kfunc_btf_id, struct module *owner);
 };
 
 struct bpf_prog_offload_ops {
@@ -939,6 +949,7 @@ struct bpf_prog_aux {
        bool func_proto_unreliable;
        bool sleepable;
        bool tail_call_reachable;
+       bool xdp_has_frags;
        struct hlist_node tramp_hlist;
        /* BTF_KIND_FUNC_PROTO for valid attach_btf_id */
        const struct btf_type *attach_func_proto;
@@ -999,16 +1010,6 @@ struct bpf_prog_aux {
 };
 
 struct bpf_array_aux {
-       /* 'Ownership' of prog array is claimed by the first program that
-        * is going to use this map or by the first program which FD is
-        * stored in the map to make sure that all callers and callees have
-        * the same prog type and JITed flag.
-        */
-       struct {
-               spinlock_t lock;
-               enum bpf_prog_type type;
-               bool jited;
-       } owner;
        /* Programs with direct jumps into programs part of this array. */
        struct list_head poke_progs;
        struct bpf_map *map;
@@ -1183,7 +1184,14 @@ struct bpf_event_entry {
        struct rcu_head rcu;
 };
 
-bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp);
+static inline bool map_type_contains_progs(struct bpf_map *map)
+{
+       return map->map_type == BPF_MAP_TYPE_PROG_ARRAY ||
+              map->map_type == BPF_MAP_TYPE_DEVMAP ||
+              map->map_type == BPF_MAP_TYPE_CPUMAP;
+}
+
+bool bpf_prog_map_compatible(struct bpf_map *map, const struct bpf_prog *fp);
 int bpf_prog_calc_tag(struct bpf_prog *fp);
 
 const struct bpf_func_proto *bpf_get_trace_printk_proto(void);
@@ -1251,6 +1259,7 @@ struct bpf_run_ctx {};
 struct bpf_cg_run_ctx {
        struct bpf_run_ctx run_ctx;
        const struct bpf_prog_array_item *prog_item;
+       int retval;
 };
 
 struct bpf_trace_run_ctx {
@@ -1283,19 +1292,19 @@ static inline void bpf_reset_run_ctx(struct bpf_run_ctx *old_ctx)
 
 typedef u32 (*bpf_prog_run_fn)(const struct bpf_prog *prog, const void *ctx);
 
-static __always_inline u32
+static __always_inline int
 BPF_PROG_RUN_ARRAY_CG_FLAGS(const struct bpf_prog_array __rcu *array_rcu,
                            const void *ctx, bpf_prog_run_fn run_prog,
-                           u32 *ret_flags)
+                           int retval, u32 *ret_flags)
 {
        const struct bpf_prog_array_item *item;
        const struct bpf_prog *prog;
        const struct bpf_prog_array *array;
        struct bpf_run_ctx *old_run_ctx;
        struct bpf_cg_run_ctx run_ctx;
-       u32 ret = 1;
        u32 func_ret;
 
+       run_ctx.retval = retval;
        migrate_disable();
        rcu_read_lock();
        array = rcu_dereference(array_rcu);
@@ -1304,27 +1313,29 @@ BPF_PROG_RUN_ARRAY_CG_FLAGS(const struct bpf_prog_array __rcu *array_rcu,
        while ((prog = READ_ONCE(item->prog))) {
                run_ctx.prog_item = item;
                func_ret = run_prog(prog, ctx);
-               ret &= (func_ret & 1);
+               if (!(func_ret & 1) && !IS_ERR_VALUE((long)run_ctx.retval))
+                       run_ctx.retval = -EPERM;
                *(ret_flags) |= (func_ret >> 1);
                item++;
        }
        bpf_reset_run_ctx(old_run_ctx);
        rcu_read_unlock();
        migrate_enable();
-       return ret;
+       return run_ctx.retval;
 }
 
-static __always_inline u32
+static __always_inline int
 BPF_PROG_RUN_ARRAY_CG(const struct bpf_prog_array __rcu *array_rcu,
-                     const void *ctx, bpf_prog_run_fn run_prog)
+                     const void *ctx, bpf_prog_run_fn run_prog,
+                     int retval)
 {
        const struct bpf_prog_array_item *item;
        const struct bpf_prog *prog;
        const struct bpf_prog_array *array;
        struct bpf_run_ctx *old_run_ctx;
        struct bpf_cg_run_ctx run_ctx;
-       u32 ret = 1;
 
+       run_ctx.retval = retval;
        migrate_disable();
        rcu_read_lock();
        array = rcu_dereference(array_rcu);
@@ -1332,13 +1343,14 @@ BPF_PROG_RUN_ARRAY_CG(const struct bpf_prog_array __rcu *array_rcu,
        old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
        while ((prog = READ_ONCE(item->prog))) {
                run_ctx.prog_item = item;
-               ret &= run_prog(prog, ctx);
+               if (!run_prog(prog, ctx) && !IS_ERR_VALUE((long)run_ctx.retval))
+                       run_ctx.retval = -EPERM;
                item++;
        }
        bpf_reset_run_ctx(old_run_ctx);
        rcu_read_unlock();
        migrate_enable();
-       return ret;
+       return run_ctx.retval;
 }
 
 static __always_inline u32
@@ -1391,19 +1403,21 @@ out:
  *   0: NET_XMIT_SUCCESS  skb should be transmitted
  *   1: NET_XMIT_DROP     skb should be dropped and cn
  *   2: NET_XMIT_CN       skb should be transmitted and cn
- *   3: -EPERM            skb should be dropped
+ *   3: -err              skb should be dropped
  */
 #define BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY(array, ctx, func)                \
        ({                                              \
                u32 _flags = 0;                         \
                bool _cn;                               \
                u32 _ret;                               \
-               _ret = BPF_PROG_RUN_ARRAY_CG_FLAGS(array, ctx, func, &_flags); \
+               _ret = BPF_PROG_RUN_ARRAY_CG_FLAGS(array, ctx, func, 0, &_flags); \
                _cn = _flags & BPF_RET_SET_CN;          \
-               if (_ret)                               \
+               if (_ret && !IS_ERR_VALUE((long)_ret))  \
+                       _ret = -EFAULT;                 \
+               if (!_ret)                              \
                        _ret = (_cn ? NET_XMIT_CN : NET_XMIT_SUCCESS);  \
                else                                    \
-                       _ret = (_cn ? NET_XMIT_DROP : -EPERM);          \
+                       _ret = (_cn ? NET_XMIT_DROP : _ret);            \
                _ret;                                   \
        })
 
@@ -1724,7 +1738,6 @@ int bpf_prog_test_run_raw_tp(struct bpf_prog *prog,
 int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog,
                                const union bpf_attr *kattr,
                                union bpf_attr __user *uattr);
-bool bpf_prog_test_check_kfunc_call(u32 kfunc_id, struct module *owner);
 bool btf_ctx_access(int off, int size, enum bpf_access_type type,
                    const struct bpf_prog *prog,
                    struct bpf_insn_access_aux *info);
@@ -1976,12 +1989,6 @@ static inline int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog,
        return -ENOTSUPP;
 }
 
-static inline bool bpf_prog_test_check_kfunc_call(u32 kfunc_id,
-                                                 struct module *owner)
-{
-       return false;
-}
-
 static inline void bpf_map_put(struct bpf_map *map)
 {
 }
@@ -2076,6 +2083,9 @@ int bpf_prog_test_run_syscall(struct bpf_prog *prog,
 int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog);
 int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype);
 int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value, u64 flags);
+int sock_map_bpf_prog_query(const union bpf_attr *attr,
+                           union bpf_attr __user *uattr);
+
 void sock_map_unhash(struct sock *sk);
 void sock_map_close(struct sock *sk, long timeout);
 #else
@@ -2129,6 +2139,12 @@ static inline int sock_map_update_elem_sys(struct bpf_map *map, void *key, void
 {
        return -EOPNOTSUPP;
 }
+
+static inline int sock_map_bpf_prog_query(const union bpf_attr *attr,
+                                         union bpf_attr __user *uattr)
+{
+       return -EINVAL;
+}
 #endif /* CONFIG_BPF_SYSCALL */
 #endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */
 
index e999317..7a7be8c 100644 (file)
@@ -521,6 +521,8 @@ bpf_prog_offload_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt);
 
 int check_ptr_off_reg(struct bpf_verifier_env *env,
                      const struct bpf_reg_state *reg, int regno);
+int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
+                            u32 regno);
 int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
                   u32 regno, u32 mem_size);
 
@@ -564,4 +566,9 @@ static inline u32 type_flag(u32 type)
        return type & ~BPF_BASE_TYPE_MASK;
 }
 
+static inline enum bpf_prog_type resolve_prog_type(struct bpf_prog *prog)
+{
+       return prog->aux->dst_prog ? prog->aux->dst_prog->type : prog->type;
+}
+
 #endif /* _LINUX_BPF_VERIFIER_H */
index 0c74348..b12cfe3 100644 (file)
 #define BTF_TYPE_EMIT(type) ((void)(type *)0)
 #define BTF_TYPE_EMIT_ENUM(enum_val) ((void)enum_val)
 
+enum btf_kfunc_type {
+       BTF_KFUNC_TYPE_CHECK,
+       BTF_KFUNC_TYPE_ACQUIRE,
+       BTF_KFUNC_TYPE_RELEASE,
+       BTF_KFUNC_TYPE_RET_NULL,
+       BTF_KFUNC_TYPE_MAX,
+};
+
 struct btf;
 struct btf_member;
 struct btf_type;
 union bpf_attr;
 struct btf_show;
+struct btf_id_set;
+
+struct btf_kfunc_id_set {
+       struct module *owner;
+       union {
+               struct {
+                       struct btf_id_set *check_set;
+                       struct btf_id_set *acquire_set;
+                       struct btf_id_set *release_set;
+                       struct btf_id_set *ret_null_set;
+               };
+               struct btf_id_set *sets[BTF_KFUNC_TYPE_MAX];
+       };
+};
 
 extern const struct file_operations btf_fops;
 
@@ -307,6 +329,11 @@ const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id);
 const char *btf_name_by_offset(const struct btf *btf, u32 offset);
 struct btf *btf_parse_vmlinux(void);
 struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog);
+bool btf_kfunc_id_set_contains(const struct btf *btf,
+                              enum bpf_prog_type prog_type,
+                              enum btf_kfunc_type type, u32 kfunc_btf_id);
+int register_btf_kfunc_id_set(enum bpf_prog_type prog_type,
+                             const struct btf_kfunc_id_set *s);
 #else
 static inline const struct btf_type *btf_type_by_id(const struct btf *btf,
                                                    u32 type_id)
@@ -318,50 +345,18 @@ static inline const char *btf_name_by_offset(const struct btf *btf,
 {
        return NULL;
 }
-#endif
-
-struct kfunc_btf_id_set {
-       struct list_head list;
-       struct btf_id_set *set;
-       struct module *owner;
-};
-
-struct kfunc_btf_id_list {
-       struct list_head list;
-       struct mutex mutex;
-};
-
-#ifdef CONFIG_DEBUG_INFO_BTF_MODULES
-void register_kfunc_btf_id_set(struct kfunc_btf_id_list *l,
-                              struct kfunc_btf_id_set *s);
-void unregister_kfunc_btf_id_set(struct kfunc_btf_id_list *l,
-                                struct kfunc_btf_id_set *s);
-bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist, u32 kfunc_id,
-                             struct module *owner);
-
-extern struct kfunc_btf_id_list bpf_tcp_ca_kfunc_list;
-extern struct kfunc_btf_id_list prog_test_kfunc_list;
-#else
-static inline void register_kfunc_btf_id_set(struct kfunc_btf_id_list *l,
-                                            struct kfunc_btf_id_set *s)
-{
-}
-static inline void unregister_kfunc_btf_id_set(struct kfunc_btf_id_list *l,
-                                              struct kfunc_btf_id_set *s)
+static inline bool btf_kfunc_id_set_contains(const struct btf *btf,
+                                            enum bpf_prog_type prog_type,
+                                            enum btf_kfunc_type type,
+                                            u32 kfunc_btf_id)
 {
+       return false;
 }
-static inline bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist,
-                                           u32 kfunc_id, struct module *owner)
+static inline int register_btf_kfunc_id_set(enum bpf_prog_type prog_type,
+                                           const struct btf_kfunc_id_set *s)
 {
-       return false;
+       return 0;
 }
-
-static struct kfunc_btf_id_list bpf_tcp_ca_kfunc_list __maybe_unused;
-static struct kfunc_btf_id_list prog_test_kfunc_list __maybe_unused;
 #endif
 
-#define DEFINE_KFUNC_BTF_ID_SET(set, name)                                     \
-       struct kfunc_btf_id_set name = { LIST_HEAD_INIT(name.list), (set),     \
-                                        THIS_MODULE }
-
 #endif
index 919c0fd..bc5d9cc 100644 (file)
@@ -11,6 +11,7 @@ struct btf_id_set {
 #ifdef CONFIG_DEBUG_INFO_BTF
 
 #include <linux/compiler.h> /* for __PASTE */
+#include <linux/compiler_attributes.h> /* for __maybe_unused */
 
 /*
  * Following macros help to define lists of BTF IDs placed
@@ -146,14 +147,14 @@ extern struct btf_id_set name;
 
 #else
 
-#define BTF_ID_LIST(name) static u32 name[5];
+#define BTF_ID_LIST(name) static u32 __maybe_unused name[5];
 #define BTF_ID(prefix, name)
 #define BTF_ID_UNUSED
-#define BTF_ID_LIST_GLOBAL(name, n) u32 name[n];
-#define BTF_ID_LIST_SINGLE(name, prefix, typename) static u32 name[1];
-#define BTF_ID_LIST_GLOBAL_SINGLE(name, prefix, typename) u32 name[1];
-#define BTF_SET_START(name) static struct btf_id_set name = { 0 };
-#define BTF_SET_START_GLOBAL(name) static struct btf_id_set name = { 0 };
+#define BTF_ID_LIST_GLOBAL(name, n) u32 __maybe_unused name[n];
+#define BTF_ID_LIST_SINGLE(name, prefix, typename) static u32 __maybe_unused name[1];
+#define BTF_ID_LIST_GLOBAL_SINGLE(name, prefix, typename) u32 __maybe_unused name[1];
+#define BTF_SET_START(name) static struct btf_id_set __maybe_unused name = { 0 };
+#define BTF_SET_START_GLOBAL(name) static struct btf_id_set __maybe_unused name = { 0 };
 #define BTF_SET_END(name)
 
 #endif /* CONFIG_DEBUG_INFO_BTF */
index 71fa57b..d23e999 100644 (file)
@@ -1356,7 +1356,10 @@ struct bpf_sockopt_kern {
        s32             level;
        s32             optname;
        s32             optlen;
-       s32             retval;
+       /* for retval in struct bpf_cg_run_ctx */
+       struct task_struct *current_task;
+       /* Temporary "register" for indirect stores to ppos. */
+       u64             tmp_reg;
 };
 
 int copy_bpf_fprog_from_user(struct sock_fprog *dst, sockptr_t src, int len);
index a59d25f..1e0f8a3 100644 (file)
@@ -371,19 +371,12 @@ static inline struct ipv6_pinfo * inet6_sk(const struct sock *__sk)
        return NULL;
 }
 
-static inline struct inet6_request_sock *
-                       inet6_rsk(const struct request_sock *rsk)
-{
-       return NULL;
-}
-
 static inline struct raw6_sock *raw6_sk(const struct sock *sk)
 {
        return NULL;
 }
 
 #define inet6_rcv_saddr(__sk)  NULL
-#define tcp_twsk_ipv6only(__sk)                0
 #define inet_v6_ipv6only(__sk)         0
 #endif /* IS_ENABLED(CONFIG_IPV6) */
 #endif /* _IPV6_H */
index f8397f3..15e0e02 100644 (file)
@@ -66,11 +66,6 @@ static inline void linkmode_mod_bit(int nr, volatile unsigned long *addr,
                linkmode_clear_bit(nr, addr);
 }
 
-static inline void linkmode_change_bit(int nr, volatile unsigned long *addr)
-{
-       __change_bit(nr, addr);
-}
-
 static inline int linkmode_test_bit(int nr, const volatile unsigned long *addr)
 {
        return test_bit(nr, addr);
index 12ea29e..b8a1a17 100644 (file)
@@ -388,23 +388,6 @@ mii_lpa_mod_linkmode_lpa_sgmii(unsigned long *lp_advertising, u32 lpa)
 }
 
 /**
- * mii_lpa_to_linkmode_adv_sgmii
- * @advertising: pointer to destination link mode.
- * @lpa: value of the MII_LPA register
- *
- * A small helper function that translates MII_ADVERTISE bits
- * to linkmode advertisement settings when in SGMII mode.
- * Clears the old value of advertising.
- */
-static inline void mii_lpa_to_linkmode_lpa_sgmii(unsigned long *lp_advertising,
-                                                u32 lpa)
-{
-       linkmode_zero(lp_advertising);
-
-       mii_lpa_mod_linkmode_lpa_sgmii(lp_advertising, lpa);
-}
-
-/**
  * mii_adv_mod_linkmode_adv_t
  * @advertising:pointer to destination link mode.
  * @adv: value of the MII_ADVERTISE register
index 1ec6318..bda1c38 100644 (file)
@@ -135,15 +135,6 @@ static inline void nl_set_extack_cookie_u64(struct netlink_ext_ack *extack,
        extack->cookie_len = sizeof(cookie);
 }
 
-static inline void nl_set_extack_cookie_u32(struct netlink_ext_ack *extack,
-                                           u32 cookie)
-{
-       if (!extack)
-               return;
-       memcpy(extack->cookie, &cookie, sizeof(cookie));
-       extack->cookie_len = sizeof(cookie);
-}
-
 void netlink_kernel_release(struct sock *sk);
 int __netlink_change_ngroups(struct sock *sk, unsigned int groups);
 int netlink_change_ngroups(struct sock *sk, unsigned int groups);
index add077a..266eb26 100644 (file)
@@ -31,8 +31,7 @@ void xpcs_link_up(struct phylink_pcs *pcs, unsigned int mode,
                  phy_interface_t interface, int speed, int duplex);
 int xpcs_do_config(struct dw_xpcs *xpcs, phy_interface_t interface,
                   unsigned int mode);
-void xpcs_validate(struct dw_xpcs *xpcs, unsigned long *supported,
-                  struct phylink_link_state *state);
+void xpcs_get_interfaces(struct dw_xpcs *xpcs, unsigned long *interfaces);
 int xpcs_config_eee(struct dw_xpcs *xpcs, int mult_fact_100ns,
                    int enable);
 struct dw_xpcs *xpcs_create(struct mdio_device *mdiodev,
index 8a636e6..a27bcc4 100644 (file)
@@ -557,6 +557,7 @@ struct skb_shared_info {
         * Warning : all fields before dataref are cleared in __alloc_skb()
         */
        atomic_t        dataref;
+       unsigned int    xdp_frags_size;
 
        /* Intermediate layers must ensure that destructor_arg
         * remains valid until skb destructor */
@@ -3898,11 +3899,6 @@ static inline ktime_t net_timedelta(ktime_t t)
        return ktime_sub(ktime_get_real(), t);
 }
 
-static inline ktime_t net_invalid_timestamp(void)
-{
-       return 0;
-}
-
 static inline u8 skb_metadata_len(const struct sk_buff *skb)
 {
        return skb_shinfo(skb)->meta_len;
index ae66dad..254a265 100644 (file)
@@ -23,11 +23,6 @@ static inline struct udphdr *udp_hdr(const struct sk_buff *skb)
        return (struct udphdr *)skb_transport_header(skb);
 }
 
-static inline struct udphdr *inner_udp_hdr(const struct sk_buff *skb)
-{
-       return (struct udphdr *)skb_inner_transport_header(skb);
-}
-
 #define UDP_HTABLE_SIZE_MIN            (CONFIG_BASE_SMALL ? 128 : 256)
 
 static inline u32 udp_hashfn(const struct net *net, u32 num, u32 mask)
index 526e495..cb628c5 100644 (file)
@@ -187,18 +187,12 @@ typedef struct {
 
 typedef struct ax25_route {
        struct ax25_route       *next;
-       refcount_t              refcount;
        ax25_address            callsign;
        struct net_device       *dev;
        ax25_digi               *digipeat;
        char                    ip_mode;
 } ax25_route;
 
-static inline void ax25_hold_route(ax25_route *ax25_rt)
-{
-       refcount_inc(&ax25_rt->refcount);
-}
-
 void __ax25_put_route(ax25_route *ax25_rt);
 
 extern rwlock_t ax25_route_lock;
@@ -213,12 +207,6 @@ static inline void ax25_route_lock_unuse(void)
        read_unlock(&ax25_route_lock);
 }
 
-static inline void ax25_put_route(ax25_route *ax25_rt)
-{
-       if (refcount_dec_and_test(&ax25_rt->refcount))
-               __ax25_put_route(ax25_rt);
-}
-
 typedef struct {
        char                    slave;                  /* slave_mode?   */
        struct timer_list       slave_timer;            /* timeout timer */
index 83cfd2d..7dead85 100644 (file)
@@ -699,20 +699,6 @@ static inline struct slave *bond_slave_has_mac(struct bonding *bond,
 }
 
 /* Caller must hold rcu_read_lock() for read */
-static inline struct slave *bond_slave_has_mac_rcu(struct bonding *bond,
-                                              const u8 *mac)
-{
-       struct list_head *iter;
-       struct slave *tmp;
-
-       bond_for_each_slave_rcu(bond, tmp, iter)
-               if (ether_addr_equal_64bits(mac, tmp->dev->dev_addr))
-                       return tmp;
-
-       return NULL;
-}
-
-/* Caller must hold rcu_read_lock() for read */
 static inline bool bond_slave_has_mac_rx(struct bonding *bond, const u8 *mac)
 {
        struct list_head *iter;
index dfd919b..463ae5d 100644 (file)
@@ -65,13 +65,13 @@ struct inet_timewait_sock {
        /* these three are in inet_sock */
        __be16                  tw_sport;
        /* And these are ours. */
-       unsigned int            tw_kill         : 1,
-                               tw_transparent  : 1,
+       unsigned int            tw_transparent  : 1,
                                tw_flowlabel    : 20,
-                               tw_pad          : 2,    /* 2 bits hole */
+                               tw_pad          : 3,    /* 3 bits hole */
                                tw_tos          : 8;
        u32                     tw_txhash;
        u32                     tw_priority;
+       u32                     tw_bslot; /* bind bucket slot */
        struct timer_list       tw_timer;
        struct inet_bind_bucket *tw_tb;
 };
@@ -110,8 +110,6 @@ static inline void inet_twsk_reschedule(struct inet_timewait_sock *tw, int timeo
 
 void inet_twsk_deschedule_put(struct inet_timewait_sock *tw);
 
-void inet_twsk_purge(struct inet_hashinfo *hashinfo, int family);
-
 static inline
 struct net *twsk_net(const struct inet_timewait_sock *twsk)
 {
diff --git a/include/net/netfilter/nf_conntrack_bpf.h b/include/net/netfilter/nf_conntrack_bpf.h
new file mode 100644 (file)
index 0000000..a473b56
--- /dev/null
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _NF_CONNTRACK_BPF_H
+#define _NF_CONNTRACK_BPF_H
+
+#include <linux/btf.h>
+#include <linux/kconfig.h>
+
+#if (IS_BUILTIN(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) || \
+    (IS_MODULE(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES))
+
+extern int register_nf_conntrack_bpf(void);
+
+#else
+
+static inline int register_nf_conntrack_bpf(void)
+{
+       return 0;
+}
+
+#endif
+
+#endif /* _NF_CONNTRACK_BPF_H */
index 7855764..f068786 100644 (file)
@@ -31,18 +31,16 @@ struct ping_group_range {
 struct inet_hashinfo;
 
 struct inet_timewait_death_row {
-       atomic_t                tw_count;
-       char                    tw_pad[L1_CACHE_BYTES - sizeof(atomic_t)];
+       refcount_t              tw_refcount;
 
-       struct inet_hashinfo    *hashinfo;
+       struct inet_hashinfo    *hashinfo ____cacheline_aligned_in_smp;
        int                     sysctl_max_tw_buckets;
 };
 
 struct tcp_fastopen_context;
 
 struct netns_ipv4 {
-       /* Please keep tcp_death_row at first field in netns_ipv4 */
-       struct inet_timewait_death_row tcp_death_row ____cacheline_aligned_in_smp;
+       struct inet_timewait_death_row *tcp_death_row;
 
 #ifdef CONFIG_SYSCTL
        struct ctl_table_header *forw_hdr;
@@ -70,11 +68,9 @@ struct netns_ipv4 {
        struct hlist_head       *fib_table_hash;
        struct sock             *fibnl;
 
-       struct sock  * __percpu *icmp_sk;
        struct sock             *mc_autojoin_sk;
 
        struct inet_peer_base   *peers;
-       struct sock  * __percpu *tcp_sk;
        struct fqdir            *fqdir;
 
        u8 sysctl_icmp_echo_ignore_all;
@@ -87,6 +83,7 @@ struct netns_ipv4 {
 
        u32 ip_rt_min_pmtu;
        int ip_rt_mtu_expires;
+       int ip_rt_min_advmss;
 
        struct local_ports ip_local_ports;
 
index a4b5503..30cdfc4 100644 (file)
@@ -88,7 +88,6 @@ struct netns_ipv6 {
        struct fib6_table       *fib6_local_tbl;
        struct fib_rules_ops    *fib6_rules_ops;
 #endif
-       struct sock * __percpu  *icmp_sk;
        struct sock             *ndisc_sk;
        struct sock             *tcp_sk;
        struct sock             *igmp_sk;
index 9e7b21c..44a3553 100644 (file)
@@ -63,12 +63,6 @@ static inline psched_time_t psched_get_time(void)
        return PSCHED_NS2TICKS(ktime_get_ns());
 }
 
-static inline psched_tdiff_t
-psched_tdiff_bounded(psched_time_t tv1, psched_time_t tv2, psched_time_t bound)
-{
-       return min(tv1 - tv2, bound);
-}
-
 struct qdisc_watchdog {
        u64             last_expires;
        struct hrtimer  timer;
index 472843e..9bab396 100644 (file)
@@ -518,11 +518,6 @@ static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz)
        BUILD_BUG_ON(sizeof(qcb->data) < sz);
 }
 
-static inline int qdisc_qlen_cpu(const struct Qdisc *q)
-{
-       return this_cpu_ptr(q->cpu_qstats)->qlen;
-}
-
 static inline int qdisc_qlen(const struct Qdisc *q)
 {
        return q->q.qlen;
index 9185e45..a3c5311 100644 (file)
@@ -70,49 +70,6 @@ static inline int udplite_checksum_init(struct sk_buff *skb, struct udphdr *uh)
        return 0;
 }
 
-/* Slow-path computation of checksum. Socket is locked. */
-static inline __wsum udplite_csum_outgoing(struct sock *sk, struct sk_buff *skb)
-{
-       const struct udp_sock *up = udp_sk(skb->sk);
-       int cscov = up->len;
-       __wsum csum = 0;
-
-       if (up->pcflag & UDPLITE_SEND_CC) {
-               /*
-                * Sender has set `partial coverage' option on UDP-Lite socket.
-                * The special case "up->pcslen == 0" signifies full coverage.
-                */
-               if (up->pcslen < up->len) {
-                       if (0 < up->pcslen)
-                               cscov = up->pcslen;
-                       udp_hdr(skb)->len = htons(up->pcslen);
-               }
-               /*
-                * NOTE: Causes for the error case  `up->pcslen > up->len':
-                *        (i)  Application error (will not be penalized).
-                *       (ii)  Payload too big for send buffer: data is split
-                *             into several packets, each with its own header.
-                *             In this case (e.g. last segment), coverage may
-                *             exceed packet length.
-                *       Since packets with coverage length > packet length are
-                *       illegal, we fall back to the defaults here.
-                */
-       }
-
-       skb->ip_summed = CHECKSUM_NONE;     /* no HW support for checksumming */
-
-       skb_queue_walk(&sk->sk_write_queue, skb) {
-               const int off = skb_transport_offset(skb);
-               const int len = skb->len - off;
-
-               csum = skb_checksum(skb, off, (cscov > len)? len : cscov, csum);
-
-               if ((cscov -= len) <= 0)
-                       break;
-       }
-       return csum;
-}
-
 /* Fast-path computation of checksum. Socket may not be locked. */
 static inline __wsum udplite_csum(struct sk_buff *skb)
 {
index 8f0812e..b7721c3 100644 (file)
@@ -60,12 +60,20 @@ struct xdp_rxq_info {
        u32 reg_state;
        struct xdp_mem_info mem;
        unsigned int napi_id;
+       u32 frag_size;
 } ____cacheline_aligned; /* perf critical, avoid false-sharing */
 
 struct xdp_txq_info {
        struct net_device *dev;
 };
 
+enum xdp_buff_flags {
+       XDP_FLAGS_HAS_FRAGS             = BIT(0), /* non-linear xdp buff */
+       XDP_FLAGS_FRAGS_PF_MEMALLOC     = BIT(1), /* xdp paged memory is under
+                                                  * pressure
+                                                  */
+};
+
 struct xdp_buff {
        void *data;
        void *data_end;
@@ -74,13 +82,40 @@ struct xdp_buff {
        struct xdp_rxq_info *rxq;
        struct xdp_txq_info *txq;
        u32 frame_sz; /* frame size to deduce data_hard_end/reserved tailroom*/
+       u32 flags; /* supported values defined in xdp_buff_flags */
 };
 
+static __always_inline bool xdp_buff_has_frags(struct xdp_buff *xdp)
+{
+       return !!(xdp->flags & XDP_FLAGS_HAS_FRAGS);
+}
+
+static __always_inline void xdp_buff_set_frags_flag(struct xdp_buff *xdp)
+{
+       xdp->flags |= XDP_FLAGS_HAS_FRAGS;
+}
+
+static __always_inline void xdp_buff_clear_frags_flag(struct xdp_buff *xdp)
+{
+       xdp->flags &= ~XDP_FLAGS_HAS_FRAGS;
+}
+
+static __always_inline bool xdp_buff_is_frag_pfmemalloc(struct xdp_buff *xdp)
+{
+       return !!(xdp->flags & XDP_FLAGS_FRAGS_PF_MEMALLOC);
+}
+
+static __always_inline void xdp_buff_set_frag_pfmemalloc(struct xdp_buff *xdp)
+{
+       xdp->flags |= XDP_FLAGS_FRAGS_PF_MEMALLOC;
+}
+
 static __always_inline void
 xdp_init_buff(struct xdp_buff *xdp, u32 frame_sz, struct xdp_rxq_info *rxq)
 {
        xdp->frame_sz = frame_sz;
        xdp->rxq = rxq;
+       xdp->flags = 0;
 }
 
 static __always_inline void
@@ -111,6 +146,20 @@ xdp_get_shared_info_from_buff(struct xdp_buff *xdp)
        return (struct skb_shared_info *)xdp_data_hard_end(xdp);
 }
 
+static __always_inline unsigned int xdp_get_buff_len(struct xdp_buff *xdp)
+{
+       unsigned int len = xdp->data_end - xdp->data;
+       struct skb_shared_info *sinfo;
+
+       if (likely(!xdp_buff_has_frags(xdp)))
+               goto out;
+
+       sinfo = xdp_get_shared_info_from_buff(xdp);
+       len += sinfo->xdp_frags_size;
+out:
+       return len;
+}
+
 struct xdp_frame {
        void *data;
        u16 len;
@@ -122,8 +171,19 @@ struct xdp_frame {
         */
        struct xdp_mem_info mem;
        struct net_device *dev_rx; /* used by cpumap */
+       u32 flags; /* supported values defined in xdp_buff_flags */
 };
 
+static __always_inline bool xdp_frame_has_frags(struct xdp_frame *frame)
+{
+       return !!(frame->flags & XDP_FLAGS_HAS_FRAGS);
+}
+
+static __always_inline bool xdp_frame_is_frag_pfmemalloc(struct xdp_frame *frame)
+{
+       return !!(frame->flags & XDP_FLAGS_FRAGS_PF_MEMALLOC);
+}
+
 #define XDP_BULK_QUEUE_SIZE    16
 struct xdp_frame_bulk {
        int count;
@@ -159,6 +219,19 @@ static inline void xdp_scrub_frame(struct xdp_frame *frame)
        frame->dev_rx = NULL;
 }
 
+static inline void
+xdp_update_skb_shared_info(struct sk_buff *skb, u8 nr_frags,
+                          unsigned int size, unsigned int truesize,
+                          bool pfmemalloc)
+{
+       skb_shinfo(skb)->nr_frags = nr_frags;
+
+       skb->len += size;
+       skb->data_len += size;
+       skb->truesize += truesize;
+       skb->pfmemalloc |= pfmemalloc;
+}
+
 /* Avoids inlining WARN macro in fast-path */
 void xdp_warn(const char *msg, const char *func, const int line);
 #define XDP_WARN(msg) xdp_warn(msg, __func__, __LINE__)
@@ -180,6 +253,7 @@ void xdp_convert_frame_to_buff(struct xdp_frame *frame, struct xdp_buff *xdp)
        xdp->data_end = frame->data + frame->len;
        xdp->data_meta = frame->data - frame->metasize;
        xdp->frame_sz = frame->frame_sz;
+       xdp->flags = frame->flags;
 }
 
 static inline
@@ -206,6 +280,7 @@ int xdp_update_frame_from_buff(struct xdp_buff *xdp,
        xdp_frame->headroom = headroom - sizeof(*xdp_frame);
        xdp_frame->metasize = metasize;
        xdp_frame->frame_sz = xdp->frame_sz;
+       xdp_frame->flags = xdp->flags;
 
        return 0;
 }
@@ -230,6 +305,8 @@ struct xdp_frame *xdp_convert_buff_to_frame(struct xdp_buff *xdp)
        return xdp_frame;
 }
 
+void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct,
+                 struct xdp_buff *xdp);
 void xdp_return_frame(struct xdp_frame *xdpf);
 void xdp_return_frame_rx_napi(struct xdp_frame *xdpf);
 void xdp_return_buff(struct xdp_buff *xdp);
@@ -246,14 +323,37 @@ void __xdp_release_frame(void *data, struct xdp_mem_info *mem);
 static inline void xdp_release_frame(struct xdp_frame *xdpf)
 {
        struct xdp_mem_info *mem = &xdpf->mem;
+       struct skb_shared_info *sinfo;
+       int i;
 
        /* Curr only page_pool needs this */
-       if (mem->type == MEM_TYPE_PAGE_POOL)
-               __xdp_release_frame(xdpf->data, mem);
+       if (mem->type != MEM_TYPE_PAGE_POOL)
+               return;
+
+       if (likely(!xdp_frame_has_frags(xdpf)))
+               goto out;
+
+       sinfo = xdp_get_shared_info_from_frame(xdpf);
+       for (i = 0; i < sinfo->nr_frags; i++) {
+               struct page *page = skb_frag_page(&sinfo->frags[i]);
+
+               __xdp_release_frame(page_address(page), mem);
+       }
+out:
+       __xdp_release_frame(xdpf->data, mem);
+}
+
+int __xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
+                      struct net_device *dev, u32 queue_index,
+                      unsigned int napi_id, u32 frag_size);
+static inline int
+xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
+                struct net_device *dev, u32 queue_index,
+                unsigned int napi_id)
+{
+       return __xdp_rxq_info_reg(xdp_rxq, dev, queue_index, napi_id, 0);
 }
 
-int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
-                    struct net_device *dev, u32 queue_index, unsigned int napi_id);
 void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq);
 void xdp_rxq_info_unused(struct xdp_rxq_info *xdp_rxq);
 bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq);
index b0383d3..16a7574 100644 (file)
@@ -330,6 +330,8 @@ union bpf_iter_link_info {
  *                     *ctx_out*, *data_in* and *data_out* must be NULL.
  *                     *repeat* must be zero.
  *
+ *             BPF_PROG_RUN is an alias for BPF_PROG_TEST_RUN.
+ *
  *     Return
  *             Returns zero on success. On error, -1 is returned and *errno*
  *             is set appropriately.
@@ -1111,6 +1113,11 @@ enum bpf_link_type {
  */
 #define BPF_F_SLEEPABLE                (1U << 4)
 
+/* If BPF_F_XDP_HAS_FRAGS is used in BPF_PROG_LOAD command, the loaded program
+ * fully support xdp frags.
+ */
+#define BPF_F_XDP_HAS_FRAGS    (1U << 5)
+
 /* When BPF ldimm64's insn[0].src_reg != 0 then this can have
  * the following extensions:
  *
@@ -1775,6 +1782,8 @@ union bpf_attr {
  *             0 on success, or a negative error in case of failure.
  *
  * u64 bpf_get_current_pid_tgid(void)
+ *     Description
+ *             Get the current pid and tgid.
  *     Return
  *             A 64-bit integer containing the current tgid and pid, and
  *             created as such:
@@ -1782,6 +1791,8 @@ union bpf_attr {
  *             *current_task*\ **->pid**.
  *
  * u64 bpf_get_current_uid_gid(void)
+ *     Description
+ *             Get the current uid and gid.
  *     Return
  *             A 64-bit integer containing the current GID and UID, and
  *             created as such: *current_gid* **<< 32 \|** *current_uid*.
@@ -2256,6 +2267,8 @@ union bpf_attr {
  *             The 32-bit hash.
  *
  * u64 bpf_get_current_task(void)
+ *     Description
+ *             Get the current task.
  *     Return
  *             A pointer to the current task struct.
  *
@@ -2369,6 +2382,8 @@ union bpf_attr {
  *             indicate that the hash is outdated and to trigger a
  *             recalculation the next time the kernel tries to access this
  *             hash or when the **bpf_get_hash_recalc**\ () helper is called.
+ *     Return
+ *             void.
  *
  * long bpf_get_numa_node_id(void)
  *     Description
@@ -2466,6 +2481,8 @@ union bpf_attr {
  *             A 8-byte long unique number or 0 if *sk* is NULL.
  *
  * u32 bpf_get_socket_uid(struct sk_buff *skb)
+ *     Description
+ *             Get the owner UID of the socked associated to *skb*.
  *     Return
  *             The owner UID of the socket associated to *skb*. If the socket
  *             is **NULL**, or if it is not a full socket (i.e. if it is a
@@ -3240,6 +3257,9 @@ union bpf_attr {
  *             The id is returned or 0 in case the id could not be retrieved.
  *
  * u64 bpf_get_current_cgroup_id(void)
+ *     Description
+ *             Get the current cgroup id based on the cgroup within which
+ *             the current task is running.
  *     Return
  *             A 64-bit integer containing the current cgroup id based
  *             on the cgroup within which the current task is running.
@@ -5018,6 +5038,44 @@ union bpf_attr {
  *
  *     Return
  *             The number of arguments of the traced function.
+ *
+ * int bpf_get_retval(void)
+ *     Description
+ *             Get the syscall's return value that will be returned to userspace.
+ *
+ *             This helper is currently supported by cgroup programs only.
+ *     Return
+ *             The syscall's return value.
+ *
+ * int bpf_set_retval(int retval)
+ *     Description
+ *             Set the syscall's return value that will be returned to userspace.
+ *
+ *             This helper is currently supported by cgroup programs only.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * u64 bpf_xdp_get_buff_len(struct xdp_buff *xdp_md)
+ *     Description
+ *             Get the total size of a given xdp buff (linear and paged area)
+ *     Return
+ *             The total size of a given xdp buffer.
+ *
+ * long bpf_xdp_load_bytes(struct xdp_buff *xdp_md, u32 offset, void *buf, u32 len)
+ *     Description
+ *             This helper is provided as an easy way to load data from a
+ *             xdp buffer. It can be used to load *len* bytes from *offset* from
+ *             the frame associated to *xdp_md*, into the buffer pointed by
+ *             *buf*.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * long bpf_xdp_store_bytes(struct xdp_buff *xdp_md, u32 offset, void *buf, u32 len)
+ *     Description
+ *             Store *len* bytes from buffer *buf* into the frame
+ *             associated to *xdp_md*, at *offset*.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
  */
 #define __BPF_FUNC_MAPPER(FN)          \
        FN(unspec),                     \
@@ -5206,6 +5264,11 @@ union bpf_attr {
        FN(get_func_arg),               \
        FN(get_func_ret),               \
        FN(get_func_arg_cnt),           \
+       FN(get_retval),                 \
+       FN(set_retval),                 \
+       FN(xdp_get_buff_len),           \
+       FN(xdp_load_bytes),             \
+       FN(xdp_store_bytes),            \
        /* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
index c7a5be3..7f145ae 100644 (file)
@@ -837,13 +837,12 @@ static int fd_array_map_delete_elem(struct bpf_map *map, void *key)
 static void *prog_fd_array_get_ptr(struct bpf_map *map,
                                   struct file *map_file, int fd)
 {
-       struct bpf_array *array = container_of(map, struct bpf_array, map);
        struct bpf_prog *prog = bpf_prog_get(fd);
 
        if (IS_ERR(prog))
                return prog;
 
-       if (!bpf_prog_array_compatible(array, prog)) {
+       if (!bpf_prog_map_compatible(map, prog)) {
                bpf_prog_put(prog);
                return ERR_PTR(-EINVAL);
        }
@@ -1071,7 +1070,6 @@ static struct bpf_map *prog_array_map_alloc(union bpf_attr *attr)
        INIT_WORK(&aux->work, prog_array_map_clear_deferred);
        INIT_LIST_HEAD(&aux->poke_progs);
        mutex_init(&aux->poke_mutex);
-       spin_lock_init(&aux->owner.lock);
 
        map = array_map_alloc(attr);
        if (IS_ERR(map)) {
index e16dafe..a1c44c1 100644 (file)
 DEFINE_IDR(btf_idr);
 DEFINE_SPINLOCK(btf_idr_lock);
 
+enum btf_kfunc_hook {
+       BTF_KFUNC_HOOK_XDP,
+       BTF_KFUNC_HOOK_TC,
+       BTF_KFUNC_HOOK_STRUCT_OPS,
+       BTF_KFUNC_HOOK_MAX,
+};
+
+enum {
+       BTF_KFUNC_SET_MAX_CNT = 32,
+};
+
+struct btf_kfunc_set_tab {
+       struct btf_id_set *sets[BTF_KFUNC_HOOK_MAX][BTF_KFUNC_TYPE_MAX];
+};
+
 struct btf {
        void *data;
        struct btf_type **types;
@@ -212,6 +227,7 @@ struct btf {
        refcount_t refcnt;
        u32 id;
        struct rcu_head rcu;
+       struct btf_kfunc_set_tab *kfunc_set_tab;
 
        /* split BTF support */
        struct btf *base_btf;
@@ -1531,8 +1547,30 @@ static void btf_free_id(struct btf *btf)
        spin_unlock_irqrestore(&btf_idr_lock, flags);
 }
 
+static void btf_free_kfunc_set_tab(struct btf *btf)
+{
+       struct btf_kfunc_set_tab *tab = btf->kfunc_set_tab;
+       int hook, type;
+
+       if (!tab)
+               return;
+       /* For module BTF, we directly assign the sets being registered, so
+        * there is nothing to free except kfunc_set_tab.
+        */
+       if (btf_is_module(btf))
+               goto free_tab;
+       for (hook = 0; hook < ARRAY_SIZE(tab->sets); hook++) {
+               for (type = 0; type < ARRAY_SIZE(tab->sets[0]); type++)
+                       kfree(tab->sets[hook][type]);
+       }
+free_tab:
+       kfree(tab);
+       btf->kfunc_set_tab = NULL;
+}
+
 static void btf_free(struct btf *btf)
 {
+       btf_free_kfunc_set_tab(btf);
        kvfree(btf->types);
        kvfree(btf->resolved_sizes);
        kvfree(btf->resolved_ids);
@@ -5616,17 +5654,45 @@ static bool __btf_type_is_scalar_struct(struct bpf_verifier_log *log,
        return true;
 }
 
+static bool is_kfunc_arg_mem_size(const struct btf *btf,
+                                 const struct btf_param *arg,
+                                 const struct bpf_reg_state *reg)
+{
+       int len, sfx_len = sizeof("__sz") - 1;
+       const struct btf_type *t;
+       const char *param_name;
+
+       t = btf_type_skip_modifiers(btf, arg->type, NULL);
+       if (!btf_type_is_scalar(t) || reg->type != SCALAR_VALUE)
+               return false;
+
+       /* In the future, this can be ported to use BTF tagging */
+       param_name = btf_name_by_offset(btf, arg->name_off);
+       if (str_is_empty(param_name))
+               return false;
+       len = strlen(param_name);
+       if (len < sfx_len)
+               return false;
+       param_name += len - sfx_len;
+       if (strncmp(param_name, "__sz", sfx_len))
+               return false;
+
+       return true;
+}
+
 static int btf_check_func_arg_match(struct bpf_verifier_env *env,
                                    const struct btf *btf, u32 func_id,
                                    struct bpf_reg_state *regs,
                                    bool ptr_to_mem_ok)
 {
        struct bpf_verifier_log *log = &env->log;
+       u32 i, nargs, ref_id, ref_obj_id = 0;
        bool is_kfunc = btf_is_kernel(btf);
        const char *func_name, *ref_tname;
        const struct btf_type *t, *ref_t;
        const struct btf_param *args;
-       u32 i, nargs, ref_id;
+       int ref_regno = 0;
+       bool rel = false;
 
        t = btf_type_by_id(btf, func_id);
        if (!t || !btf_type_is_func(t)) {
@@ -5704,6 +5770,16 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
                        if (reg->type == PTR_TO_BTF_ID) {
                                reg_btf = reg->btf;
                                reg_ref_id = reg->btf_id;
+                               /* Ensure only one argument is referenced PTR_TO_BTF_ID */
+                               if (reg->ref_obj_id) {
+                                       if (ref_obj_id) {
+                                               bpf_log(log, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
+                                                       regno, reg->ref_obj_id, ref_obj_id);
+                                               return -EFAULT;
+                                       }
+                                       ref_regno = regno;
+                                       ref_obj_id = reg->ref_obj_id;
+                               }
                        } else {
                                reg_btf = btf_vmlinux;
                                reg_ref_id = *reg2btf_ids[reg->type];
@@ -5727,17 +5803,33 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
                        u32 type_size;
 
                        if (is_kfunc) {
+                               bool arg_mem_size = i + 1 < nargs && is_kfunc_arg_mem_size(btf, &args[i + 1], &regs[regno + 1]);
+
                                /* Permit pointer to mem, but only when argument
                                 * type is pointer to scalar, or struct composed
                                 * (recursively) of scalars.
+                                * When arg_mem_size is true, the pointer can be
+                                * void *.
                                 */
                                if (!btf_type_is_scalar(ref_t) &&
-                                   !__btf_type_is_scalar_struct(log, btf, ref_t, 0)) {
+                                   !__btf_type_is_scalar_struct(log, btf, ref_t, 0) &&
+                                   (arg_mem_size ? !btf_type_is_void(ref_t) : 1)) {
                                        bpf_log(log,
-                                               "arg#%d pointer type %s %s must point to scalar or struct with scalar\n",
-                                               i, btf_type_str(ref_t), ref_tname);
+                                               "arg#%d pointer type %s %s must point to %sscalar, or struct with scalar\n",
+                                               i, btf_type_str(ref_t), ref_tname, arg_mem_size ? "void, " : "");
                                        return -EINVAL;
                                }
+
+                               /* Check for mem, len pair */
+                               if (arg_mem_size) {
+                                       if (check_kfunc_mem_size_reg(env, &regs[regno + 1], regno + 1)) {
+                                               bpf_log(log, "arg#%d arg#%d memory, len pair leads to invalid memory access\n",
+                                                       i, i + 1);
+                                               return -EINVAL;
+                                       }
+                                       i++;
+                                       continue;
+                               }
                        }
 
                        resolve_ret = btf_resolve_size(btf, ref_t, &type_size);
@@ -5758,7 +5850,23 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
                }
        }
 
-       return 0;
+       /* Either both are set, or neither */
+       WARN_ON_ONCE((ref_obj_id && !ref_regno) || (!ref_obj_id && ref_regno));
+       if (is_kfunc) {
+               rel = btf_kfunc_id_set_contains(btf, resolve_prog_type(env->prog),
+                                               BTF_KFUNC_TYPE_RELEASE, func_id);
+               /* We already made sure ref_obj_id is set only for one argument */
+               if (rel && !ref_obj_id) {
+                       bpf_log(log, "release kernel function %s expects refcounted PTR_TO_BTF_ID\n",
+                               func_name);
+                       return -EINVAL;
+               }
+               /* Allow (!rel && ref_obj_id), so that passing such referenced PTR_TO_BTF_ID to
+                * other kfuncs works
+                */
+       }
+       /* returns argument register number > 0 in case of reference release kfunc */
+       return rel ? ref_regno : 0;
 }
 
 /* Compare BTF of a function with given bpf_reg_state.
@@ -6200,12 +6308,17 @@ bool btf_id_set_contains(const struct btf_id_set *set, u32 id)
        return bsearch(&id, set->ids, set->cnt, sizeof(u32), btf_id_cmp_func) != NULL;
 }
 
+enum {
+       BTF_MODULE_F_LIVE = (1 << 0),
+};
+
 #ifdef CONFIG_DEBUG_INFO_BTF_MODULES
 struct btf_module {
        struct list_head list;
        struct module *module;
        struct btf *btf;
        struct bin_attribute *sysfs_attr;
+       int flags;
 };
 
 static LIST_HEAD(btf_modules);
@@ -6233,7 +6346,8 @@ static int btf_module_notify(struct notifier_block *nb, unsigned long op,
        int err = 0;
 
        if (mod->btf_data_size == 0 ||
-           (op != MODULE_STATE_COMING && op != MODULE_STATE_GOING))
+           (op != MODULE_STATE_COMING && op != MODULE_STATE_LIVE &&
+            op != MODULE_STATE_GOING))
                goto out;
 
        switch (op) {
@@ -6292,6 +6406,17 @@ static int btf_module_notify(struct notifier_block *nb, unsigned long op,
                }
 
                break;
+       case MODULE_STATE_LIVE:
+               mutex_lock(&btf_module_mutex);
+               list_for_each_entry_safe(btf_mod, tmp, &btf_modules, list) {
+                       if (btf_mod->module != module)
+                               continue;
+
+                       btf_mod->flags |= BTF_MODULE_F_LIVE;
+                       break;
+               }
+               mutex_unlock(&btf_module_mutex);
+               break;
        case MODULE_STATE_GOING:
                mutex_lock(&btf_module_mutex);
                list_for_each_entry_safe(btf_mod, tmp, &btf_modules, list) {
@@ -6338,7 +6463,12 @@ struct module *btf_try_get_module(const struct btf *btf)
                if (btf_mod->btf != btf)
                        continue;
 
-               if (try_module_get(btf_mod->module))
+               /* We must only consider module whose __init routine has
+                * finished, hence we must check for BTF_MODULE_F_LIVE flag,
+                * which is set from the notifier callback for
+                * MODULE_STATE_LIVE.
+                */
+               if ((btf_mod->flags & BTF_MODULE_F_LIVE) && try_module_get(btf_mod->module))
                        res = btf_mod->module;
 
                break;
@@ -6349,6 +6479,36 @@ struct module *btf_try_get_module(const struct btf *btf)
        return res;
 }
 
+/* Returns struct btf corresponding to the struct module
+ *
+ * This function can return NULL or ERR_PTR. Note that caller must
+ * release reference for struct btf iff btf_is_module is true.
+ */
+static struct btf *btf_get_module_btf(const struct module *module)
+{
+       struct btf *btf = NULL;
+#ifdef CONFIG_DEBUG_INFO_BTF_MODULES
+       struct btf_module *btf_mod, *tmp;
+#endif
+
+       if (!module)
+               return bpf_get_btf_vmlinux();
+#ifdef CONFIG_DEBUG_INFO_BTF_MODULES
+       mutex_lock(&btf_module_mutex);
+       list_for_each_entry_safe(btf_mod, tmp, &btf_modules, list) {
+               if (btf_mod->module != module)
+                       continue;
+
+               btf_get(btf_mod->btf);
+               btf = btf_mod->btf;
+               break;
+       }
+       mutex_unlock(&btf_module_mutex);
+#endif
+
+       return btf;
+}
+
 BPF_CALL_4(bpf_btf_find_by_name_kind, char *, name, int, name_sz, u32, kind, int, flags)
 {
        struct btf *btf;
@@ -6416,53 +6576,181 @@ BTF_ID_LIST_GLOBAL(btf_tracing_ids, MAX_BTF_TRACING_TYPE)
 BTF_TRACING_TYPE_xxx
 #undef BTF_TRACING_TYPE
 
-/* BTF ID set registration API for modules */
-
-#ifdef CONFIG_DEBUG_INFO_BTF_MODULES
+/* Kernel Function (kfunc) BTF ID set registration API */
 
-void register_kfunc_btf_id_set(struct kfunc_btf_id_list *l,
-                              struct kfunc_btf_id_set *s)
+static int __btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook,
+                                   enum btf_kfunc_type type,
+                                   struct btf_id_set *add_set, bool vmlinux_set)
 {
-       mutex_lock(&l->mutex);
-       list_add(&s->list, &l->list);
-       mutex_unlock(&l->mutex);
+       struct btf_kfunc_set_tab *tab;
+       struct btf_id_set *set;
+       u32 set_cnt;
+       int ret;
+
+       if (hook >= BTF_KFUNC_HOOK_MAX || type >= BTF_KFUNC_TYPE_MAX) {
+               ret = -EINVAL;
+               goto end;
+       }
+
+       if (!add_set->cnt)
+               return 0;
+
+       tab = btf->kfunc_set_tab;
+       if (!tab) {
+               tab = kzalloc(sizeof(*tab), GFP_KERNEL | __GFP_NOWARN);
+               if (!tab)
+                       return -ENOMEM;
+               btf->kfunc_set_tab = tab;
+       }
+
+       set = tab->sets[hook][type];
+       /* Warn when register_btf_kfunc_id_set is called twice for the same hook
+        * for module sets.
+        */
+       if (WARN_ON_ONCE(set && !vmlinux_set)) {
+               ret = -EINVAL;
+               goto end;
+       }
+
+       /* We don't need to allocate, concatenate, and sort module sets, because
+        * only one is allowed per hook. Hence, we can directly assign the
+        * pointer and return.
+        */
+       if (!vmlinux_set) {
+               tab->sets[hook][type] = add_set;
+               return 0;
+       }
+
+       /* In case of vmlinux sets, there may be more than one set being
+        * registered per hook. To create a unified set, we allocate a new set
+        * and concatenate all individual sets being registered. While each set
+        * is individually sorted, they may become unsorted when concatenated,
+        * hence re-sorting the final set again is required to make binary
+        * searching the set using btf_id_set_contains function work.
+        */
+       set_cnt = set ? set->cnt : 0;
+
+       if (set_cnt > U32_MAX - add_set->cnt) {
+               ret = -EOVERFLOW;
+               goto end;
+       }
+
+       if (set_cnt + add_set->cnt > BTF_KFUNC_SET_MAX_CNT) {
+               ret = -E2BIG;
+               goto end;
+       }
+
+       /* Grow set */
+       set = krealloc(tab->sets[hook][type],
+                      offsetof(struct btf_id_set, ids[set_cnt + add_set->cnt]),
+                      GFP_KERNEL | __GFP_NOWARN);
+       if (!set) {
+               ret = -ENOMEM;
+               goto end;
+       }
+
+       /* For newly allocated set, initialize set->cnt to 0 */
+       if (!tab->sets[hook][type])
+               set->cnt = 0;
+       tab->sets[hook][type] = set;
+
+       /* Concatenate the two sets */
+       memcpy(set->ids + set->cnt, add_set->ids, add_set->cnt * sizeof(set->ids[0]));
+       set->cnt += add_set->cnt;
+
+       sort(set->ids, set->cnt, sizeof(set->ids[0]), btf_id_cmp_func, NULL);
+
+       return 0;
+end:
+       btf_free_kfunc_set_tab(btf);
+       return ret;
 }
-EXPORT_SYMBOL_GPL(register_kfunc_btf_id_set);
 
-void unregister_kfunc_btf_id_set(struct kfunc_btf_id_list *l,
-                                struct kfunc_btf_id_set *s)
+static int btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook,
+                                 const struct btf_kfunc_id_set *kset)
 {
-       mutex_lock(&l->mutex);
-       list_del_init(&s->list);
-       mutex_unlock(&l->mutex);
+       bool vmlinux_set = !btf_is_module(btf);
+       int type, ret;
+
+       for (type = 0; type < ARRAY_SIZE(kset->sets); type++) {
+               if (!kset->sets[type])
+                       continue;
+
+               ret = __btf_populate_kfunc_set(btf, hook, type, kset->sets[type], vmlinux_set);
+               if (ret)
+                       break;
+       }
+       return ret;
 }
-EXPORT_SYMBOL_GPL(unregister_kfunc_btf_id_set);
 
-bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist, u32 kfunc_id,
-                             struct module *owner)
+static bool __btf_kfunc_id_set_contains(const struct btf *btf,
+                                       enum btf_kfunc_hook hook,
+                                       enum btf_kfunc_type type,
+                                       u32 kfunc_btf_id)
 {
-       struct kfunc_btf_id_set *s;
+       struct btf_id_set *set;
 
-       mutex_lock(&klist->mutex);
-       list_for_each_entry(s, &klist->list, list) {
-               if (s->owner == owner && btf_id_set_contains(s->set, kfunc_id)) {
-                       mutex_unlock(&klist->mutex);
-                       return true;
-               }
+       if (hook >= BTF_KFUNC_HOOK_MAX || type >= BTF_KFUNC_TYPE_MAX)
+               return false;
+       if (!btf->kfunc_set_tab)
+               return false;
+       set = btf->kfunc_set_tab->sets[hook][type];
+       if (!set)
+               return false;
+       return btf_id_set_contains(set, kfunc_btf_id);
+}
+
+static int bpf_prog_type_to_kfunc_hook(enum bpf_prog_type prog_type)
+{
+       switch (prog_type) {
+       case BPF_PROG_TYPE_XDP:
+               return BTF_KFUNC_HOOK_XDP;
+       case BPF_PROG_TYPE_SCHED_CLS:
+               return BTF_KFUNC_HOOK_TC;
+       case BPF_PROG_TYPE_STRUCT_OPS:
+               return BTF_KFUNC_HOOK_STRUCT_OPS;
+       default:
+               return BTF_KFUNC_HOOK_MAX;
        }
-       mutex_unlock(&klist->mutex);
-       return false;
 }
 
-#define DEFINE_KFUNC_BTF_ID_LIST(name)                                         \
-       struct kfunc_btf_id_list name = { LIST_HEAD_INIT(name.list),           \
-                                         __MUTEX_INITIALIZER(name.mutex) };   \
-       EXPORT_SYMBOL_GPL(name)
+/* Caution:
+ * Reference to the module (obtained using btf_try_get_module) corresponding to
+ * the struct btf *MUST* be held when calling this function from verifier
+ * context. This is usually true as we stash references in prog's kfunc_btf_tab;
+ * keeping the reference for the duration of the call provides the necessary
+ * protection for looking up a well-formed btf->kfunc_set_tab.
+ */
+bool btf_kfunc_id_set_contains(const struct btf *btf,
+                              enum bpf_prog_type prog_type,
+                              enum btf_kfunc_type type, u32 kfunc_btf_id)
+{
+       enum btf_kfunc_hook hook;
 
-DEFINE_KFUNC_BTF_ID_LIST(bpf_tcp_ca_kfunc_list);
-DEFINE_KFUNC_BTF_ID_LIST(prog_test_kfunc_list);
+       hook = bpf_prog_type_to_kfunc_hook(prog_type);
+       return __btf_kfunc_id_set_contains(btf, hook, type, kfunc_btf_id);
+}
 
-#endif
+/* This function must be invoked only from initcalls/module init functions */
+int register_btf_kfunc_id_set(enum bpf_prog_type prog_type,
+                             const struct btf_kfunc_id_set *kset)
+{
+       enum btf_kfunc_hook hook;
+       struct btf *btf;
+       int ret;
+
+       btf = btf_get_module_btf(kset->owner);
+       if (IS_ERR_OR_NULL(btf))
+               return btf ? PTR_ERR(btf) : -ENOENT;
+
+       hook = bpf_prog_type_to_kfunc_hook(prog_type);
+       ret = btf_populate_kfunc_set(btf, hook, kset);
+       /* reference is only taken for module BTF */
+       if (btf_is_module(btf))
+               btf_put(btf);
+       return ret;
+}
+EXPORT_SYMBOL_GPL(register_btf_kfunc_id_set);
 
 int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
                              const struct btf *targ_btf, __u32 targ_id)
index 514b468..279ebbe 100644 (file)
@@ -1044,7 +1044,7 @@ int cgroup_bpf_prog_query(const union bpf_attr *attr,
  *   NET_XMIT_DROP       (1)   - drop packet and notify TCP to call cwr
  *   NET_XMIT_CN         (2)   - continue with packet output and notify TCP
  *                               to call cwr
- *   -EPERM                    - drop packet
+ *   -err                      - drop packet
  *
  * For ingress packets, this function will return -EPERM if any
  * attached program was found and if it returned != 1 during execution.
@@ -1079,8 +1079,9 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
                        cgrp->bpf.effective[atype], skb, __bpf_prog_run_save_cb);
        } else {
                ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], skb,
-                                           __bpf_prog_run_save_cb);
-               ret = (ret == 1 ? 0 : -EPERM);
+                                           __bpf_prog_run_save_cb, 0);
+               if (ret && !IS_ERR_VALUE((long)ret))
+                       ret = -EFAULT;
        }
        bpf_restore_data_end(skb, saved_data_end);
        __skb_pull(skb, offset);
@@ -1107,10 +1108,9 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk,
                               enum cgroup_bpf_attach_type atype)
 {
        struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
-       int ret;
 
-       ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], sk, bpf_prog_run);
-       return ret == 1 ? 0 : -EPERM;
+       return BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], sk,
+                                    bpf_prog_run, 0);
 }
 EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
 
@@ -1142,7 +1142,6 @@ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
        };
        struct sockaddr_storage unspec;
        struct cgroup *cgrp;
-       int ret;
 
        /* Check socket family since not all sockets represent network
         * endpoint (e.g. AF_UNIX).
@@ -1156,10 +1155,8 @@ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
        }
 
        cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
-       ret = BPF_PROG_RUN_ARRAY_CG_FLAGS(cgrp->bpf.effective[atype], &ctx,
-                                         bpf_prog_run, flags);
-
-       return ret == 1 ? 0 : -EPERM;
+       return BPF_PROG_RUN_ARRAY_CG_FLAGS(cgrp->bpf.effective[atype], &ctx,
+                                          bpf_prog_run, 0, flags);
 }
 EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_addr);
 
@@ -1184,11 +1181,9 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
                                     enum cgroup_bpf_attach_type atype)
 {
        struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
-       int ret;
 
-       ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], sock_ops,
-                                   bpf_prog_run);
-       return ret == 1 ? 0 : -EPERM;
+       return BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], sock_ops,
+                                    bpf_prog_run, 0);
 }
 EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops);
 
@@ -1201,17 +1196,47 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
                .major = major,
                .minor = minor,
        };
-       int allow;
+       int ret;
 
        rcu_read_lock();
        cgrp = task_dfl_cgroup(current);
-       allow = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx,
-                                     bpf_prog_run);
+       ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx,
+                                   bpf_prog_run, 0);
        rcu_read_unlock();
 
-       return !allow;
+       return ret;
 }
 
+BPF_CALL_0(bpf_get_retval)
+{
+       struct bpf_cg_run_ctx *ctx =
+               container_of(current->bpf_ctx, struct bpf_cg_run_ctx, run_ctx);
+
+       return ctx->retval;
+}
+
+static const struct bpf_func_proto bpf_get_retval_proto = {
+       .func           = bpf_get_retval,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+};
+
+BPF_CALL_1(bpf_set_retval, int, retval)
+{
+       struct bpf_cg_run_ctx *ctx =
+               container_of(current->bpf_ctx, struct bpf_cg_run_ctx, run_ctx);
+
+       ctx->retval = retval;
+       return 0;
+}
+
+static const struct bpf_func_proto bpf_set_retval_proto = {
+       .func           = bpf_set_retval,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_ANYTHING,
+};
+
 static const struct bpf_func_proto *
 cgroup_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
@@ -1224,6 +1249,10 @@ cgroup_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
                return &bpf_get_current_cgroup_id_proto;
        case BPF_FUNC_perf_event_output:
                return &bpf_event_output_data_proto;
+       case BPF_FUNC_get_retval:
+               return &bpf_get_retval_proto;
+       case BPF_FUNC_set_retval:
+               return &bpf_set_retval_proto;
        default:
                return bpf_base_func_proto(func_id);
        }
@@ -1337,7 +1366,8 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
 
        rcu_read_lock();
        cgrp = task_dfl_cgroup(current);
-       ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx, bpf_prog_run);
+       ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx,
+                                   bpf_prog_run, 0);
        rcu_read_unlock();
 
        kfree(ctx.cur_val);
@@ -1350,7 +1380,7 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
                kfree(ctx.new_val);
        }
 
-       return ret == 1 ? 0 : -EPERM;
+       return ret;
 }
 
 #ifdef CONFIG_NET
@@ -1452,13 +1482,11 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level,
 
        lock_sock(sk);
        ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_SETSOCKOPT],
-                                   &ctx, bpf_prog_run);
+                                   &ctx, bpf_prog_run, 0);
        release_sock(sk);
 
-       if (!ret) {
-               ret = -EPERM;
+       if (ret)
                goto out;
-       }
 
        if (ctx.optlen == -1) {
                /* optlen set to -1, bypass kernel */
@@ -1518,7 +1546,7 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
                .sk = sk,
                .level = level,
                .optname = optname,
-               .retval = retval,
+               .current_task = current,
        };
        int ret;
 
@@ -1562,27 +1590,17 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
 
        lock_sock(sk);
        ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_GETSOCKOPT],
-                                   &ctx, bpf_prog_run);
+                                   &ctx, bpf_prog_run, retval);
        release_sock(sk);
 
-       if (!ret) {
-               ret = -EPERM;
+       if (ret < 0)
                goto out;
-       }
 
        if (ctx.optlen > max_optlen || ctx.optlen < 0) {
                ret = -EFAULT;
                goto out;
        }
 
-       /* BPF programs only allowed to set retval to 0, not some
-        * arbitrary value.
-        */
-       if (ctx.retval != 0 && ctx.retval != retval) {
-               ret = -EFAULT;
-               goto out;
-       }
-
        if (ctx.optlen != 0) {
                if (copy_to_user(optval, ctx.optval, ctx.optlen) ||
                    put_user(ctx.optlen, optlen)) {
@@ -1591,8 +1609,6 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
                }
        }
 
-       ret = ctx.retval;
-
 out:
        sockopt_free_buf(&ctx, &buf);
        return ret;
@@ -1607,10 +1623,10 @@ int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level,
                .sk = sk,
                .level = level,
                .optname = optname,
-               .retval = retval,
                .optlen = *optlen,
                .optval = optval,
                .optval_end = optval + *optlen,
+               .current_task = current,
        };
        int ret;
 
@@ -1623,25 +1639,19 @@ int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level,
         */
 
        ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_GETSOCKOPT],
-                                   &ctx, bpf_prog_run);
-       if (!ret)
-               return -EPERM;
+                                   &ctx, bpf_prog_run, retval);
+       if (ret < 0)
+               return ret;
 
        if (ctx.optlen > *optlen)
                return -EFAULT;
 
-       /* BPF programs only allowed to set retval to 0, not some
-        * arbitrary value.
-        */
-       if (ctx.retval != 0 && ctx.retval != retval)
-               return -EFAULT;
-
        /* BPF programs can shrink the buffer, export the modifications.
         */
        if (ctx.optlen != 0)
                *optlen = ctx.optlen;
 
-       return ctx.retval;
+       return ret;
 }
 #endif
 
@@ -2057,10 +2067,39 @@ static u32 cg_sockopt_convert_ctx_access(enum bpf_access_type type,
                        *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optlen);
                break;
        case offsetof(struct bpf_sockopt, retval):
-               if (type == BPF_WRITE)
-                       *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_STX_MEM, retval);
-               else
-                       *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, retval);
+               BUILD_BUG_ON(offsetof(struct bpf_cg_run_ctx, run_ctx) != 0);
+
+               if (type == BPF_WRITE) {
+                       int treg = BPF_REG_9;
+
+                       if (si->src_reg == treg || si->dst_reg == treg)
+                               --treg;
+                       if (si->src_reg == treg || si->dst_reg == treg)
+                               --treg;
+                       *insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, treg,
+                                             offsetof(struct bpf_sockopt_kern, tmp_reg));
+                       *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, current_task),
+                                             treg, si->dst_reg,
+                                             offsetof(struct bpf_sockopt_kern, current_task));
+                       *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct task_struct, bpf_ctx),
+                                             treg, treg,
+                                             offsetof(struct task_struct, bpf_ctx));
+                       *insn++ = BPF_STX_MEM(BPF_FIELD_SIZEOF(struct bpf_cg_run_ctx, retval),
+                                             treg, si->src_reg,
+                                             offsetof(struct bpf_cg_run_ctx, retval));
+                       *insn++ = BPF_LDX_MEM(BPF_DW, treg, si->dst_reg,
+                                             offsetof(struct bpf_sockopt_kern, tmp_reg));
+               } else {
+                       *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, current_task),
+                                             si->dst_reg, si->src_reg,
+                                             offsetof(struct bpf_sockopt_kern, current_task));
+                       *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct task_struct, bpf_ctx),
+                                             si->dst_reg, si->dst_reg,
+                                             offsetof(struct task_struct, bpf_ctx));
+                       *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_cg_run_ctx, retval),
+                                             si->dst_reg, si->dst_reg,
+                                             offsetof(struct bpf_cg_run_ctx, retval));
+               }
                break;
        case offsetof(struct bpf_sockopt, optval):
                *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optval);
index de3e5bc..0a1cfd8 100644 (file)
@@ -1829,28 +1829,30 @@ static unsigned int __bpf_prog_ret0_warn(const void *ctx,
 }
 #endif
 
-bool bpf_prog_array_compatible(struct bpf_array *array,
-                              const struct bpf_prog *fp)
+bool bpf_prog_map_compatible(struct bpf_map *map,
+                            const struct bpf_prog *fp)
 {
        bool ret;
 
        if (fp->kprobe_override)
                return false;
 
-       spin_lock(&array->aux->owner.lock);
-
-       if (!array->aux->owner.type) {
+       spin_lock(&map->owner.lock);
+       if (!map->owner.type) {
                /* There's no owner yet where we could check for
                 * compatibility.
                 */
-               array->aux->owner.type  = fp->type;
-               array->aux->owner.jited = fp->jited;
+               map->owner.type  = fp->type;
+               map->owner.jited = fp->jited;
+               map->owner.xdp_has_frags = fp->aux->xdp_has_frags;
                ret = true;
        } else {
-               ret = array->aux->owner.type  == fp->type &&
-                     array->aux->owner.jited == fp->jited;
+               ret = map->owner.type  == fp->type &&
+                     map->owner.jited == fp->jited &&
+                     map->owner.xdp_has_frags == fp->aux->xdp_has_frags;
        }
-       spin_unlock(&array->aux->owner.lock);
+       spin_unlock(&map->owner.lock);
+
        return ret;
 }
 
@@ -1862,13 +1864,11 @@ static int bpf_check_tail_call(const struct bpf_prog *fp)
        mutex_lock(&aux->used_maps_mutex);
        for (i = 0; i < aux->used_map_cnt; i++) {
                struct bpf_map *map = aux->used_maps[i];
-               struct bpf_array *array;
 
-               if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
+               if (!map_type_contains_progs(map))
                        continue;
 
-               array = container_of(map, struct bpf_array, map);
-               if (!bpf_prog_array_compatible(array, fp)) {
+               if (!bpf_prog_map_compatible(map, fp)) {
                        ret = -EINVAL;
                        goto out;
                }
index b3e6b94..650e5d2 100644 (file)
@@ -397,7 +397,8 @@ static int cpu_map_kthread_run(void *data)
        return 0;
 }
 
-static int __cpu_map_load_bpf_program(struct bpf_cpu_map_entry *rcpu, int fd)
+static int __cpu_map_load_bpf_program(struct bpf_cpu_map_entry *rcpu,
+                                     struct bpf_map *map, int fd)
 {
        struct bpf_prog *prog;
 
@@ -405,7 +406,8 @@ static int __cpu_map_load_bpf_program(struct bpf_cpu_map_entry *rcpu, int fd)
        if (IS_ERR(prog))
                return PTR_ERR(prog);
 
-       if (prog->expected_attach_type != BPF_XDP_CPUMAP) {
+       if (prog->expected_attach_type != BPF_XDP_CPUMAP ||
+           !bpf_prog_map_compatible(map, prog)) {
                bpf_prog_put(prog);
                return -EINVAL;
        }
@@ -457,7 +459,7 @@ __cpu_map_entry_alloc(struct bpf_map *map, struct bpf_cpumap_val *value,
        rcpu->map_id = map->id;
        rcpu->value.qsize  = value->qsize;
 
-       if (fd > 0 && __cpu_map_load_bpf_program(rcpu, fd))
+       if (fd > 0 && __cpu_map_load_bpf_program(rcpu, map, fd))
                goto free_ptr_ring;
 
        /* Setup kthread */
index fe019db..038f6d7 100644 (file)
@@ -858,7 +858,8 @@ static struct bpf_dtab_netdev *__dev_map_alloc_node(struct net *net,
                                             BPF_PROG_TYPE_XDP, false);
                if (IS_ERR(prog))
                        goto err_put_dev;
-               if (prog->expected_attach_type != BPF_XDP_DEVMAP)
+               if (prog->expected_attach_type != BPF_XDP_DEVMAP ||
+                   !bpf_prog_map_compatible(&dtab->map, prog))
                        goto err_put_prog;
        }
 
index fa4505f..72ce1ed 100644 (file)
@@ -556,16 +556,14 @@ static unsigned long bpf_map_memory_footprint(const struct bpf_map *map)
 
 static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
 {
-       const struct bpf_map *map = filp->private_data;
-       const struct bpf_array *array;
+       struct bpf_map *map = filp->private_data;
        u32 type = 0, jited = 0;
 
-       if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) {
-               array = container_of(map, struct bpf_array, map);
-               spin_lock(&array->aux->owner.lock);
-               type  = array->aux->owner.type;
-               jited = array->aux->owner.jited;
-               spin_unlock(&array->aux->owner.lock);
+       if (map_type_contains_progs(map)) {
+               spin_lock(&map->owner.lock);
+               type  = map->owner.type;
+               jited = map->owner.jited;
+               spin_unlock(&map->owner.lock);
        }
 
        seq_printf(m,
@@ -874,6 +872,7 @@ static int map_create(union bpf_attr *attr)
        atomic64_set(&map->refcnt, 1);
        atomic64_set(&map->usercnt, 1);
        mutex_init(&map->freeze_mutex);
+       spin_lock_init(&map->owner.lock);
 
        map->spin_lock_off = -EINVAL;
        map->timer_off = -EINVAL;
@@ -2217,7 +2216,8 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr)
                                 BPF_F_ANY_ALIGNMENT |
                                 BPF_F_TEST_STATE_FREQ |
                                 BPF_F_SLEEPABLE |
-                                BPF_F_TEST_RND_HI32))
+                                BPF_F_TEST_RND_HI32 |
+                                BPF_F_XDP_HAS_FRAGS))
                return -EINVAL;
 
        if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) &&
@@ -2303,6 +2303,7 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr)
        prog->aux->dst_prog = dst_prog;
        prog->aux->offload_requested = !!attr->prog_ifindex;
        prog->aux->sleepable = attr->prog_flags & BPF_F_SLEEPABLE;
+       prog->aux->xdp_has_frags = attr->prog_flags & BPF_F_XDP_HAS_FRAGS;
 
        err = security_bpf_prog_alloc(prog->aux);
        if (err)
@@ -3318,6 +3319,11 @@ static int bpf_prog_query(const union bpf_attr *attr,
        case BPF_FLOW_DISSECTOR:
        case BPF_SK_LOOKUP:
                return netns_bpf_prog_query(attr, uattr);
+       case BPF_SK_SKB_STREAM_PARSER:
+       case BPF_SK_SKB_STREAM_VERDICT:
+       case BPF_SK_MSG_VERDICT:
+       case BPF_SK_SKB_VERDICT:
+               return sock_map_bpf_prog_query(attr, uattr);
        default:
                return -EINVAL;
        }
index a39eede..dcf065e 100644 (file)
@@ -452,7 +452,8 @@ static bool reg_type_may_be_refcounted_or_null(enum bpf_reg_type type)
 {
        return base_type(type) == PTR_TO_SOCKET ||
                base_type(type) == PTR_TO_TCP_SOCK ||
-               base_type(type) == PTR_TO_MEM;
+               base_type(type) == PTR_TO_MEM ||
+               base_type(type) == PTR_TO_BTF_ID;
 }
 
 static bool type_is_rdonly_mem(u32 type)
@@ -1743,7 +1744,7 @@ find_kfunc_desc(const struct bpf_prog *prog, u32 func_id, u16 offset)
 }
 
 static struct btf *__find_kfunc_desc_btf(struct bpf_verifier_env *env,
-                                        s16 offset, struct module **btf_modp)
+                                        s16 offset)
 {
        struct bpf_kfunc_btf kf_btf = { .offset = offset };
        struct bpf_kfunc_btf_tab *tab;
@@ -1797,8 +1798,6 @@ static struct btf *__find_kfunc_desc_btf(struct bpf_verifier_env *env,
                sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
                     kfunc_btf_cmp_by_off, NULL);
        }
-       if (btf_modp)
-               *btf_modp = b->module;
        return b->btf;
 }
 
@@ -1815,8 +1814,7 @@ void bpf_free_kfunc_btf_tab(struct bpf_kfunc_btf_tab *tab)
 }
 
 static struct btf *find_kfunc_desc_btf(struct bpf_verifier_env *env,
-                                      u32 func_id, s16 offset,
-                                      struct module **btf_modp)
+                                      u32 func_id, s16 offset)
 {
        if (offset) {
                if (offset < 0) {
@@ -1827,7 +1825,7 @@ static struct btf *find_kfunc_desc_btf(struct bpf_verifier_env *env,
                        return ERR_PTR(-EINVAL);
                }
 
-               return __find_kfunc_desc_btf(env, offset, btf_modp);
+               return __find_kfunc_desc_btf(env, offset);
        }
        return btf_vmlinux ?: ERR_PTR(-ENOENT);
 }
@@ -1890,7 +1888,7 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset)
                prog_aux->kfunc_btf_tab = btf_tab;
        }
 
-       desc_btf = find_kfunc_desc_btf(env, func_id, offset, NULL);
+       desc_btf = find_kfunc_desc_btf(env, func_id, offset);
        if (IS_ERR(desc_btf)) {
                verbose(env, "failed to find BTF for kernel function\n");
                return PTR_ERR(desc_btf);
@@ -2351,7 +2349,7 @@ static const char *disasm_kfunc_name(void *data, const struct bpf_insn *insn)
        if (insn->src_reg != BPF_PSEUDO_KFUNC_CALL)
                return NULL;
 
-       desc_btf = find_kfunc_desc_btf(data, insn->imm, insn->off, NULL);
+       desc_btf = find_kfunc_desc_btf(data, insn->imm, insn->off);
        if (IS_ERR(desc_btf))
                return "<error>";
 
@@ -3498,11 +3496,6 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno,
 
 #define MAX_PACKET_OFF 0xffff
 
-static enum bpf_prog_type resolve_prog_type(struct bpf_prog *prog)
-{
-       return prog->aux->dst_prog ? prog->aux->dst_prog->type : prog->type;
-}
-
 static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
                                       const struct bpf_call_arg_meta *meta,
                                       enum bpf_access_type t)
@@ -4877,6 +4870,62 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
        }
 }
 
+static int check_mem_size_reg(struct bpf_verifier_env *env,
+                             struct bpf_reg_state *reg, u32 regno,
+                             bool zero_size_allowed,
+                             struct bpf_call_arg_meta *meta)
+{
+       int err;
+
+       /* This is used to refine r0 return value bounds for helpers
+        * that enforce this value as an upper bound on return values.
+        * See do_refine_retval_range() for helpers that can refine
+        * the return value. C type of helper is u32 so we pull register
+        * bound from umax_value however, if negative verifier errors
+        * out. Only upper bounds can be learned because retval is an
+        * int type and negative retvals are allowed.
+        */
+       if (meta)
+               meta->msize_max_value = reg->umax_value;
+
+       /* The register is SCALAR_VALUE; the access check
+        * happens using its boundaries.
+        */
+       if (!tnum_is_const(reg->var_off))
+               /* For unprivileged variable accesses, disable raw
+                * mode so that the program is required to
+                * initialize all the memory that the helper could
+                * just partially fill up.
+                */
+               meta = NULL;
+
+       if (reg->smin_value < 0) {
+               verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n",
+                       regno);
+               return -EACCES;
+       }
+
+       if (reg->umin_value == 0) {
+               err = check_helper_mem_access(env, regno - 1, 0,
+                                             zero_size_allowed,
+                                             meta);
+               if (err)
+                       return err;
+       }
+
+       if (reg->umax_value >= BPF_MAX_VAR_SIZ) {
+               verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
+                       regno);
+               return -EACCES;
+       }
+       err = check_helper_mem_access(env, regno - 1,
+                                     reg->umax_value,
+                                     zero_size_allowed, meta);
+       if (!err)
+               err = mark_chain_precision(env, regno);
+       return err;
+}
+
 int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
                   u32 regno, u32 mem_size)
 {
@@ -4900,6 +4949,28 @@ int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
        return check_helper_mem_access(env, regno, mem_size, true, NULL);
 }
 
+int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
+                            u32 regno)
+{
+       struct bpf_reg_state *mem_reg = &cur_regs(env)[regno - 1];
+       bool may_be_null = type_may_be_null(mem_reg->type);
+       struct bpf_reg_state saved_reg;
+       int err;
+
+       WARN_ON_ONCE(regno < BPF_REG_2 || regno > BPF_REG_5);
+
+       if (may_be_null) {
+               saved_reg = *mem_reg;
+               mark_ptr_not_null_reg(mem_reg);
+       }
+
+       err = check_mem_size_reg(env, reg, regno, true, NULL);
+
+       if (may_be_null)
+               *mem_reg = saved_reg;
+       return err;
+}
+
 /* Implementation details:
  * bpf_map_lookup returns PTR_TO_MAP_VALUE_OR_NULL
  * Two bpf_map_lookups (even with the same key) will have different reg->id.
@@ -5439,51 +5510,7 @@ skip_type_check:
        } else if (arg_type_is_mem_size(arg_type)) {
                bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO);
 
-               /* This is used to refine r0 return value bounds for helpers
-                * that enforce this value as an upper bound on return values.
-                * See do_refine_retval_range() for helpers that can refine
-                * the return value. C type of helper is u32 so we pull register
-                * bound from umax_value however, if negative verifier errors
-                * out. Only upper bounds can be learned because retval is an
-                * int type and negative retvals are allowed.
-                */
-               meta->msize_max_value = reg->umax_value;
-
-               /* The register is SCALAR_VALUE; the access check
-                * happens using its boundaries.
-                */
-               if (!tnum_is_const(reg->var_off))
-                       /* For unprivileged variable accesses, disable raw
-                        * mode so that the program is required to
-                        * initialize all the memory that the helper could
-                        * just partially fill up.
-                        */
-                       meta = NULL;
-
-               if (reg->smin_value < 0) {
-                       verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n",
-                               regno);
-                       return -EACCES;
-               }
-
-               if (reg->umin_value == 0) {
-                       err = check_helper_mem_access(env, regno - 1, 0,
-                                                     zero_size_allowed,
-                                                     meta);
-                       if (err)
-                               return err;
-               }
-
-               if (reg->umax_value >= BPF_MAX_VAR_SIZ) {
-                       verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
-                               regno);
-                       return -EACCES;
-               }
-               err = check_helper_mem_access(env, regno - 1,
-                                             reg->umax_value,
-                                             zero_size_allowed, meta);
-               if (!err)
-                       err = mark_chain_precision(env, regno);
+               err = check_mem_size_reg(env, reg, regno, zero_size_allowed, meta);
        } else if (arg_type_is_alloc_size(arg_type)) {
                if (!tnum_is_const(reg->var_off)) {
                        verbose(env, "R%d is not a known constant'\n",
@@ -6842,22 +6869,23 @@ static void mark_btf_func_reg_size(struct bpf_verifier_env *env, u32 regno,
        }
 }
 
-static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn)
+static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
+                           int *insn_idx_p)
 {
        const struct btf_type *t, *func, *func_proto, *ptr_type;
        struct bpf_reg_state *regs = cur_regs(env);
        const char *func_name, *ptr_type_name;
        u32 i, nargs, func_id, ptr_type_id;
-       struct module *btf_mod = NULL;
+       int err, insn_idx = *insn_idx_p;
        const struct btf_param *args;
        struct btf *desc_btf;
-       int err;
+       bool acq;
 
        /* skip for now, but return error when we find this in fixup_kfunc_call */
        if (!insn->imm)
                return 0;
 
-       desc_btf = find_kfunc_desc_btf(env, insn->imm, insn->off, &btf_mod);
+       desc_btf = find_kfunc_desc_btf(env, insn->imm, insn->off);
        if (IS_ERR(desc_btf))
                return PTR_ERR(desc_btf);
 
@@ -6866,23 +6894,43 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn)
        func_name = btf_name_by_offset(desc_btf, func->name_off);
        func_proto = btf_type_by_id(desc_btf, func->type);
 
-       if (!env->ops->check_kfunc_call ||
-           !env->ops->check_kfunc_call(func_id, btf_mod)) {
+       if (!btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog),
+                                     BTF_KFUNC_TYPE_CHECK, func_id)) {
                verbose(env, "calling kernel function %s is not allowed\n",
                        func_name);
                return -EACCES;
        }
 
+       acq = btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog),
+                                       BTF_KFUNC_TYPE_ACQUIRE, func_id);
+
        /* Check the arguments */
        err = btf_check_kfunc_arg_match(env, desc_btf, func_id, regs);
-       if (err)
+       if (err < 0)
                return err;
+       /* In case of release function, we get register number of refcounted
+        * PTR_TO_BTF_ID back from btf_check_kfunc_arg_match, do the release now
+        */
+       if (err) {
+               err = release_reference(env, regs[err].ref_obj_id);
+               if (err) {
+                       verbose(env, "kfunc %s#%d reference has not been acquired before\n",
+                               func_name, func_id);
+                       return err;
+               }
+       }
 
        for (i = 0; i < CALLER_SAVED_REGS; i++)
                mark_reg_not_init(env, regs, caller_saved[i]);
 
        /* Check return type */
        t = btf_type_skip_modifiers(desc_btf, func_proto->type, NULL);
+
+       if (acq && !btf_type_is_ptr(t)) {
+               verbose(env, "acquire kernel function does not return PTR_TO_BTF_ID\n");
+               return -EINVAL;
+       }
+
        if (btf_type_is_scalar(t)) {
                mark_reg_unknown(env, regs, BPF_REG_0);
                mark_btf_func_reg_size(env, BPF_REG_0, t->size);
@@ -6901,7 +6949,21 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn)
                regs[BPF_REG_0].btf = desc_btf;
                regs[BPF_REG_0].type = PTR_TO_BTF_ID;
                regs[BPF_REG_0].btf_id = ptr_type_id;
+               if (btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog),
+                                             BTF_KFUNC_TYPE_RET_NULL, func_id)) {
+                       regs[BPF_REG_0].type |= PTR_MAYBE_NULL;
+                       /* For mark_ptr_or_null_reg, see 93c230e3f5bd6 */
+                       regs[BPF_REG_0].id = ++env->id_gen;
+               }
                mark_btf_func_reg_size(env, BPF_REG_0, sizeof(void *));
+               if (acq) {
+                       int id = acquire_reference_state(env, insn_idx);
+
+                       if (id < 0)
+                               return id;
+                       regs[BPF_REG_0].id = id;
+                       regs[BPF_REG_0].ref_obj_id = id;
+               }
        } /* else { add_kfunc_call() ensures it is btf_type_is_void(t) } */
 
        nargs = btf_type_vlen(func_proto);
@@ -11549,7 +11611,7 @@ static int do_check(struct bpf_verifier_env *env)
                                if (insn->src_reg == BPF_PSEUDO_CALL)
                                        err = check_func_call(env, insn, &env->insn_idx);
                                else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL)
-                                       err = check_kfunc_call(env, insn);
+                                       err = check_kfunc_call(env, insn, &env->insn_idx);
                                else
                                        err = check_helper_call(env, insn, &env->insn_idx);
                                if (err)
index 21aa306..06a9e22 100644 (file)
@@ -1562,6 +1562,7 @@ static const struct bpf_func_proto bpf_perf_event_output_proto_raw_tp = {
 
 extern const struct bpf_func_proto bpf_skb_output_proto;
 extern const struct bpf_func_proto bpf_xdp_output_proto;
+extern const struct bpf_func_proto bpf_xdp_get_buff_len_trace_proto;
 
 BPF_CALL_3(bpf_get_stackid_raw_tp, struct bpf_raw_tracepoint_args *, args,
           struct bpf_map *, map, u64, flags)
@@ -1661,6 +1662,8 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
                return &bpf_sock_from_file_proto;
        case BPF_FUNC_get_socket_cookie:
                return &bpf_get_socket_ptr_cookie_proto;
+       case BPF_FUNC_xdp_get_buff_len:
+               return &bpf_xdp_get_buff_len_trace_proto;
 #endif
        case BPF_FUNC_seq_printf:
                return prog->expected_attach_type == BPF_TRACE_ITER ?
index d0b2e09..be97dc6 100644 (file)
@@ -111,7 +111,6 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route)
                return -ENOMEM;
        }
 
-       refcount_set(&ax25_rt->refcount, 1);
        ax25_rt->callsign     = route->dest_addr;
        ax25_rt->dev          = ax25_dev->dev;
        ax25_rt->digipeat     = NULL;
@@ -160,12 +159,12 @@ static int ax25_rt_del(struct ax25_routes_struct *route)
                    ax25cmp(&route->dest_addr, &s->callsign) == 0) {
                        if (ax25_route_list == s) {
                                ax25_route_list = s->next;
-                               ax25_put_route(s);
+                               __ax25_put_route(s);
                        } else {
                                for (t = ax25_route_list; t != NULL; t = t->next) {
                                        if (t->next == s) {
                                                t->next = s->next;
-                                               ax25_put_route(s);
+                                               __ax25_put_route(s);
                                                break;
                                        }
                                }
index 46dd957..65b52b4 100644 (file)
@@ -5,6 +5,7 @@
 #include <linux/btf.h>
 #include <linux/btf_ids.h>
 #include <linux/slab.h>
+#include <linux/init.h>
 #include <linux/vmalloc.h>
 #include <linux/etherdevice.h>
 #include <linux/filter.h>
@@ -130,7 +131,8 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
 
 static int bpf_test_finish(const union bpf_attr *kattr,
                           union bpf_attr __user *uattr, const void *data,
-                          u32 size, u32 retval, u32 duration)
+                          struct skb_shared_info *sinfo, u32 size,
+                          u32 retval, u32 duration)
 {
        void __user *data_out = u64_to_user_ptr(kattr->test.data_out);
        int err = -EFAULT;
@@ -145,8 +147,36 @@ static int bpf_test_finish(const union bpf_attr *kattr,
                err = -ENOSPC;
        }
 
-       if (data_out && copy_to_user(data_out, data, copy_size))
-               goto out;
+       if (data_out) {
+               int len = sinfo ? copy_size - sinfo->xdp_frags_size : copy_size;
+
+               if (copy_to_user(data_out, data, len))
+                       goto out;
+
+               if (sinfo) {
+                       int i, offset = len, data_len;
+
+                       for (i = 0; i < sinfo->nr_frags; i++) {
+                               skb_frag_t *frag = &sinfo->frags[i];
+
+                               if (offset >= copy_size) {
+                                       err = -ENOSPC;
+                                       break;
+                               }
+
+                               data_len = min_t(int, copy_size - offset,
+                                                skb_frag_size(frag));
+
+                               if (copy_to_user(data_out + offset,
+                                                skb_frag_address(frag),
+                                                data_len))
+                                       goto out;
+
+                               offset += data_len;
+                       }
+               }
+       }
+
        if (copy_to_user(&uattr->test.data_size_out, &size, sizeof(size)))
                goto out;
        if (copy_to_user(&uattr->test.retval, &retval, sizeof(retval)))
@@ -171,6 +201,8 @@ int noinline bpf_fentry_test1(int a)
 {
        return a + 1;
 }
+EXPORT_SYMBOL_GPL(bpf_fentry_test1);
+ALLOW_ERROR_INJECTION(bpf_fentry_test1, ERRNO);
 
 int noinline bpf_fentry_test2(int a, u64 b)
 {
@@ -232,28 +264,142 @@ struct sock * noinline bpf_kfunc_call_test3(struct sock *sk)
        return sk;
 }
 
+struct prog_test_ref_kfunc {
+       int a;
+       int b;
+       struct prog_test_ref_kfunc *next;
+};
+
+static struct prog_test_ref_kfunc prog_test_struct = {
+       .a = 42,
+       .b = 108,
+       .next = &prog_test_struct,
+};
+
+noinline struct prog_test_ref_kfunc *
+bpf_kfunc_call_test_acquire(unsigned long *scalar_ptr)
+{
+       /* randomly return NULL */
+       if (get_jiffies_64() % 2)
+               return NULL;
+       return &prog_test_struct;
+}
+
+noinline void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p)
+{
+}
+
+struct prog_test_pass1 {
+       int x0;
+       struct {
+               int x1;
+               struct {
+                       int x2;
+                       struct {
+                               int x3;
+                       };
+               };
+       };
+};
+
+struct prog_test_pass2 {
+       int len;
+       short arr1[4];
+       struct {
+               char arr2[4];
+               unsigned long arr3[8];
+       } x;
+};
+
+struct prog_test_fail1 {
+       void *p;
+       int x;
+};
+
+struct prog_test_fail2 {
+       int x8;
+       struct prog_test_pass1 x;
+};
+
+struct prog_test_fail3 {
+       int len;
+       char arr1[2];
+       char arr2[];
+};
+
+noinline void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb)
+{
+}
+
+noinline void bpf_kfunc_call_test_pass1(struct prog_test_pass1 *p)
+{
+}
+
+noinline void bpf_kfunc_call_test_pass2(struct prog_test_pass2 *p)
+{
+}
+
+noinline void bpf_kfunc_call_test_fail1(struct prog_test_fail1 *p)
+{
+}
+
+noinline void bpf_kfunc_call_test_fail2(struct prog_test_fail2 *p)
+{
+}
+
+noinline void bpf_kfunc_call_test_fail3(struct prog_test_fail3 *p)
+{
+}
+
+noinline void bpf_kfunc_call_test_mem_len_pass1(void *mem, int mem__sz)
+{
+}
+
+noinline void bpf_kfunc_call_test_mem_len_fail1(void *mem, int len)
+{
+}
+
+noinline void bpf_kfunc_call_test_mem_len_fail2(u64 *mem, int len)
+{
+}
+
 __diag_pop();
 
 ALLOW_ERROR_INJECTION(bpf_modify_return_test, ERRNO);
 
-BTF_SET_START(test_sk_kfunc_ids)
+BTF_SET_START(test_sk_check_kfunc_ids)
 BTF_ID(func, bpf_kfunc_call_test1)
 BTF_ID(func, bpf_kfunc_call_test2)
 BTF_ID(func, bpf_kfunc_call_test3)
-BTF_SET_END(test_sk_kfunc_ids)
-
-bool bpf_prog_test_check_kfunc_call(u32 kfunc_id, struct module *owner)
-{
-       if (btf_id_set_contains(&test_sk_kfunc_ids, kfunc_id))
-               return true;
-       return bpf_check_mod_kfunc_call(&prog_test_kfunc_list, kfunc_id, owner);
-}
-
-static void *bpf_test_init(const union bpf_attr *kattr, u32 size,
-                          u32 headroom, u32 tailroom)
+BTF_ID(func, bpf_kfunc_call_test_acquire)
+BTF_ID(func, bpf_kfunc_call_test_release)
+BTF_ID(func, bpf_kfunc_call_test_pass_ctx)
+BTF_ID(func, bpf_kfunc_call_test_pass1)
+BTF_ID(func, bpf_kfunc_call_test_pass2)
+BTF_ID(func, bpf_kfunc_call_test_fail1)
+BTF_ID(func, bpf_kfunc_call_test_fail2)
+BTF_ID(func, bpf_kfunc_call_test_fail3)
+BTF_ID(func, bpf_kfunc_call_test_mem_len_pass1)
+BTF_ID(func, bpf_kfunc_call_test_mem_len_fail1)
+BTF_ID(func, bpf_kfunc_call_test_mem_len_fail2)
+BTF_SET_END(test_sk_check_kfunc_ids)
+
+BTF_SET_START(test_sk_acquire_kfunc_ids)
+BTF_ID(func, bpf_kfunc_call_test_acquire)
+BTF_SET_END(test_sk_acquire_kfunc_ids)
+
+BTF_SET_START(test_sk_release_kfunc_ids)
+BTF_ID(func, bpf_kfunc_call_test_release)
+BTF_SET_END(test_sk_release_kfunc_ids)
+
+BTF_SET_START(test_sk_ret_null_kfunc_ids)
+BTF_ID(func, bpf_kfunc_call_test_acquire)
+BTF_SET_END(test_sk_ret_null_kfunc_ids)
+
+static void *bpf_test_init(const union bpf_attr *kattr, u32 user_size,
+                          u32 size, u32 headroom, u32 tailroom)
 {
        void __user *data_in = u64_to_user_ptr(kattr->test.data_in);
-       u32 user_size = kattr->test.data_size_in;
        void *data;
 
        if (size < ETH_HLEN || size > PAGE_SIZE - headroom - tailroom)
@@ -581,7 +727,8 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
        if (kattr->test.flags || kattr->test.cpu)
                return -EINVAL;
 
-       data = bpf_test_init(kattr, size, NET_SKB_PAD + NET_IP_ALIGN,
+       data = bpf_test_init(kattr, kattr->test.data_size_in,
+                            size, NET_SKB_PAD + NET_IP_ALIGN,
                             SKB_DATA_ALIGN(sizeof(struct skb_shared_info)));
        if (IS_ERR(data))
                return PTR_ERR(data);
@@ -683,7 +830,8 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
        /* bpf program can never convert linear skb to non-linear */
        if (WARN_ON_ONCE(skb_is_nonlinear(skb)))
                size = skb_headlen(skb);
-       ret = bpf_test_finish(kattr, uattr, skb->data, size, retval, duration);
+       ret = bpf_test_finish(kattr, uattr, skb->data, NULL, size, retval,
+                             duration);
        if (!ret)
                ret = bpf_ctx_finish(kattr, uattr, ctx,
                                     sizeof(struct __sk_buff));
@@ -758,16 +906,16 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
                          union bpf_attr __user *uattr)
 {
        u32 tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
-       u32 headroom = XDP_PACKET_HEADROOM;
        u32 size = kattr->test.data_size_in;
+       u32 headroom = XDP_PACKET_HEADROOM;
+       u32 retval, duration, max_data_sz;
        u32 repeat = kattr->test.repeat;
        struct netdev_rx_queue *rxqueue;
+       struct skb_shared_info *sinfo;
        struct xdp_buff xdp = {};
-       u32 retval, duration;
+       int i, ret = -EINVAL;
        struct xdp_md *ctx;
-       u32 max_data_sz;
        void *data;
-       int ret = -EINVAL;
 
        if (prog->expected_attach_type == BPF_XDP_DEVMAP ||
            prog->expected_attach_type == BPF_XDP_CPUMAP)
@@ -787,26 +935,60 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
                headroom -= ctx->data;
        }
 
-       /* XDP have extra tailroom as (most) drivers use full page */
        max_data_sz = 4096 - headroom - tailroom;
+       size = min_t(u32, size, max_data_sz);
 
-       data = bpf_test_init(kattr, max_data_sz, headroom, tailroom);
+       data = bpf_test_init(kattr, size, max_data_sz, headroom, tailroom);
        if (IS_ERR(data)) {
                ret = PTR_ERR(data);
                goto free_ctx;
        }
 
        rxqueue = __netif_get_rx_queue(current->nsproxy->net_ns->loopback_dev, 0);
-       xdp_init_buff(&xdp, headroom + max_data_sz + tailroom,
-                     &rxqueue->xdp_rxq);
+       rxqueue->xdp_rxq.frag_size = headroom + max_data_sz + tailroom;
+       xdp_init_buff(&xdp, rxqueue->xdp_rxq.frag_size, &rxqueue->xdp_rxq);
        xdp_prepare_buff(&xdp, data, headroom, size, true);
+       sinfo = xdp_get_shared_info_from_buff(&xdp);
 
        ret = xdp_convert_md_to_buff(ctx, &xdp);
        if (ret)
                goto free_data;
 
+       if (unlikely(kattr->test.data_size_in > size)) {
+               void __user *data_in = u64_to_user_ptr(kattr->test.data_in);
+
+               while (size < kattr->test.data_size_in) {
+                       struct page *page;
+                       skb_frag_t *frag;
+                       int data_len;
+
+                       page = alloc_page(GFP_KERNEL);
+                       if (!page) {
+                               ret = -ENOMEM;
+                               goto out;
+                       }
+
+                       frag = &sinfo->frags[sinfo->nr_frags++];
+                       __skb_frag_set_page(frag, page);
+
+                       data_len = min_t(int, kattr->test.data_size_in - size,
+                                        PAGE_SIZE);
+                       skb_frag_size_set(frag, data_len);
+
+                       if (copy_from_user(page_address(page), data_in + size,
+                                          data_len)) {
+                               ret = -EFAULT;
+                               goto out;
+                       }
+                       sinfo->xdp_frags_size += data_len;
+                       size += data_len;
+               }
+               xdp_buff_set_frags_flag(&xdp);
+       }
+
        if (repeat > 1)
                bpf_prog_change_xdp(NULL, prog);
+
        ret = bpf_test_run(prog, &xdp, repeat, &retval, &duration, true);
        /* We convert the xdp_buff back to an xdp_md before checking the return
         * code so the reference count of any held netdevice will be decremented
@@ -816,12 +998,9 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
        if (ret)
                goto out;
 
-       if (xdp.data_meta != data + headroom ||
-           xdp.data_end != xdp.data_meta + size)
-               size = xdp.data_end - xdp.data_meta;
-
-       ret = bpf_test_finish(kattr, uattr, xdp.data_meta, size, retval,
-                             duration);
+       size = xdp.data_end - xdp.data_meta + sinfo->xdp_frags_size;
+       ret = bpf_test_finish(kattr, uattr, xdp.data_meta, sinfo, size,
+                             retval, duration);
        if (!ret)
                ret = bpf_ctx_finish(kattr, uattr, ctx,
                                     sizeof(struct xdp_md));
@@ -830,6 +1009,8 @@ out:
        if (repeat > 1)
                bpf_prog_change_xdp(prog, NULL);
 free_data:
+       for (i = 0; i < sinfo->nr_frags; i++)
+               __free_page(skb_frag_page(&sinfo->frags[i]));
        kfree(data);
 free_ctx:
        kfree(ctx);
@@ -876,7 +1057,7 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
        if (size < ETH_HLEN)
                return -EINVAL;
 
-       data = bpf_test_init(kattr, size, 0, 0);
+       data = bpf_test_init(kattr, kattr->test.data_size_in, size, 0, 0);
        if (IS_ERR(data))
                return PTR_ERR(data);
 
@@ -911,8 +1092,8 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
        if (ret < 0)
                goto out;
 
-       ret = bpf_test_finish(kattr, uattr, &flow_keys, sizeof(flow_keys),
-                             retval, duration);
+       ret = bpf_test_finish(kattr, uattr, &flow_keys, NULL,
+                             sizeof(flow_keys), retval, duration);
        if (!ret)
                ret = bpf_ctx_finish(kattr, uattr, user_ctx,
                                     sizeof(struct bpf_flow_keys));
@@ -1016,7 +1197,7 @@ int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kat
                user_ctx->cookie = sock_gen_cookie(ctx.selected_sk);
        }
 
-       ret = bpf_test_finish(kattr, uattr, NULL, 0, retval, duration);
+       ret = bpf_test_finish(kattr, uattr, NULL, NULL, 0, retval, duration);
        if (!ret)
                ret = bpf_ctx_finish(kattr, uattr, user_ctx, sizeof(*user_ctx));
 
@@ -1067,3 +1248,17 @@ out:
        kfree(ctx);
        return err;
 }
+
+static const struct btf_kfunc_id_set bpf_prog_test_kfunc_set = {
+       .owner        = THIS_MODULE,
+       .check_set    = &test_sk_check_kfunc_ids,
+       .acquire_set  = &test_sk_acquire_kfunc_ids,
+       .release_set  = &test_sk_release_kfunc_ids,
+       .ret_null_set = &test_sk_ret_null_kfunc_ids,
+};
+
+static int __init bpf_prog_test_run_init(void)
+{
+       return register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_prog_test_kfunc_set);
+}
+late_initcall(bpf_prog_test_run_init);
index 4603b7c..a06931c 100644 (file)
@@ -3783,6 +3783,28 @@ static const struct bpf_func_proto sk_skb_change_head_proto = {
        .arg2_type      = ARG_ANYTHING,
        .arg3_type      = ARG_ANYTHING,
 };
+
+BPF_CALL_1(bpf_xdp_get_buff_len, struct  xdp_buff*, xdp)
+{
+       return xdp_get_buff_len(xdp);
+}
+
+static const struct bpf_func_proto bpf_xdp_get_buff_len_proto = {
+       .func           = bpf_xdp_get_buff_len,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_CTX,
+};
+
+BTF_ID_LIST_SINGLE(bpf_xdp_get_buff_len_bpf_ids, struct, xdp_buff)
+
+const struct bpf_func_proto bpf_xdp_get_buff_len_trace_proto = {
+       .func           = bpf_xdp_get_buff_len,
+       .gpl_only       = false,
+       .arg1_type      = ARG_PTR_TO_BTF_ID,
+       .arg1_btf_id    = &bpf_xdp_get_buff_len_bpf_ids[0],
+};
+
 static unsigned long xdp_get_metalen(const struct xdp_buff *xdp)
 {
        return xdp_data_meta_unsupported(xdp) ? 0 :
@@ -3817,11 +3839,208 @@ static const struct bpf_func_proto bpf_xdp_adjust_head_proto = {
        .arg2_type      = ARG_ANYTHING,
 };
 
+static void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off,
+                            void *buf, unsigned long len, bool flush)
+{
+       unsigned long ptr_len, ptr_off = 0;
+       skb_frag_t *next_frag, *end_frag;
+       struct skb_shared_info *sinfo;
+       void *src, *dst;
+       u8 *ptr_buf;
+
+       if (likely(xdp->data_end - xdp->data >= off + len)) {
+               src = flush ? buf : xdp->data + off;
+               dst = flush ? xdp->data + off : buf;
+               memcpy(dst, src, len);
+               return;
+       }
+
+       sinfo = xdp_get_shared_info_from_buff(xdp);
+       end_frag = &sinfo->frags[sinfo->nr_frags];
+       next_frag = &sinfo->frags[0];
+
+       ptr_len = xdp->data_end - xdp->data;
+       ptr_buf = xdp->data;
+
+       while (true) {
+               if (off < ptr_off + ptr_len) {
+                       unsigned long copy_off = off - ptr_off;
+                       unsigned long copy_len = min(len, ptr_len - copy_off);
+
+                       src = flush ? buf : ptr_buf + copy_off;
+                       dst = flush ? ptr_buf + copy_off : buf;
+                       memcpy(dst, src, copy_len);
+
+                       off += copy_len;
+                       len -= copy_len;
+                       buf += copy_len;
+               }
+
+               if (!len || next_frag == end_frag)
+                       break;
+
+               ptr_off += ptr_len;
+               ptr_buf = skb_frag_address(next_frag);
+               ptr_len = skb_frag_size(next_frag);
+               next_frag++;
+       }
+}
+
+static void *bpf_xdp_pointer(struct xdp_buff *xdp, u32 offset, u32 len)
+{
+       struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
+       u32 size = xdp->data_end - xdp->data;
+       void *addr = xdp->data;
+       int i;
+
+       if (unlikely(offset > 0xffff || len > 0xffff))
+               return ERR_PTR(-EFAULT);
+
+       if (offset + len > xdp_get_buff_len(xdp))
+               return ERR_PTR(-EINVAL);
+
+       if (offset < size) /* linear area */
+               goto out;
+
+       offset -= size;
+       for (i = 0; i < sinfo->nr_frags; i++) { /* paged area */
+               u32 frag_size = skb_frag_size(&sinfo->frags[i]);
+
+               if  (offset < frag_size) {
+                       addr = skb_frag_address(&sinfo->frags[i]);
+                       size = frag_size;
+                       break;
+               }
+               offset -= frag_size;
+       }
+out:
+       return offset + len < size ? addr + offset : NULL;
+}
+
+BPF_CALL_4(bpf_xdp_load_bytes, struct xdp_buff *, xdp, u32, offset,
+          void *, buf, u32, len)
+{
+       void *ptr;
+
+       ptr = bpf_xdp_pointer(xdp, offset, len);
+       if (IS_ERR(ptr))
+               return PTR_ERR(ptr);
+
+       if (!ptr)
+               bpf_xdp_copy_buf(xdp, offset, buf, len, false);
+       else
+               memcpy(buf, ptr, len);
+
+       return 0;
+}
+
+static const struct bpf_func_proto bpf_xdp_load_bytes_proto = {
+       .func           = bpf_xdp_load_bytes,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_CTX,
+       .arg2_type      = ARG_ANYTHING,
+       .arg3_type      = ARG_PTR_TO_UNINIT_MEM,
+       .arg4_type      = ARG_CONST_SIZE,
+};
+
+BPF_CALL_4(bpf_xdp_store_bytes, struct xdp_buff *, xdp, u32, offset,
+          void *, buf, u32, len)
+{
+       void *ptr;
+
+       ptr = bpf_xdp_pointer(xdp, offset, len);
+       if (IS_ERR(ptr))
+               return PTR_ERR(ptr);
+
+       if (!ptr)
+               bpf_xdp_copy_buf(xdp, offset, buf, len, true);
+       else
+               memcpy(ptr, buf, len);
+
+       return 0;
+}
+
+static const struct bpf_func_proto bpf_xdp_store_bytes_proto = {
+       .func           = bpf_xdp_store_bytes,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_CTX,
+       .arg2_type      = ARG_ANYTHING,
+       .arg3_type      = ARG_PTR_TO_UNINIT_MEM,
+       .arg4_type      = ARG_CONST_SIZE,
+};
+
+static int bpf_xdp_frags_increase_tail(struct xdp_buff *xdp, int offset)
+{
+       struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
+       skb_frag_t *frag = &sinfo->frags[sinfo->nr_frags - 1];
+       struct xdp_rxq_info *rxq = xdp->rxq;
+       unsigned int tailroom;
+
+       if (!rxq->frag_size || rxq->frag_size > xdp->frame_sz)
+               return -EOPNOTSUPP;
+
+       tailroom = rxq->frag_size - skb_frag_size(frag) - skb_frag_off(frag);
+       if (unlikely(offset > tailroom))
+               return -EINVAL;
+
+       memset(skb_frag_address(frag) + skb_frag_size(frag), 0, offset);
+       skb_frag_size_add(frag, offset);
+       sinfo->xdp_frags_size += offset;
+
+       return 0;
+}
+
+static int bpf_xdp_frags_shrink_tail(struct xdp_buff *xdp, int offset)
+{
+       struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
+       int i, n_frags_free = 0, len_free = 0;
+
+       if (unlikely(offset > (int)xdp_get_buff_len(xdp) - ETH_HLEN))
+               return -EINVAL;
+
+       for (i = sinfo->nr_frags - 1; i >= 0 && offset > 0; i--) {
+               skb_frag_t *frag = &sinfo->frags[i];
+               int shrink = min_t(int, offset, skb_frag_size(frag));
+
+               len_free += shrink;
+               offset -= shrink;
+
+               if (skb_frag_size(frag) == shrink) {
+                       struct page *page = skb_frag_page(frag);
+
+                       __xdp_return(page_address(page), &xdp->rxq->mem,
+                                    false, NULL);
+                       n_frags_free++;
+               } else {
+                       skb_frag_size_sub(frag, shrink);
+                       break;
+               }
+       }
+       sinfo->nr_frags -= n_frags_free;
+       sinfo->xdp_frags_size -= len_free;
+
+       if (unlikely(!sinfo->nr_frags)) {
+               xdp_buff_clear_frags_flag(xdp);
+               xdp->data_end -= offset;
+       }
+
+       return 0;
+}
+
 BPF_CALL_2(bpf_xdp_adjust_tail, struct xdp_buff *, xdp, int, offset)
 {
        void *data_hard_end = xdp_data_hard_end(xdp); /* use xdp->frame_sz */
        void *data_end = xdp->data_end + offset;
 
+       if (unlikely(xdp_buff_has_frags(xdp))) { /* non-linear xdp buff */
+               if (offset < 0)
+                       return bpf_xdp_frags_shrink_tail(xdp, -offset);
+
+               return bpf_xdp_frags_increase_tail(xdp, offset);
+       }
+
        /* Notice that xdp_data_hard_end have reserved some tailroom */
        if (unlikely(data_end > data_hard_end))
                return -EINVAL;
@@ -4047,6 +4266,14 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
        struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
        enum bpf_map_type map_type = ri->map_type;
 
+       /* XDP_REDIRECT is not fully supported yet for xdp frags since
+        * not all XDP capable drivers can map non-linear xdp_frame in
+        * ndo_xdp_xmit.
+        */
+       if (unlikely(xdp_buff_has_frags(xdp) &&
+                    map_type != BPF_MAP_TYPE_CPUMAP))
+               return -EOPNOTSUPP;
+
        if (map_type == BPF_MAP_TYPE_XSKMAP)
                return __xdp_do_redirect_xsk(ri, dev, xdp, xdp_prog);
 
@@ -4590,10 +4817,12 @@ static const struct bpf_func_proto bpf_sk_ancestor_cgroup_id_proto = {
 };
 #endif
 
-static unsigned long bpf_xdp_copy(void *dst_buff, const void *src_buff,
+static unsigned long bpf_xdp_copy(void *dst, const void *ctx,
                                  unsigned long off, unsigned long len)
 {
-       memcpy(dst_buff, src_buff + off, len);
+       struct xdp_buff *xdp = (struct xdp_buff *)ctx;
+
+       bpf_xdp_copy_buf(xdp, off, dst, len, false);
        return 0;
 }
 
@@ -4604,11 +4833,11 @@ BPF_CALL_5(bpf_xdp_event_output, struct xdp_buff *, xdp, struct bpf_map *, map,
 
        if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK)))
                return -EINVAL;
-       if (unlikely(!xdp ||
-                    xdp_size > (unsigned long)(xdp->data_end - xdp->data)))
+
+       if (unlikely(!xdp || xdp_size > xdp_get_buff_len(xdp)))
                return -EFAULT;
 
-       return bpf_event_output(map, flags, meta, meta_size, xdp->data,
+       return bpf_event_output(map, flags, meta, meta_size, xdp,
                                xdp_size, bpf_xdp_copy);
 }
 
@@ -7533,6 +7762,12 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
                return &bpf_xdp_redirect_map_proto;
        case BPF_FUNC_xdp_adjust_tail:
                return &bpf_xdp_adjust_tail_proto;
+       case BPF_FUNC_xdp_get_buff_len:
+               return &bpf_xdp_get_buff_len_proto;
+       case BPF_FUNC_xdp_load_bytes:
+               return &bpf_xdp_load_bytes_proto;
+       case BPF_FUNC_xdp_store_bytes:
+               return &bpf_xdp_store_bytes_proto;
        case BPF_FUNC_fib_lookup:
                return &bpf_xdp_fib_lookup_proto;
        case BPF_FUNC_check_mtu:
@@ -10062,7 +10297,6 @@ const struct bpf_verifier_ops tc_cls_act_verifier_ops = {
        .convert_ctx_access     = tc_cls_act_convert_ctx_access,
        .gen_prologue           = tc_cls_act_prologue,
        .gen_ld_abs             = bpf_gen_ld_abs,
-       .check_kfunc_call       = bpf_prog_test_check_kfunc_call,
 };
 
 const struct bpf_prog_ops tc_cls_act_prog_ops = {
index a5b5bb9..c53d9aa 100644 (file)
@@ -301,6 +301,7 @@ struct net *get_net_ns_by_id(const struct net *net, int id)
 
        return peer;
 }
+EXPORT_SYMBOL_GPL(get_net_ns_by_id);
 
 /*
  * setup_net runs the initializers for the network namespace object.
index 4ff806d..cccf21f 100644 (file)
@@ -2266,6 +2266,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
                        sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
                        /* pairs with the WRITE_ONCE() in netif_set_gso_max_size() */
                        sk->sk_gso_max_size = READ_ONCE(dst->dev->gso_max_size);
+                       sk->sk_gso_max_size -= (MAX_TCP_HEADER + 1);
                        /* pairs with the WRITE_ONCE() in netif_set_gso_max_segs() */
                        max_segs = max_t(u32, READ_ONCE(dst->dev->gso_max_segs), 1);
                }
index 1827669..2d213c4 100644 (file)
@@ -1416,38 +1416,50 @@ static struct sk_psock_progs *sock_map_progs(struct bpf_map *map)
        return NULL;
 }
 
-static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
-                               struct bpf_prog *old, u32 which)
+static int sock_map_prog_lookup(struct bpf_map *map, struct bpf_prog ***pprog,
+                               u32 which)
 {
        struct sk_psock_progs *progs = sock_map_progs(map);
-       struct bpf_prog **pprog;
 
        if (!progs)
                return -EOPNOTSUPP;
 
        switch (which) {
        case BPF_SK_MSG_VERDICT:
-               pprog = &progs->msg_parser;
+               *pprog = &progs->msg_parser;
                break;
 #if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
        case BPF_SK_SKB_STREAM_PARSER:
-               pprog = &progs->stream_parser;
+               *pprog = &progs->stream_parser;
                break;
 #endif
        case BPF_SK_SKB_STREAM_VERDICT:
                if (progs->skb_verdict)
                        return -EBUSY;
-               pprog = &progs->stream_verdict;
+               *pprog = &progs->stream_verdict;
                break;
        case BPF_SK_SKB_VERDICT:
                if (progs->stream_verdict)
                        return -EBUSY;
-               pprog = &progs->skb_verdict;
+               *pprog = &progs->skb_verdict;
                break;
        default:
                return -EOPNOTSUPP;
        }
 
+       return 0;
+}
+
+static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
+                               struct bpf_prog *old, u32 which)
+{
+       struct bpf_prog **pprog;
+       int ret;
+
+       ret = sock_map_prog_lookup(map, &pprog, which);
+       if (ret)
+               return ret;
+
        if (old)
                return psock_replace_prog(pprog, prog, old);
 
@@ -1455,6 +1467,57 @@ static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
        return 0;
 }
 
+int sock_map_bpf_prog_query(const union bpf_attr *attr,
+                           union bpf_attr __user *uattr)
+{
+       __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
+       u32 prog_cnt = 0, flags = 0, ufd = attr->target_fd;
+       struct bpf_prog **pprog;
+       struct bpf_prog *prog;
+       struct bpf_map *map;
+       struct fd f;
+       u32 id = 0;
+       int ret;
+
+       if (attr->query.query_flags)
+               return -EINVAL;
+
+       f = fdget(ufd);
+       map = __bpf_map_get(f);
+       if (IS_ERR(map))
+               return PTR_ERR(map);
+
+       rcu_read_lock();
+
+       ret = sock_map_prog_lookup(map, &pprog, attr->query.attach_type);
+       if (ret)
+               goto end;
+
+       prog = *pprog;
+       prog_cnt = !prog ? 0 : 1;
+
+       if (!attr->query.prog_cnt || !prog_ids || !prog_cnt)
+               goto end;
+
+       /* we do not hold the refcnt, the bpf prog may be released
+        * asynchronously and the id would be set to 0.
+        */
+       id = data_race(prog->aux->id);
+       if (id == 0)
+               prog_cnt = 0;
+
+end:
+       rcu_read_unlock();
+
+       if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags)) ||
+           (id != 0 && copy_to_user(prog_ids, &id, sizeof(u32))) ||
+           copy_to_user(&uattr->query.prog_cnt, &prog_cnt, sizeof(prog_cnt)))
+               ret = -EFAULT;
+
+       fdput(f);
+       return ret;
+}
+
 static void sock_map_unlink(struct sock *sk, struct sk_psock_link *link)
 {
        switch (link->map->map_type) {
index 7aba355..361df31 100644 (file)
@@ -162,8 +162,9 @@ static void xdp_rxq_info_init(struct xdp_rxq_info *xdp_rxq)
 }
 
 /* Returns 0 on success, negative on failure */
-int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
-                    struct net_device *dev, u32 queue_index, unsigned int napi_id)
+int __xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
+                      struct net_device *dev, u32 queue_index,
+                      unsigned int napi_id, u32 frag_size)
 {
        if (!dev) {
                WARN(1, "Missing net_device from driver");
@@ -185,11 +186,12 @@ int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
        xdp_rxq->dev = dev;
        xdp_rxq->queue_index = queue_index;
        xdp_rxq->napi_id = napi_id;
+       xdp_rxq->frag_size = frag_size;
 
        xdp_rxq->reg_state = REG_STATE_REGISTERED;
        return 0;
 }
-EXPORT_SYMBOL_GPL(xdp_rxq_info_reg);
+EXPORT_SYMBOL_GPL(__xdp_rxq_info_reg);
 
 void xdp_rxq_info_unused(struct xdp_rxq_info *xdp_rxq)
 {
@@ -369,8 +371,8 @@ EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model);
  * is used for those calls sites.  Thus, allowing for faster recycling
  * of xdp_frames/pages in those cases.
  */
-static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct,
-                        struct xdp_buff *xdp)
+void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct,
+                 struct xdp_buff *xdp)
 {
        struct xdp_mem_allocator *xa;
        struct page *page;
@@ -406,12 +408,38 @@ static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct,
 
 void xdp_return_frame(struct xdp_frame *xdpf)
 {
+       struct skb_shared_info *sinfo;
+       int i;
+
+       if (likely(!xdp_frame_has_frags(xdpf)))
+               goto out;
+
+       sinfo = xdp_get_shared_info_from_frame(xdpf);
+       for (i = 0; i < sinfo->nr_frags; i++) {
+               struct page *page = skb_frag_page(&sinfo->frags[i]);
+
+               __xdp_return(page_address(page), &xdpf->mem, false, NULL);
+       }
+out:
        __xdp_return(xdpf->data, &xdpf->mem, false, NULL);
 }
 EXPORT_SYMBOL_GPL(xdp_return_frame);
 
 void xdp_return_frame_rx_napi(struct xdp_frame *xdpf)
 {
+       struct skb_shared_info *sinfo;
+       int i;
+
+       if (likely(!xdp_frame_has_frags(xdpf)))
+               goto out;
+
+       sinfo = xdp_get_shared_info_from_frame(xdpf);
+       for (i = 0; i < sinfo->nr_frags; i++) {
+               struct page *page = skb_frag_page(&sinfo->frags[i]);
+
+               __xdp_return(page_address(page), &xdpf->mem, true, NULL);
+       }
+out:
        __xdp_return(xdpf->data, &xdpf->mem, true, NULL);
 }
 EXPORT_SYMBOL_GPL(xdp_return_frame_rx_napi);
@@ -447,7 +475,7 @@ void xdp_return_frame_bulk(struct xdp_frame *xdpf,
        struct xdp_mem_allocator *xa;
 
        if (mem->type != MEM_TYPE_PAGE_POOL) {
-               __xdp_return(xdpf->data, &xdpf->mem, false, NULL);
+               xdp_return_frame(xdpf);
                return;
        }
 
@@ -466,12 +494,38 @@ void xdp_return_frame_bulk(struct xdp_frame *xdpf,
                bq->xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params);
        }
 
+       if (unlikely(xdp_frame_has_frags(xdpf))) {
+               struct skb_shared_info *sinfo;
+               int i;
+
+               sinfo = xdp_get_shared_info_from_frame(xdpf);
+               for (i = 0; i < sinfo->nr_frags; i++) {
+                       skb_frag_t *frag = &sinfo->frags[i];
+
+                       bq->q[bq->count++] = skb_frag_address(frag);
+                       if (bq->count == XDP_BULK_QUEUE_SIZE)
+                               xdp_flush_frame_bulk(bq);
+               }
+       }
        bq->q[bq->count++] = xdpf->data;
 }
 EXPORT_SYMBOL_GPL(xdp_return_frame_bulk);
 
 void xdp_return_buff(struct xdp_buff *xdp)
 {
+       struct skb_shared_info *sinfo;
+       int i;
+
+       if (likely(!xdp_buff_has_frags(xdp)))
+               goto out;
+
+       sinfo = xdp_get_shared_info_from_buff(xdp);
+       for (i = 0; i < sinfo->nr_frags; i++) {
+               struct page *page = skb_frag_page(&sinfo->frags[i]);
+
+               __xdp_return(page_address(page), &xdp->rxq->mem, true, xdp);
+       }
+out:
        __xdp_return(xdp->data, &xdp->rxq->mem, true, xdp);
 }
 
@@ -561,8 +615,14 @@ struct sk_buff *__xdp_build_skb_from_frame(struct xdp_frame *xdpf,
                                           struct sk_buff *skb,
                                           struct net_device *dev)
 {
+       struct skb_shared_info *sinfo = xdp_get_shared_info_from_frame(xdpf);
        unsigned int headroom, frame_size;
        void *hard_start;
+       u8 nr_frags;
+
+       /* xdp frags frame */
+       if (unlikely(xdp_frame_has_frags(xdpf)))
+               nr_frags = sinfo->nr_frags;
 
        /* Part of headroom was reserved to xdpf */
        headroom = sizeof(*xdpf) + xdpf->headroom;
@@ -582,6 +642,12 @@ struct sk_buff *__xdp_build_skb_from_frame(struct xdp_frame *xdpf,
        if (xdpf->metasize)
                skb_metadata_set(skb, xdpf->metasize);
 
+       if (unlikely(xdp_frame_has_frags(xdpf)))
+               xdp_update_skb_shared_info(skb, nr_frags,
+                                          sinfo->xdp_frags_size,
+                                          nr_frags * xdpf->frame_sz,
+                                          xdp_frame_is_frag_pfmemalloc(xdpf));
+
        /* Essential SKB info: protocol and skb->dev */
        skb->protocol = eth_type_trans(skb, dev);
 
index 5183e62..671c377 100644 (file)
@@ -136,11 +136,6 @@ static inline int between48(const u64 seq1, const u64 seq2, const u64 seq3)
        return (seq3 << 16) - (seq2 << 16) >= (seq1 << 16) - (seq2 << 16);
 }
 
-static inline u64 max48(const u64 seq1, const u64 seq2)
-{
-       return after48(seq1, seq2) ? seq1 : seq2;
-}
-
 /**
  * dccp_loss_count - Approximate the number of lost data packets in a burst loss
  * @s1:  last known sequence number before the loss ('hole')
index 0ea2927..ae66256 100644 (file)
@@ -1030,15 +1030,9 @@ static void __net_exit dccp_v4_exit_net(struct net *net)
        inet_ctl_sock_destroy(pn->v4_ctl_sk);
 }
 
-static void __net_exit dccp_v4_exit_batch(struct list_head *net_exit_list)
-{
-       inet_twsk_purge(&dccp_hashinfo, AF_INET);
-}
-
 static struct pernet_operations dccp_v4_ops = {
        .init   = dccp_v4_init_net,
        .exit   = dccp_v4_exit_net,
-       .exit_batch = dccp_v4_exit_batch,
        .id     = &dccp_v4_pernet_id,
        .size   = sizeof(struct dccp_v4_pernet),
 };
index fa66351..eab3bd1 100644 (file)
@@ -1115,15 +1115,9 @@ static void __net_exit dccp_v6_exit_net(struct net *net)
        inet_ctl_sock_destroy(pn->v6_ctl_sk);
 }
 
-static void __net_exit dccp_v6_exit_batch(struct list_head *net_exit_list)
-{
-       inet_twsk_purge(&dccp_hashinfo, AF_INET6);
-}
-
 static struct pernet_operations dccp_v6_ops = {
        .init   = dccp_v6_init_net,
        .exit   = dccp_v6_exit_net,
-       .exit_batch = dccp_v6_exit_batch,
        .id     = &dccp_v6_pernet_id,
        .size   = sizeof(struct dccp_v6_pernet),
 };
index 91e7a22..64d805b 100644 (file)
@@ -22,6 +22,7 @@
 #include "feat.h"
 
 struct inet_timewait_death_row dccp_death_row = {
+       .tw_refcount = REFCOUNT_INIT(1),
        .sysctl_max_tw_buckets = NR_FILE * 2,
        .hashinfo       = &dccp_hashinfo,
 };
index e3c7d26..517cc83 100644 (file)
@@ -113,26 +113,15 @@ static int dsa_switch_bridge_join(struct dsa_switch *ds,
        return dsa_tag_8021q_bridge_join(ds, info);
 }
 
-static int dsa_switch_bridge_leave(struct dsa_switch *ds,
-                                  struct dsa_notifier_bridge_info *info)
+static int dsa_switch_sync_vlan_filtering(struct dsa_switch *ds,
+                                         struct dsa_notifier_bridge_info *info)
 {
-       struct dsa_switch_tree *dst = ds->dst;
        struct netlink_ext_ack extack = {0};
        bool change_vlan_filtering = false;
        bool vlan_filtering;
        struct dsa_port *dp;
        int err;
 
-       if (dst->index == info->tree_index && ds->index == info->sw_index &&
-           ds->ops->port_bridge_leave)
-               ds->ops->port_bridge_leave(ds, info->port, info->bridge);
-
-       if ((dst->index != info->tree_index || ds->index != info->sw_index) &&
-           ds->ops->crosschip_bridge_leave)
-               ds->ops->crosschip_bridge_leave(ds, info->tree_index,
-                                               info->sw_index, info->port,
-                                               info->bridge);
-
        if (ds->needs_standalone_vlan_filtering &&
            !br_vlan_enabled(info->bridge.dev)) {
                change_vlan_filtering = true;
@@ -172,6 +161,31 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds,
                        return err;
        }
 
+       return 0;
+}
+
+static int dsa_switch_bridge_leave(struct dsa_switch *ds,
+                                  struct dsa_notifier_bridge_info *info)
+{
+       struct dsa_switch_tree *dst = ds->dst;
+       int err;
+
+       if (dst->index == info->tree_index && ds->index == info->sw_index &&
+           ds->ops->port_bridge_leave)
+               ds->ops->port_bridge_leave(ds, info->port, info->bridge);
+
+       if ((dst->index != info->tree_index || ds->index != info->sw_index) &&
+           ds->ops->crosschip_bridge_leave)
+               ds->ops->crosschip_bridge_leave(ds, info->tree_index,
+                                               info->sw_index, info->port,
+                                               info->bridge);
+
+       if (ds->dst->index == info->tree_index && ds->index == info->sw_index) {
+               err = dsa_switch_sync_vlan_filtering(ds, info);
+               if (err)
+                       return err;
+       }
+
        return dsa_tag_8021q_bridge_leave(ds, info);
 }
 
index 043e4e9..ff9ec76 100644 (file)
@@ -259,11 +259,6 @@ static inline u16 prp_get_skb_sequence_nr(struct prp_rct *rct)
        return ntohs(rct->sequence_nr);
 }
 
-static inline u16 get_prp_lan_id(struct prp_rct *rct)
-{
-       return ntohs(rct->lan_id_and_LSDU_size) >> 12;
-}
-
 /* assume there is a valid rct */
 static inline bool prp_check_lsdu_size(struct sk_buff *skb,
                                       struct prp_rct *rct,
index de610cb..b60c9fd 100644 (file)
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2019 Facebook  */
 
+#include <linux/init.h>
 #include <linux/types.h>
 #include <linux/bpf_verifier.h>
 #include <linux/bpf.h>
@@ -212,26 +213,23 @@ bpf_tcp_ca_get_func_proto(enum bpf_func_id func_id,
        }
 }
 
-BTF_SET_START(bpf_tcp_ca_kfunc_ids)
+BTF_SET_START(bpf_tcp_ca_check_kfunc_ids)
 BTF_ID(func, tcp_reno_ssthresh)
 BTF_ID(func, tcp_reno_cong_avoid)
 BTF_ID(func, tcp_reno_undo_cwnd)
 BTF_ID(func, tcp_slow_start)
 BTF_ID(func, tcp_cong_avoid_ai)
-BTF_SET_END(bpf_tcp_ca_kfunc_ids)
+BTF_SET_END(bpf_tcp_ca_check_kfunc_ids)
 
-static bool bpf_tcp_ca_check_kfunc_call(u32 kfunc_btf_id, struct module *owner)
-{
-       if (btf_id_set_contains(&bpf_tcp_ca_kfunc_ids, kfunc_btf_id))
-               return true;
-       return bpf_check_mod_kfunc_call(&bpf_tcp_ca_kfunc_list, kfunc_btf_id, owner);
-}
+static const struct btf_kfunc_id_set bpf_tcp_ca_kfunc_set = {
+       .owner     = THIS_MODULE,
+       .check_set = &bpf_tcp_ca_check_kfunc_ids,
+};
 
 static const struct bpf_verifier_ops bpf_tcp_ca_verifier_ops = {
        .get_func_proto         = bpf_tcp_ca_get_func_proto,
        .is_valid_access        = bpf_tcp_ca_is_valid_access,
        .btf_struct_access      = bpf_tcp_ca_btf_struct_access,
-       .check_kfunc_call       = bpf_tcp_ca_check_kfunc_call,
 };
 
 static int bpf_tcp_ca_init_member(const struct btf_type *t,
@@ -300,3 +298,9 @@ struct bpf_struct_ops bpf_tcp_congestion_ops = {
        .init = bpf_tcp_ca_init,
        .name = "tcp_congestion_ops",
 };
+
+static int __init bpf_tcp_ca_kfunc_init(void)
+{
+       return register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &bpf_tcp_ca_kfunc_set);
+}
+late_initcall(bpf_tcp_ca_kfunc_init);
index b458986..4c53994 100644 (file)
@@ -1257,34 +1257,13 @@ fib_info_laddrhash_bucket(const struct net *net, __be32 val)
        return &fib_info_laddrhash[slot];
 }
 
-static struct hlist_head *fib_info_hash_alloc(int bytes)
-{
-       if (bytes <= PAGE_SIZE)
-               return kzalloc(bytes, GFP_KERNEL);
-       else
-               return (struct hlist_head *)
-                       __get_free_pages(GFP_KERNEL | __GFP_ZERO,
-                                        get_order(bytes));
-}
-
-static void fib_info_hash_free(struct hlist_head *hash, int bytes)
-{
-       if (!hash)
-               return;
-
-       if (bytes <= PAGE_SIZE)
-               kfree(hash);
-       else
-               free_pages((unsigned long) hash, get_order(bytes));
-}
-
 static void fib_info_hash_move(struct hlist_head *new_info_hash,
                               struct hlist_head *new_laddrhash,
                               unsigned int new_size)
 {
        struct hlist_head *old_info_hash, *old_laddrhash;
        unsigned int old_size = fib_info_hash_size;
-       unsigned int i, bytes;
+       unsigned int i;
 
        spin_lock_bh(&fib_info_lock);
        old_info_hash = fib_info_hash;
@@ -1325,9 +1304,8 @@ static void fib_info_hash_move(struct hlist_head *new_info_hash,
 
        spin_unlock_bh(&fib_info_lock);
 
-       bytes = old_size * sizeof(struct hlist_head *);
-       fib_info_hash_free(old_info_hash, bytes);
-       fib_info_hash_free(old_laddrhash, bytes);
+       kvfree(old_info_hash);
+       kvfree(old_laddrhash);
 }
 
 __be32 fib_info_update_nhc_saddr(struct net *net, struct fib_nh_common *nhc,
@@ -1444,19 +1422,19 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
                unsigned int new_size = fib_info_hash_size << 1;
                struct hlist_head *new_info_hash;
                struct hlist_head *new_laddrhash;
-               unsigned int bytes;
+               size_t bytes;
 
                if (!new_size)
                        new_size = 16;
-               bytes = new_size * sizeof(struct hlist_head *);
-               new_info_hash = fib_info_hash_alloc(bytes);
-               new_laddrhash = fib_info_hash_alloc(bytes);
+               bytes = (size_t)new_size * sizeof(struct hlist_head *);
+               new_info_hash = kvzalloc(bytes, GFP_KERNEL);
+               new_laddrhash = kvzalloc(bytes, GFP_KERNEL);
                if (!new_info_hash || !new_laddrhash) {
-                       fib_info_hash_free(new_info_hash, bytes);
-                       fib_info_hash_free(new_laddrhash, bytes);
-               } else
+                       kvfree(new_info_hash);
+                       kvfree(new_laddrhash);
+               } else {
                        fib_info_hash_move(new_info_hash, new_laddrhash, new_size);
-
+               }
                if (!fib_info_hash_size)
                        goto failure;
        }
index b7e277d..72a375c 100644 (file)
@@ -192,24 +192,14 @@ struct icmp_control {
 
 static const struct icmp_control icmp_pointers[NR_ICMP_TYPES+1];
 
-/*
- *     The ICMP socket(s). This is the most convenient way to flow control
- *     our ICMP output as well as maintain a clean interface throughout
- *     all layers. All Socketless IP sends will soon be gone.
- *
- *     On SMP we have one ICMP socket per-cpu.
- */
-static struct sock *icmp_sk(struct net *net)
-{
-       return this_cpu_read(*net->ipv4.icmp_sk);
-}
+static DEFINE_PER_CPU(struct sock *, ipv4_icmp_sk);
 
 /* Called with BH disabled */
 static inline struct sock *icmp_xmit_lock(struct net *net)
 {
        struct sock *sk;
 
-       sk = icmp_sk(net);
+       sk = this_cpu_read(ipv4_icmp_sk);
 
        if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
                /* This can happen if the output path signals a
@@ -217,11 +207,13 @@ static inline struct sock *icmp_xmit_lock(struct net *net)
                 */
                return NULL;
        }
+       sock_net_set(sk, net);
        return sk;
 }
 
 static inline void icmp_xmit_unlock(struct sock *sk)
 {
+       sock_net_set(sk, &init_net);
        spin_unlock(&sk->sk_lock.slock);
 }
 
@@ -363,14 +355,13 @@ static int icmp_glue_bits(void *from, char *to, int offset, int len, int odd,
        return 0;
 }
 
-static void icmp_push_reply(struct icmp_bxm *icmp_param,
+static void icmp_push_reply(struct sock *sk,
+                           struct icmp_bxm *icmp_param,
                            struct flowi4 *fl4,
                            struct ipcm_cookie *ipc, struct rtable **rt)
 {
-       struct sock *sk;
        struct sk_buff *skb;
 
-       sk = icmp_sk(dev_net((*rt)->dst.dev));
        if (ip_append_data(sk, fl4, icmp_glue_bits, icmp_param,
                           icmp_param->data_len+icmp_param->head_len,
                           icmp_param->head_len,
@@ -452,7 +443,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
        if (IS_ERR(rt))
                goto out_unlock;
        if (icmpv4_xrlim_allow(net, rt, &fl4, type, code))
-               icmp_push_reply(icmp_param, &fl4, &ipc, &rt);
+               icmp_push_reply(sk, icmp_param, &fl4, &ipc, &rt);
        ip_rt_put(rt);
 out_unlock:
        icmp_xmit_unlock(sk);
@@ -766,7 +757,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
        if (!fl4.saddr)
                fl4.saddr = htonl(INADDR_DUMMY);
 
-       icmp_push_reply(&icmp_param, &fl4, &ipc, &rt);
+       icmp_push_reply(sk, &icmp_param, &fl4, &ipc, &rt);
 ende:
        ip_rt_put(rt);
 out_unlock:
@@ -1434,46 +1425,8 @@ static const struct icmp_control icmp_pointers[NR_ICMP_TYPES + 1] = {
        },
 };
 
-static void __net_exit icmp_sk_exit(struct net *net)
-{
-       int i;
-
-       for_each_possible_cpu(i)
-               inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.icmp_sk, i));
-       free_percpu(net->ipv4.icmp_sk);
-       net->ipv4.icmp_sk = NULL;
-}
-
 static int __net_init icmp_sk_init(struct net *net)
 {
-       int i, err;
-
-       net->ipv4.icmp_sk = alloc_percpu(struct sock *);
-       if (!net->ipv4.icmp_sk)
-               return -ENOMEM;
-
-       for_each_possible_cpu(i) {
-               struct sock *sk;
-
-               err = inet_ctl_sock_create(&sk, PF_INET,
-                                          SOCK_RAW, IPPROTO_ICMP, net);
-               if (err < 0)
-                       goto fail;
-
-               *per_cpu_ptr(net->ipv4.icmp_sk, i) = sk;
-
-               /* Enough space for 2 64K ICMP packets, including
-                * sk_buff/skb_shared_info struct overhead.
-                */
-               sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
-
-               /*
-                * Speedup sock_wfree()
-                */
-               sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
-               inet_sk(sk)->pmtudisc = IP_PMTUDISC_DONT;
-       }
-
        /* Control parameters for ECHO replies. */
        net->ipv4.sysctl_icmp_echo_ignore_all = 0;
        net->ipv4.sysctl_icmp_echo_enable_probe = 0;
@@ -1499,18 +1452,36 @@ static int __net_init icmp_sk_init(struct net *net)
        net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr = 0;
 
        return 0;
-
-fail:
-       icmp_sk_exit(net);
-       return err;
 }
 
 static struct pernet_operations __net_initdata icmp_sk_ops = {
        .init = icmp_sk_init,
-       .exit = icmp_sk_exit,
 };
 
 int __init icmp_init(void)
 {
+       int err, i;
+
+       for_each_possible_cpu(i) {
+               struct sock *sk;
+
+               err = inet_ctl_sock_create(&sk, PF_INET,
+                                          SOCK_RAW, IPPROTO_ICMP, &init_net);
+               if (err < 0)
+                       return err;
+
+               per_cpu(ipv4_icmp_sk, i) = sk;
+
+               /* Enough space for 2 64K ICMP packets, including
+                * sk_buff/skb_shared_info struct overhead.
+                */
+               sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
+
+               /*
+                * Speedup sock_wfree()
+                */
+               sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
+               inet_sk(sk)->pmtudisc = IP_PMTUDISC_DONT;
+       }
        return register_pernet_subsys(&icmp_sk_ops);
 }
index 437afe3..9e0bbd0 100644 (file)
@@ -52,14 +52,15 @@ static void inet_twsk_kill(struct inet_timewait_sock *tw)
        spin_unlock(lock);
 
        /* Disassociate with bind bucket. */
-       bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), tw->tw_num,
-                       hashinfo->bhash_size)];
+       bhead = &hashinfo->bhash[tw->tw_bslot];
 
        spin_lock(&bhead->lock);
        inet_twsk_bind_unhash(tw, hashinfo);
        spin_unlock(&bhead->lock);
 
-       atomic_dec(&tw->tw_dr->tw_count);
+       if (refcount_dec_and_test(&tw->tw_dr->tw_refcount))
+               kfree(tw->tw_dr);
+
        inet_twsk_put(tw);
 }
 
@@ -110,8 +111,12 @@ void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
           Note, that any socket with inet->num != 0 MUST be bound in
           binding cache, even if it is closed.
         */
-       bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->inet_num,
-                       hashinfo->bhash_size)];
+       /* Cache inet_bhashfn(), because 'struct net' might be no longer
+        * available later in inet_twsk_kill().
+        */
+       tw->tw_bslot = inet_bhashfn(twsk_net(tw), inet->inet_num,
+                                   hashinfo->bhash_size);
+       bhead = &hashinfo->bhash[tw->tw_bslot];
        spin_lock(&bhead->lock);
        tw->tw_tb = icsk->icsk_bind_hash;
        WARN_ON(!icsk->icsk_bind_hash);
@@ -145,10 +150,6 @@ static void tw_timer_handler(struct timer_list *t)
 {
        struct inet_timewait_sock *tw = from_timer(tw, t, tw_timer);
 
-       if (tw->tw_kill)
-               __NET_INC_STATS(twsk_net(tw), LINUX_MIB_TIMEWAITKILLED);
-       else
-               __NET_INC_STATS(twsk_net(tw), LINUX_MIB_TIMEWAITED);
        inet_twsk_kill(tw);
 }
 
@@ -158,7 +159,7 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk,
 {
        struct inet_timewait_sock *tw;
 
-       if (atomic_read(&dr->tw_count) >= dr->sysctl_max_tw_buckets)
+       if (refcount_read(&dr->tw_refcount) - 1 >= dr->sysctl_max_tw_buckets)
                return NULL;
 
        tw = kmem_cache_alloc(sk->sk_prot_creator->twsk_prot->twsk_slab,
@@ -244,59 +245,15 @@ void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo, bool rearm)
         * of PAWS.
         */
 
-       tw->tw_kill = timeo <= 4*HZ;
        if (!rearm) {
+               bool kill = timeo <= 4*HZ;
+
+               __NET_INC_STATS(twsk_net(tw), kill ? LINUX_MIB_TIMEWAITKILLED :
+                                                    LINUX_MIB_TIMEWAITED);
                BUG_ON(mod_timer(&tw->tw_timer, jiffies + timeo));
-               atomic_inc(&tw->tw_dr->tw_count);
+               refcount_inc(&tw->tw_dr->tw_refcount);
        } else {
                mod_timer_pending(&tw->tw_timer, jiffies + timeo);
        }
 }
 EXPORT_SYMBOL_GPL(__inet_twsk_schedule);
-
-void inet_twsk_purge(struct inet_hashinfo *hashinfo, int family)
-{
-       struct inet_timewait_sock *tw;
-       struct sock *sk;
-       struct hlist_nulls_node *node;
-       unsigned int slot;
-
-       for (slot = 0; slot <= hashinfo->ehash_mask; slot++) {
-               struct inet_ehash_bucket *head = &hashinfo->ehash[slot];
-restart_rcu:
-               cond_resched();
-               rcu_read_lock();
-restart:
-               sk_nulls_for_each_rcu(sk, node, &head->chain) {
-                       if (sk->sk_state != TCP_TIME_WAIT)
-                               continue;
-                       tw = inet_twsk(sk);
-                       if ((tw->tw_family != family) ||
-                               refcount_read(&twsk_net(tw)->ns.count))
-                               continue;
-
-                       if (unlikely(!refcount_inc_not_zero(&tw->tw_refcnt)))
-                               continue;
-
-                       if (unlikely((tw->tw_family != family) ||
-                                    refcount_read(&twsk_net(tw)->ns.count))) {
-                               inet_twsk_put(tw);
-                               goto restart;
-                       }
-
-                       rcu_read_unlock();
-                       local_bh_disable();
-                       inet_twsk_deschedule_put(tw);
-                       local_bh_enable();
-                       goto restart_rcu;
-               }
-               /* If the nulls value we got at the end of this lookup is
-                * not the expected one, we must restart lookup.
-                * We probably met an item that was moved to another chain.
-                */
-               if (get_nulls_value(node) != slot)
-                       goto restart;
-               rcu_read_unlock();
-       }
-}
-EXPORT_SYMBOL_GPL(inet_twsk_purge);
index f30273a..2883607 100644 (file)
@@ -59,8 +59,8 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
        socket_seq_show(seq);
        seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %ld\n",
                   sock_prot_inuse_get(net, &tcp_prot), orphans,
-                  atomic_read(&net->ipv4.tcp_death_row.tw_count), sockets,
-                  proto_memory_allocated(&tcp_prot));
+                  refcount_read(&net->ipv4.tcp_death_row->tw_refcount) - 1,
+                  sockets, proto_memory_allocated(&tcp_prot));
        seq_printf(seq, "UDP: inuse %d mem %ld\n",
                   sock_prot_inuse_get(net, &udp_prot),
                   proto_memory_allocated(&udp_prot));
index ff6f91c..e42e283 100644 (file)
 
 #define DEFAULT_MIN_PMTU (512 + 20 + 20)
 #define DEFAULT_MTU_EXPIRES (10 * 60 * HZ)
-
+#define DEFAULT_MIN_ADVMSS 256
 static int ip_rt_max_size;
 static int ip_rt_redirect_number __read_mostly = 9;
 static int ip_rt_redirect_load __read_mostly   = HZ / 50;
 static int ip_rt_redirect_silence __read_mostly        = ((HZ / 50) << (9 + 1));
 static int ip_rt_error_cost __read_mostly      = HZ;
 static int ip_rt_error_burst __read_mostly     = 5 * HZ;
-static int ip_rt_min_advmss __read_mostly      = 256;
 
 static int ip_rt_gc_timeout __read_mostly      = RT_GC_TIMEOUT;
 
@@ -1298,9 +1297,10 @@ static void set_class_tag(struct rtable *rt, u32 tag)
 
 static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
 {
+       struct net *net = dev_net(dst->dev);
        unsigned int header_size = sizeof(struct tcphdr) + sizeof(struct iphdr);
        unsigned int advmss = max_t(unsigned int, ipv4_mtu(dst) - header_size,
-                                   ip_rt_min_advmss);
+                                   net->ipv4.ip_rt_min_advmss);
 
        return min(advmss, IPV4_MAX_PMTU - header_size);
 }
@@ -3535,13 +3535,6 @@ static struct ctl_table ipv4_route_table[] = {
                .mode           = 0644,
                .proc_handler   = proc_dointvec,
        },
-       {
-               .procname       = "min_adv_mss",
-               .data           = &ip_rt_min_advmss,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec,
-       },
        { }
 };
 
@@ -3569,6 +3562,13 @@ static struct ctl_table ipv4_route_netns_table[] = {
                .mode           = 0644,
                .proc_handler   = proc_dointvec_jiffies,
        },
+       {
+               .procname   = "min_adv_mss",
+               .data       = &init_net.ipv4.ip_rt_min_advmss,
+               .maxlen     = sizeof(int),
+               .mode       = 0644,
+               .proc_handler   = proc_dointvec,
+       },
        { },
 };
 
@@ -3631,6 +3631,7 @@ static __net_init int netns_ip_rt_init(struct net *net)
        /* Set default value for namespaceified sysctls */
        net->ipv4.ip_rt_min_pmtu = DEFAULT_MIN_PMTU;
        net->ipv4.ip_rt_mtu_expires = DEFAULT_MTU_EXPIRES;
+       net->ipv4.ip_rt_min_advmss = DEFAULT_MIN_ADVMSS;
        return 0;
 }
 
index 97eb547..1cae27b 100644 (file)
@@ -589,6 +589,14 @@ static struct ctl_table ipv4_table[] = {
 };
 
 static struct ctl_table ipv4_net_table[] = {
+       /* tcp_max_tw_buckets must be first in this table. */
+       {
+               .procname       = "tcp_max_tw_buckets",
+/*             .data           = &init_net.ipv4.tcp_death_row.sysctl_max_tw_buckets, */
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec
+       },
        {
                .procname       = "icmp_echo_ignore_all",
                .data           = &init_net.ipv4.sysctl_icmp_echo_ignore_all,
@@ -1001,13 +1009,6 @@ static struct ctl_table ipv4_net_table[] = {
                .extra2         = &two,
        },
        {
-               .procname       = "tcp_max_tw_buckets",
-               .data           = &init_net.ipv4.tcp_death_row.sysctl_max_tw_buckets,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec
-       },
-       {
                .procname       = "tcp_max_syn_backlog",
                .data           = &init_net.ipv4.sysctl_max_syn_backlog,
                .maxlen         = sizeof(int),
@@ -1400,7 +1401,8 @@ static __net_init int ipv4_sysctl_init_net(struct net *net)
                if (!table)
                        goto err_alloc;
 
-               for (i = 0; i < ARRAY_SIZE(ipv4_net_table) - 1; i++) {
+               /* skip first entry (sysctl_max_tw_buckets) */
+               for (i = 1; i < ARRAY_SIZE(ipv4_net_table) - 1; i++) {
                        if (table[i].data) {
                                /* Update the variables to point into
                                 * the current struct net
@@ -1415,6 +1417,8 @@ static __net_init int ipv4_sysctl_init_net(struct net *net)
                }
        }
 
+       table[0].data = &net->ipv4.tcp_death_row->sysctl_max_tw_buckets;
+
        net->ipv4.ipv4_hdr = register_net_sysctl(net, "net/ipv4", table);
        if (!net->ipv4.ipv4_hdr)
                goto err_reg;
index 78e8146..cf1ce81 100644 (file)
@@ -894,8 +894,7 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
                return mss_now;
 
        /* Note : tcp_tso_autosize() will eventually split this later */
-       new_size_goal = sk->sk_gso_max_size - 1 - MAX_TCP_HEADER;
-       new_size_goal = tcp_bound_to_half_wnd(tp, new_size_goal);
+       new_size_goal = tcp_bound_to_half_wnd(tp, sk->sk_gso_max_size);
 
        /* We try hard to avoid divides here */
        size_goal = tp->gso_segs * mss_now;
index ec55500..02e8626 100644 (file)
@@ -1154,7 +1154,7 @@ static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = {
        .set_state      = bbr_set_state,
 };
 
-BTF_SET_START(tcp_bbr_kfunc_ids)
+BTF_SET_START(tcp_bbr_check_kfunc_ids)
 #ifdef CONFIG_X86
 #ifdef CONFIG_DYNAMIC_FTRACE
 BTF_ID(func, bbr_init)
@@ -1167,25 +1167,27 @@ BTF_ID(func, bbr_min_tso_segs)
 BTF_ID(func, bbr_set_state)
 #endif
 #endif
-BTF_SET_END(tcp_bbr_kfunc_ids)
+BTF_SET_END(tcp_bbr_check_kfunc_ids)
 
-static DEFINE_KFUNC_BTF_ID_SET(&tcp_bbr_kfunc_ids, tcp_bbr_kfunc_btf_set);
+static const struct btf_kfunc_id_set tcp_bbr_kfunc_set = {
+       .owner     = THIS_MODULE,
+       .check_set = &tcp_bbr_check_kfunc_ids,
+};
 
 static int __init bbr_register(void)
 {
        int ret;
 
        BUILD_BUG_ON(sizeof(struct bbr) > ICSK_CA_PRIV_SIZE);
-       ret = tcp_register_congestion_control(&tcp_bbr_cong_ops);
-       if (ret)
+
+       ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &tcp_bbr_kfunc_set);
+       if (ret < 0)
                return ret;
-       register_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_bbr_kfunc_btf_set);
-       return 0;
+       return tcp_register_congestion_control(&tcp_bbr_cong_ops);
 }
 
 static void __exit bbr_unregister(void)
 {
-       unregister_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_bbr_kfunc_btf_set);
        tcp_unregister_congestion_control(&tcp_bbr_cong_ops);
 }
 
index e07837e..24d562d 100644 (file)
@@ -485,7 +485,7 @@ static struct tcp_congestion_ops cubictcp __read_mostly = {
        .name           = "cubic",
 };
 
-BTF_SET_START(tcp_cubic_kfunc_ids)
+BTF_SET_START(tcp_cubic_check_kfunc_ids)
 #ifdef CONFIG_X86
 #ifdef CONFIG_DYNAMIC_FTRACE
 BTF_ID(func, cubictcp_init)
@@ -496,9 +496,12 @@ BTF_ID(func, cubictcp_cwnd_event)
 BTF_ID(func, cubictcp_acked)
 #endif
 #endif
-BTF_SET_END(tcp_cubic_kfunc_ids)
+BTF_SET_END(tcp_cubic_check_kfunc_ids)
 
-static DEFINE_KFUNC_BTF_ID_SET(&tcp_cubic_kfunc_ids, tcp_cubic_kfunc_btf_set);
+static const struct btf_kfunc_id_set tcp_cubic_kfunc_set = {
+       .owner     = THIS_MODULE,
+       .check_set = &tcp_cubic_check_kfunc_ids,
+};
 
 static int __init cubictcp_register(void)
 {
@@ -534,16 +537,14 @@ static int __init cubictcp_register(void)
        /* divide by bic_scale and by constant Srtt (100ms) */
        do_div(cube_factor, bic_scale * 10);
 
-       ret = tcp_register_congestion_control(&cubictcp);
-       if (ret)
+       ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &tcp_cubic_kfunc_set);
+       if (ret < 0)
                return ret;
-       register_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_cubic_kfunc_btf_set);
-       return 0;
+       return tcp_register_congestion_control(&cubictcp);
 }
 
 static void __exit cubictcp_unregister(void)
 {
-       unregister_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_cubic_kfunc_btf_set);
        tcp_unregister_congestion_control(&cubictcp);
 }
 
index 0d7ab3c..1943a66 100644 (file)
@@ -238,7 +238,7 @@ static struct tcp_congestion_ops dctcp_reno __read_mostly = {
        .name           = "dctcp-reno",
 };
 
-BTF_SET_START(tcp_dctcp_kfunc_ids)
+BTF_SET_START(tcp_dctcp_check_kfunc_ids)
 #ifdef CONFIG_X86
 #ifdef CONFIG_DYNAMIC_FTRACE
 BTF_ID(func, dctcp_init)
@@ -249,25 +249,27 @@ BTF_ID(func, dctcp_cwnd_undo)
 BTF_ID(func, dctcp_state)
 #endif
 #endif
-BTF_SET_END(tcp_dctcp_kfunc_ids)
+BTF_SET_END(tcp_dctcp_check_kfunc_ids)
 
-static DEFINE_KFUNC_BTF_ID_SET(&tcp_dctcp_kfunc_ids, tcp_dctcp_kfunc_btf_set);
+static const struct btf_kfunc_id_set tcp_dctcp_kfunc_set = {
+       .owner     = THIS_MODULE,
+       .check_set = &tcp_dctcp_check_kfunc_ids,
+};
 
 static int __init dctcp_register(void)
 {
        int ret;
 
        BUILD_BUG_ON(sizeof(struct dctcp) > ICSK_CA_PRIV_SIZE);
-       ret = tcp_register_congestion_control(&dctcp);
-       if (ret)
+
+       ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &tcp_dctcp_kfunc_set);
+       if (ret < 0)
                return ret;
-       register_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_dctcp_kfunc_btf_set);
-       return 0;
+       return tcp_register_congestion_control(&dctcp);
 }
 
 static void __exit dctcp_unregister(void)
 {
-       unregister_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_dctcp_kfunc_btf_set);
        tcp_unregister_congestion_control(&dctcp);
 }
 
index fec656f..6873f46 100644 (file)
@@ -91,6 +91,8 @@ static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
 struct inet_hashinfo tcp_hashinfo;
 EXPORT_SYMBOL(tcp_hashinfo);
 
+static DEFINE_PER_CPU(struct sock *, ipv4_tcp_sk);
+
 static u32 tcp_v4_init_seq(const struct sk_buff *skb)
 {
        return secure_tcp_seq(ip_hdr(skb)->daddr,
@@ -206,7 +208,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
        struct rtable *rt;
        int err;
        struct ip_options_rcu *inet_opt;
-       struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
+       struct inet_timewait_death_row *tcp_death_row = sock_net(sk)->ipv4.tcp_death_row;
 
        if (addr_len < sizeof(struct sockaddr_in))
                return -EINVAL;
@@ -810,7 +812,8 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
        arg.tos = ip_hdr(skb)->tos;
        arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
        local_bh_disable();
-       ctl_sk = this_cpu_read(*net->ipv4.tcp_sk);
+       ctl_sk = this_cpu_read(ipv4_tcp_sk);
+       sock_net_set(ctl_sk, net);
        if (sk) {
                ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
                                   inet_twsk(sk)->tw_mark : sk->sk_mark;
@@ -825,6 +828,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
                              transmit_time);
 
        ctl_sk->sk_mark = 0;
+       sock_net_set(ctl_sk, &init_net);
        __TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
        __TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
        local_bh_enable();
@@ -908,7 +912,8 @@ static void tcp_v4_send_ack(const struct sock *sk,
        arg.tos = tos;
        arg.uid = sock_net_uid(net, sk_fullsock(sk) ? sk : NULL);
        local_bh_disable();
-       ctl_sk = this_cpu_read(*net->ipv4.tcp_sk);
+       ctl_sk = this_cpu_read(ipv4_tcp_sk);
+       sock_net_set(ctl_sk, net);
        ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
                           inet_twsk(sk)->tw_mark : sk->sk_mark;
        ctl_sk->sk_priority = (sk->sk_state == TCP_TIME_WAIT) ?
@@ -921,6 +926,7 @@ static void tcp_v4_send_ack(const struct sock *sk,
                              transmit_time);
 
        ctl_sk->sk_mark = 0;
+       sock_net_set(ctl_sk, &init_net);
        __TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
        local_bh_enable();
 }
@@ -3111,41 +3117,18 @@ EXPORT_SYMBOL(tcp_prot);
 
 static void __net_exit tcp_sk_exit(struct net *net)
 {
-       int cpu;
+       struct inet_timewait_death_row *tcp_death_row = net->ipv4.tcp_death_row;
 
        if (net->ipv4.tcp_congestion_control)
                bpf_module_put(net->ipv4.tcp_congestion_control,
                               net->ipv4.tcp_congestion_control->owner);
-
-       for_each_possible_cpu(cpu)
-               inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu));
-       free_percpu(net->ipv4.tcp_sk);
+       if (refcount_dec_and_test(&tcp_death_row->tw_refcount))
+               kfree(tcp_death_row);
 }
 
 static int __net_init tcp_sk_init(struct net *net)
 {
-       int res, cpu, cnt;
-
-       net->ipv4.tcp_sk = alloc_percpu(struct sock *);
-       if (!net->ipv4.tcp_sk)
-               return -ENOMEM;
-
-       for_each_possible_cpu(cpu) {
-               struct sock *sk;
-
-               res = inet_ctl_sock_create(&sk, PF_INET, SOCK_RAW,
-                                          IPPROTO_TCP, net);
-               if (res)
-                       goto fail;
-               sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
-
-               /* Please enforce IP_DF and IPID==0 for RST and
-                * ACK sent in SYN-RECV and TIME-WAIT state.
-                */
-               inet_sk(sk)->pmtudisc = IP_PMTUDISC_DO;
-
-               *per_cpu_ptr(net->ipv4.tcp_sk, cpu) = sk;
-       }
+       int cnt;
 
        net->ipv4.sysctl_tcp_ecn = 2;
        net->ipv4.sysctl_tcp_ecn_fallback = 1;
@@ -3172,9 +3155,13 @@ static int __net_init tcp_sk_init(struct net *net)
        net->ipv4.sysctl_tcp_tw_reuse = 2;
        net->ipv4.sysctl_tcp_no_ssthresh_metrics_save = 1;
 
+       net->ipv4.tcp_death_row = kzalloc(sizeof(struct inet_timewait_death_row), GFP_KERNEL);
+       if (!net->ipv4.tcp_death_row)
+               return -ENOMEM;
+       refcount_set(&net->ipv4.tcp_death_row->tw_refcount, 1);
        cnt = tcp_hashinfo.ehash_mask + 1;
-       net->ipv4.tcp_death_row.sysctl_max_tw_buckets = cnt / 2;
-       net->ipv4.tcp_death_row.hashinfo = &tcp_hashinfo;
+       net->ipv4.tcp_death_row->sysctl_max_tw_buckets = cnt / 2;
+       net->ipv4.tcp_death_row->hashinfo = &tcp_hashinfo;
 
        net->ipv4.sysctl_max_syn_backlog = max(128, cnt / 128);
        net->ipv4.sysctl_tcp_sack = 1;
@@ -3229,18 +3216,12 @@ static int __net_init tcp_sk_init(struct net *net)
                net->ipv4.tcp_congestion_control = &tcp_reno;
 
        return 0;
-fail:
-       tcp_sk_exit(net);
-
-       return res;
 }
 
 static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
 {
        struct net *net;
 
-       inet_twsk_purge(&tcp_hashinfo, AF_INET);
-
        list_for_each_entry(net, net_exit_list, exit_list)
                tcp_fastopen_ctx_destroy(net);
 }
@@ -3326,6 +3307,24 @@ static void __init bpf_iter_register(void)
 
 void __init tcp_v4_init(void)
 {
+       int cpu, res;
+
+       for_each_possible_cpu(cpu) {
+               struct sock *sk;
+
+               res = inet_ctl_sock_create(&sk, PF_INET, SOCK_RAW,
+                                          IPPROTO_TCP, &init_net);
+               if (res)
+                       panic("Failed to create the TCP control socket.\n");
+               sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
+
+               /* Please enforce IP_DF and IPID==0 for RST and
+                * ACK sent in SYN-RECV and TIME-WAIT state.
+                */
+               inet_sk(sk)->pmtudisc = IP_PMTUDISC_DO;
+
+               per_cpu(ipv4_tcp_sk, cpu) = sk;
+       }
        if (register_pernet_subsys(&tcp_sk_ops))
                panic("Failed to create the TCP control socket.\n");
 
index 7c2d3ac..3977257 100644 (file)
@@ -248,7 +248,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
        const struct inet_connection_sock *icsk = inet_csk(sk);
        const struct tcp_sock *tp = tcp_sk(sk);
        struct inet_timewait_sock *tw;
-       struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
+       struct inet_timewait_death_row *tcp_death_row = sock_net(sk)->ipv4.tcp_death_row;
 
        tw = inet_twsk_alloc(sk, tcp_death_row, state);
 
index 5079832..11c06b9 100644 (file)
@@ -1960,7 +1960,7 @@ static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
 
        bytes = min_t(unsigned long,
                      sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift),
-                     sk->sk_gso_max_size - 1 - MAX_TCP_HEADER);
+                     sk->sk_gso_max_size);
 
        /* Goal is to send at least one packet per ms,
         * not one big TSO packet every 100 ms.
index 96c5cc0..e6b978e 100644 (file)
 
 #include <linux/uaccess.h>
 
-/*
- *     The ICMP socket(s). This is the most convenient way to flow control
- *     our ICMP output as well as maintain a clean interface throughout
- *     all layers. All Socketless IP sends will soon be gone.
- *
- *     On SMP we have one ICMP socket per-cpu.
- */
-static struct sock *icmpv6_sk(struct net *net)
-{
-       return this_cpu_read(*net->ipv6.icmp_sk);
-}
+static DEFINE_PER_CPU(struct sock *, ipv6_icmp_sk);
 
 static int icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
                       u8 type, u8 code, int offset, __be32 info)
@@ -110,11 +100,11 @@ static const struct inet6_protocol icmpv6_protocol = {
 };
 
 /* Called with BH disabled */
-static __inline__ struct sock *icmpv6_xmit_lock(struct net *net)
+static struct sock *icmpv6_xmit_lock(struct net *net)
 {
        struct sock *sk;
 
-       sk = icmpv6_sk(net);
+       sk = this_cpu_read(ipv6_icmp_sk);
        if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
                /* This can happen if the output path (f.e. SIT or
                 * ip6ip6 tunnel) signals dst_link_failure() for an
@@ -122,11 +112,13 @@ static __inline__ struct sock *icmpv6_xmit_lock(struct net *net)
                 */
                return NULL;
        }
+       sock_net_set(sk, net);
        return sk;
 }
 
-static __inline__ void icmpv6_xmit_unlock(struct sock *sk)
+static void icmpv6_xmit_unlock(struct sock *sk)
 {
+       sock_net_set(sk, &init_net);
        spin_unlock(&sk->sk_lock.slock);
 }
 
@@ -1034,59 +1026,27 @@ void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
        security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6));
 }
 
-static void __net_exit icmpv6_sk_exit(struct net *net)
-{
-       int i;
-
-       for_each_possible_cpu(i)
-               inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv6.icmp_sk, i));
-       free_percpu(net->ipv6.icmp_sk);
-}
-
-static int __net_init icmpv6_sk_init(struct net *net)
+int __init icmpv6_init(void)
 {
        struct sock *sk;
        int err, i;
 
-       net->ipv6.icmp_sk = alloc_percpu(struct sock *);
-       if (!net->ipv6.icmp_sk)
-               return -ENOMEM;
-
        for_each_possible_cpu(i) {
                err = inet_ctl_sock_create(&sk, PF_INET6,
-                                          SOCK_RAW, IPPROTO_ICMPV6, net);
+                                          SOCK_RAW, IPPROTO_ICMPV6, &init_net);
                if (err < 0) {
                        pr_err("Failed to initialize the ICMP6 control socket (err %d)\n",
                               err);
-                       goto fail;
+                       return err;
                }
 
-               *per_cpu_ptr(net->ipv6.icmp_sk, i) = sk;
+               per_cpu(ipv6_icmp_sk, i) = sk;
 
                /* Enough space for 2 64K ICMP packets, including
                 * sk_buff struct overhead.
                 */
                sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
        }
-       return 0;
-
- fail:
-       icmpv6_sk_exit(net);
-       return err;
-}
-
-static struct pernet_operations icmpv6_sk_ops = {
-       .init = icmpv6_sk_init,
-       .exit = icmpv6_sk_exit,
-};
-
-int __init icmpv6_init(void)
-{
-       int err;
-
-       err = register_pernet_subsys(&icmpv6_sk_ops);
-       if (err < 0)
-               return err;
 
        err = -EAGAIN;
        if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
@@ -1101,14 +1061,12 @@ sender_reg_err:
        inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
 fail:
        pr_err("Failed to register ICMP6 protocol\n");
-       unregister_pernet_subsys(&icmpv6_sk_ops);
        return err;
 }
 
 void icmpv6_cleanup(void)
 {
        inet6_unregister_icmp_sender(icmp6_send);
-       unregister_pernet_subsys(&icmpv6_sk_ops);
        inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
 }
 
index b29e9ba..d37a79a 100644 (file)
@@ -249,7 +249,7 @@ INDIRECT_CALLABLE_SCOPE struct sk_buff *ipv6_gro_receive(struct list_head *head,
                 if ((first_word & htonl(0xF00FFFFF)) ||
                     !ipv6_addr_equal(&iph->saddr, &iph2->saddr) ||
                     !ipv6_addr_equal(&iph->daddr, &iph2->daddr) ||
-                    *(u16 *)&iph->nexthdr != *(u16 *)&iph2->nexthdr) {
+                    iph->nexthdr != iph2->nexthdr) {
 not_same_flow:
                        NAPI_GRO_CB(p)->same_flow = 0;
                        continue;
@@ -260,7 +260,8 @@ not_same_flow:
                                goto not_same_flow;
                }
                /* flush if Traffic Class fields are different */
-               NAPI_GRO_CB(p)->flush |= !!(first_word & htonl(0x0FF00000));
+               NAPI_GRO_CB(p)->flush |= !!((first_word & htonl(0x0FF00000)) |
+                       (__force __be32)(iph->hop_limit ^ iph2->hop_limit));
                NAPI_GRO_CB(p)->flush |= flush;
 
                /* If the previous IP ID value was based on an atomic
index 97ade83..b47ffc8 100644 (file)
@@ -1121,6 +1121,11 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield,
 
                        memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr));
                        neigh_release(neigh);
+               } else if (skb->protocol == htons(ETH_P_IP)) {
+                       struct rtable *rt = skb_rtable(skb);
+
+                       if (rt->rt_gw_family == AF_INET6)
+                               memcpy(&fl6->daddr, &rt->rt_gw6, sizeof(fl6->daddr));
                }
        } else if (t->parms.proto != 0 && !(t->parms.flags &
                                            (IP6_TNL_F_USE_ORIG_TCLASS |
index 075ee8a..0c648bf 100644 (file)
@@ -148,6 +148,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
        struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
        struct inet_sock *inet = inet_sk(sk);
        struct inet_connection_sock *icsk = inet_csk(sk);
+       struct inet_timewait_death_row *tcp_death_row;
        struct ipv6_pinfo *np = tcp_inet6_sk(sk);
        struct tcp_sock *tp = tcp_sk(sk);
        struct in6_addr *saddr = NULL, *final_p, final;
@@ -156,7 +157,6 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
        struct dst_entry *dst;
        int addr_type;
        int err;
-       struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
 
        if (addr_len < SIN6_LEN_RFC2133)
                return -EINVAL;
@@ -308,6 +308,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
        inet->inet_dport = usin->sin6_port;
 
        tcp_set_state(sk, TCP_SYN_SENT);
+       tcp_death_row = sock_net(sk)->ipv4.tcp_death_row;
        err = inet6_hash_connect(tcp_death_row, sk);
        if (err)
                goto late_failure;
@@ -2237,15 +2238,9 @@ static void __net_exit tcpv6_net_exit(struct net *net)
        inet_ctl_sock_destroy(net->ipv6.tcp_sk);
 }
 
-static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
-{
-       inet_twsk_purge(&tcp_hashinfo, AF_INET6);
-}
-
 static struct pernet_operations tcpv6_net_ops = {
        .init       = tcpv6_net_init,
        .exit       = tcpv6_net_exit,
-       .exit_batch = tcpv6_net_exit_batch,
 };
 
 int __init tcpv6_init(void)
index a135b1a..238b6a6 100644 (file)
@@ -14,6 +14,11 @@ nf_conntrack-$(CONFIG_NF_CONNTRACK_LABELS) += nf_conntrack_labels.o
 nf_conntrack-$(CONFIG_NF_CT_PROTO_DCCP) += nf_conntrack_proto_dccp.o
 nf_conntrack-$(CONFIG_NF_CT_PROTO_SCTP) += nf_conntrack_proto_sctp.o
 nf_conntrack-$(CONFIG_NF_CT_PROTO_GRE) += nf_conntrack_proto_gre.o
+ifeq ($(CONFIG_NF_CONNTRACK),m)
+nf_conntrack-$(CONFIG_DEBUG_INFO_BTF_MODULES) += nf_conntrack_bpf.o
+else ifeq ($(CONFIG_NF_CONNTRACK),y)
+nf_conntrack-$(CONFIG_DEBUG_INFO_BTF) += nf_conntrack_bpf.o
+endif
 
 obj-$(CONFIG_NETFILTER) = netfilter.o
 
diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c
new file mode 100644 (file)
index 0000000..8ad3f52
--- /dev/null
@@ -0,0 +1,257 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Unstable Conntrack Helpers for XDP and TC-BPF hook
+ *
+ * These are called from the XDP and SCHED_CLS BPF programs. Note that it is
+ * allowed to break compatibility for these functions since the interface they
+ * are exposed through to BPF programs is explicitly unstable.
+ */
+
+#include <linux/bpf.h>
+#include <linux/btf.h>
+#include <linux/types.h>
+#include <linux/btf_ids.h>
+#include <linux/net_namespace.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_core.h>
+
+/* bpf_ct_opts - Options for CT lookup helpers
+ *
+ * Members:
+ * @netns_id   - Specify the network namespace for lookup
+ *              Values:
+ *                BPF_F_CURRENT_NETNS (-1)
+ *                  Use namespace associated with ctx (xdp_md, __sk_buff)
+ *                [0, S32_MAX]
+ *                  Network Namespace ID
+ * @error      - Out parameter, set for any errors encountered
+ *              Values:
+ *                -EINVAL - Passed NULL for bpf_tuple pointer
+ *                -EINVAL - opts->reserved is not 0
+ *                -EINVAL - netns_id is less than -1
+ *                -EINVAL - opts__sz isn't NF_BPF_CT_OPTS_SZ (12)
+ *                -EPROTO - l4proto isn't one of IPPROTO_TCP or IPPROTO_UDP
+ *                -ENONET - No network namespace found for netns_id
+ *                -ENOENT - Conntrack lookup could not find entry for tuple
+ *                -EAFNOSUPPORT - tuple__sz isn't one of sizeof(tuple->ipv4)
+ *                                or sizeof(tuple->ipv6)
+ * @l4proto    - Layer 4 protocol
+ *              Values:
+ *                IPPROTO_TCP, IPPROTO_UDP
+ * @reserved   - Reserved member, will be reused for more options in future
+ *              Values:
+ *                0
+ */
+struct bpf_ct_opts {
+       s32 netns_id;
+       s32 error;
+       u8 l4proto;
+       u8 reserved[3];
+};
+
+enum {
+       NF_BPF_CT_OPTS_SZ = 12,
+};
+
+static struct nf_conn *__bpf_nf_ct_lookup(struct net *net,
+                                         struct bpf_sock_tuple *bpf_tuple,
+                                         u32 tuple_len, u8 protonum,
+                                         s32 netns_id)
+{
+       struct nf_conntrack_tuple_hash *hash;
+       struct nf_conntrack_tuple tuple;
+
+       if (unlikely(protonum != IPPROTO_TCP && protonum != IPPROTO_UDP))
+               return ERR_PTR(-EPROTO);
+       if (unlikely(netns_id < BPF_F_CURRENT_NETNS))
+               return ERR_PTR(-EINVAL);
+
+       memset(&tuple, 0, sizeof(tuple));
+       switch (tuple_len) {
+       case sizeof(bpf_tuple->ipv4):
+               tuple.src.l3num = AF_INET;
+               tuple.src.u3.ip = bpf_tuple->ipv4.saddr;
+               tuple.src.u.tcp.port = bpf_tuple->ipv4.sport;
+               tuple.dst.u3.ip = bpf_tuple->ipv4.daddr;
+               tuple.dst.u.tcp.port = bpf_tuple->ipv4.dport;
+               break;
+       case sizeof(bpf_tuple->ipv6):
+               tuple.src.l3num = AF_INET6;
+               memcpy(tuple.src.u3.ip6, bpf_tuple->ipv6.saddr, sizeof(bpf_tuple->ipv6.saddr));
+               tuple.src.u.tcp.port = bpf_tuple->ipv6.sport;
+               memcpy(tuple.dst.u3.ip6, bpf_tuple->ipv6.daddr, sizeof(bpf_tuple->ipv6.daddr));
+               tuple.dst.u.tcp.port = bpf_tuple->ipv6.dport;
+               break;
+       default:
+               return ERR_PTR(-EAFNOSUPPORT);
+       }
+
+       tuple.dst.protonum = protonum;
+
+       if (netns_id >= 0) {
+               net = get_net_ns_by_id(net, netns_id);
+               if (unlikely(!net))
+                       return ERR_PTR(-ENONET);
+       }
+
+       hash = nf_conntrack_find_get(net, &nf_ct_zone_dflt, &tuple);
+       if (netns_id >= 0)
+               put_net(net);
+       if (!hash)
+               return ERR_PTR(-ENOENT);
+       return nf_ct_tuplehash_to_ctrack(hash);
+}
+
+__diag_push();
+__diag_ignore(GCC, 8, "-Wmissing-prototypes",
+             "Global functions as their definitions will be in nf_conntrack BTF");
+
+/* bpf_xdp_ct_lookup - Lookup CT entry for the given tuple, and acquire a
+ *                    reference to it
+ *
+ * Parameters:
+ * @xdp_ctx    - Pointer to ctx (xdp_md) in XDP program
+ *                 Cannot be NULL
+ * @bpf_tuple  - Pointer to memory representing the tuple to look up
+ *                 Cannot be NULL
+ * @tuple__sz  - Length of the tuple structure
+ *                 Must be one of sizeof(bpf_tuple->ipv4) or
+ *                 sizeof(bpf_tuple->ipv6)
+ * @opts       - Additional options for lookup (documented above)
+ *                 Cannot be NULL
+ * @opts__sz   - Length of the bpf_ct_opts structure
+ *                 Must be NF_BPF_CT_OPTS_SZ (12)
+ */
+struct nf_conn *
+bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
+                 u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz)
+{
+       struct xdp_buff *ctx = (struct xdp_buff *)xdp_ctx;
+       struct net *caller_net;
+       struct nf_conn *nfct;
+
+       BUILD_BUG_ON(sizeof(struct bpf_ct_opts) != NF_BPF_CT_OPTS_SZ);
+
+       if (!opts)
+               return NULL;
+       if (!bpf_tuple || opts->reserved[0] || opts->reserved[1] ||
+           opts->reserved[2] || opts__sz != NF_BPF_CT_OPTS_SZ) {
+               opts->error = -EINVAL;
+               return NULL;
+       }
+       caller_net = dev_net(ctx->rxq->dev);
+       nfct = __bpf_nf_ct_lookup(caller_net, bpf_tuple, tuple__sz, opts->l4proto,
+                                 opts->netns_id);
+       if (IS_ERR(nfct)) {
+               opts->error = PTR_ERR(nfct);
+               return NULL;
+       }
+       return nfct;
+}
+
+/* bpf_skb_ct_lookup - Lookup CT entry for the given tuple, and acquire a
+ *                    reference to it
+ *
+ * Parameters:
+ * @skb_ctx    - Pointer to ctx (__sk_buff) in TC program
+ *                 Cannot be NULL
+ * @bpf_tuple  - Pointer to memory representing the tuple to look up
+ *                 Cannot be NULL
+ * @tuple__sz  - Length of the tuple structure
+ *                 Must be one of sizeof(bpf_tuple->ipv4) or
+ *                 sizeof(bpf_tuple->ipv6)
+ * @opts       - Additional options for lookup (documented above)
+ *                 Cannot be NULL
+ * @opts__sz   - Length of the bpf_ct_opts structure
+ *                 Must be NF_BPF_CT_OPTS_SZ (12)
+ */
+struct nf_conn *
+bpf_skb_ct_lookup(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple,
+                 u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz)
+{
+       struct sk_buff *skb = (struct sk_buff *)skb_ctx;
+       struct net *caller_net;
+       struct nf_conn *nfct;
+
+       BUILD_BUG_ON(sizeof(struct bpf_ct_opts) != NF_BPF_CT_OPTS_SZ);
+
+       if (!opts)
+               return NULL;
+       if (!bpf_tuple || opts->reserved[0] || opts->reserved[1] ||
+           opts->reserved[2] || opts__sz != NF_BPF_CT_OPTS_SZ) {
+               opts->error = -EINVAL;
+               return NULL;
+       }
+       caller_net = skb->dev ? dev_net(skb->dev) : sock_net(skb->sk);
+       nfct = __bpf_nf_ct_lookup(caller_net, bpf_tuple, tuple__sz, opts->l4proto,
+                                 opts->netns_id);
+       if (IS_ERR(nfct)) {
+               opts->error = PTR_ERR(nfct);
+               return NULL;
+       }
+       return nfct;
+}
+
+/* bpf_ct_release - Release acquired nf_conn object
+ *
+ * This must be invoked for referenced PTR_TO_BTF_ID, and the verifier rejects
+ * the program if any references remain in the program in all of the explored
+ * states.
+ *
+ * Parameters:
+ * @nf_conn     - Pointer to referenced nf_conn object, obtained using
+ *                bpf_xdp_ct_lookup or bpf_skb_ct_lookup.
+ */
+void bpf_ct_release(struct nf_conn *nfct)
+{
+       if (!nfct)
+               return;
+       nf_ct_put(nfct);
+}
+
+__diag_pop()
+
+BTF_SET_START(nf_ct_xdp_check_kfunc_ids)
+BTF_ID(func, bpf_xdp_ct_lookup)
+BTF_ID(func, bpf_ct_release)
+BTF_SET_END(nf_ct_xdp_check_kfunc_ids)
+
+BTF_SET_START(nf_ct_tc_check_kfunc_ids)
+BTF_ID(func, bpf_skb_ct_lookup)
+BTF_ID(func, bpf_ct_release)
+BTF_SET_END(nf_ct_tc_check_kfunc_ids)
+
+BTF_SET_START(nf_ct_acquire_kfunc_ids)
+BTF_ID(func, bpf_xdp_ct_lookup)
+BTF_ID(func, bpf_skb_ct_lookup)
+BTF_SET_END(nf_ct_acquire_kfunc_ids)
+
+BTF_SET_START(nf_ct_release_kfunc_ids)
+BTF_ID(func, bpf_ct_release)
+BTF_SET_END(nf_ct_release_kfunc_ids)
+
+/* Both sets are identical */
+#define nf_ct_ret_null_kfunc_ids nf_ct_acquire_kfunc_ids
+
+static const struct btf_kfunc_id_set nf_conntrack_xdp_kfunc_set = {
+       .owner        = THIS_MODULE,
+       .check_set    = &nf_ct_xdp_check_kfunc_ids,
+       .acquire_set  = &nf_ct_acquire_kfunc_ids,
+       .release_set  = &nf_ct_release_kfunc_ids,
+       .ret_null_set = &nf_ct_ret_null_kfunc_ids,
+};
+
+static const struct btf_kfunc_id_set nf_conntrack_tc_kfunc_set = {
+       .owner        = THIS_MODULE,
+       .check_set    = &nf_ct_tc_check_kfunc_ids,
+       .acquire_set  = &nf_ct_acquire_kfunc_ids,
+       .release_set  = &nf_ct_release_kfunc_ids,
+       .ret_null_set = &nf_ct_ret_null_kfunc_ids,
+};
+
+int register_nf_conntrack_bpf(void)
+{
+       int ret;
+
+       ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &nf_conntrack_xdp_kfunc_set);
+       return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &nf_conntrack_tc_kfunc_set);
+}
index d6aa5b4..d38d689 100644 (file)
@@ -34,6 +34,7 @@
 #include <linux/rculist_nulls.h>
 
 #include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_bpf.h>
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_expect.h>
 #include <net/netfilter/nf_conntrack_helper.h>
@@ -2750,8 +2751,15 @@ int nf_conntrack_init_start(void)
        conntrack_gc_work_init(&conntrack_gc_work);
        queue_delayed_work(system_power_efficient_wq, &conntrack_gc_work.dwork, HZ);
 
+       ret = register_nf_conntrack_bpf();
+       if (ret < 0)
+               goto err_kfunc;
+
        return 0;
 
+err_kfunc:
+       cancel_delayed_work_sync(&conntrack_gc_work.dwork);
+       nf_conntrack_proto_fini();
 err_proto:
        nf_conntrack_seqadj_fini();
 err_seqadj:
index 64ae4c4..c5eec16 100644 (file)
@@ -226,14 +226,6 @@ static inline void msg_set_bits(struct tipc_msg *m, u32 w,
        m->hdr[w] |= htonl(val);
 }
 
-static inline void msg_swap_words(struct tipc_msg *msg, u32 a, u32 b)
-{
-       u32 temp = msg->hdr[a];
-
-       msg->hdr[a] = msg->hdr[b];
-       msg->hdr[b] = temp;
-}
-
 /*
  * Word 0
  */
@@ -480,11 +472,6 @@ static inline void msg_incr_reroute_cnt(struct tipc_msg *m)
        msg_set_bits(m, 1, 21, 0xf, msg_reroute_cnt(m) + 1);
 }
 
-static inline void msg_reset_reroute_cnt(struct tipc_msg *m)
-{
-       msg_set_bits(m, 1, 21, 0xf, 0);
-}
-
 static inline u32 msg_lookup_scope(struct tipc_msg *m)
 {
        return msg_bits(m, 1, 19, 0x3);
@@ -800,11 +787,6 @@ static inline void msg_set_dest_domain(struct tipc_msg *m, u32 n)
        msg_set_word(m, 2, n);
 }
 
-static inline u32 msg_bcgap_after(struct tipc_msg *m)
-{
-       return msg_bits(m, 2, 16, 0xffff);
-}
-
 static inline void msg_set_bcgap_after(struct tipc_msg *m, u32 n)
 {
        msg_set_bits(m, 2, 16, 0xffff, n);
@@ -868,11 +850,6 @@ static inline void msg_set_next_sent(struct tipc_msg *m, u16 n)
        msg_set_bits(m, 4, 0, 0xffff, n);
 }
 
-static inline void msg_set_long_msgno(struct tipc_msg *m, u32 n)
-{
-       msg_set_bits(m, 4, 0, 0xffff, n);
-}
-
 static inline u32 msg_bc_netid(struct tipc_msg *m)
 {
        return msg_word(m, 4);
index c195698..3e0d628 100644 (file)
@@ -3240,49 +3240,58 @@ static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
        return sk;
 }
 
-static struct sock *unix_next_socket(struct seq_file *seq,
-                                    struct sock *sk,
-                                    loff_t *pos)
+static struct sock *unix_get_first(struct seq_file *seq, loff_t *pos)
 {
        unsigned long bucket = get_bucket(*pos);
+       struct sock *sk;
 
-       while (sk > (struct sock *)SEQ_START_TOKEN) {
-               sk = sk_next(sk);
-               if (!sk)
-                       goto next_bucket;
-               if (sock_net(sk) == seq_file_net(seq))
-                       return sk;
-       }
-
-       do {
+       while (bucket < ARRAY_SIZE(unix_socket_table)) {
                spin_lock(&unix_table_locks[bucket]);
+
                sk = unix_from_bucket(seq, pos);
                if (sk)
                        return sk;
 
-next_bucket:
-               spin_unlock(&unix_table_locks[bucket++]);
-               *pos = set_bucket_offset(bucket, 1);
-       } while (bucket < ARRAY_SIZE(unix_socket_table));
+               spin_unlock(&unix_table_locks[bucket]);
+
+               *pos = set_bucket_offset(++bucket, 1);
+       }
 
        return NULL;
 }
 
+static struct sock *unix_get_next(struct seq_file *seq, struct sock *sk,
+                                 loff_t *pos)
+{
+       unsigned long bucket = get_bucket(*pos);
+
+       for (sk = sk_next(sk); sk; sk = sk_next(sk))
+               if (sock_net(sk) == seq_file_net(seq))
+                       return sk;
+
+       spin_unlock(&unix_table_locks[bucket]);
+
+       *pos = set_bucket_offset(++bucket, 1);
+
+       return unix_get_first(seq, pos);
+}
+
 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
 {
        if (!*pos)
                return SEQ_START_TOKEN;
 
-       if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
-               return NULL;
-
-       return unix_next_socket(seq, NULL, pos);
+       return unix_get_first(seq, pos);
 }
 
 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
        ++*pos;
-       return unix_next_socket(seq, v, pos);
+
+       if (v == SEQ_START_TOKEN)
+               return unix_get_first(seq, pos);
+
+       return unix_get_next(seq, v, pos);
 }
 
 static void unix_seq_stop(struct seq_file *seq, void *v)
@@ -3347,6 +3356,15 @@ static const struct seq_operations unix_seq_ops = {
 };
 
 #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL)
+struct bpf_unix_iter_state {
+       struct seq_net_private p;
+       unsigned int cur_sk;
+       unsigned int end_sk;
+       unsigned int max_sk;
+       struct sock **batch;
+       bool st_bucket_done;
+};
+
 struct bpf_iter__unix {
        __bpf_md_ptr(struct bpf_iter_meta *, meta);
        __bpf_md_ptr(struct unix_sock *, unix_sk);
@@ -3365,24 +3383,156 @@ static int unix_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta,
        return bpf_iter_run_prog(prog, &ctx);
 }
 
+static int bpf_iter_unix_hold_batch(struct seq_file *seq, struct sock *start_sk)
+
+{
+       struct bpf_unix_iter_state *iter = seq->private;
+       unsigned int expected = 1;
+       struct sock *sk;
+
+       sock_hold(start_sk);
+       iter->batch[iter->end_sk++] = start_sk;
+
+       for (sk = sk_next(start_sk); sk; sk = sk_next(sk)) {
+               if (sock_net(sk) != seq_file_net(seq))
+                       continue;
+
+               if (iter->end_sk < iter->max_sk) {
+                       sock_hold(sk);
+                       iter->batch[iter->end_sk++] = sk;
+               }
+
+               expected++;
+       }
+
+       spin_unlock(&unix_table_locks[start_sk->sk_hash]);
+
+       return expected;
+}
+
+static void bpf_iter_unix_put_batch(struct bpf_unix_iter_state *iter)
+{
+       while (iter->cur_sk < iter->end_sk)
+               sock_put(iter->batch[iter->cur_sk++]);
+}
+
+static int bpf_iter_unix_realloc_batch(struct bpf_unix_iter_state *iter,
+                                      unsigned int new_batch_sz)
+{
+       struct sock **new_batch;
+
+       new_batch = kvmalloc(sizeof(*new_batch) * new_batch_sz,
+                            GFP_USER | __GFP_NOWARN);
+       if (!new_batch)
+               return -ENOMEM;
+
+       bpf_iter_unix_put_batch(iter);
+       kvfree(iter->batch);
+       iter->batch = new_batch;
+       iter->max_sk = new_batch_sz;
+
+       return 0;
+}
+
+static struct sock *bpf_iter_unix_batch(struct seq_file *seq,
+                                       loff_t *pos)
+{
+       struct bpf_unix_iter_state *iter = seq->private;
+       unsigned int expected;
+       bool resized = false;
+       struct sock *sk;
+
+       if (iter->st_bucket_done)
+               *pos = set_bucket_offset(get_bucket(*pos) + 1, 1);
+
+again:
+       /* Get a new batch */
+       iter->cur_sk = 0;
+       iter->end_sk = 0;
+
+       sk = unix_get_first(seq, pos);
+       if (!sk)
+               return NULL; /* Done */
+
+       expected = bpf_iter_unix_hold_batch(seq, sk);
+
+       if (iter->end_sk == expected) {
+               iter->st_bucket_done = true;
+               return sk;
+       }
+
+       if (!resized && !bpf_iter_unix_realloc_batch(iter, expected * 3 / 2)) {
+               resized = true;
+               goto again;
+       }
+
+       return sk;
+}
+
+static void *bpf_iter_unix_seq_start(struct seq_file *seq, loff_t *pos)
+{
+       if (!*pos)
+               return SEQ_START_TOKEN;
+
+       /* bpf iter does not support lseek, so it always
+        * continue from where it was stop()-ped.
+        */
+       return bpf_iter_unix_batch(seq, pos);
+}
+
+static void *bpf_iter_unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+       struct bpf_unix_iter_state *iter = seq->private;
+       struct sock *sk;
+
+       /* Whenever seq_next() is called, the iter->cur_sk is
+        * done with seq_show(), so advance to the next sk in
+        * the batch.
+        */
+       if (iter->cur_sk < iter->end_sk)
+               sock_put(iter->batch[iter->cur_sk++]);
+
+       ++*pos;
+
+       if (iter->cur_sk < iter->end_sk)
+               sk = iter->batch[iter->cur_sk];
+       else
+               sk = bpf_iter_unix_batch(seq, pos);
+
+       return sk;
+}
+
 static int bpf_iter_unix_seq_show(struct seq_file *seq, void *v)
 {
        struct bpf_iter_meta meta;
        struct bpf_prog *prog;
        struct sock *sk = v;
        uid_t uid;
+       bool slow;
+       int ret;
 
        if (v == SEQ_START_TOKEN)
                return 0;
 
+       slow = lock_sock_fast(sk);
+
+       if (unlikely(sk_unhashed(sk))) {
+               ret = SEQ_SKIP;
+               goto unlock;
+       }
+
        uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk));
        meta.seq = seq;
        prog = bpf_iter_get_info(&meta, false);
-       return unix_prog_seq_show(prog, &meta, v, uid);
+       ret = unix_prog_seq_show(prog, &meta, v, uid);
+unlock:
+       unlock_sock_fast(sk, slow);
+       return ret;
 }
 
 static void bpf_iter_unix_seq_stop(struct seq_file *seq, void *v)
 {
+       struct bpf_unix_iter_state *iter = seq->private;
        struct bpf_iter_meta meta;
        struct bpf_prog *prog;
 
@@ -3393,12 +3543,13 @@ static void bpf_iter_unix_seq_stop(struct seq_file *seq, void *v)
                        (void)unix_prog_seq_show(prog, &meta, v, 0);
        }
 
-       unix_seq_stop(seq, v);
+       if (iter->cur_sk < iter->end_sk)
+               bpf_iter_unix_put_batch(iter);
 }
 
 static const struct seq_operations bpf_iter_unix_seq_ops = {
-       .start  = unix_seq_start,
-       .next   = unix_seq_next,
+       .start  = bpf_iter_unix_seq_start,
+       .next   = bpf_iter_unix_seq_next,
        .stop   = bpf_iter_unix_seq_stop,
        .show   = bpf_iter_unix_seq_show,
 };
@@ -3447,13 +3598,55 @@ static struct pernet_operations unix_net_ops = {
 DEFINE_BPF_ITER_FUNC(unix, struct bpf_iter_meta *meta,
                     struct unix_sock *unix_sk, uid_t uid)
 
+#define INIT_BATCH_SZ 16
+
+static int bpf_iter_init_unix(void *priv_data, struct bpf_iter_aux_info *aux)
+{
+       struct bpf_unix_iter_state *iter = priv_data;
+       int err;
+
+       err = bpf_iter_init_seq_net(priv_data, aux);
+       if (err)
+               return err;
+
+       err = bpf_iter_unix_realloc_batch(iter, INIT_BATCH_SZ);
+       if (err) {
+               bpf_iter_fini_seq_net(priv_data);
+               return err;
+       }
+
+       return 0;
+}
+
+static void bpf_iter_fini_unix(void *priv_data)
+{
+       struct bpf_unix_iter_state *iter = priv_data;
+
+       bpf_iter_fini_seq_net(priv_data);
+       kvfree(iter->batch);
+}
+
 static const struct bpf_iter_seq_info unix_seq_info = {
        .seq_ops                = &bpf_iter_unix_seq_ops,
-       .init_seq_private       = bpf_iter_init_seq_net,
-       .fini_seq_private       = bpf_iter_fini_seq_net,
-       .seq_priv_size          = sizeof(struct seq_net_private),
+       .init_seq_private       = bpf_iter_init_unix,
+       .fini_seq_private       = bpf_iter_fini_unix,
+       .seq_priv_size          = sizeof(struct bpf_unix_iter_state),
 };
 
+static const struct bpf_func_proto *
+bpf_iter_unix_get_func_proto(enum bpf_func_id func_id,
+                            const struct bpf_prog *prog)
+{
+       switch (func_id) {
+       case BPF_FUNC_setsockopt:
+               return &bpf_sk_setsockopt_proto;
+       case BPF_FUNC_getsockopt:
+               return &bpf_sk_getsockopt_proto;
+       default:
+               return NULL;
+       }
+}
+
 static struct bpf_iter_reg unix_reg_info = {
        .target                 = "unix",
        .ctx_arg_info_size      = 1,
@@ -3461,6 +3654,7 @@ static struct bpf_iter_reg unix_reg_info = {
                { offsetof(struct bpf_iter__unix, unix_sk),
                  PTR_TO_BTF_ID_OR_NULL },
        },
+       .get_func_proto         = bpf_iter_unix_get_func_proto,
        .seq_info               = &unix_seq_info,
 };
 
index 8675fa5..3ec8ad9 100644 (file)
@@ -26,12 +26,12 @@ static void int_exit(int sig)
 {
        __u32 curr_prog_id = 0;
 
-       if (bpf_get_link_xdp_id(ifindex, &curr_prog_id, xdp_flags)) {
-               printf("bpf_get_link_xdp_id failed\n");
+       if (bpf_xdp_query_id(ifindex, xdp_flags, &curr_prog_id)) {
+               printf("bpf_xdp_query_id failed\n");
                exit(1);
        }
        if (prog_id == curr_prog_id)
-               bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
+               bpf_xdp_detach(ifindex, xdp_flags, NULL);
        else if (!curr_prog_id)
                printf("couldn't find a prog id on a given interface\n");
        else
@@ -143,7 +143,7 @@ int main(int argc, char **argv)
        signal(SIGINT, int_exit);
        signal(SIGTERM, int_exit);
 
-       if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) {
+       if (bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL) < 0) {
                printf("link set xdp fd failed\n");
                return 1;
        }
index a70b094..6c61d5f 100644 (file)
@@ -34,12 +34,12 @@ static void int_exit(int sig)
        __u32 curr_prog_id = 0;
 
        if (ifindex > -1) {
-               if (bpf_get_link_xdp_id(ifindex, &curr_prog_id, xdp_flags)) {
-                       printf("bpf_get_link_xdp_id failed\n");
+               if (bpf_xdp_query_id(ifindex, xdp_flags, &curr_prog_id)) {
+                       printf("bpf_xdp_query_id failed\n");
                        exit(1);
                }
                if (prog_id == curr_prog_id)
-                       bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
+                       bpf_xdp_detach(ifindex, xdp_flags, NULL);
                else if (!curr_prog_id)
                        printf("couldn't find a prog id on a given iface\n");
                else
@@ -173,7 +173,7 @@ int main(int argc, char **argv)
        signal(SIGINT, int_exit);
        signal(SIGTERM, int_exit);
 
-       if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) {
+       if (bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL) < 0) {
                printf("link set xdp fd failed\n");
                return 1;
        }
index 4ad8967..79ccd98 100644 (file)
@@ -33,7 +33,7 @@ static int do_attach(int idx, int prog_fd, int map_fd, const char *name)
 {
        int err;
 
-       err = bpf_set_link_xdp_fd(idx, prog_fd, xdp_flags);
+       err = bpf_xdp_attach(idx, prog_fd, xdp_flags, NULL);
        if (err < 0) {
                printf("ERROR: failed to attach program to %s\n", name);
                return err;
@@ -51,7 +51,7 @@ static int do_detach(int idx, const char *name)
 {
        int err;
 
-       err = bpf_set_link_xdp_fd(idx, -1, xdp_flags);
+       err = bpf_xdp_detach(idx, xdp_flags, NULL);
        if (err < 0)
                printf("ERROR: failed to detach program from %s\n", name);
 
index cfaf7e5..2d565ba 100644 (file)
@@ -43,13 +43,13 @@ static void int_exit(int sig)
        int i = 0;
 
        for (i = 0; i < total_ifindex; i++) {
-               if (bpf_get_link_xdp_id(ifindex_list[i], &prog_id, flags)) {
-                       printf("bpf_get_link_xdp_id on iface %d failed\n",
+               if (bpf_xdp_query_id(ifindex_list[i], flags, &prog_id)) {
+                       printf("bpf_xdp_query_id on iface %d failed\n",
                               ifindex_list[i]);
                        exit(1);
                }
                if (prog_id_list[i] == prog_id)
-                       bpf_set_link_xdp_fd(ifindex_list[i], -1, flags);
+                       bpf_xdp_detach(ifindex_list[i], flags, NULL);
                else if (!prog_id)
                        printf("couldn't find a prog id on iface %d\n",
                               ifindex_list[i]);
@@ -716,12 +716,12 @@ int main(int ac, char **argv)
        }
        prog_id_list = (__u32 *)calloc(total_ifindex, sizeof(__u32 *));
        for (i = 0; i < total_ifindex; i++) {
-               if (bpf_set_link_xdp_fd(ifindex_list[i], prog_fd, flags) < 0) {
+               if (bpf_xdp_attach(ifindex_list[i], prog_fd, flags, NULL) < 0) {
                        printf("link set xdp fd failed\n");
                        int recovery_index = i;
 
                        for (i = 0; i < recovery_index; i++)
-                               bpf_set_link_xdp_fd(ifindex_list[i], -1, flags);
+                               bpf_xdp_detach(ifindex_list[i], flags, NULL);
 
                        return 1;
                }
index 74a2926..fb2532d 100644 (file)
@@ -62,15 +62,15 @@ static void int_exit(int sig)
        __u32 curr_prog_id = 0;
 
        if (ifindex > -1) {
-               if (bpf_get_link_xdp_id(ifindex, &curr_prog_id, xdp_flags)) {
-                       printf("bpf_get_link_xdp_id failed\n");
+               if (bpf_xdp_query_id(ifindex, xdp_flags, &curr_prog_id)) {
+                       printf("bpf_xdp_query_id failed\n");
                        exit(EXIT_FAIL);
                }
                if (prog_id == curr_prog_id) {
                        fprintf(stderr,
                                "Interrupted: Removing XDP program on ifindex:%d device:%s\n",
                                ifindex, ifname);
-                       bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
+                       bpf_xdp_detach(ifindex, xdp_flags, NULL);
                } else if (!curr_prog_id) {
                        printf("couldn't find a prog id on a given iface\n");
                } else {
@@ -209,7 +209,7 @@ static struct datarec *alloc_record_per_cpu(void)
 
 static struct record *alloc_record_per_rxq(void)
 {
-       unsigned int nr_rxqs = bpf_map__def(rx_queue_index_map)->max_entries;
+       unsigned int nr_rxqs = bpf_map__max_entries(rx_queue_index_map);
        struct record *array;
 
        array = calloc(nr_rxqs, sizeof(struct record));
@@ -222,7 +222,7 @@ static struct record *alloc_record_per_rxq(void)
 
 static struct stats_record *alloc_stats_record(void)
 {
-       unsigned int nr_rxqs = bpf_map__def(rx_queue_index_map)->max_entries;
+       unsigned int nr_rxqs = bpf_map__max_entries(rx_queue_index_map);
        struct stats_record *rec;
        int i;
 
@@ -241,7 +241,7 @@ static struct stats_record *alloc_stats_record(void)
 
 static void free_stats_record(struct stats_record *r)
 {
-       unsigned int nr_rxqs = bpf_map__def(rx_queue_index_map)->max_entries;
+       unsigned int nr_rxqs = bpf_map__max_entries(rx_queue_index_map);
        int i;
 
        for (i = 0; i < nr_rxqs; i++)
@@ -289,7 +289,7 @@ static void stats_collect(struct stats_record *rec)
        map_collect_percpu(fd, 0, &rec->stats);
 
        fd = bpf_map__fd(rx_queue_index_map);
-       max_rxqs = bpf_map__def(rx_queue_index_map)->max_entries;
+       max_rxqs = bpf_map__max_entries(rx_queue_index_map);
        for (i = 0; i < max_rxqs; i++)
                map_collect_percpu(fd, i, &rec->rxq[i]);
 }
@@ -335,7 +335,7 @@ static void stats_print(struct stats_record *stats_rec,
                        struct stats_record *stats_prev,
                        int action, __u32 cfg_opt)
 {
-       unsigned int nr_rxqs = bpf_map__def(rx_queue_index_map)->max_entries;
+       unsigned int nr_rxqs = bpf_map__max_entries(rx_queue_index_map);
        unsigned int nr_cpus = bpf_num_possible_cpus();
        double pps = 0, err = 0;
        struct record *rec, *prev;
@@ -582,7 +582,7 @@ int main(int argc, char **argv)
        signal(SIGINT, int_exit);
        signal(SIGTERM, int_exit);
 
-       if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) {
+       if (bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL) < 0) {
                fprintf(stderr, "link set xdp fd failed\n");
                return EXIT_FAIL_XDP;
        }
index 587eacb..0a2b3e9 100644 (file)
@@ -30,7 +30,7 @@ static int do_attach(int idx, int fd, const char *name)
        __u32 info_len = sizeof(info);
        int err;
 
-       err = bpf_set_link_xdp_fd(idx, fd, xdp_flags);
+       err = bpf_xdp_attach(idx, fd, xdp_flags, NULL);
        if (err < 0) {
                printf("ERROR: failed to attach program to %s\n", name);
                return err;
@@ -51,13 +51,13 @@ static int do_detach(int idx, const char *name)
        __u32 curr_prog_id = 0;
        int err = 0;
 
-       err = bpf_get_link_xdp_id(idx, &curr_prog_id, xdp_flags);
+       err = bpf_xdp_query_id(idx, xdp_flags, &curr_prog_id);
        if (err) {
-               printf("bpf_get_link_xdp_id failed\n");
+               printf("bpf_xdp_query_id failed\n");
                return err;
        }
        if (prog_id == curr_prog_id) {
-               err = bpf_set_link_xdp_fd(idx, -1, xdp_flags);
+               err = bpf_xdp_detach(idx, xdp_flags, NULL);
                if (err < 0)
                        printf("ERROR: failed to detach prog from %s\n", name);
        } else if (!curr_prog_id) {
index 8740838..ae70a79 100644 (file)
@@ -1265,7 +1265,7 @@ static int __sample_remove_xdp(int ifindex, __u32 prog_id, int xdp_flags)
        int ret;
 
        if (prog_id) {
-               ret = bpf_get_link_xdp_id(ifindex, &cur_prog_id, xdp_flags);
+               ret = bpf_xdp_query_id(ifindex, xdp_flags, &cur_prog_id);
                if (ret < 0)
                        return -errno;
 
@@ -1278,7 +1278,7 @@ static int __sample_remove_xdp(int ifindex, __u32 prog_id, int xdp_flags)
                }
        }
 
-       return bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
+       return bpf_xdp_detach(ifindex, xdp_flags, NULL);
 }
 
 int sample_install_xdp(struct bpf_program *xdp_prog, int ifindex, bool generic,
@@ -1295,8 +1295,7 @@ int sample_install_xdp(struct bpf_program *xdp_prog, int ifindex, bool generic,
 
        xdp_flags |= !force ? XDP_FLAGS_UPDATE_IF_NOEXIST : 0;
        xdp_flags |= generic ? XDP_FLAGS_SKB_MODE : XDP_FLAGS_DRV_MODE;
-       ret = bpf_set_link_xdp_fd(ifindex, bpf_program__fd(xdp_prog),
-                                 xdp_flags);
+       ret = bpf_xdp_attach(ifindex, bpf_program__fd(xdp_prog), xdp_flags, NULL);
        if (ret < 0) {
                ret = -errno;
                fprintf(stderr,
@@ -1308,7 +1307,7 @@ int sample_install_xdp(struct bpf_program *xdp_prog, int ifindex, bool generic,
                return ret;
        }
 
-       ret = bpf_get_link_xdp_id(ifindex, &prog_id, xdp_flags);
+       ret = bpf_xdp_query_id(ifindex, xdp_flags, &prog_id);
        if (ret < 0) {
                ret = -errno;
                fprintf(stderr,
index 1d4f305..7370c03 100644 (file)
@@ -32,12 +32,12 @@ static void int_exit(int sig)
        __u32 curr_prog_id = 0;
 
        if (ifindex > -1) {
-               if (bpf_get_link_xdp_id(ifindex, &curr_prog_id, xdp_flags)) {
-                       printf("bpf_get_link_xdp_id failed\n");
+               if (bpf_xdp_query_id(ifindex, xdp_flags, &curr_prog_id)) {
+                       printf("bpf_xdp_query_id failed\n");
                        exit(1);
                }
                if (prog_id == curr_prog_id)
-                       bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
+                       bpf_xdp_detach(ifindex, xdp_flags, NULL);
                else if (!curr_prog_id)
                        printf("couldn't find a prog id on a given iface\n");
                else
@@ -288,7 +288,7 @@ int main(int argc, char **argv)
                }
        }
 
-       if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) {
+       if (bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL) < 0) {
                printf("link set xdp fd failed\n");
                return 1;
        }
@@ -302,7 +302,7 @@ int main(int argc, char **argv)
 
        poll_stats(kill_after_s);
 
-       bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
+       bpf_xdp_detach(ifindex, xdp_flags, NULL);
 
        return 0;
 }
index cc44087..28b5f2a 100644 (file)
@@ -173,7 +173,7 @@ main(int argc, char **argv)
        unlink(SOCKET_NAME);
 
        /* Unset fd for given ifindex */
-       err = bpf_set_link_xdp_fd(ifindex, -1, 0);
+       err = bpf_xdp_detach(ifindex, 0, NULL);
        if (err) {
                fprintf(stderr, "Error when unsetting bpf prog_fd for ifindex(%d)\n", ifindex);
                return err;
index aa50864..19288a2 100644 (file)
@@ -571,13 +571,13 @@ static void remove_xdp_program(void)
 {
        u32 curr_prog_id = 0;
 
-       if (bpf_get_link_xdp_id(opt_ifindex, &curr_prog_id, opt_xdp_flags)) {
-               printf("bpf_get_link_xdp_id failed\n");
+       if (bpf_xdp_query_id(opt_ifindex, opt_xdp_flags, &curr_prog_id)) {
+               printf("bpf_xdp_query_id failed\n");
                exit(EXIT_FAILURE);
        }
 
        if (prog_id == curr_prog_id)
-               bpf_set_link_xdp_fd(opt_ifindex, -1, opt_xdp_flags);
+               bpf_xdp_detach(opt_ifindex, opt_xdp_flags, NULL);
        else if (!curr_prog_id)
                printf("couldn't find a prog id on a given interface\n");
        else
@@ -1027,7 +1027,7 @@ static struct xsk_socket_info *xsk_configure_socket(struct xsk_umem_info *umem,
        if (ret)
                exit_with_error(-ret);
 
-       ret = bpf_get_link_xdp_id(opt_ifindex, &prog_id, opt_xdp_flags);
+       ret = bpf_xdp_query_id(opt_ifindex, opt_xdp_flags, &prog_id);
        if (ret)
                exit_with_error(-ret);
 
@@ -1760,7 +1760,7 @@ static void load_xdp_program(char **argv, struct bpf_object **obj)
                exit(EXIT_FAILURE);
        }
 
-       if (bpf_set_link_xdp_fd(opt_ifindex, prog_fd, opt_xdp_flags) < 0) {
+       if (bpf_xdp_attach(opt_ifindex, prog_fd, opt_xdp_flags, NULL) < 0) {
                fprintf(stderr, "ERROR: link set xdp fd failed\n");
                exit(EXIT_FAILURE);
        }
index 52e7c4f..2220509 100644 (file)
@@ -974,8 +974,8 @@ static void remove_xdp_program(void)
        int i;
 
        for (i = 0 ; i < n_ports; i++)
-               bpf_set_link_xdp_fd(if_nametoindex(port_params[i].iface), -1,
-                                   port_params[i].xsk_cfg.xdp_flags);
+               bpf_xdp_detach(if_nametoindex(port_params[i].iface),
+                              port_params[i].xsk_cfg.xdp_flags, NULL);
 }
 
 int main(int argc, char **argv)
index a6403dd..0966252 100755 (executable)
@@ -87,21 +87,25 @@ class HeaderParser(object):
         self.line = ''
         self.helpers = []
         self.commands = []
+        self.desc_unique_helpers = set()
+        self.define_unique_helpers = []
+        self.desc_syscalls = []
+        self.enum_syscalls = []
 
     def parse_element(self):
         proto    = self.parse_symbol()
-        desc     = self.parse_desc()
-        ret      = self.parse_ret()
+        desc     = self.parse_desc(proto)
+        ret      = self.parse_ret(proto)
         return APIElement(proto=proto, desc=desc, ret=ret)
 
     def parse_helper(self):
         proto    = self.parse_proto()
-        desc     = self.parse_desc()
-        ret      = self.parse_ret()
+        desc     = self.parse_desc(proto)
+        ret      = self.parse_ret(proto)
         return Helper(proto=proto, desc=desc, ret=ret)
 
     def parse_symbol(self):
-        p = re.compile(' \* ?(.+)$')
+        p = re.compile(' \* ?(BPF\w+)$')
         capture = p.match(self.line)
         if not capture:
             raise NoSyscallCommandFound
@@ -127,16 +131,15 @@ class HeaderParser(object):
         self.line = self.reader.readline()
         return capture.group(1)
 
-    def parse_desc(self):
+    def parse_desc(self, proto):
         p = re.compile(' \* ?(?:\t| {5,8})Description$')
         capture = p.match(self.line)
         if not capture:
-            # Helper can have empty description and we might be parsing another
-            # attribute: return but do not consume.
-            return ''
+            raise Exception("No description section found for " + proto)
         # Description can be several lines, some of them possibly empty, and it
         # stops when another subsection title is met.
         desc = ''
+        desc_present = False
         while True:
             self.line = self.reader.readline()
             if self.line == ' *\n':
@@ -145,21 +148,24 @@ class HeaderParser(object):
                 p = re.compile(' \* ?(?:\t| {5,8})(?:\t| {8})(.*)')
                 capture = p.match(self.line)
                 if capture:
+                    desc_present = True
                     desc += capture.group(1) + '\n'
                 else:
                     break
+
+        if not desc_present:
+            raise Exception("No description found for " + proto)
         return desc
 
-    def parse_ret(self):
+    def parse_ret(self, proto):
         p = re.compile(' \* ?(?:\t| {5,8})Return$')
         capture = p.match(self.line)
         if not capture:
-            # Helper can have empty retval and we might be parsing another
-            # attribute: return but do not consume.
-            return ''
+            raise Exception("No return section found for " + proto)
         # Return value description can be several lines, some of them possibly
         # empty, and it stops when another subsection title is met.
         ret = ''
+        ret_present = False
         while True:
             self.line = self.reader.readline()
             if self.line == ' *\n':
@@ -168,44 +174,101 @@ class HeaderParser(object):
                 p = re.compile(' \* ?(?:\t| {5,8})(?:\t| {8})(.*)')
                 capture = p.match(self.line)
                 if capture:
+                    ret_present = True
                     ret += capture.group(1) + '\n'
                 else:
                     break
+
+        if not ret_present:
+            raise Exception("No return found for " + proto)
         return ret
 
-    def seek_to(self, target, help_message):
+    def seek_to(self, target, help_message, discard_lines = 1):
         self.reader.seek(0)
         offset = self.reader.read().find(target)
         if offset == -1:
             raise Exception(help_message)
         self.reader.seek(offset)
         self.reader.readline()
-        self.reader.readline()
+        for _ in range(discard_lines):
+            self.reader.readline()
         self.line = self.reader.readline()
 
-    def parse_syscall(self):
+    def parse_desc_syscall(self):
         self.seek_to('* DOC: eBPF Syscall Commands',
                      'Could not find start of eBPF syscall descriptions list')
         while True:
             try:
                 command = self.parse_element()
                 self.commands.append(command)
+                self.desc_syscalls.append(command.proto)
+
             except NoSyscallCommandFound:
                 break
 
-    def parse_helpers(self):
+    def parse_enum_syscall(self):
+        self.seek_to('enum bpf_cmd {',
+                     'Could not find start of bpf_cmd enum', 0)
+        # Searches for either one or more BPF\w+ enums
+        bpf_p = re.compile('\s*(BPF\w+)+')
+        # Searches for an enum entry assigned to another entry,
+        # for e.g. BPF_PROG_RUN = BPF_PROG_TEST_RUN, which is
+        # not documented hence should be skipped in check to
+        # determine if the right number of syscalls are documented
+        assign_p = re.compile('\s*(BPF\w+)\s*=\s*(BPF\w+)')
+        bpf_cmd_str = ''
+        while True:
+            capture = assign_p.match(self.line)
+            if capture:
+                # Skip line if an enum entry is assigned to another entry
+                self.line = self.reader.readline()
+                continue
+            capture = bpf_p.match(self.line)
+            if capture:
+                bpf_cmd_str += self.line
+            else:
+                break
+            self.line = self.reader.readline()
+        # Find the number of occurences of BPF\w+
+        self.enum_syscalls = re.findall('(BPF\w+)+', bpf_cmd_str)
+
+    def parse_desc_helpers(self):
         self.seek_to('* Start of BPF helper function descriptions:',
                      'Could not find start of eBPF helper descriptions list')
         while True:
             try:
                 helper = self.parse_helper()
                 self.helpers.append(helper)
+                proto = helper.proto_break_down()
+                self.desc_unique_helpers.add(proto['name'])
             except NoHelperFound:
                 break
 
+    def parse_define_helpers(self):
+        # Parse the number of FN(...) in #define __BPF_FUNC_MAPPER to compare
+        # later with the number of unique function names present in description.
+        # Note: seek_to(..) discards the first line below the target search text,
+        # resulting in FN(unspec) being skipped and not added to self.define_unique_helpers.
+        self.seek_to('#define __BPF_FUNC_MAPPER(FN)',
+                     'Could not find start of eBPF helper definition list')
+        # Searches for either one or more FN(\w+) defines or a backslash for newline
+        p = re.compile('\s*(FN\(\w+\))+|\\\\')
+        fn_defines_str = ''
+        while True:
+            capture = p.match(self.line)
+            if capture:
+                fn_defines_str += self.line
+            else:
+                break
+            self.line = self.reader.readline()
+        # Find the number of occurences of FN(\w+)
+        self.define_unique_helpers = re.findall('FN\(\w+\)', fn_defines_str)
+
     def run(self):
-        self.parse_syscall()
-        self.parse_helpers()
+        self.parse_desc_syscall()
+        self.parse_enum_syscall()
+        self.parse_desc_helpers()
+        self.parse_define_helpers()
         self.reader.close()
 
 ###############################################################################
@@ -235,6 +298,25 @@ class Printer(object):
             self.print_one(elem)
         self.print_footer()
 
+    def elem_number_check(self, desc_unique_elem, define_unique_elem, type, instance):
+        """
+        Checks the number of helpers/syscalls documented within the header file
+        description with those defined as part of enum/macro and raise an
+        Exception if they don't match.
+        """
+        nr_desc_unique_elem = len(desc_unique_elem)
+        nr_define_unique_elem = len(define_unique_elem)
+        if nr_desc_unique_elem != nr_define_unique_elem:
+            exception_msg = '''
+The number of unique %s in description (%d) doesn\'t match the number of unique %s defined in %s (%d)
+''' % (type, nr_desc_unique_elem, type, instance, nr_define_unique_elem)
+            if nr_desc_unique_elem < nr_define_unique_elem:
+                # Function description is parsed until no helper is found (which can be due to
+                # misformatting). Hence, only print the first missing/misformatted helper/enum.
+                exception_msg += '''
+The description for %s is not present or formatted correctly.
+''' % (define_unique_elem[nr_desc_unique_elem])
+            raise Exception(exception_msg)
 
 class PrinterRST(Printer):
     """
@@ -295,7 +377,6 @@ class PrinterRST(Printer):
 
         print('')
 
-
 class PrinterHelpersRST(PrinterRST):
     """
     A printer for dumping collected information about helpers as a ReStructured
@@ -305,6 +386,7 @@ class PrinterHelpersRST(PrinterRST):
     """
     def __init__(self, parser):
         self.elements = parser.helpers
+        self.elem_number_check(parser.desc_unique_helpers, parser.define_unique_helpers, 'helper', '__BPF_FUNC_MAPPER')
 
     def print_header(self):
         header = '''\
@@ -478,6 +560,7 @@ class PrinterSyscallRST(PrinterRST):
     """
     def __init__(self, parser):
         self.elements = parser.commands
+        self.elem_number_check(parser.desc_syscalls, parser.enum_syscalls, 'syscall', 'bpf_cmd')
 
     def print_header(self):
         header = '''\
@@ -509,6 +592,7 @@ class PrinterHelpers(Printer):
     """
     def __init__(self, parser):
         self.elements = parser.helpers
+        self.elem_number_check(parser.desc_unique_helpers, parser.define_unique_helpers, 'helper', '__BPF_FUNC_MAPPER')
 
     type_fwds = [
             'struct bpf_fib_lookup',
index 842889f..a9f8c63 100644 (file)
@@ -838,7 +838,7 @@ int devcgroup_check_permission(short type, u32 major, u32 minor, short access)
        int rc = BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type, major, minor, access);
 
        if (rc)
-               return -EPERM;
+               return rc;
 
        #ifdef CONFIG_CGROUP_DEVICE
        return devcgroup_legacy_check_permission(type, major, minor, access);
index 5983312..a2c665b 100644 (file)
@@ -902,7 +902,7 @@ static int do_show(int argc, char **argv)
                                      equal_fn_for_key_as_id, NULL);
        btf_map_table = hashmap__new(hash_fn_for_key_as_id,
                                     equal_fn_for_key_as_id, NULL);
-       if (!btf_prog_table || !btf_map_table) {
+       if (IS_ERR(btf_prog_table) || IS_ERR(btf_map_table)) {
                hashmap__free(btf_prog_table);
                hashmap__free(btf_map_table);
                if (fd >= 0)
index 3571a28..effe136 100644 (file)
@@ -50,6 +50,7 @@ static int show_bpf_prog(int id, enum bpf_attach_type attach_type,
                         const char *attach_flags_str,
                         int level)
 {
+       char prog_name[MAX_PROG_FULL_NAME];
        struct bpf_prog_info info = {};
        __u32 info_len = sizeof(info);
        int prog_fd;
@@ -63,6 +64,7 @@ static int show_bpf_prog(int id, enum bpf_attach_type attach_type,
                return -1;
        }
 
+       get_prog_full_name(&info, prog_fd, prog_name, sizeof(prog_name));
        if (json_output) {
                jsonw_start_object(json_wtr);
                jsonw_uint_field(json_wtr, "id", info.id);
@@ -73,7 +75,7 @@ static int show_bpf_prog(int id, enum bpf_attach_type attach_type,
                        jsonw_uint_field(json_wtr, "attach_type", attach_type);
                jsonw_string_field(json_wtr, "attach_flags",
                                   attach_flags_str);
-               jsonw_string_field(json_wtr, "name", info.name);
+               jsonw_string_field(json_wtr, "name", prog_name);
                jsonw_end_object(json_wtr);
        } else {
                printf("%s%-8u ", level ? "    " : "", info.id);
@@ -81,7 +83,7 @@ static int show_bpf_prog(int id, enum bpf_attach_type attach_type,
                        printf("%-15s", attach_type_name[attach_type]);
                else
                        printf("type %-10u", attach_type);
-               printf(" %-15s %-15s\n", attach_flags_str, info.name);
+               printf(" %-15s %-15s\n", attach_flags_str, prog_name);
        }
 
        close(prog_fd);
index fa8eb81..111dff8 100644 (file)
@@ -24,6 +24,7 @@
 #include <bpf/bpf.h>
 #include <bpf/hashmap.h>
 #include <bpf/libbpf.h> /* libbpf_num_possible_cpus */
+#include <bpf/btf.h>
 
 #include "main.h"
 
@@ -304,6 +305,49 @@ const char *get_fd_type_name(enum bpf_obj_type type)
        return names[type];
 }
 
+void get_prog_full_name(const struct bpf_prog_info *prog_info, int prog_fd,
+                       char *name_buff, size_t buff_len)
+{
+       const char *prog_name = prog_info->name;
+       const struct btf_type *func_type;
+       const struct bpf_func_info finfo;
+       struct bpf_prog_info info = {};
+       __u32 info_len = sizeof(info);
+       struct btf *prog_btf = NULL;
+
+       if (buff_len <= BPF_OBJ_NAME_LEN ||
+           strlen(prog_info->name) < BPF_OBJ_NAME_LEN - 1)
+               goto copy_name;
+
+       if (!prog_info->btf_id || prog_info->nr_func_info == 0)
+               goto copy_name;
+
+       info.nr_func_info = 1;
+       info.func_info_rec_size = prog_info->func_info_rec_size;
+       if (info.func_info_rec_size > sizeof(finfo))
+               info.func_info_rec_size = sizeof(finfo);
+       info.func_info = ptr_to_u64(&finfo);
+
+       if (bpf_obj_get_info_by_fd(prog_fd, &info, &info_len))
+               goto copy_name;
+
+       prog_btf = btf__load_from_kernel_by_id(info.btf_id);
+       if (!prog_btf)
+               goto copy_name;
+
+       func_type = btf__type_by_id(prog_btf, finfo.type_id);
+       if (!func_type || !btf_is_func(func_type))
+               goto copy_name;
+
+       prog_name = btf__name_by_offset(prog_btf, func_type->name_off);
+
+copy_name:
+       snprintf(name_buff, buff_len, "%s", prog_name);
+
+       if (prog_btf)
+               btf__free(prog_btf);
+}
+
 int get_fd_type(int fd)
 {
        char path[PATH_MAX];
index b4695df..43e3f87 100644 (file)
@@ -227,7 +227,7 @@ static int codegen_datasecs(struct bpf_object *obj, const char *obj_name)
                /* only generate definitions for memory-mapped internal maps */
                if (!bpf_map__is_internal(map))
                        continue;
-               if (!(bpf_map__def(map)->map_flags & BPF_F_MMAPABLE))
+               if (!(bpf_map__map_flags(map) & BPF_F_MMAPABLE))
                        continue;
 
                if (!get_map_ident(map, map_ident, sizeof(map_ident)))
@@ -468,7 +468,7 @@ static void codegen_destroy(struct bpf_object *obj, const char *obj_name)
                if (!get_map_ident(map, ident, sizeof(ident)))
                        continue;
                if (bpf_map__is_internal(map) &&
-                   (bpf_map__def(map)->map_flags & BPF_F_MMAPABLE))
+                   (bpf_map__map_flags(map) & BPF_F_MMAPABLE))
                        printf("\tmunmap(skel->%1$s, %2$zd);\n",
                               ident, bpf_map_mmap_sz(map));
                codegen("\
@@ -536,7 +536,7 @@ static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *h
                        continue;
 
                if (!bpf_map__is_internal(map) ||
-                   !(bpf_map__def(map)->map_flags & BPF_F_MMAPABLE))
+                   !(bpf_map__map_flags(map) & BPF_F_MMAPABLE))
                        continue;
 
                codegen("\
@@ -600,10 +600,10 @@ static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *h
                        continue;
 
                if (!bpf_map__is_internal(map) ||
-                   !(bpf_map__def(map)->map_flags & BPF_F_MMAPABLE))
+                   !(bpf_map__map_flags(map) & BPF_F_MMAPABLE))
                        continue;
 
-               if (bpf_map__def(map)->map_flags & BPF_F_RDONLY_PROG)
+               if (bpf_map__map_flags(map) & BPF_F_RDONLY_PROG)
                        mmap_flags = "PROT_READ";
                else
                        mmap_flags = "PROT_READ | PROT_WRITE";
@@ -927,7 +927,6 @@ static int do_skeleton(int argc, char **argv)
                        s = (struct bpf_object_skeleton *)calloc(1, sizeof(*s));\n\
                        if (!s)                                             \n\
                                goto err;                                   \n\
-                       obj->skeleton = s;                                  \n\
                                                                            \n\
                        s->sz = sizeof(*s);                                 \n\
                        s->name = \"%1$s\";                                 \n\
@@ -962,7 +961,7 @@ static int do_skeleton(int argc, char **argv)
                                i, bpf_map__name(map), i, ident);
                        /* memory-mapped internal maps */
                        if (bpf_map__is_internal(map) &&
-                           (bpf_map__def(map)->map_flags & BPF_F_MMAPABLE)) {
+                           (bpf_map__map_flags(map) & BPF_F_MMAPABLE)) {
                                printf("\ts->maps[%zu].mmaped = (void **)&obj->%s;\n",
                                       i, ident);
                        }
@@ -1000,6 +999,7 @@ static int do_skeleton(int argc, char **argv)
                                                                            \n\
                        s->data = (void *)%2$s__elf_bytes(&s->data_sz);     \n\
                                                                            \n\
+                       obj->skeleton = s;                                  \n\
                        return 0;                                           \n\
                err:                                                        \n\
                        bpf_object__destroy_skeleton(s);                    \n\
index 2c258db..97dec81 100644 (file)
@@ -2,6 +2,7 @@
 /* Copyright (C) 2020 Facebook */
 
 #include <errno.h>
+#include <linux/err.h>
 #include <net/if.h>
 #include <stdio.h>
 #include <unistd.h>
@@ -306,7 +307,7 @@ static int do_show(int argc, char **argv)
        if (show_pinned) {
                link_table = hashmap__new(hash_fn_for_key_as_id,
                                          equal_fn_for_key_as_id, NULL);
-               if (!link_table) {
+               if (IS_ERR(link_table)) {
                        p_err("failed to create hashmap for pinned paths");
                        return -1;
                }
index 020e91a..9d01fa9 100644 (file)
@@ -478,7 +478,14 @@ int main(int argc, char **argv)
        }
 
        if (!legacy_libbpf) {
-               ret = libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+               enum libbpf_strict_mode mode;
+
+               /* Allow legacy map definitions for skeleton generation.
+                * It will still be rejected if users use LIBBPF_STRICT_ALL
+                * mode for loading generated skeleton.
+                */
+               mode = (__LIBBPF_STRICT_LAST - 1) & ~LIBBPF_STRICT_MAP_DEFINITIONS;
+               ret = libbpf_set_strict_mode(mode);
                if (ret)
                        p_err("failed to enable libbpf strict mode: %d", ret);
        }
index 8d76d93..0c38405 100644 (file)
@@ -140,6 +140,10 @@ struct cmd {
 int cmd_select(const struct cmd *cmds, int argc, char **argv,
               int (*help)(int argc, char **argv));
 
+#define MAX_PROG_FULL_NAME 128
+void get_prog_full_name(const struct bpf_prog_info *prog_info, int prog_fd,
+                       char *name_buff, size_t buff_len);
+
 int get_fd_type(int fd);
 const char *get_fd_type_name(enum bpf_obj_type type);
 char *get_fdinfo(int fd, const char *key);
index cc530a2..c66a3c9 100644 (file)
@@ -699,7 +699,7 @@ static int do_show(int argc, char **argv)
        if (show_pinned) {
                map_table = hashmap__new(hash_fn_for_key_as_id,
                                         equal_fn_for_key_as_id, NULL);
-               if (!map_table) {
+               if (IS_ERR(map_table)) {
                        p_err("failed to create hashmap for pinned paths");
                        return -1;
                }
index 6490537..526a332 100644 (file)
@@ -551,7 +551,7 @@ static int do_attach_detach_xdp(int progfd, enum net_attach_type attach_type,
        if (attach_type == NET_ATTACH_TYPE_XDP_OFFLOAD)
                flags |= XDP_FLAGS_HW_MODE;
 
-       return bpf_set_link_xdp_fd(ifindex, progfd, flags);
+       return bpf_xdp_attach(ifindex, progfd, flags, NULL);
 }
 
 static int do_attach(int argc, char **argv)
index 56b598e..7c384d1 100644 (file)
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
 /* Copyright (C) 2020 Facebook */
 #include <errno.h>
+#include <linux/err.h>
 #include <stdbool.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -101,7 +102,7 @@ int build_obj_refs_table(struct hashmap **map, enum bpf_obj_type type)
        libbpf_print_fn_t default_print;
 
        *map = hashmap__new(hash_fn_for_key_as_id, equal_fn_for_key_as_id, NULL);
-       if (!*map) {
+       if (IS_ERR(*map)) {
                p_err("failed to create hashmap for PID references");
                return -1;
        }
index 2a21d50..cf935c6 100644 (file)
@@ -424,8 +424,10 @@ out_free:
        free(value);
 }
 
-static void print_prog_header_json(struct bpf_prog_info *info)
+static void print_prog_header_json(struct bpf_prog_info *info, int fd)
 {
+       char prog_name[MAX_PROG_FULL_NAME];
+
        jsonw_uint_field(json_wtr, "id", info->id);
        if (info->type < ARRAY_SIZE(prog_type_name))
                jsonw_string_field(json_wtr, "type",
@@ -433,8 +435,10 @@ static void print_prog_header_json(struct bpf_prog_info *info)
        else
                jsonw_uint_field(json_wtr, "type", info->type);
 
-       if (*info->name)
-               jsonw_string_field(json_wtr, "name", info->name);
+       if (*info->name) {
+               get_prog_full_name(info, fd, prog_name, sizeof(prog_name));
+               jsonw_string_field(json_wtr, "name", prog_name);
+       }
 
        jsonw_name(json_wtr, "tag");
        jsonw_printf(json_wtr, "\"" BPF_TAG_FMT "\"",
@@ -455,7 +459,7 @@ static void print_prog_json(struct bpf_prog_info *info, int fd)
        char *memlock;
 
        jsonw_start_object(json_wtr);
-       print_prog_header_json(info);
+       print_prog_header_json(info, fd);
        print_dev_json(info->ifindex, info->netns_dev, info->netns_ino);
 
        if (info->load_time) {
@@ -507,16 +511,20 @@ static void print_prog_json(struct bpf_prog_info *info, int fd)
        jsonw_end_object(json_wtr);
 }
 
-static void print_prog_header_plain(struct bpf_prog_info *info)
+static void print_prog_header_plain(struct bpf_prog_info *info, int fd)
 {
+       char prog_name[MAX_PROG_FULL_NAME];
+
        printf("%u: ", info->id);
        if (info->type < ARRAY_SIZE(prog_type_name))
                printf("%s  ", prog_type_name[info->type]);
        else
                printf("type %u  ", info->type);
 
-       if (*info->name)
-               printf("name %s  ", info->name);
+       if (*info->name) {
+               get_prog_full_name(info, fd, prog_name, sizeof(prog_name));
+               printf("name %s  ", prog_name);
+       }
 
        printf("tag ");
        fprint_hex(stdout, info->tag, BPF_TAG_SIZE, "");
@@ -534,7 +542,7 @@ static void print_prog_plain(struct bpf_prog_info *info, int fd)
 {
        char *memlock;
 
-       print_prog_header_plain(info);
+       print_prog_header_plain(info, fd);
 
        if (info->load_time) {
                char buf[32];
@@ -641,7 +649,7 @@ static int do_show(int argc, char **argv)
        if (show_pinned) {
                prog_table = hashmap__new(hash_fn_for_key_as_id,
                                          equal_fn_for_key_as_id, NULL);
-               if (!prog_table) {
+               if (IS_ERR(prog_table)) {
                        p_err("failed to create hashmap for pinned paths");
                        return -1;
                }
@@ -972,10 +980,10 @@ static int do_dump(int argc, char **argv)
 
                if (json_output && nb_fds > 1) {
                        jsonw_start_object(json_wtr);   /* prog object */
-                       print_prog_header_json(&info);
+                       print_prog_header_json(&info, fds[i]);
                        jsonw_name(json_wtr, "insns");
                } else if (nb_fds > 1) {
-                       print_prog_header_plain(&info);
+                       print_prog_header_plain(&info, fds[i]);
                }
 
                err = prog_dump(&info, mode, filepath, opcodes, visual, linum);
index 2f693b0..e08a6ff 100644 (file)
@@ -480,7 +480,6 @@ static int do_unregister(int argc, char **argv)
 static int do_register(int argc, char **argv)
 {
        LIBBPF_OPTS(bpf_object_open_opts, open_opts);
-       const struct bpf_map_def *def;
        struct bpf_map_info info = {};
        __u32 info_len = sizeof(info);
        int nr_errs = 0, nr_maps = 0;
@@ -510,8 +509,7 @@ static int do_register(int argc, char **argv)
        }
 
        bpf_object__for_each_map(map, obj) {
-               def = bpf_map__def(map);
-               if (def->type != BPF_MAP_TYPE_STRUCT_OPS)
+               if (bpf_map__type(map) != BPF_MAP_TYPE_STRUCT_OPS)
                        continue;
 
                link = bpf_map__attach_struct_ops(map);
index 9ddeca9..a7f87cd 100644 (file)
@@ -20,6 +20,8 @@ LD       = $(HOSTLD)
 ARCH     = $(HOSTARCH)
 RM      ?= rm
 CROSS_COMPILE =
+CFLAGS  := $(KBUILD_HOSTCFLAGS)
+LDFLAGS := $(KBUILD_HOSTLDFLAGS)
 
 OUTPUT ?= $(srctree)/tools/bpf/resolve_btfids/
 
@@ -47,10 +49,10 @@ $(SUBCMDOBJ): fixdep FORCE | $(OUTPUT)/libsubcmd
 
 $(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(LIBBPF_OUT)
        $(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC) OUTPUT=$(LIBBPF_OUT)    \
-                   DESTDIR=$(LIBBPF_DESTDIR) prefix=                          \
+                   DESTDIR=$(LIBBPF_DESTDIR) prefix= EXTRA_CFLAGS="$(CFLAGS)" \
                    $(abspath $@) install_headers
 
-CFLAGS := -g \
+CFLAGS += -g \
           -I$(srctree)/tools/include \
           -I$(srctree)/tools/include/uapi \
           -I$(LIBBPF_INCLUDE) \
index b0383d3..16a7574 100644 (file)
@@ -330,6 +330,8 @@ union bpf_iter_link_info {
  *                     *ctx_out*, *data_in* and *data_out* must be NULL.
  *                     *repeat* must be zero.
  *
+ *             BPF_PROG_RUN is an alias for BPF_PROG_TEST_RUN.
+ *
  *     Return
  *             Returns zero on success. On error, -1 is returned and *errno*
  *             is set appropriately.
@@ -1111,6 +1113,11 @@ enum bpf_link_type {
  */
 #define BPF_F_SLEEPABLE                (1U << 4)
 
+/* If BPF_F_XDP_HAS_FRAGS is used in BPF_PROG_LOAD command, the loaded program
+ * fully support xdp frags.
+ */
+#define BPF_F_XDP_HAS_FRAGS    (1U << 5)
+
 /* When BPF ldimm64's insn[0].src_reg != 0 then this can have
  * the following extensions:
  *
@@ -1775,6 +1782,8 @@ union bpf_attr {
  *             0 on success, or a negative error in case of failure.
  *
  * u64 bpf_get_current_pid_tgid(void)
+ *     Description
+ *             Get the current pid and tgid.
  *     Return
  *             A 64-bit integer containing the current tgid and pid, and
  *             created as such:
@@ -1782,6 +1791,8 @@ union bpf_attr {
  *             *current_task*\ **->pid**.
  *
  * u64 bpf_get_current_uid_gid(void)
+ *     Description
+ *             Get the current uid and gid.
  *     Return
  *             A 64-bit integer containing the current GID and UID, and
  *             created as such: *current_gid* **<< 32 \|** *current_uid*.
@@ -2256,6 +2267,8 @@ union bpf_attr {
  *             The 32-bit hash.
  *
  * u64 bpf_get_current_task(void)
+ *     Description
+ *             Get the current task.
  *     Return
  *             A pointer to the current task struct.
  *
@@ -2369,6 +2382,8 @@ union bpf_attr {
  *             indicate that the hash is outdated and to trigger a
  *             recalculation the next time the kernel tries to access this
  *             hash or when the **bpf_get_hash_recalc**\ () helper is called.
+ *     Return
+ *             void.
  *
  * long bpf_get_numa_node_id(void)
  *     Description
@@ -2466,6 +2481,8 @@ union bpf_attr {
  *             A 8-byte long unique number or 0 if *sk* is NULL.
  *
  * u32 bpf_get_socket_uid(struct sk_buff *skb)
+ *     Description
+ *             Get the owner UID of the socked associated to *skb*.
  *     Return
  *             The owner UID of the socket associated to *skb*. If the socket
  *             is **NULL**, or if it is not a full socket (i.e. if it is a
@@ -3240,6 +3257,9 @@ union bpf_attr {
  *             The id is returned or 0 in case the id could not be retrieved.
  *
  * u64 bpf_get_current_cgroup_id(void)
+ *     Description
+ *             Get the current cgroup id based on the cgroup within which
+ *             the current task is running.
  *     Return
  *             A 64-bit integer containing the current cgroup id based
  *             on the cgroup within which the current task is running.
@@ -5018,6 +5038,44 @@ union bpf_attr {
  *
  *     Return
  *             The number of arguments of the traced function.
+ *
+ * int bpf_get_retval(void)
+ *     Description
+ *             Get the syscall's return value that will be returned to userspace.
+ *
+ *             This helper is currently supported by cgroup programs only.
+ *     Return
+ *             The syscall's return value.
+ *
+ * int bpf_set_retval(int retval)
+ *     Description
+ *             Set the syscall's return value that will be returned to userspace.
+ *
+ *             This helper is currently supported by cgroup programs only.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * u64 bpf_xdp_get_buff_len(struct xdp_buff *xdp_md)
+ *     Description
+ *             Get the total size of a given xdp buff (linear and paged area)
+ *     Return
+ *             The total size of a given xdp buffer.
+ *
+ * long bpf_xdp_load_bytes(struct xdp_buff *xdp_md, u32 offset, void *buf, u32 len)
+ *     Description
+ *             This helper is provided as an easy way to load data from a
+ *             xdp buffer. It can be used to load *len* bytes from *offset* from
+ *             the frame associated to *xdp_md*, into the buffer pointed by
+ *             *buf*.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * long bpf_xdp_store_bytes(struct xdp_buff *xdp_md, u32 offset, void *buf, u32 len)
+ *     Description
+ *             Store *len* bytes from buffer *buf* into the frame
+ *             associated to *xdp_md*, at *offset*.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
  */
 #define __BPF_FUNC_MAPPER(FN)          \
        FN(unspec),                     \
@@ -5206,6 +5264,11 @@ union bpf_attr {
        FN(get_func_arg),               \
        FN(get_func_ret),               \
        FN(get_func_arg_cnt),           \
+       FN(get_retval),                 \
+       FN(set_retval),                 \
+       FN(xdp_get_buff_len),           \
+       FN(xdp_load_bytes),             \
+       FN(xdp_store_bytes),            \
        /* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
index 550b4cb..418b259 100644 (file)
@@ -754,10 +754,10 @@ int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type,
                .flags = flags,
        );
 
-       return bpf_prog_attach_xattr(prog_fd, target_fd, type, &opts);
+       return bpf_prog_attach_opts(prog_fd, target_fd, type, &opts);
 }
 
-int bpf_prog_attach_xattr(int prog_fd, int target_fd,
+int bpf_prog_attach_opts(int prog_fd, int target_fd,
                          enum bpf_attach_type type,
                          const struct bpf_prog_attach_opts *opts)
 {
@@ -778,6 +778,11 @@ int bpf_prog_attach_xattr(int prog_fd, int target_fd,
        return libbpf_err_errno(ret);
 }
 
+__attribute__((alias("bpf_prog_attach_opts")))
+int bpf_prog_attach_xattr(int prog_fd, int target_fd,
+                         enum bpf_attach_type type,
+                         const struct bpf_prog_attach_opts *opts);
+
 int bpf_prog_detach(int target_fd, enum bpf_attach_type type)
 {
        union bpf_attr attr;
index 14e0d97..c2e8327 100644 (file)
@@ -391,6 +391,10 @@ struct bpf_prog_attach_opts {
 
 LIBBPF_API int bpf_prog_attach(int prog_fd, int attachable_fd,
                               enum bpf_attach_type type, unsigned int flags);
+LIBBPF_API int bpf_prog_attach_opts(int prog_fd, int attachable_fd,
+                                    enum bpf_attach_type type,
+                                    const struct bpf_prog_attach_opts *opts);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_prog_attach_opts() instead")
 LIBBPF_API int bpf_prog_attach_xattr(int prog_fd, int attachable_fd,
                                     enum bpf_attach_type type,
                                     const struct bpf_prog_attach_opts *opts);
index 963b106..44df982 100644 (file)
@@ -133,7 +133,7 @@ struct bpf_map_def {
        unsigned int value_size;
        unsigned int max_entries;
        unsigned int map_flags;
-};
+} __attribute__((deprecated("use BTF-defined maps in .maps section")));
 
 enum libbpf_pin_type {
        LIBBPF_PIN_NONE,
index 9aa19c8..1383e26 100644 (file)
@@ -1620,20 +1620,37 @@ static int btf_commit_type(struct btf *btf, int data_sz)
 struct btf_pipe {
        const struct btf *src;
        struct btf *dst;
+       struct hashmap *str_off_map; /* map string offsets from src to dst */
 };
 
 static int btf_rewrite_str(__u32 *str_off, void *ctx)
 {
        struct btf_pipe *p = ctx;
-       int off;
+       void *mapped_off;
+       int off, err;
 
        if (!*str_off) /* nothing to do for empty strings */
                return 0;
 
+       if (p->str_off_map &&
+           hashmap__find(p->str_off_map, (void *)(long)*str_off, &mapped_off)) {
+               *str_off = (__u32)(long)mapped_off;
+               return 0;
+       }
+
        off = btf__add_str(p->dst, btf__str_by_offset(p->src, *str_off));
        if (off < 0)
                return off;
 
+       /* Remember string mapping from src to dst.  It avoids
+        * performing expensive string comparisons.
+        */
+       if (p->str_off_map) {
+               err = hashmap__append(p->str_off_map, (void *)(long)*str_off, (void *)(long)off);
+               if (err)
+                       return err;
+       }
+
        *str_off = off;
        return 0;
 }
@@ -1680,6 +1697,9 @@ static int btf_rewrite_type_ids(__u32 *type_id, void *ctx)
        return 0;
 }
 
+static size_t btf_dedup_identity_hash_fn(const void *key, void *ctx);
+static bool btf_dedup_equal_fn(const void *k1, const void *k2, void *ctx);
+
 int btf__add_btf(struct btf *btf, const struct btf *src_btf)
 {
        struct btf_pipe p = { .src = src_btf, .dst = btf };
@@ -1713,6 +1733,11 @@ int btf__add_btf(struct btf *btf, const struct btf *src_btf)
        if (!off)
                return libbpf_err(-ENOMEM);
 
+       /* Map the string offsets from src_btf to the offsets from btf to improve performance */
+       p.str_off_map = hashmap__new(btf_dedup_identity_hash_fn, btf_dedup_equal_fn, NULL);
+       if (IS_ERR(p.str_off_map))
+               return libbpf_err(-ENOMEM);
+
        /* bulk copy types data for all types from src_btf */
        memcpy(t, src_btf->types_data, data_sz);
 
@@ -1754,6 +1779,8 @@ int btf__add_btf(struct btf *btf, const struct btf *src_btf)
        btf->hdr->str_off += data_sz;
        btf->nr_types += cnt;
 
+       hashmap__free(p.str_off_map);
+
        /* return type ID of the first added BTF type */
        return btf->start_id + btf->nr_types - cnt;
 err_out:
@@ -1767,6 +1794,8 @@ err_out:
         * wasn't modified, so doesn't need restoring, see big comment above */
        btf->hdr->str_len = old_strs_len;
 
+       hashmap__free(p.str_off_map);
+
        return libbpf_err(err);
 }
 
index 061839f..51862fd 100644 (file)
@@ -375,8 +375,28 @@ btf_dump__dump_type_data(struct btf_dump *d, __u32 id,
                         const struct btf_dump_type_data_opts *opts);
 
 /*
- * A set of helpers for easier BTF types handling
+ * A set of helpers for easier BTF types handling.
+ *
+ * The inline functions below rely on constants from the kernel headers which
+ * may not be available for applications including this header file. To avoid
+ * compilation errors, we define all the constants here that were added after
+ * the initial introduction of the BTF_KIND* constants.
  */
+#ifndef BTF_KIND_FUNC
+#define BTF_KIND_FUNC          12      /* Function     */
+#define BTF_KIND_FUNC_PROTO    13      /* Function Proto       */
+#endif
+#ifndef BTF_KIND_VAR
+#define BTF_KIND_VAR           14      /* Variable     */
+#define BTF_KIND_DATASEC       15      /* Section      */
+#endif
+#ifndef BTF_KIND_FLOAT
+#define BTF_KIND_FLOAT         16      /* Floating point       */
+#endif
+/* The kernel header switched to enums, so these two were never #defined */
+#define BTF_KIND_DECL_TAG      17      /* Decl Tag */
+#define BTF_KIND_TYPE_TAG      18      /* Type Tag */
+
 static inline __u16 btf_kind(const struct btf_type *t)
 {
        return BTF_INFO_KIND(t->info);
index 3c20b12..aeb09c2 100644 (file)
@@ -75,7 +75,7 @@ void hashmap__clear(struct hashmap *map)
 
 void hashmap__free(struct hashmap *map)
 {
-       if (!map)
+       if (IS_ERR_OR_NULL(map))
                return;
 
        hashmap__clear(map);
@@ -238,4 +238,3 @@ bool hashmap__delete(struct hashmap *map, const void *key,
 
        return true;
 }
-
index 7f10dd5..a8c7503 100644 (file)
@@ -235,6 +235,8 @@ enum sec_def_flags {
        SEC_SLEEPABLE = 8,
        /* allow non-strict prefix matching */
        SEC_SLOPPY_PFX = 16,
+       /* BPF program support non-linear XDP buffer */
+       SEC_XDP_FRAGS = 32,
 };
 
 struct bpf_sec_def {
@@ -1937,6 +1939,11 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict)
        if (obj->efile.maps_shndx < 0)
                return 0;
 
+       if (libbpf_mode & LIBBPF_STRICT_MAP_DEFINITIONS) {
+               pr_warn("legacy map definitions in SEC(\"maps\") are not supported\n");
+               return -EOPNOTSUPP;
+       }
+
        if (!symbols)
                return -EINVAL;
 
@@ -1999,6 +2006,8 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict)
                        return -LIBBPF_ERRNO__FORMAT;
                }
 
+               pr_warn("map '%s' (legacy): legacy map definitions are deprecated, use BTF-defined maps instead\n", map_name);
+
                if (ELF64_ST_BIND(sym->st_info) == STB_LOCAL) {
                        pr_warn("map '%s' (legacy): static maps are not supported\n", map_name);
                        return -ENOTSUP;
@@ -4190,6 +4199,7 @@ static int bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map)
                return 0;
 
        if (!bpf_map__is_internal(map)) {
+               pr_warn("Use of BPF_ANNOTATE_KV_PAIR is deprecated, use BTF-defined maps in .maps section instead\n");
                ret = btf__get_map_kv_tids(obj->btf, map->name, def->key_size,
                                           def->value_size, &key_type_id,
                                           &value_type_id);
@@ -6562,6 +6572,9 @@ static int libbpf_preload_prog(struct bpf_program *prog,
        if (def & SEC_SLEEPABLE)
                opts->prog_flags |= BPF_F_SLEEPABLE;
 
+       if (prog->type == BPF_PROG_TYPE_XDP && (def & SEC_XDP_FRAGS))
+               opts->prog_flags |= BPF_F_XDP_HAS_FRAGS;
+
        if ((prog->type == BPF_PROG_TYPE_TRACING ||
             prog->type == BPF_PROG_TYPE_LSM ||
             prog->type == BPF_PROG_TYPE_EXT) && !prog->attach_btf_id) {
@@ -8600,8 +8613,11 @@ static const struct bpf_sec_def section_defs[] = {
        SEC_DEF("lsm.s/",               LSM, BPF_LSM_MAC, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_lsm),
        SEC_DEF("iter/",                TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF, attach_iter),
        SEC_DEF("syscall",              SYSCALL, 0, SEC_SLEEPABLE),
+       SEC_DEF("xdp.frags/devmap",     XDP, BPF_XDP_DEVMAP, SEC_XDP_FRAGS),
        SEC_DEF("xdp_devmap/",          XDP, BPF_XDP_DEVMAP, SEC_ATTACHABLE),
+       SEC_DEF("xdp.frags/cpumap",     XDP, BPF_XDP_CPUMAP, SEC_XDP_FRAGS),
        SEC_DEF("xdp_cpumap/",          XDP, BPF_XDP_CPUMAP, SEC_ATTACHABLE),
+       SEC_DEF("xdp.frags",            XDP, BPF_XDP, SEC_XDP_FRAGS),
        SEC_DEF("xdp",                  XDP, BPF_XDP, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX),
        SEC_DEF("perf_event",           PERF_EVENT, 0, SEC_NONE | SEC_SLOPPY_PFX),
        SEC_DEF("lwt_in",               LWT_IN, 0, SEC_NONE | SEC_SLOPPY_PFX),
@@ -11795,6 +11811,9 @@ void bpf_object__detach_skeleton(struct bpf_object_skeleton *s)
 
 void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s)
 {
+       if (!s)
+               return;
+
        if (s->progs)
                bpf_object__detach_skeleton(s);
        if (s->obj)
index 8b9bc5e..9467006 100644 (file)
@@ -706,7 +706,8 @@ bpf_object__prev_map(const struct bpf_object *obj, const struct bpf_map *map);
 LIBBPF_API int bpf_map__fd(const struct bpf_map *map);
 LIBBPF_API int bpf_map__reuse_fd(struct bpf_map *map, int fd);
 /* get map definition */
-LIBBPF_API const struct bpf_map_def *bpf_map__def(const struct bpf_map *map);
+LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 8, "use appropriate getters or setters instead")
+const struct bpf_map_def *bpf_map__def(const struct bpf_map *map);
 /* get map name */
 LIBBPF_API const char *bpf_map__name(const struct bpf_map *map);
 /* get/set map type */
@@ -832,13 +833,42 @@ struct bpf_xdp_set_link_opts {
 };
 #define bpf_xdp_set_link_opts__last_field old_fd
 
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_xdp_attach() instead")
 LIBBPF_API int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_xdp_attach() instead")
 LIBBPF_API int bpf_set_link_xdp_fd_opts(int ifindex, int fd, __u32 flags,
                                        const struct bpf_xdp_set_link_opts *opts);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_xdp_query_id() instead")
 LIBBPF_API int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_xdp_query() instead")
 LIBBPF_API int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info,
                                     size_t info_size, __u32 flags);
 
+struct bpf_xdp_attach_opts {
+       size_t sz;
+       int old_prog_fd;
+       size_t :0;
+};
+#define bpf_xdp_attach_opts__last_field old_prog_fd
+
+struct bpf_xdp_query_opts {
+       size_t sz;
+       __u32 prog_id;          /* output */
+       __u32 drv_prog_id;      /* output */
+       __u32 hw_prog_id;       /* output */
+       __u32 skb_prog_id;      /* output */
+       __u8 attach_mode;       /* output */
+       size_t :0;
+};
+#define bpf_xdp_query_opts__last_field attach_mode
+
+LIBBPF_API int bpf_xdp_attach(int ifindex, int prog_fd, __u32 flags,
+                             const struct bpf_xdp_attach_opts *opts);
+LIBBPF_API int bpf_xdp_detach(int ifindex, __u32 flags,
+                             const struct bpf_xdp_attach_opts *opts);
+LIBBPF_API int bpf_xdp_query(int ifindex, int flags, struct bpf_xdp_query_opts *opts);
+LIBBPF_API int bpf_xdp_query_id(int ifindex, int flags, __u32 *prog_id);
+
 /* TC related API */
 enum bpf_tc_attach_point {
        BPF_TC_INGRESS = 1 << 0,
index 5297839..e10f082 100644 (file)
@@ -247,6 +247,7 @@ LIBBPF_0.0.8 {
                bpf_link_create;
                bpf_link_update;
                bpf_map__set_initial_value;
+               bpf_prog_attach_opts;
                bpf_program__attach_cgroup;
                bpf_program__attach_lsm;
                bpf_program__is_lsm;
@@ -427,6 +428,10 @@ LIBBPF_0.7.0 {
                bpf_program__log_level;
                bpf_program__set_log_buf;
                bpf_program__set_log_level;
+               bpf_xdp_attach;
+               bpf_xdp_detach;
+               bpf_xdp_query;
+               bpf_xdp_query_id;
                libbpf_probe_bpf_helper;
                libbpf_probe_bpf_map_type;
                libbpf_probe_bpf_prog_type;
index 79131f7..3c2b281 100644 (file)
@@ -73,6 +73,11 @@ enum libbpf_strict_mode {
         * operation.
         */
        LIBBPF_STRICT_AUTO_RLIMIT_MEMLOCK = 0x10,
+       /*
+        * Error out on any SEC("maps") map definition, which are deprecated
+        * in favor of BTF-defined map definitions in SEC(".maps").
+        */
+       LIBBPF_STRICT_MAP_DEFINITIONS = 0x20,
 
        __LIBBPF_STRICT_LAST,
 };
index 39f25e0..c39c37f 100644 (file)
@@ -217,6 +217,28 @@ static int __bpf_set_link_xdp_fd_replace(int ifindex, int fd, int old_fd,
        return libbpf_netlink_send_recv(&req, NULL, NULL, NULL);
 }
 
+int bpf_xdp_attach(int ifindex, int prog_fd, __u32 flags, const struct bpf_xdp_attach_opts *opts)
+{
+       int old_prog_fd, err;
+
+       if (!OPTS_VALID(opts, bpf_xdp_attach_opts))
+               return libbpf_err(-EINVAL);
+
+       old_prog_fd = OPTS_GET(opts, old_prog_fd, 0);
+       if (old_prog_fd)
+               flags |= XDP_FLAGS_REPLACE;
+       else
+               old_prog_fd = -1;
+
+       err = __bpf_set_link_xdp_fd_replace(ifindex, prog_fd, old_prog_fd, flags);
+       return libbpf_err(err);
+}
+
+int bpf_xdp_detach(int ifindex, __u32 flags, const struct bpf_xdp_attach_opts *opts)
+{
+       return bpf_xdp_attach(ifindex, -1, flags, opts);
+}
+
 int bpf_set_link_xdp_fd_opts(int ifindex, int fd, __u32 flags,
                             const struct bpf_xdp_set_link_opts *opts)
 {
@@ -303,69 +325,98 @@ static int get_xdp_info(void *cookie, void *msg, struct nlattr **tb)
        return 0;
 }
 
-int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info,
-                         size_t info_size, __u32 flags)
+int bpf_xdp_query(int ifindex, int xdp_flags, struct bpf_xdp_query_opts *opts)
 {
-       struct xdp_id_md xdp_id = {};
-       __u32 mask;
-       int ret;
        struct libbpf_nla_req req = {
                .nh.nlmsg_len      = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
                .nh.nlmsg_type     = RTM_GETLINK,
                .nh.nlmsg_flags    = NLM_F_DUMP | NLM_F_REQUEST,
                .ifinfo.ifi_family = AF_PACKET,
        };
+       struct xdp_id_md xdp_id = {};
+       int err;
 
-       if (flags & ~XDP_FLAGS_MASK || !info_size)
+       if (!OPTS_VALID(opts, bpf_xdp_query_opts))
+               return libbpf_err(-EINVAL);
+
+       if (xdp_flags & ~XDP_FLAGS_MASK)
                return libbpf_err(-EINVAL);
 
        /* Check whether the single {HW,DRV,SKB} mode is set */
-       flags &= (XDP_FLAGS_SKB_MODE | XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE);
-       mask = flags - 1;
-       if (flags && flags & mask)
+       xdp_flags &= XDP_FLAGS_SKB_MODE | XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE;
+       if (xdp_flags & (xdp_flags - 1))
                return libbpf_err(-EINVAL);
 
        xdp_id.ifindex = ifindex;
-       xdp_id.flags = flags;
+       xdp_id.flags = xdp_flags;
 
-       ret = libbpf_netlink_send_recv(&req, __dump_link_nlmsg,
+       err = libbpf_netlink_send_recv(&req, __dump_link_nlmsg,
                                       get_xdp_info, &xdp_id);
-       if (!ret) {
-               size_t sz = min(info_size, sizeof(xdp_id.info));
+       if (err)
+               return libbpf_err(err);
 
-               memcpy(info, &xdp_id.info, sz);
-               memset((void *) info + sz, 0, info_size - sz);
-       }
+       OPTS_SET(opts, prog_id, xdp_id.info.prog_id);
+       OPTS_SET(opts, drv_prog_id, xdp_id.info.drv_prog_id);
+       OPTS_SET(opts, hw_prog_id, xdp_id.info.hw_prog_id);
+       OPTS_SET(opts, skb_prog_id, xdp_id.info.skb_prog_id);
+       OPTS_SET(opts, attach_mode, xdp_id.info.attach_mode);
 
-       return libbpf_err(ret);
+       return 0;
 }
 
-static __u32 get_xdp_id(struct xdp_link_info *info, __u32 flags)
+int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info,
+                         size_t info_size, __u32 flags)
 {
-       flags &= XDP_FLAGS_MODES;
+       LIBBPF_OPTS(bpf_xdp_query_opts, opts);
+       size_t sz;
+       int err;
+
+       if (!info_size)
+               return libbpf_err(-EINVAL);
 
-       if (info->attach_mode != XDP_ATTACHED_MULTI && !flags)
-               return info->prog_id;
-       if (flags & XDP_FLAGS_DRV_MODE)
-               return info->drv_prog_id;
-       if (flags & XDP_FLAGS_HW_MODE)
-               return info->hw_prog_id;
-       if (flags & XDP_FLAGS_SKB_MODE)
-               return info->skb_prog_id;
+       err = bpf_xdp_query(ifindex, flags, &opts);
+       if (err)
+               return libbpf_err(err);
+
+       /* struct xdp_link_info field layout matches struct bpf_xdp_query_opts
+        * layout after sz field
+        */
+       sz = min(info_size, offsetofend(struct xdp_link_info, attach_mode));
+       memcpy(info, &opts.prog_id, sz);
+       memset((void *)info + sz, 0, info_size - sz);
 
        return 0;
 }
 
-int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags)
+int bpf_xdp_query_id(int ifindex, int flags, __u32 *prog_id)
 {
-       struct xdp_link_info info;
+       LIBBPF_OPTS(bpf_xdp_query_opts, opts);
        int ret;
 
-       ret = bpf_get_link_xdp_info(ifindex, &info, sizeof(info), flags);
-       if (!ret)
-               *prog_id = get_xdp_id(&info, flags);
+       ret = bpf_xdp_query(ifindex, flags, &opts);
+       if (ret)
+               return libbpf_err(ret);
+
+       flags &= XDP_FLAGS_MODES;
 
-       return libbpf_err(ret);
+       if (opts.attach_mode != XDP_ATTACHED_MULTI && !flags)
+               *prog_id = opts.prog_id;
+       else if (flags & XDP_FLAGS_DRV_MODE)
+               *prog_id = opts.drv_prog_id;
+       else if (flags & XDP_FLAGS_HW_MODE)
+               *prog_id = opts.hw_prog_id;
+       else if (flags & XDP_FLAGS_SKB_MODE)
+               *prog_id = opts.skb_prog_id;
+       else
+               *prog_id = 0;
+
+       return 0;
+}
+
+
+int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags)
+{
+       return bpf_xdp_query_id(ifindex, flags, prog_id);
 }
 
 typedef int (*qdisc_config_t)(struct libbpf_nla_req *req);
index 7ecfaac..ef2832b 100644 (file)
@@ -1005,24 +1005,22 @@ __bpf_map__config_value(struct bpf_map *map,
 {
        struct bpf_map_op *op;
        const char *map_name = bpf_map__name(map);
-       const struct bpf_map_def *def = bpf_map__def(map);
 
-       if (IS_ERR(def)) {
-               pr_debug("Unable to get map definition from '%s'\n",
-                        map_name);
+       if (!map) {
+               pr_debug("Map '%s' is invalid\n", map_name);
                return -BPF_LOADER_ERRNO__INTERNAL;
        }
 
-       if (def->type != BPF_MAP_TYPE_ARRAY) {
+       if (bpf_map__type(map) != BPF_MAP_TYPE_ARRAY) {
                pr_debug("Map %s type is not BPF_MAP_TYPE_ARRAY\n",
                         map_name);
                return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE;
        }
-       if (def->key_size < sizeof(unsigned int)) {
+       if (bpf_map__key_size(map) < sizeof(unsigned int)) {
                pr_debug("Map %s has incorrect key size\n", map_name);
                return -BPF_LOADER_ERRNO__OBJCONF_MAP_KEYSIZE;
        }
-       switch (def->value_size) {
+       switch (bpf_map__value_size(map)) {
        case 1:
        case 2:
        case 4:
@@ -1064,7 +1062,6 @@ __bpf_map__config_event(struct bpf_map *map,
                        struct parse_events_term *term,
                        struct evlist *evlist)
 {
-       const struct bpf_map_def *def;
        struct bpf_map_op *op;
        const char *map_name = bpf_map__name(map);
        struct evsel *evsel = evlist__find_evsel_by_str(evlist, term->val.str);
@@ -1075,18 +1072,16 @@ __bpf_map__config_event(struct bpf_map *map,
                return -BPF_LOADER_ERRNO__OBJCONF_MAP_NOEVT;
        }
 
-       def = bpf_map__def(map);
-       if (IS_ERR(def)) {
-               pr_debug("Unable to get map definition from '%s'\n",
-                        map_name);
-               return PTR_ERR(def);
+       if (!map) {
+               pr_debug("Map '%s' is invalid\n", map_name);
+               return PTR_ERR(map);
        }
 
        /*
         * No need to check key_size and value_size:
         * kernel has already checked them.
         */
-       if (def->type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
+       if (bpf_map__type(map) != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
                pr_debug("Map %s type is not BPF_MAP_TYPE_PERF_EVENT_ARRAY\n",
                         map_name);
                return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE;
@@ -1135,7 +1130,6 @@ config_map_indices_range_check(struct parse_events_term *term,
                               const char *map_name)
 {
        struct parse_events_array *array = &term->array;
-       const struct bpf_map_def *def;
        unsigned int i;
 
        if (!array->nr_ranges)
@@ -1146,10 +1140,8 @@ config_map_indices_range_check(struct parse_events_term *term,
                return -BPF_LOADER_ERRNO__INTERNAL;
        }
 
-       def = bpf_map__def(map);
-       if (IS_ERR(def)) {
-               pr_debug("ERROR: Unable to get map definition from '%s'\n",
-                        map_name);
+       if (!map) {
+               pr_debug("Map '%s' is invalid\n", map_name);
                return -BPF_LOADER_ERRNO__INTERNAL;
        }
 
@@ -1158,7 +1150,7 @@ config_map_indices_range_check(struct parse_events_term *term,
                size_t length = array->ranges[i].length;
                unsigned int idx = start + length - 1;
 
-               if (idx >= def->max_entries) {
+               if (idx >= bpf_map__max_entries(map)) {
                        pr_debug("ERROR: index %d too large\n", idx);
                        return -BPF_LOADER_ERRNO__OBJCONF_MAP_IDX2BIG;
                }
@@ -1251,21 +1243,21 @@ out:
 }
 
 typedef int (*map_config_func_t)(const char *name, int map_fd,
-                                const struct bpf_map_def *pdef,
+                                const struct bpf_map *map,
                                 struct bpf_map_op *op,
                                 void *pkey, void *arg);
 
 static int
 foreach_key_array_all(map_config_func_t func,
                      void *arg, const char *name,
-                     int map_fd, const struct bpf_map_def *pdef,
+                     int map_fd, const struct bpf_map *map,
                      struct bpf_map_op *op)
 {
        unsigned int i;
        int err;
 
-       for (i = 0; i < pdef->max_entries; i++) {
-               err = func(name, map_fd, pdef, op, &i, arg);
+       for (i = 0; i < bpf_map__max_entries(map); i++) {
+               err = func(name, map_fd, map, op, &i, arg);
                if (err) {
                        pr_debug("ERROR: failed to insert value to %s[%u]\n",
                                 name, i);
@@ -1278,7 +1270,7 @@ foreach_key_array_all(map_config_func_t func,
 static int
 foreach_key_array_ranges(map_config_func_t func, void *arg,
                         const char *name, int map_fd,
-                        const struct bpf_map_def *pdef,
+                        const struct bpf_map *map,
                         struct bpf_map_op *op)
 {
        unsigned int i, j;
@@ -1291,7 +1283,7 @@ foreach_key_array_ranges(map_config_func_t func, void *arg,
                for (j = 0; j < length; j++) {
                        unsigned int idx = start + j;
 
-                       err = func(name, map_fd, pdef, op, &idx, arg);
+                       err = func(name, map_fd, map, op, &idx, arg);
                        if (err) {
                                pr_debug("ERROR: failed to insert value to %s[%u]\n",
                                         name, idx);
@@ -1307,9 +1299,8 @@ bpf_map_config_foreach_key(struct bpf_map *map,
                           map_config_func_t func,
                           void *arg)
 {
-       int err, map_fd;
+       int err, map_fd, type;
        struct bpf_map_op *op;
-       const struct bpf_map_def *def;
        const char *name = bpf_map__name(map);
        struct bpf_map_priv *priv = bpf_map__priv(map);
 
@@ -1322,9 +1313,8 @@ bpf_map_config_foreach_key(struct bpf_map *map,
                return 0;
        }
 
-       def = bpf_map__def(map);
-       if (IS_ERR(def)) {
-               pr_debug("ERROR: failed to get definition from map %s\n", name);
+       if (!map) {
+               pr_debug("Map '%s' is invalid\n", name);
                return -BPF_LOADER_ERRNO__INTERNAL;
        }
        map_fd = bpf_map__fd(map);
@@ -1333,19 +1323,19 @@ bpf_map_config_foreach_key(struct bpf_map *map,
                return map_fd;
        }
 
+       type = bpf_map__type(map);
        list_for_each_entry(op, &priv->ops_list, list) {
-               switch (def->type) {
+               switch (type) {
                case BPF_MAP_TYPE_ARRAY:
                case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
                        switch (op->key_type) {
                        case BPF_MAP_KEY_ALL:
                                err = foreach_key_array_all(func, arg, name,
-                                                           map_fd, def, op);
+                                                           map_fd, map, op);
                                break;
                        case BPF_MAP_KEY_RANGES:
                                err = foreach_key_array_ranges(func, arg, name,
-                                                              map_fd, def,
-                                                              op);
+                                                              map_fd, map, op);
                                break;
                        default:
                                pr_debug("ERROR: keytype for map '%s' invalid\n",
@@ -1454,7 +1444,7 @@ apply_config_evsel_for_key(const char *name, int map_fd, void *pkey,
 
 static int
 apply_obj_config_map_for_key(const char *name, int map_fd,
-                            const struct bpf_map_def *pdef,
+                            const struct bpf_map *map,
                             struct bpf_map_op *op,
                             void *pkey, void *arg __maybe_unused)
 {
@@ -1463,7 +1453,7 @@ apply_obj_config_map_for_key(const char *name, int map_fd,
        switch (op->op_type) {
        case BPF_MAP_OP_SET_VALUE:
                err = apply_config_value_for_key(map_fd, pkey,
-                                                pdef->value_size,
+                                                bpf_map__value_size(map),
                                                 op->v.value);
                break;
        case BPF_MAP_OP_SET_EVSEL:
index eb853ca..c863ae0 100644 (file)
@@ -9,25 +9,25 @@
 #include <stdlib.h>
 #include <unistd.h>
 
-static bool bpf_map_def__is_per_cpu(const struct bpf_map_def *def)
+static bool bpf_map__is_per_cpu(enum bpf_map_type type)
 {
-       return def->type == BPF_MAP_TYPE_PERCPU_HASH ||
-              def->type == BPF_MAP_TYPE_PERCPU_ARRAY ||
-              def->type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
-              def->type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE;
+       return type == BPF_MAP_TYPE_PERCPU_HASH ||
+              type == BPF_MAP_TYPE_PERCPU_ARRAY ||
+              type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
+              type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE;
 }
 
-static void *bpf_map_def__alloc_value(const struct bpf_map_def *def)
+static void *bpf_map__alloc_value(const struct bpf_map *map)
 {
-       if (bpf_map_def__is_per_cpu(def))
-               return malloc(round_up(def->value_size, 8) * sysconf(_SC_NPROCESSORS_CONF));
+       if (bpf_map__is_per_cpu(bpf_map__type(map)))
+               return malloc(round_up(bpf_map__value_size(map), 8) *
+                             sysconf(_SC_NPROCESSORS_CONF));
 
-       return malloc(def->value_size);
+       return malloc(bpf_map__value_size(map));
 }
 
 int bpf_map__fprintf(struct bpf_map *map, FILE *fp)
 {
-       const struct bpf_map_def *def = bpf_map__def(map);
        void *prev_key = NULL, *key, *value;
        int fd = bpf_map__fd(map), err;
        int printed = 0;
@@ -35,15 +35,15 @@ int bpf_map__fprintf(struct bpf_map *map, FILE *fp)
        if (fd < 0)
                return fd;
 
-       if (IS_ERR(def))
-               return PTR_ERR(def);
+       if (!map)
+               return PTR_ERR(map);
 
        err = -ENOMEM;
-       key = malloc(def->key_size);
+       key = malloc(bpf_map__key_size(map));
        if (key == NULL)
                goto out;
 
-       value = bpf_map_def__alloc_value(def);
+       value = bpf_map__alloc_value(map);
        if (value == NULL)
                goto out_free_key;
 
index 42ffc24..945f92d 100644 (file)
@@ -21,7 +21,7 @@ endif
 
 BPF_GCC                ?= $(shell command -v bpf-gcc;)
 SAN_CFLAGS     ?=
-CFLAGS += -g -O0 -rdynamic -Wall $(GENFLAGS) $(SAN_CFLAGS)             \
+CFLAGS += -g -O0 -rdynamic -Wall -Werror $(GENFLAGS) $(SAN_CFLAGS)     \
          -I$(CURDIR) -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR)          \
          -I$(TOOLSINCDIR) -I$(APIDIR) -I$(OUTPUT)
 LDFLAGS += $(SAN_CFLAGS)
@@ -292,7 +292,7 @@ IS_LITTLE_ENDIAN = $(shell $(CC) -dM -E - </dev/null | \
 MENDIAN=$(if $(IS_LITTLE_ENDIAN),-mlittle-endian,-mbig-endian)
 
 CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG))
-BPF_CFLAGS = -g -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN)                  \
+BPF_CFLAGS = -g -Werror -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN)          \
             -I$(INCLUDE_DIR) -I$(CURDIR) -I$(APIDIR)                   \
             -I$(abspath $(OUTPUT)/../usr/include)
 
index df3b292..bdbacf5 100644 (file)
@@ -109,26 +109,31 @@ static struct bin_attribute bin_attr_bpf_testmod_file __ro_after_init = {
        .write = bpf_testmod_test_write,
 };
 
-BTF_SET_START(bpf_testmod_kfunc_ids)
+BTF_SET_START(bpf_testmod_check_kfunc_ids)
 BTF_ID(func, bpf_testmod_test_mod_kfunc)
-BTF_SET_END(bpf_testmod_kfunc_ids)
+BTF_SET_END(bpf_testmod_check_kfunc_ids)
 
-static DEFINE_KFUNC_BTF_ID_SET(&bpf_testmod_kfunc_ids, bpf_testmod_kfunc_btf_set);
+static const struct btf_kfunc_id_set bpf_testmod_kfunc_set = {
+       .owner     = THIS_MODULE,
+       .check_set = &bpf_testmod_check_kfunc_ids,
+};
+
+extern int bpf_fentry_test1(int a);
 
 static int bpf_testmod_init(void)
 {
        int ret;
 
-       ret = sysfs_create_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file);
-       if (ret)
+       ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_testmod_kfunc_set);
+       if (ret < 0)
                return ret;
-       register_kfunc_btf_id_set(&prog_test_kfunc_list, &bpf_testmod_kfunc_btf_set);
-       return 0;
+       if (bpf_fentry_test1(0) < 0)
+               return -EINVAL;
+       return sysfs_create_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file);
 }
 
 static void bpf_testmod_exit(void)
 {
-       unregister_kfunc_btf_id_set(&prog_test_kfunc_list, &bpf_testmod_kfunc_btf_set);
        return sysfs_remove_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file);
 }
 
index f628713..763db63 100644 (file)
@@ -48,3 +48,8 @@ CONFIG_IMA_READ_POLICY=y
 CONFIG_BLK_DEV_LOOP=y
 CONFIG_FUNCTION_TRACER=y
 CONFIG_DYNAMIC_FTRACE=y
+CONFIG_NETFILTER=y
+CONFIG_NF_DEFRAG_IPV4=y
+CONFIG_NF_DEFRAG_IPV6=y
+CONFIG_NF_CONNTRACK=y
+CONFIG_USERFAULTFD=y
index d0f06e4..eac71fb 100644 (file)
@@ -1,13 +1,24 @@
 // SPDX-License-Identifier: GPL-2.0
-#include <test_progs.h>
-#include "bind_perm.skel.h"
-
+#define _GNU_SOURCE
+#include <sched.h>
+#include <stdlib.h>
 #include <sys/types.h>
 #include <sys/socket.h>
 #include <sys/capability.h>
 
+#include "test_progs.h"
+#include "bind_perm.skel.h"
+
 static int duration;
 
+static int create_netns(void)
+{
+       if (!ASSERT_OK(unshare(CLONE_NEWNET), "create netns"))
+               return -1;
+
+       return 0;
+}
+
 void try_bind(int family, int port, int expected_errno)
 {
        struct sockaddr_storage addr = {};
@@ -75,6 +86,9 @@ void test_bind_perm(void)
        struct bind_perm *skel;
        int cgroup_fd;
 
+       if (create_netns())
+               return;
+
        cgroup_fd = test__join_cgroup("/bind_perm");
        if (CHECK(cgroup_fd < 0, "cg-join", "errno %d", errno))
                return;
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt_unix.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt_unix.c
new file mode 100644 (file)
index 0000000..ee725d4
--- /dev/null
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Amazon.com Inc. or its affiliates. */
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <test_progs.h>
+#include "bpf_iter_setsockopt_unix.skel.h"
+
+#define NR_CASES 5
+
+static int create_unix_socket(struct bpf_iter_setsockopt_unix *skel)
+{
+       struct sockaddr_un addr = {
+               .sun_family = AF_UNIX,
+               .sun_path = "",
+       };
+       socklen_t len;
+       int fd, err;
+
+       fd = socket(AF_UNIX, SOCK_STREAM, 0);
+       if (!ASSERT_NEQ(fd, -1, "socket"))
+               return -1;
+
+       len = offsetof(struct sockaddr_un, sun_path);
+       err = bind(fd, (struct sockaddr *)&addr, len);
+       if (!ASSERT_OK(err, "bind"))
+               return -1;
+
+       len = sizeof(addr);
+       err = getsockname(fd, (struct sockaddr *)&addr, &len);
+       if (!ASSERT_OK(err, "getsockname"))
+               return -1;
+
+       memcpy(&skel->bss->sun_path, &addr.sun_path,
+              len - offsetof(struct sockaddr_un, sun_path));
+
+       return fd;
+}
+
+static void test_sndbuf(struct bpf_iter_setsockopt_unix *skel, int fd)
+{
+       socklen_t optlen;
+       int i, err;
+
+       for (i = 0; i < NR_CASES; i++) {
+               if (!ASSERT_NEQ(skel->data->sndbuf_getsockopt[i], -1,
+                               "bpf_(get|set)sockopt"))
+                       return;
+
+               err = setsockopt(fd, SOL_SOCKET, SO_SNDBUF,
+                                &(skel->data->sndbuf_setsockopt[i]),
+                                sizeof(skel->data->sndbuf_setsockopt[i]));
+               if (!ASSERT_OK(err, "setsockopt"))
+                       return;
+
+               optlen = sizeof(skel->bss->sndbuf_getsockopt_expected[i]);
+               err = getsockopt(fd, SOL_SOCKET, SO_SNDBUF,
+                                &(skel->bss->sndbuf_getsockopt_expected[i]),
+                                &optlen);
+               if (!ASSERT_OK(err, "getsockopt"))
+                       return;
+
+               if (!ASSERT_EQ(skel->data->sndbuf_getsockopt[i],
+                              skel->bss->sndbuf_getsockopt_expected[i],
+                              "bpf_(get|set)sockopt"))
+                       return;
+       }
+}
+
+void test_bpf_iter_setsockopt_unix(void)
+{
+       struct bpf_iter_setsockopt_unix *skel;
+       int err, unix_fd, iter_fd;
+       char buf;
+
+       skel = bpf_iter_setsockopt_unix__open_and_load();
+       if (!ASSERT_OK_PTR(skel, "open_and_load"))
+               return;
+
+       unix_fd = create_unix_socket(skel);
+       if (!ASSERT_NEQ(unix_fd, -1, "create_unix_server"))
+               goto destroy;
+
+       skel->links.change_sndbuf = bpf_program__attach_iter(skel->progs.change_sndbuf, NULL);
+       if (!ASSERT_OK_PTR(skel->links.change_sndbuf, "bpf_program__attach_iter"))
+               goto destroy;
+
+       iter_fd = bpf_iter_create(bpf_link__fd(skel->links.change_sndbuf));
+       if (!ASSERT_GE(iter_fd, 0, "bpf_iter_create"))
+               goto destroy;
+
+       while ((err = read(iter_fd, &buf, sizeof(buf))) == -1 &&
+              errno == EAGAIN)
+               ;
+       if (!ASSERT_OK(err, "read iter error"))
+               goto destroy;
+
+       test_sndbuf(skel, unix_fd);
+destroy:
+       bpf_iter_setsockopt_unix__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c b/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c
new file mode 100644 (file)
index 0000000..d43f548
--- /dev/null
@@ -0,0 +1,230 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <unistd.h>
+#include <pthread.h>
+#include <sys/mman.h>
+#include <stdatomic.h>
+#include <test_progs.h>
+#include <sys/syscall.h>
+#include <linux/module.h>
+#include <linux/userfaultfd.h>
+
+#include "ksym_race.skel.h"
+#include "bpf_mod_race.skel.h"
+#include "kfunc_call_race.skel.h"
+
+/* This test crafts a race between btf_try_get_module and do_init_module, and
+ * checks whether btf_try_get_module handles the invocation for a well-formed
+ * but uninitialized module correctly. Unless the module has completed its
+ * initcalls, the verifier should fail the program load and return ENXIO.
+ *
+ * userfaultfd is used to trigger a fault in an fmod_ret program, and make it
+ * sleep, then the BPF program is loaded and the return value from verifier is
+ * inspected. After this, the userfaultfd is closed so that the module loading
+ * thread makes forward progress, and fmod_ret injects an error so that the
+ * module load fails and it is freed.
+ *
+ * If the verifier succeeded in loading the supplied program, it will end up
+ * taking reference to freed module, and trigger a crash when the program fd
+ * is closed later. This is true for both kfuncs and ksyms. In both cases,
+ * the crash is triggered inside bpf_prog_free_deferred, when module reference
+ * is finally released.
+ */
+
+struct test_config {
+       const char *str_open;
+       void *(*bpf_open_and_load)();
+       void (*bpf_destroy)(void *);
+};
+
+enum test_state {
+       _TS_INVALID,
+       TS_MODULE_LOAD,
+       TS_MODULE_LOAD_FAIL,
+};
+
+static _Atomic enum test_state state = _TS_INVALID;
+
+static int sys_finit_module(int fd, const char *param_values, int flags)
+{
+       return syscall(__NR_finit_module, fd, param_values, flags);
+}
+
+static int sys_delete_module(const char *name, unsigned int flags)
+{
+       return syscall(__NR_delete_module, name, flags);
+}
+
+static int load_module(const char *mod)
+{
+       int ret, fd;
+
+       fd = open("bpf_testmod.ko", O_RDONLY);
+       if (fd < 0)
+               return fd;
+
+       ret = sys_finit_module(fd, "", 0);
+       close(fd);
+       if (ret < 0)
+               return ret;
+       return 0;
+}
+
+static void *load_module_thread(void *p)
+{
+
+       if (!ASSERT_NEQ(load_module("bpf_testmod.ko"), 0, "load_module_thread must fail"))
+               atomic_store(&state, TS_MODULE_LOAD);
+       else
+               atomic_store(&state, TS_MODULE_LOAD_FAIL);
+       return p;
+}
+
+static int sys_userfaultfd(int flags)
+{
+       return syscall(__NR_userfaultfd, flags);
+}
+
+static int test_setup_uffd(void *fault_addr)
+{
+       struct uffdio_register uffd_register = {};
+       struct uffdio_api uffd_api = {};
+       int uffd;
+
+       uffd = sys_userfaultfd(O_CLOEXEC);
+       if (uffd < 0)
+               return -errno;
+
+       uffd_api.api = UFFD_API;
+       uffd_api.features = 0;
+       if (ioctl(uffd, UFFDIO_API, &uffd_api)) {
+               close(uffd);
+               return -1;
+       }
+
+       uffd_register.range.start = (unsigned long)fault_addr;
+       uffd_register.range.len = 4096;
+       uffd_register.mode = UFFDIO_REGISTER_MODE_MISSING;
+       if (ioctl(uffd, UFFDIO_REGISTER, &uffd_register)) {
+               close(uffd);
+               return -1;
+       }
+       return uffd;
+}
+
+static void test_bpf_mod_race_config(const struct test_config *config)
+{
+       void *fault_addr, *skel_fail;
+       struct bpf_mod_race *skel;
+       struct uffd_msg uffd_msg;
+       pthread_t load_mod_thrd;
+       _Atomic int *blockingp;
+       int uffd, ret;
+
+       fault_addr = mmap(0, 4096, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+       if (!ASSERT_NEQ(fault_addr, MAP_FAILED, "mmap for uffd registration"))
+               return;
+
+       if (!ASSERT_OK(sys_delete_module("bpf_testmod", 0), "unload bpf_testmod"))
+               goto end_mmap;
+
+       skel = bpf_mod_race__open();
+       if (!ASSERT_OK_PTR(skel, "bpf_mod_kfunc_race__open"))
+               goto end_module;
+
+       skel->rodata->bpf_mod_race_config.tgid = getpid();
+       skel->rodata->bpf_mod_race_config.inject_error = -4242;
+       skel->rodata->bpf_mod_race_config.fault_addr = fault_addr;
+       if (!ASSERT_OK(bpf_mod_race__load(skel), "bpf_mod___load"))
+               goto end_destroy;
+       blockingp = (_Atomic int *)&skel->bss->bpf_blocking;
+
+       if (!ASSERT_OK(bpf_mod_race__attach(skel), "bpf_mod_kfunc_race__attach"))
+               goto end_destroy;
+
+       uffd = test_setup_uffd(fault_addr);
+       if (!ASSERT_GE(uffd, 0, "userfaultfd open + register address"))
+               goto end_destroy;
+
+       if (!ASSERT_OK(pthread_create(&load_mod_thrd, NULL, load_module_thread, NULL),
+                      "load module thread"))
+               goto end_uffd;
+
+       /* Now, we either fail loading module, or block in bpf prog, spin to find out */
+       while (!atomic_load(&state) && !atomic_load(blockingp))
+               ;
+       if (!ASSERT_EQ(state, _TS_INVALID, "module load should block"))
+               goto end_join;
+       if (!ASSERT_EQ(*blockingp, 1, "module load blocked")) {
+               pthread_kill(load_mod_thrd, SIGKILL);
+               goto end_uffd;
+       }
+
+       /* We might have set bpf_blocking to 1, but may have not blocked in
+        * bpf_copy_from_user. Read userfaultfd descriptor to verify that.
+        */
+       if (!ASSERT_EQ(read(uffd, &uffd_msg, sizeof(uffd_msg)), sizeof(uffd_msg),
+                      "read uffd block event"))
+               goto end_join;
+       if (!ASSERT_EQ(uffd_msg.event, UFFD_EVENT_PAGEFAULT, "read uffd event is pagefault"))
+               goto end_join;
+
+       /* We know that load_mod_thrd is blocked in the fmod_ret program, the
+        * module state is still MODULE_STATE_COMING because mod->init hasn't
+        * returned. This is the time we try to load a program calling kfunc and
+        * check if we get ENXIO from verifier.
+        */
+       skel_fail = config->bpf_open_and_load();
+       ret = errno;
+       if (!ASSERT_EQ(skel_fail, NULL, config->str_open)) {
+               /* Close uffd to unblock load_mod_thrd */
+               close(uffd);
+               uffd = -1;
+               while (atomic_load(blockingp) != 2)
+                       ;
+               ASSERT_OK(kern_sync_rcu(), "kern_sync_rcu");
+               config->bpf_destroy(skel_fail);
+               goto end_join;
+
+       }
+       ASSERT_EQ(ret, ENXIO, "verifier returns ENXIO");
+       ASSERT_EQ(skel->data->res_try_get_module, false, "btf_try_get_module == false");
+
+       close(uffd);
+       uffd = -1;
+end_join:
+       pthread_join(load_mod_thrd, NULL);
+       if (uffd < 0)
+               ASSERT_EQ(atomic_load(&state), TS_MODULE_LOAD_FAIL, "load_mod_thrd success");
+end_uffd:
+       if (uffd >= 0)
+               close(uffd);
+end_destroy:
+       bpf_mod_race__destroy(skel);
+       ASSERT_OK(kern_sync_rcu(), "kern_sync_rcu");
+end_module:
+       sys_delete_module("bpf_testmod", 0);
+       ASSERT_OK(load_module("bpf_testmod.ko"), "restore bpf_testmod");
+end_mmap:
+       munmap(fault_addr, 4096);
+       atomic_store(&state, _TS_INVALID);
+}
+
+static const struct test_config ksym_config = {
+       .str_open = "ksym_race__open_and_load",
+       .bpf_open_and_load = (void *)ksym_race__open_and_load,
+       .bpf_destroy = (void *)ksym_race__destroy,
+};
+
+static const struct test_config kfunc_config = {
+       .str_open = "kfunc_call_race__open_and_load",
+       .bpf_open_and_load = (void *)kfunc_call_race__open_and_load,
+       .bpf_destroy = (void *)kfunc_call_race__destroy,
+};
+
+void serial_test_bpf_mod_race(void)
+{
+       if (test__start_subtest("ksym (used_btfs UAF)"))
+               test_bpf_mod_race_config(&ksym_config);
+       if (test__start_subtest("kfunc (kfunc_btf_tab UAF)"))
+               test_bpf_mod_race_config(&kfunc_config);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
new file mode 100644 (file)
index 0000000..e3166a8
--- /dev/null
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <network_helpers.h>
+#include "test_bpf_nf.skel.h"
+
+enum {
+       TEST_XDP,
+       TEST_TC_BPF,
+};
+
+void test_bpf_nf_ct(int mode)
+{
+       struct test_bpf_nf *skel;
+       int prog_fd, err, retval;
+
+       skel = test_bpf_nf__open_and_load();
+       if (!ASSERT_OK_PTR(skel, "test_bpf_nf__open_and_load"))
+               return;
+
+       if (mode == TEST_XDP)
+               prog_fd = bpf_program__fd(skel->progs.nf_xdp_ct_test);
+       else
+               prog_fd = bpf_program__fd(skel->progs.nf_skb_ct_test);
+
+       err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), NULL, NULL,
+                               (__u32 *)&retval, NULL);
+       if (!ASSERT_OK(err, "bpf_prog_test_run"))
+               goto end;
+
+       ASSERT_EQ(skel->bss->test_einval_bpf_tuple, -EINVAL, "Test EINVAL for NULL bpf_tuple");
+       ASSERT_EQ(skel->bss->test_einval_reserved, -EINVAL, "Test EINVAL for reserved not set to 0");
+       ASSERT_EQ(skel->bss->test_einval_netns_id, -EINVAL, "Test EINVAL for netns_id < -1");
+       ASSERT_EQ(skel->bss->test_einval_len_opts, -EINVAL, "Test EINVAL for len__opts != NF_BPF_CT_OPTS_SZ");
+       ASSERT_EQ(skel->bss->test_eproto_l4proto, -EPROTO, "Test EPROTO for l4proto != TCP or UDP");
+       ASSERT_EQ(skel->bss->test_enonet_netns_id, -ENONET, "Test ENONET for bad but valid netns_id");
+       ASSERT_EQ(skel->bss->test_enoent_lookup, -ENOENT, "Test ENOENT for failed lookup");
+       ASSERT_EQ(skel->bss->test_eafnosupport, -EAFNOSUPPORT, "Test EAFNOSUPPORT for invalid len__tuple");
+end:
+       test_bpf_nf__destroy(skel);
+}
+
+void test_bpf_nf(void)
+{
+       if (test__start_subtest("xdp-ct"))
+               test_bpf_nf_ct(TEST_XDP);
+       if (test__start_subtest("tc-bpf-ct"))
+               test_bpf_nf_ct(TEST_TC_BPF);
+}
index 8ba53ac..14f9b61 100644 (file)
@@ -4560,6 +4560,8 @@ static void do_test_file(unsigned int test_num)
        has_btf_ext = btf_ext != NULL;
        btf_ext__free(btf_ext);
 
+       /* temporary disable LIBBPF_STRICT_MAP_DEFINITIONS to test legacy maps */
+       libbpf_set_strict_mode((__LIBBPF_STRICT_LAST - 1) & ~LIBBPF_STRICT_MAP_DEFINITIONS);
        obj = bpf_object__open(test->file);
        err = libbpf_get_error(obj);
        if (CHECK(err, "obj: %d", err))
@@ -4684,6 +4686,8 @@ skip:
        fprintf(stderr, "OK");
 
 done:
+       libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
        btf__free(btf);
        free(func_info);
        bpf_object__close(obj);
index d3e8f72..38b3c47 100644 (file)
@@ -194,14 +194,14 @@ void serial_test_cgroup_attach_multi(void)
 
        attach_opts.flags = BPF_F_ALLOW_OVERRIDE | BPF_F_REPLACE;
        attach_opts.replace_prog_fd = allow_prog[0];
-       if (CHECK(!bpf_prog_attach_xattr(allow_prog[6], cg1,
+       if (CHECK(!bpf_prog_attach_opts(allow_prog[6], cg1,
                                         BPF_CGROUP_INET_EGRESS, &attach_opts),
                  "fail_prog_replace_override", "unexpected success\n"))
                goto err;
        CHECK_FAIL(errno != EINVAL);
 
        attach_opts.flags = BPF_F_REPLACE;
-       if (CHECK(!bpf_prog_attach_xattr(allow_prog[6], cg1,
+       if (CHECK(!bpf_prog_attach_opts(allow_prog[6], cg1,
                                         BPF_CGROUP_INET_EGRESS, &attach_opts),
                  "fail_prog_replace_no_multi", "unexpected success\n"))
                goto err;
@@ -209,7 +209,7 @@ void serial_test_cgroup_attach_multi(void)
 
        attach_opts.flags = BPF_F_ALLOW_MULTI | BPF_F_REPLACE;
        attach_opts.replace_prog_fd = -1;
-       if (CHECK(!bpf_prog_attach_xattr(allow_prog[6], cg1,
+       if (CHECK(!bpf_prog_attach_opts(allow_prog[6], cg1,
                                         BPF_CGROUP_INET_EGRESS, &attach_opts),
                  "fail_prog_replace_bad_fd", "unexpected success\n"))
                goto err;
@@ -217,7 +217,7 @@ void serial_test_cgroup_attach_multi(void)
 
        /* replacing a program that is not attached to cgroup should fail  */
        attach_opts.replace_prog_fd = allow_prog[3];
-       if (CHECK(!bpf_prog_attach_xattr(allow_prog[6], cg1,
+       if (CHECK(!bpf_prog_attach_opts(allow_prog[6], cg1,
                                         BPF_CGROUP_INET_EGRESS, &attach_opts),
                  "fail_prog_replace_no_ent", "unexpected success\n"))
                goto err;
@@ -225,14 +225,14 @@ void serial_test_cgroup_attach_multi(void)
 
        /* replace 1st from the top program */
        attach_opts.replace_prog_fd = allow_prog[0];
-       if (CHECK(bpf_prog_attach_xattr(allow_prog[6], cg1,
+       if (CHECK(bpf_prog_attach_opts(allow_prog[6], cg1,
                                        BPF_CGROUP_INET_EGRESS, &attach_opts),
                  "prog_replace", "errno=%d\n", errno))
                goto err;
 
        /* replace program with itself */
        attach_opts.replace_prog_fd = allow_prog[6];
-       if (CHECK(bpf_prog_attach_xattr(allow_prog[6], cg1,
+       if (CHECK(bpf_prog_attach_opts(allow_prog[6], cg1,
                                        BPF_CGROUP_INET_EGRESS, &attach_opts),
                  "prog_replace", "errno=%d\n", errno))
                goto err;
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_getset_retval.c b/tools/testing/selftests/bpf/prog_tests/cgroup_getset_retval.c
new file mode 100644 (file)
index 0000000..0b47c3c
--- /dev/null
@@ -0,0 +1,481 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright 2021 Google LLC.
+ */
+
+#include <test_progs.h>
+#include <cgroup_helpers.h>
+#include <network_helpers.h>
+
+#include "cgroup_getset_retval_setsockopt.skel.h"
+#include "cgroup_getset_retval_getsockopt.skel.h"
+
+#define SOL_CUSTOM     0xdeadbeef
+
+static int zero;
+
+static void test_setsockopt_set(int cgroup_fd, int sock_fd)
+{
+       struct cgroup_getset_retval_setsockopt *obj;
+       struct bpf_link *link_set_eunatch = NULL;
+
+       obj = cgroup_getset_retval_setsockopt__open_and_load();
+       if (!ASSERT_OK_PTR(obj, "skel-load"))
+               return;
+
+       /* Attach setsockopt that sets EUNATCH, assert that
+        * we actually get that error when we run setsockopt()
+        */
+       link_set_eunatch = bpf_program__attach_cgroup(obj->progs.set_eunatch,
+                                                     cgroup_fd);
+       if (!ASSERT_OK_PTR(link_set_eunatch, "cg-attach-set_eunatch"))
+               goto close_bpf_object;
+
+       if (!ASSERT_ERR(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR,
+                                  &zero, sizeof(int)), "setsockopt"))
+               goto close_bpf_object;
+       if (!ASSERT_EQ(errno, EUNATCH, "setsockopt-errno"))
+               goto close_bpf_object;
+
+       if (!ASSERT_EQ(obj->bss->invocations, 1, "invocations"))
+               goto close_bpf_object;
+       if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+               goto close_bpf_object;
+
+close_bpf_object:
+       bpf_link__destroy(link_set_eunatch);
+
+       cgroup_getset_retval_setsockopt__destroy(obj);
+}
+
+static void test_setsockopt_set_and_get(int cgroup_fd, int sock_fd)
+{
+       struct cgroup_getset_retval_setsockopt *obj;
+       struct bpf_link *link_set_eunatch = NULL, *link_get_retval = NULL;
+
+       obj = cgroup_getset_retval_setsockopt__open_and_load();
+       if (!ASSERT_OK_PTR(obj, "skel-load"))
+               return;
+
+       /* Attach setsockopt that sets EUNATCH, and one that gets the
+        * previously set errno. Assert that we get the same errno back.
+        */
+       link_set_eunatch = bpf_program__attach_cgroup(obj->progs.set_eunatch,
+                                                     cgroup_fd);
+       if (!ASSERT_OK_PTR(link_set_eunatch, "cg-attach-set_eunatch"))
+               goto close_bpf_object;
+       link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval,
+                                                    cgroup_fd);
+       if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval"))
+               goto close_bpf_object;
+
+       if (!ASSERT_ERR(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR,
+                                  &zero, sizeof(int)), "setsockopt"))
+               goto close_bpf_object;
+       if (!ASSERT_EQ(errno, EUNATCH, "setsockopt-errno"))
+               goto close_bpf_object;
+
+       if (!ASSERT_EQ(obj->bss->invocations, 2, "invocations"))
+               goto close_bpf_object;
+       if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+               goto close_bpf_object;
+       if (!ASSERT_EQ(obj->bss->retval_value, -EUNATCH, "retval_value"))
+               goto close_bpf_object;
+
+close_bpf_object:
+       bpf_link__destroy(link_set_eunatch);
+       bpf_link__destroy(link_get_retval);
+
+       cgroup_getset_retval_setsockopt__destroy(obj);
+}
+
+static void test_setsockopt_default_zero(int cgroup_fd, int sock_fd)
+{
+       struct cgroup_getset_retval_setsockopt *obj;
+       struct bpf_link *link_get_retval = NULL;
+
+       obj = cgroup_getset_retval_setsockopt__open_and_load();
+       if (!ASSERT_OK_PTR(obj, "skel-load"))
+               return;
+
+       /* Attach setsockopt that gets the previously set errno.
+        * Assert that, without anything setting one, we get 0.
+        */
+       link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval,
+                                                    cgroup_fd);
+       if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval"))
+               goto close_bpf_object;
+
+       if (!ASSERT_OK(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR,
+                                 &zero, sizeof(int)), "setsockopt"))
+               goto close_bpf_object;
+
+       if (!ASSERT_EQ(obj->bss->invocations, 1, "invocations"))
+               goto close_bpf_object;
+       if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+               goto close_bpf_object;
+       if (!ASSERT_EQ(obj->bss->retval_value, 0, "retval_value"))
+               goto close_bpf_object;
+
+close_bpf_object:
+       bpf_link__destroy(link_get_retval);
+
+       cgroup_getset_retval_setsockopt__destroy(obj);
+}
+
+static void test_setsockopt_default_zero_and_set(int cgroup_fd, int sock_fd)
+{
+       struct cgroup_getset_retval_setsockopt *obj;
+       struct bpf_link *link_get_retval = NULL, *link_set_eunatch = NULL;
+
+       obj = cgroup_getset_retval_setsockopt__open_and_load();
+       if (!ASSERT_OK_PTR(obj, "skel-load"))
+               return;
+
+       /* Attach setsockopt that gets the previously set errno, and then
+        * one that sets the errno to EUNATCH. Assert that the get does not
+        * see EUNATCH set later, and does not prevent EUNATCH from being set.
+        */
+       link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval,
+                                                    cgroup_fd);
+       if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval"))
+               goto close_bpf_object;
+       link_set_eunatch = bpf_program__attach_cgroup(obj->progs.set_eunatch,
+                                                     cgroup_fd);
+       if (!ASSERT_OK_PTR(link_set_eunatch, "cg-attach-set_eunatch"))
+               goto close_bpf_object;
+
+       if (!ASSERT_ERR(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR,
+                                  &zero, sizeof(int)), "setsockopt"))
+               goto close_bpf_object;
+       if (!ASSERT_EQ(errno, EUNATCH, "setsockopt-errno"))
+               goto close_bpf_object;
+
+       if (!ASSERT_EQ(obj->bss->invocations, 2, "invocations"))
+               goto close_bpf_object;
+       if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+               goto close_bpf_object;
+       if (!ASSERT_EQ(obj->bss->retval_value, 0, "retval_value"))
+               goto close_bpf_object;
+
+close_bpf_object:
+       bpf_link__destroy(link_get_retval);
+       bpf_link__destroy(link_set_eunatch);
+
+       cgroup_getset_retval_setsockopt__destroy(obj);
+}
+
+static void test_setsockopt_override(int cgroup_fd, int sock_fd)
+{
+       struct cgroup_getset_retval_setsockopt *obj;
+       struct bpf_link *link_set_eunatch = NULL, *link_set_eisconn = NULL;
+       struct bpf_link *link_get_retval = NULL;
+
+       obj = cgroup_getset_retval_setsockopt__open_and_load();
+       if (!ASSERT_OK_PTR(obj, "skel-load"))
+               return;
+
+       /* Attach setsockopt that sets EUNATCH, then one that sets EISCONN,
+        * and then one that gets the exported errno. Assert both the syscall
+        * and the helper sees the last set errno.
+        */
+       link_set_eunatch = bpf_program__attach_cgroup(obj->progs.set_eunatch,
+                                                     cgroup_fd);
+       if (!ASSERT_OK_PTR(link_set_eunatch, "cg-attach-set_eunatch"))
+               goto close_bpf_object;
+       link_set_eisconn = bpf_program__attach_cgroup(obj->progs.set_eisconn,
+                                                     cgroup_fd);
+       if (!ASSERT_OK_PTR(link_set_eisconn, "cg-attach-set_eisconn"))
+               goto close_bpf_object;
+       link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval,
+                                                    cgroup_fd);
+       if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval"))
+               goto close_bpf_object;
+
+       if (!ASSERT_ERR(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR,
+                                  &zero, sizeof(int)), "setsockopt"))
+               goto close_bpf_object;
+       if (!ASSERT_EQ(errno, EISCONN, "setsockopt-errno"))
+               goto close_bpf_object;
+
+       if (!ASSERT_EQ(obj->bss->invocations, 3, "invocations"))
+               goto close_bpf_object;
+       if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+               goto close_bpf_object;
+       if (!ASSERT_EQ(obj->bss->retval_value, -EISCONN, "retval_value"))
+               goto close_bpf_object;
+
+close_bpf_object:
+       bpf_link__destroy(link_set_eunatch);
+       bpf_link__destroy(link_set_eisconn);
+       bpf_link__destroy(link_get_retval);
+
+       cgroup_getset_retval_setsockopt__destroy(obj);
+}
+
+static void test_setsockopt_legacy_eperm(int cgroup_fd, int sock_fd)
+{
+       struct cgroup_getset_retval_setsockopt *obj;
+       struct bpf_link *link_legacy_eperm = NULL, *link_get_retval = NULL;
+
+       obj = cgroup_getset_retval_setsockopt__open_and_load();
+       if (!ASSERT_OK_PTR(obj, "skel-load"))
+               return;
+
+       /* Attach setsockopt that return a reject without setting errno
+        * (legacy reject), and one that gets the errno. Assert that for
+        * backward compatibility the syscall result in EPERM, and this
+        * is also visible to the helper.
+        */
+       link_legacy_eperm = bpf_program__attach_cgroup(obj->progs.legacy_eperm,
+                                                      cgroup_fd);
+       if (!ASSERT_OK_PTR(link_legacy_eperm, "cg-attach-legacy_eperm"))
+               goto close_bpf_object;
+       link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval,
+                                                    cgroup_fd);
+       if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval"))
+               goto close_bpf_object;
+
+       if (!ASSERT_ERR(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR,
+                                  &zero, sizeof(int)), "setsockopt"))
+               goto close_bpf_object;
+       if (!ASSERT_EQ(errno, EPERM, "setsockopt-errno"))
+               goto close_bpf_object;
+
+       if (!ASSERT_EQ(obj->bss->invocations, 2, "invocations"))
+               goto close_bpf_object;
+       if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+               goto close_bpf_object;
+       if (!ASSERT_EQ(obj->bss->retval_value, -EPERM, "retval_value"))
+               goto close_bpf_object;
+
+close_bpf_object:
+       bpf_link__destroy(link_legacy_eperm);
+       bpf_link__destroy(link_get_retval);
+
+       cgroup_getset_retval_setsockopt__destroy(obj);
+}
+
+static void test_setsockopt_legacy_no_override(int cgroup_fd, int sock_fd)
+{
+       struct cgroup_getset_retval_setsockopt *obj;
+       struct bpf_link *link_set_eunatch = NULL, *link_legacy_eperm = NULL;
+       struct bpf_link *link_get_retval = NULL;
+
+       obj = cgroup_getset_retval_setsockopt__open_and_load();
+       if (!ASSERT_OK_PTR(obj, "skel-load"))
+               return;
+
+       /* Attach setsockopt that sets EUNATCH, then one that return a reject
+        * without setting errno, and then one that gets the exported errno.
+        * Assert both the syscall and the helper's errno are unaffected by
+        * the second prog (i.e. legacy rejects does not override the errno
+        * to EPERM).
+        */
+       link_set_eunatch = bpf_program__attach_cgroup(obj->progs.set_eunatch,
+                                                     cgroup_fd);
+       if (!ASSERT_OK_PTR(link_set_eunatch, "cg-attach-set_eunatch"))
+               goto close_bpf_object;
+       link_legacy_eperm = bpf_program__attach_cgroup(obj->progs.legacy_eperm,
+                                                      cgroup_fd);
+       if (!ASSERT_OK_PTR(link_legacy_eperm, "cg-attach-legacy_eperm"))
+               goto close_bpf_object;
+       link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval,
+                                                    cgroup_fd);
+       if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval"))
+               goto close_bpf_object;
+
+       if (!ASSERT_ERR(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR,
+                                  &zero, sizeof(int)), "setsockopt"))
+               goto close_bpf_object;
+       if (!ASSERT_EQ(errno, EUNATCH, "setsockopt-errno"))
+               goto close_bpf_object;
+
+       if (!ASSERT_EQ(obj->bss->invocations, 3, "invocations"))
+               goto close_bpf_object;
+       if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+               goto close_bpf_object;
+       if (!ASSERT_EQ(obj->bss->retval_value, -EUNATCH, "retval_value"))
+               goto close_bpf_object;
+
+close_bpf_object:
+       bpf_link__destroy(link_set_eunatch);
+       bpf_link__destroy(link_legacy_eperm);
+       bpf_link__destroy(link_get_retval);
+
+       cgroup_getset_retval_setsockopt__destroy(obj);
+}
+
+static void test_getsockopt_get(int cgroup_fd, int sock_fd)
+{
+       struct cgroup_getset_retval_getsockopt *obj;
+       struct bpf_link *link_get_retval = NULL;
+       int buf;
+       socklen_t optlen = sizeof(buf);
+
+       obj = cgroup_getset_retval_getsockopt__open_and_load();
+       if (!ASSERT_OK_PTR(obj, "skel-load"))
+               return;
+
+       /* Attach getsockopt that gets previously set errno. Assert that the
+        * error from kernel is in both ctx_retval_value and retval_value.
+        */
+       link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval,
+                                                    cgroup_fd);
+       if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval"))
+               goto close_bpf_object;
+
+       if (!ASSERT_ERR(getsockopt(sock_fd, SOL_CUSTOM, 0,
+                                  &buf, &optlen), "getsockopt"))
+               goto close_bpf_object;
+       if (!ASSERT_EQ(errno, EOPNOTSUPP, "getsockopt-errno"))
+               goto close_bpf_object;
+
+       if (!ASSERT_EQ(obj->bss->invocations, 1, "invocations"))
+               goto close_bpf_object;
+       if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+               goto close_bpf_object;
+       if (!ASSERT_EQ(obj->bss->retval_value, -EOPNOTSUPP, "retval_value"))
+               goto close_bpf_object;
+       if (!ASSERT_EQ(obj->bss->ctx_retval_value, -EOPNOTSUPP, "ctx_retval_value"))
+               goto close_bpf_object;
+
+close_bpf_object:
+       bpf_link__destroy(link_get_retval);
+
+       cgroup_getset_retval_getsockopt__destroy(obj);
+}
+
+static void test_getsockopt_override(int cgroup_fd, int sock_fd)
+{
+       struct cgroup_getset_retval_getsockopt *obj;
+       struct bpf_link *link_set_eisconn = NULL;
+       int buf;
+       socklen_t optlen = sizeof(buf);
+
+       obj = cgroup_getset_retval_getsockopt__open_and_load();
+       if (!ASSERT_OK_PTR(obj, "skel-load"))
+               return;
+
+       /* Attach getsockopt that sets retval to -EISCONN. Assert that this
+        * overrides the value from kernel.
+        */
+       link_set_eisconn = bpf_program__attach_cgroup(obj->progs.set_eisconn,
+                                                     cgroup_fd);
+       if (!ASSERT_OK_PTR(link_set_eisconn, "cg-attach-set_eisconn"))
+               goto close_bpf_object;
+
+       if (!ASSERT_ERR(getsockopt(sock_fd, SOL_CUSTOM, 0,
+                                  &buf, &optlen), "getsockopt"))
+               goto close_bpf_object;
+       if (!ASSERT_EQ(errno, EISCONN, "getsockopt-errno"))
+               goto close_bpf_object;
+
+       if (!ASSERT_EQ(obj->bss->invocations, 1, "invocations"))
+               goto close_bpf_object;
+       if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+               goto close_bpf_object;
+
+close_bpf_object:
+       bpf_link__destroy(link_set_eisconn);
+
+       cgroup_getset_retval_getsockopt__destroy(obj);
+}
+
+static void test_getsockopt_retval_sync(int cgroup_fd, int sock_fd)
+{
+       struct cgroup_getset_retval_getsockopt *obj;
+       struct bpf_link *link_set_eisconn = NULL, *link_clear_retval = NULL;
+       struct bpf_link *link_get_retval = NULL;
+       int buf;
+       socklen_t optlen = sizeof(buf);
+
+       obj = cgroup_getset_retval_getsockopt__open_and_load();
+       if (!ASSERT_OK_PTR(obj, "skel-load"))
+               return;
+
+       /* Attach getsockopt that sets retval to -EISCONN, and one that clears
+        * ctx retval. Assert that the clearing ctx retval is synced to helper
+        * and clears any errors both from kernel and BPF..
+        */
+       link_set_eisconn = bpf_program__attach_cgroup(obj->progs.set_eisconn,
+                                                     cgroup_fd);
+       if (!ASSERT_OK_PTR(link_set_eisconn, "cg-attach-set_eisconn"))
+               goto close_bpf_object;
+       link_clear_retval = bpf_program__attach_cgroup(obj->progs.clear_retval,
+                                                      cgroup_fd);
+       if (!ASSERT_OK_PTR(link_clear_retval, "cg-attach-clear_retval"))
+               goto close_bpf_object;
+       link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval,
+                                                    cgroup_fd);
+       if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval"))
+               goto close_bpf_object;
+
+       if (!ASSERT_OK(getsockopt(sock_fd, SOL_CUSTOM, 0,
+                                 &buf, &optlen), "getsockopt"))
+               goto close_bpf_object;
+
+       if (!ASSERT_EQ(obj->bss->invocations, 3, "invocations"))
+               goto close_bpf_object;
+       if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+               goto close_bpf_object;
+       if (!ASSERT_EQ(obj->bss->retval_value, 0, "retval_value"))
+               goto close_bpf_object;
+       if (!ASSERT_EQ(obj->bss->ctx_retval_value, 0, "ctx_retval_value"))
+               goto close_bpf_object;
+
+close_bpf_object:
+       bpf_link__destroy(link_set_eisconn);
+       bpf_link__destroy(link_clear_retval);
+       bpf_link__destroy(link_get_retval);
+
+       cgroup_getset_retval_getsockopt__destroy(obj);
+}
+
+void test_cgroup_getset_retval(void)
+{
+       int cgroup_fd = -1;
+       int sock_fd = -1;
+
+       cgroup_fd = test__join_cgroup("/cgroup_getset_retval");
+       if (!ASSERT_GE(cgroup_fd, 0, "cg-create"))
+               goto close_fd;
+
+       sock_fd = start_server(AF_INET, SOCK_DGRAM, NULL, 0, 0);
+       if (!ASSERT_GE(sock_fd, 0, "start-server"))
+               goto close_fd;
+
+       if (test__start_subtest("setsockopt-set"))
+               test_setsockopt_set(cgroup_fd, sock_fd);
+
+       if (test__start_subtest("setsockopt-set_and_get"))
+               test_setsockopt_set_and_get(cgroup_fd, sock_fd);
+
+       if (test__start_subtest("setsockopt-default_zero"))
+               test_setsockopt_default_zero(cgroup_fd, sock_fd);
+
+       if (test__start_subtest("setsockopt-default_zero_and_set"))
+               test_setsockopt_default_zero_and_set(cgroup_fd, sock_fd);
+
+       if (test__start_subtest("setsockopt-override"))
+               test_setsockopt_override(cgroup_fd, sock_fd);
+
+       if (test__start_subtest("setsockopt-legacy_eperm"))
+               test_setsockopt_legacy_eperm(cgroup_fd, sock_fd);
+
+       if (test__start_subtest("setsockopt-legacy_no_override"))
+               test_setsockopt_legacy_no_override(cgroup_fd, sock_fd);
+
+       if (test__start_subtest("getsockopt-get"))
+               test_getsockopt_get(cgroup_fd, sock_fd);
+
+       if (test__start_subtest("getsockopt-override"))
+               test_getsockopt_override(cgroup_fd, sock_fd);
+
+       if (test__start_subtest("getsockopt-retval_sync"))
+               test_getsockopt_retval_sync(cgroup_fd, sock_fd);
+
+close_fd:
+       close(cgroup_fd);
+}
index ac54e3f..dfafd62 100644 (file)
@@ -457,7 +457,7 @@ static int init_prog_array(struct bpf_object *obj, struct bpf_map *prog_array)
        if (map_fd < 0)
                return -1;
 
-       for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+       for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
                snprintf(prog_name, sizeof(prog_name), "flow_dissector_%d", i);
 
                prog = bpf_object__find_program_by_name(obj, prog_name);
index 9da131b..917165e 100644 (file)
@@ -121,7 +121,7 @@ static void test_global_data_rdonly(struct bpf_object *obj, __u32 duration)
        if (CHECK_FAIL(map_fd < 0))
                return;
 
-       buff = malloc(bpf_map__def(map)->value_size);
+       buff = malloc(bpf_map__value_size(map));
        if (buff)
                err = bpf_map_update_elem(map_fd, &zero, buff, 0);
        free(buff);
index 1db86ea..57331c6 100644 (file)
@@ -20,7 +20,7 @@ void test_global_data_init(void)
        if (CHECK_FAIL(!map || !bpf_map__is_internal(map)))
                goto out;
 
-       sz = bpf_map__def(map)->value_size;
+       sz = bpf_map__value_size(map);
        newval = malloc(sz);
        if (CHECK_FAIL(!newval))
                goto out;
index 7d7445c..b39a4f0 100644 (file)
@@ -27,6 +27,12 @@ static void test_main(void)
        ASSERT_OK(err, "bpf_prog_test_run(test2)");
        ASSERT_EQ(retval, 3, "test2-retval");
 
+       prog_fd = skel->progs.kfunc_call_test_ref_btf_id.prog_fd;
+       err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
+                               NULL, NULL, (__u32 *)&retval, NULL);
+       ASSERT_OK(err, "bpf_prog_test_run(test_ref_btf_id)");
+       ASSERT_EQ(retval, 0, "test_ref_btf_id-retval");
+
        kfunc_call_test_lskel__destroy(skel);
 }
 
index 85db0f4..b97a8f2 100644 (file)
@@ -8,6 +8,7 @@
 #include "test_sockmap_update.skel.h"
 #include "test_sockmap_invalid_update.skel.h"
 #include "test_sockmap_skb_verdict_attach.skel.h"
+#include "test_sockmap_progs_query.skel.h"
 #include "bpf_iter_sockmap.skel.h"
 
 #define TCP_REPAIR             19      /* TCP sock is under repair right now */
@@ -315,6 +316,63 @@ out:
        test_sockmap_skb_verdict_attach__destroy(skel);
 }
 
+static __u32 query_prog_id(int prog_fd)
+{
+       struct bpf_prog_info info = {};
+       __u32 info_len = sizeof(info);
+       int err;
+
+       err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
+       if (!ASSERT_OK(err, "bpf_obj_get_info_by_fd") ||
+           !ASSERT_EQ(info_len, sizeof(info), "bpf_obj_get_info_by_fd"))
+               return 0;
+
+       return info.id;
+}
+
+static void test_sockmap_progs_query(enum bpf_attach_type attach_type)
+{
+       struct test_sockmap_progs_query *skel;
+       int err, map_fd, verdict_fd;
+       __u32 attach_flags = 0;
+       __u32 prog_ids[3] = {};
+       __u32 prog_cnt = 3;
+
+       skel = test_sockmap_progs_query__open_and_load();
+       if (!ASSERT_OK_PTR(skel, "test_sockmap_progs_query__open_and_load"))
+               return;
+
+       map_fd = bpf_map__fd(skel->maps.sock_map);
+
+       if (attach_type == BPF_SK_MSG_VERDICT)
+               verdict_fd = bpf_program__fd(skel->progs.prog_skmsg_verdict);
+       else
+               verdict_fd = bpf_program__fd(skel->progs.prog_skb_verdict);
+
+       err = bpf_prog_query(map_fd, attach_type, 0 /* query flags */,
+                            &attach_flags, prog_ids, &prog_cnt);
+       ASSERT_OK(err, "bpf_prog_query failed");
+       ASSERT_EQ(attach_flags,  0, "wrong attach_flags on query");
+       ASSERT_EQ(prog_cnt, 0, "wrong program count on query");
+
+       err = bpf_prog_attach(verdict_fd, map_fd, attach_type, 0);
+       if (!ASSERT_OK(err, "bpf_prog_attach failed"))
+               goto out;
+
+       prog_cnt = 1;
+       err = bpf_prog_query(map_fd, attach_type, 0 /* query flags */,
+                            &attach_flags, prog_ids, &prog_cnt);
+       ASSERT_OK(err, "bpf_prog_query failed");
+       ASSERT_EQ(attach_flags, 0, "wrong attach_flags on query");
+       ASSERT_EQ(prog_cnt, 1, "wrong program count on query");
+       ASSERT_EQ(prog_ids[0], query_prog_id(verdict_fd),
+                 "wrong prog_ids on query");
+
+       bpf_prog_detach2(verdict_fd, map_fd, attach_type);
+out:
+       test_sockmap_progs_query__destroy(skel);
+}
+
 void test_sockmap_basic(void)
 {
        if (test__start_subtest("sockmap create_update_free"))
@@ -341,4 +399,12 @@ void test_sockmap_basic(void)
                test_sockmap_skb_verdict_attach(BPF_SK_SKB_STREAM_VERDICT,
                                                BPF_SK_SKB_VERDICT);
        }
+       if (test__start_subtest("sockmap msg_verdict progs query"))
+               test_sockmap_progs_query(BPF_SK_MSG_VERDICT);
+       if (test__start_subtest("sockmap stream_parser progs query"))
+               test_sockmap_progs_query(BPF_SK_SKB_STREAM_PARSER);
+       if (test__start_subtest("sockmap stream_verdict progs query"))
+               test_sockmap_progs_query(BPF_SK_SKB_STREAM_VERDICT);
+       if (test__start_subtest("sockmap skb_verdict progs query"))
+               test_sockmap_progs_query(BPF_SK_SKB_VERDICT);
 }
index 7e21bfa..2cf0c7a 100644 (file)
@@ -1413,14 +1413,12 @@ close_srv1:
 
 static void test_ops_cleanup(const struct bpf_map *map)
 {
-       const struct bpf_map_def *def;
        int err, mapfd;
        u32 key;
 
-       def = bpf_map__def(map);
        mapfd = bpf_map__fd(map);
 
-       for (key = 0; key < def->max_entries; key++) {
+       for (key = 0; key < bpf_map__max_entries(map); key++) {
                err = bpf_map_delete_elem(mapfd, &key);
                if (err && errno != EINVAL && errno != ENOENT)
                        FAIL_ERRNO("map_delete: expected EINVAL/ENOENT");
@@ -1443,13 +1441,13 @@ static const char *family_str(sa_family_t family)
 
 static const char *map_type_str(const struct bpf_map *map)
 {
-       const struct bpf_map_def *def;
+       int type;
 
-       def = bpf_map__def(map);
-       if (IS_ERR(def))
+       if (!map)
                return "invalid";
+       type = bpf_map__type(map);
 
-       switch (def->type) {
+       switch (type) {
        case BPF_MAP_TYPE_SOCKMAP:
                return "sockmap";
        case BPF_MAP_TYPE_SOCKHASH:
index 4b937e5..30a99d2 100644 (file)
@@ -173,11 +173,11 @@ static int getsetsockopt(void)
        }
 
        memset(&buf, 0, sizeof(buf));
-       buf.zc.address = 12345; /* rejected by BPF */
+       buf.zc.address = 12345; /* Not page aligned. Rejected by tcp_zerocopy_receive() */
        optlen = sizeof(buf.zc);
        errno = 0;
        err = getsockopt(fd, SOL_TCP, TCP_ZEROCOPY_RECEIVE, &buf, &optlen);
-       if (errno != EPERM) {
+       if (errno != EINVAL) {
                log_err("Unexpected getsockopt(TCP_ZEROCOPY_RECEIVE) err=%d errno=%d",
                        err, errno);
                goto err;
index 5dc0f42..796f231 100644 (file)
@@ -37,7 +37,7 @@ static void test_tailcall_1(void)
        if (CHECK_FAIL(map_fd < 0))
                goto out;
 
-       for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+       for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
                snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
 
                prog = bpf_object__find_program_by_name(obj, prog_name);
@@ -53,7 +53,7 @@ static void test_tailcall_1(void)
                        goto out;
        }
 
-       for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+       for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
                err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
                                        &duration, &retval, NULL);
                CHECK(err || retval != i, "tailcall",
@@ -69,7 +69,7 @@ static void test_tailcall_1(void)
        CHECK(err || retval != 3, "tailcall", "err %d errno %d retval %d\n",
              err, errno, retval);
 
-       for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+       for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
                snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
 
                prog = bpf_object__find_program_by_name(obj, prog_name);
@@ -90,8 +90,8 @@ static void test_tailcall_1(void)
        CHECK(err || retval != 0, "tailcall", "err %d errno %d retval %d\n",
              err, errno, retval);
 
-       for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
-               j = bpf_map__def(prog_array)->max_entries - 1 - i;
+       for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
+               j = bpf_map__max_entries(prog_array) - 1 - i;
                snprintf(prog_name, sizeof(prog_name), "classifier_%d", j);
 
                prog = bpf_object__find_program_by_name(obj, prog_name);
@@ -107,8 +107,8 @@ static void test_tailcall_1(void)
                        goto out;
        }
 
-       for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
-               j = bpf_map__def(prog_array)->max_entries - 1 - i;
+       for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
+               j = bpf_map__max_entries(prog_array) - 1 - i;
 
                err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
                                        &duration, &retval, NULL);
@@ -125,7 +125,7 @@ static void test_tailcall_1(void)
        CHECK(err || retval != 3, "tailcall", "err %d errno %d retval %d\n",
              err, errno, retval);
 
-       for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+       for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
                err = bpf_map_delete_elem(map_fd, &i);
                if (CHECK_FAIL(err >= 0 || errno != ENOENT))
                        goto out;
@@ -175,7 +175,7 @@ static void test_tailcall_2(void)
        if (CHECK_FAIL(map_fd < 0))
                goto out;
 
-       for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+       for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
                snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
 
                prog = bpf_object__find_program_by_name(obj, prog_name);
@@ -353,7 +353,7 @@ static void test_tailcall_4(void)
        if (CHECK_FAIL(map_fd < 0))
                return;
 
-       for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+       for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
                snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
 
                prog = bpf_object__find_program_by_name(obj, prog_name);
@@ -369,7 +369,7 @@ static void test_tailcall_4(void)
                        goto out;
        }
 
-       for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+       for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
                err = bpf_map_update_elem(data_fd, &zero, &i, BPF_ANY);
                if (CHECK_FAIL(err))
                        goto out;
@@ -380,7 +380,7 @@ static void test_tailcall_4(void)
                      "err %d errno %d retval %d\n", err, errno, retval);
        }
 
-       for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+       for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
                err = bpf_map_update_elem(data_fd, &zero, &i, BPF_ANY);
                if (CHECK_FAIL(err))
                        goto out;
@@ -441,7 +441,7 @@ static void test_tailcall_5(void)
        if (CHECK_FAIL(map_fd < 0))
                return;
 
-       for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+       for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
                snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
 
                prog = bpf_object__find_program_by_name(obj, prog_name);
@@ -457,7 +457,7 @@ static void test_tailcall_5(void)
                        goto out;
        }
 
-       for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+       for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
                err = bpf_map_update_elem(data_fd, &zero, &key[i], BPF_ANY);
                if (CHECK_FAIL(err))
                        goto out;
@@ -468,7 +468,7 @@ static void test_tailcall_5(void)
                      "err %d errno %d retval %d\n", err, errno, retval);
        }
 
-       for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+       for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
                err = bpf_map_update_elem(data_fd, &zero, &key[i], BPF_ANY);
                if (CHECK_FAIL(err))
                        goto out;
@@ -520,7 +520,7 @@ static void test_tailcall_bpf2bpf_1(void)
                goto out;
 
        /* nop -> jmp */
-       for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+       for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
                snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
 
                prog = bpf_object__find_program_by_name(obj, prog_name);
@@ -681,7 +681,7 @@ static void test_tailcall_bpf2bpf_3(void)
        if (CHECK_FAIL(map_fd < 0))
                goto out;
 
-       for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+       for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
                snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
 
                prog = bpf_object__find_program_by_name(obj, prog_name);
@@ -778,7 +778,7 @@ static void test_tailcall_bpf2bpf_4(bool noise)
        if (CHECK_FAIL(map_fd < 0))
                goto out;
 
-       for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+       for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
                snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
 
                prog = bpf_object__find_program_by_name(obj, prog_name);
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_frags.c b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_frags.c
new file mode 100644 (file)
index 0000000..31c1886
--- /dev/null
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <network_helpers.h>
+
+void test_xdp_update_frags(void)
+{
+       const char *file = "./test_xdp_update_frags.o";
+       __u32 duration, retval, size;
+       struct bpf_program *prog;
+       struct bpf_object *obj;
+       int err, prog_fd;
+       __u32 *offset;
+       __u8 *buf;
+
+       obj = bpf_object__open(file);
+       if (libbpf_get_error(obj))
+               return;
+
+       prog = bpf_object__next_program(obj, NULL);
+       if (bpf_object__load(obj))
+               return;
+
+       prog_fd = bpf_program__fd(prog);
+
+       buf = malloc(128);
+       if (!ASSERT_OK_PTR(buf, "alloc buf 128b"))
+               goto out;
+
+       memset(buf, 0, 128);
+       offset = (__u32 *)buf;
+       *offset = 16;
+       buf[*offset] = 0xaa;            /* marker at offset 16 (head) */
+       buf[*offset + 15] = 0xaa;       /* marker at offset 31 (head) */
+
+       err = bpf_prog_test_run(prog_fd, 1, buf, 128,
+                               buf, &size, &retval, &duration);
+
+       /* test_xdp_update_frags: buf[16,31]: 0xaa -> 0xbb */
+       ASSERT_OK(err, "xdp_update_frag");
+       ASSERT_EQ(retval, XDP_PASS, "xdp_update_frag retval");
+       ASSERT_EQ(buf[16], 0xbb, "xdp_update_frag buf[16]");
+       ASSERT_EQ(buf[31], 0xbb, "xdp_update_frag buf[31]");
+
+       free(buf);
+
+       buf = malloc(9000);
+       if (!ASSERT_OK_PTR(buf, "alloc buf 9Kb"))
+               goto out;
+
+       memset(buf, 0, 9000);
+       offset = (__u32 *)buf;
+       *offset = 5000;
+       buf[*offset] = 0xaa;            /* marker at offset 5000 (frag0) */
+       buf[*offset + 15] = 0xaa;       /* marker at offset 5015 (frag0) */
+
+       err = bpf_prog_test_run(prog_fd, 1, buf, 9000,
+                               buf, &size, &retval, &duration);
+
+       /* test_xdp_update_frags: buf[5000,5015]: 0xaa -> 0xbb */
+       ASSERT_OK(err, "xdp_update_frag");
+       ASSERT_EQ(retval, XDP_PASS, "xdp_update_frag retval");
+       ASSERT_EQ(buf[5000], 0xbb, "xdp_update_frag buf[5000]");
+       ASSERT_EQ(buf[5015], 0xbb, "xdp_update_frag buf[5015]");
+
+       memset(buf, 0, 9000);
+       offset = (__u32 *)buf;
+       *offset = 3510;
+       buf[*offset] = 0xaa;            /* marker at offset 3510 (head) */
+       buf[*offset + 15] = 0xaa;       /* marker at offset 3525 (frag0) */
+
+       err = bpf_prog_test_run(prog_fd, 1, buf, 9000,
+                               buf, &size, &retval, &duration);
+
+       /* test_xdp_update_frags: buf[3510,3525]: 0xaa -> 0xbb */
+       ASSERT_OK(err, "xdp_update_frag");
+       ASSERT_EQ(retval, XDP_PASS, "xdp_update_frag retval");
+       ASSERT_EQ(buf[3510], 0xbb, "xdp_update_frag buf[3510]");
+       ASSERT_EQ(buf[3525], 0xbb, "xdp_update_frag buf[3525]");
+
+       memset(buf, 0, 9000);
+       offset = (__u32 *)buf;
+       *offset = 7606;
+       buf[*offset] = 0xaa;            /* marker at offset 7606 (frag0) */
+       buf[*offset + 15] = 0xaa;       /* marker at offset 7621 (frag1) */
+
+       err = bpf_prog_test_run(prog_fd, 1, buf, 9000,
+                               buf, &size, &retval, &duration);
+
+       /* test_xdp_update_frags: buf[7606,7621]: 0xaa -> 0xbb */
+       ASSERT_OK(err, "xdp_update_frag");
+       ASSERT_EQ(retval, XDP_PASS, "xdp_update_frag retval");
+       ASSERT_EQ(buf[7606], 0xbb, "xdp_update_frag buf[7606]");
+       ASSERT_EQ(buf[7621], 0xbb, "xdp_update_frag buf[7621]");
+
+       free(buf);
+out:
+       bpf_object__close(obj);
+}
+
+void test_xdp_adjust_frags(void)
+{
+       if (test__start_subtest("xdp_adjust_frags"))
+               test_xdp_update_frags();
+}
index 3f5a17c..ccc9e63 100644 (file)
@@ -11,22 +11,21 @@ static void test_xdp_adjust_tail_shrink(void)
        char buf[128];
 
        err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
-       if (CHECK_FAIL(err))
+       if (ASSERT_OK(err, "test_xdp_adjust_tail_shrink"))
                return;
 
        err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
                                buf, &size, &retval, &duration);
-
-       CHECK(err || retval != XDP_DROP,
-             "ipv4", "err %d errno %d retval %d size %d\n",
-             err, errno, retval, size);
+       ASSERT_OK(err, "ipv4");
+       ASSERT_EQ(retval, XDP_DROP, "ipv4 retval");
 
        expect_sz = sizeof(pkt_v6) - 20;  /* Test shrink with 20 bytes */
        err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6),
                                buf, &size, &retval, &duration);
-       CHECK(err || retval != XDP_TX || size != expect_sz,
-             "ipv6", "err %d errno %d retval %d size %d expect-size %d\n",
-             err, errno, retval, size, expect_sz);
+       ASSERT_OK(err, "ipv6");
+       ASSERT_EQ(retval, XDP_TX, "ipv6 retval");
+       ASSERT_EQ(size, expect_sz, "ipv6 size");
+
        bpf_object__close(obj);
 }
 
@@ -39,21 +38,20 @@ static void test_xdp_adjust_tail_grow(void)
        int err, prog_fd;
 
        err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
-       if (CHECK_FAIL(err))
+       if (ASSERT_OK(err, "test_xdp_adjust_tail_grow"))
                return;
 
        err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
                                buf, &size, &retval, &duration);
-       CHECK(err || retval != XDP_DROP,
-             "ipv4", "err %d errno %d retval %d size %d\n",
-             err, errno, retval, size);
+       ASSERT_OK(err, "ipv4");
+       ASSERT_EQ(retval, XDP_DROP, "ipv4 retval");
 
        expect_sz = sizeof(pkt_v6) + 40; /* Test grow with 40 bytes */
        err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6) /* 74 */,
                                buf, &size, &retval, &duration);
-       CHECK(err || retval != XDP_TX || size != expect_sz,
-             "ipv6", "err %d errno %d retval %d size %d expect-size %d\n",
-             err, errno, retval, size, expect_sz);
+       ASSERT_OK(err, "ipv6");
+       ASSERT_EQ(retval, XDP_TX, "ipv6 retval");
+       ASSERT_EQ(size, expect_sz, "ipv6 size");
 
        bpf_object__close(obj);
 }
@@ -76,7 +74,7 @@ static void test_xdp_adjust_tail_grow2(void)
        };
 
        err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &tattr.prog_fd);
-       if (CHECK_ATTR(err, "load", "err %d errno %d\n", err, errno))
+       if (ASSERT_OK(err, "test_xdp_adjust_tail_grow"))
                return;
 
        /* Test case-64 */
@@ -86,21 +84,17 @@ static void test_xdp_adjust_tail_grow2(void)
        /* Kernel side alloc packet memory area that is zero init */
        err = bpf_prog_test_run_xattr(&tattr);
 
-       CHECK_ATTR(errno != ENOSPC /* Due limit copy_size in bpf_test_finish */
-                  || tattr.retval != XDP_TX
-                  || tattr.data_size_out != 192, /* Expected grow size */
-                  "case-64",
-                  "err %d errno %d retval %d size %d\n",
-                  err, errno, tattr.retval, tattr.data_size_out);
+       ASSERT_EQ(errno, ENOSPC, "case-64 errno"); /* Due limit copy_size in bpf_test_finish */
+       ASSERT_EQ(tattr.retval, XDP_TX, "case-64 retval");
+       ASSERT_EQ(tattr.data_size_out, 192, "case-64 data_size_out"); /* Expected grow size */
 
        /* Extra checks for data contents */
-       CHECK_ATTR(tattr.data_size_out != 192
-                  || buf[0]   != 1 ||  buf[63]  != 1  /*  0-63  memset to 1 */
-                  || buf[64]  != 0 ||  buf[127] != 0  /* 64-127 memset to 0 */
-                  || buf[128] != 1 ||  buf[191] != 1, /*128-191 memset to 1 */
-                  "case-64-data",
-                  "err %d errno %d retval %d size %d\n",
-                  err, errno, tattr.retval, tattr.data_size_out);
+       ASSERT_EQ(buf[0], 1, "case-64-data buf[0]"); /*  0-63  memset to 1 */
+       ASSERT_EQ(buf[63], 1, "case-64-data buf[63]");
+       ASSERT_EQ(buf[64], 0, "case-64-data buf[64]"); /* 64-127 memset to 0 */
+       ASSERT_EQ(buf[127], 0, "case-64-data buf[127]");
+       ASSERT_EQ(buf[128], 1, "case-64-data buf[128]"); /* 128-191 memset to 1 */
+       ASSERT_EQ(buf[191], 1, "case-64-data buf[191]");
 
        /* Test case-128 */
        memset(buf, 2, sizeof(buf));
@@ -109,24 +103,139 @@ static void test_xdp_adjust_tail_grow2(void)
        err = bpf_prog_test_run_xattr(&tattr);
 
        max_grow = 4096 - XDP_PACKET_HEADROOM - tailroom; /* 3520 */
-       CHECK_ATTR(err
-                  || tattr.retval != XDP_TX
-                  || tattr.data_size_out != max_grow,/* Expect max grow size */
-                  "case-128",
-                  "err %d errno %d retval %d size %d expect-size %d\n",
-                  err, errno, tattr.retval, tattr.data_size_out, max_grow);
+       ASSERT_OK(err, "case-128");
+       ASSERT_EQ(tattr.retval, XDP_TX, "case-128 retval");
+       ASSERT_EQ(tattr.data_size_out, max_grow, "case-128 data_size_out"); /* Expect max grow */
 
        /* Extra checks for data content: Count grow size, will contain zeros */
        for (i = 0, cnt = 0; i < sizeof(buf); i++) {
                if (buf[i] == 0)
                        cnt++;
        }
-       CHECK_ATTR((cnt != (max_grow - tattr.data_size_in)) /* Grow increase */
-                  || tattr.data_size_out != max_grow, /* Total grow size */
-                  "case-128-data",
-                  "err %d errno %d retval %d size %d grow-size %d\n",
-                  err, errno, tattr.retval, tattr.data_size_out, cnt);
+       ASSERT_EQ(cnt, max_grow - tattr.data_size_in, "case-128-data cnt"); /* Grow increase */
+       ASSERT_EQ(tattr.data_size_out, max_grow, "case-128-data data_size_out"); /* Total grow */
+
+       bpf_object__close(obj);
+}
+
+void test_xdp_adjust_frags_tail_shrink(void)
+{
+       const char *file = "./test_xdp_adjust_tail_shrink.o";
+       __u32 duration, retval, size, exp_size;
+       struct bpf_program *prog;
+       struct bpf_object *obj;
+       int err, prog_fd;
+       __u8 *buf;
+
+       /* For the individual test cases, the first byte in the packet
+        * indicates which test will be run.
+        */
+       obj = bpf_object__open(file);
+       if (libbpf_get_error(obj))
+               return;
+
+       prog = bpf_object__next_program(obj, NULL);
+       if (bpf_object__load(obj))
+               return;
+
+       prog_fd = bpf_program__fd(prog);
+
+       buf = malloc(9000);
+       if (!ASSERT_OK_PTR(buf, "alloc buf 9Kb"))
+               goto out;
+
+       memset(buf, 0, 9000);
+
+       /* Test case removing 10 bytes from last frag, NOT freeing it */
+       exp_size = 8990; /* 9000 - 10 */
+       err = bpf_prog_test_run(prog_fd, 1, buf, 9000,
+                               buf, &size, &retval, &duration);
+
+       ASSERT_OK(err, "9Kb-10b");
+       ASSERT_EQ(retval, XDP_TX, "9Kb-10b retval");
+       ASSERT_EQ(size, exp_size, "9Kb-10b size");
+
+       /* Test case removing one of two pages, assuming 4K pages */
+       buf[0] = 1;
+       exp_size = 4900; /* 9000 - 4100 */
+       err = bpf_prog_test_run(prog_fd, 1, buf, 9000,
+                               buf, &size, &retval, &duration);
+
+       ASSERT_OK(err, "9Kb-4Kb");
+       ASSERT_EQ(retval, XDP_TX, "9Kb-4Kb retval");
+       ASSERT_EQ(size, exp_size, "9Kb-4Kb size");
+
+       /* Test case removing two pages resulting in a linear xdp_buff */
+       buf[0] = 2;
+       exp_size = 800; /* 9000 - 8200 */
+       err = bpf_prog_test_run(prog_fd, 1, buf, 9000,
+                               buf, &size, &retval, &duration);
+
+       ASSERT_OK(err, "9Kb-9Kb");
+       ASSERT_EQ(retval, XDP_TX, "9Kb-9Kb retval");
+       ASSERT_EQ(size, exp_size, "9Kb-9Kb size");
+
+       free(buf);
+out:
+       bpf_object__close(obj);
+}
+
+void test_xdp_adjust_frags_tail_grow(void)
+{
+       const char *file = "./test_xdp_adjust_tail_grow.o";
+       __u32 duration, retval, size, exp_size;
+       struct bpf_program *prog;
+       struct bpf_object *obj;
+       int err, i, prog_fd;
+       __u8 *buf;
+
+       obj = bpf_object__open(file);
+       if (libbpf_get_error(obj))
+               return;
+
+       prog = bpf_object__next_program(obj, NULL);
+       if (bpf_object__load(obj))
+               return;
+
+       prog_fd = bpf_program__fd(prog);
+
+       buf = malloc(16384);
+       if (!ASSERT_OK_PTR(buf, "alloc buf 16Kb"))
+               goto out;
+
+       /* Test case add 10 bytes to last frag */
+       memset(buf, 1, 16384);
+       size = 9000;
+       exp_size = size + 10;
+       err = bpf_prog_test_run(prog_fd, 1, buf, size,
+                               buf, &size, &retval, &duration);
+
+       ASSERT_OK(err, "9Kb+10b");
+       ASSERT_EQ(retval, XDP_TX, "9Kb+10b retval");
+       ASSERT_EQ(size, exp_size, "9Kb+10b size");
+
+       for (i = 0; i < 9000; i++)
+               ASSERT_EQ(buf[i], 1, "9Kb+10b-old");
+
+       for (i = 9000; i < 9010; i++)
+               ASSERT_EQ(buf[i], 0, "9Kb+10b-new");
+
+       for (i = 9010; i < 16384; i++)
+               ASSERT_EQ(buf[i], 1, "9Kb+10b-untouched");
+
+       /* Test a too large grow */
+       memset(buf, 1, 16384);
+       size = 9001;
+       exp_size = size;
+       err = bpf_prog_test_run(prog_fd, 1, buf, size,
+                               buf, &size, &retval, &duration);
+
+       ASSERT_OK(err, "9Kb+10b");
+       ASSERT_EQ(retval, XDP_DROP, "9Kb+10b retval");
+       ASSERT_EQ(size, exp_size, "9Kb+10b size");
 
+       free(buf);
+out:
        bpf_object__close(obj);
 }
 
@@ -138,4 +247,8 @@ void test_xdp_adjust_tail(void)
                test_xdp_adjust_tail_grow();
        if (test__start_subtest("xdp_adjust_tail_grow2"))
                test_xdp_adjust_tail_grow2();
+       if (test__start_subtest("xdp_adjust_frags_tail_shrink"))
+               test_xdp_adjust_frags_tail_shrink();
+       if (test__start_subtest("xdp_adjust_frags_tail_grow"))
+               test_xdp_adjust_frags_tail_grow();
 }
index c98a897..9c395ea 100644 (file)
@@ -10,40 +10,97 @@ struct meta {
        int pkt_len;
 };
 
+struct test_ctx_s {
+       bool passed;
+       int pkt_size;
+};
+
+struct test_ctx_s test_ctx;
+
 static void on_sample(void *ctx, int cpu, void *data, __u32 size)
 {
-       int duration = 0;
        struct meta *meta = (struct meta *)data;
        struct ipv4_packet *trace_pkt_v4 = data + sizeof(*meta);
+       unsigned char *raw_pkt = data + sizeof(*meta);
+       struct test_ctx_s *tst_ctx = ctx;
+
+       ASSERT_GE(size, sizeof(pkt_v4) + sizeof(*meta), "check_size");
+       ASSERT_EQ(meta->ifindex, if_nametoindex("lo"), "check_meta_ifindex");
+       ASSERT_EQ(meta->pkt_len, tst_ctx->pkt_size, "check_meta_pkt_len");
+       ASSERT_EQ(memcmp(trace_pkt_v4, &pkt_v4, sizeof(pkt_v4)), 0,
+                 "check_packet_content");
+
+       if (meta->pkt_len > sizeof(pkt_v4)) {
+               for (int i = 0; i < meta->pkt_len - sizeof(pkt_v4); i++)
+                       ASSERT_EQ(raw_pkt[i + sizeof(pkt_v4)], (unsigned char)i,
+                                 "check_packet_content");
+       }
+
+       tst_ctx->passed = true;
+}
 
-       if (CHECK(size < sizeof(pkt_v4) + sizeof(*meta),
-                 "check_size", "size %u < %zu\n",
-                 size, sizeof(pkt_v4) + sizeof(*meta)))
-               return;
+#define BUF_SZ 9000
 
-       if (CHECK(meta->ifindex != if_nametoindex("lo"), "check_meta_ifindex",
-                 "meta->ifindex = %d\n", meta->ifindex))
+static void run_xdp_bpf2bpf_pkt_size(int pkt_fd, struct perf_buffer *pb,
+                                    struct test_xdp_bpf2bpf *ftrace_skel,
+                                    int pkt_size)
+{
+       __u32 duration = 0, retval, size;
+       __u8 *buf, *buf_in;
+       int err;
+
+       if (!ASSERT_LE(pkt_size, BUF_SZ, "pkt_size") ||
+           !ASSERT_GE(pkt_size, sizeof(pkt_v4), "pkt_size"))
                return;
 
-       if (CHECK(meta->pkt_len != sizeof(pkt_v4), "check_meta_pkt_len",
-                 "meta->pkt_len = %zd\n", sizeof(pkt_v4)))
+       buf_in = malloc(BUF_SZ);
+       if (!ASSERT_OK_PTR(buf_in, "buf_in malloc()"))
                return;
 
-       if (CHECK(memcmp(trace_pkt_v4, &pkt_v4, sizeof(pkt_v4)),
-                 "check_packet_content", "content not the same\n"))
+       buf = malloc(BUF_SZ);
+       if (!ASSERT_OK_PTR(buf, "buf malloc()")) {
+               free(buf_in);
                return;
+       }
+
+       test_ctx.passed = false;
+       test_ctx.pkt_size = pkt_size;
+
+       memcpy(buf_in, &pkt_v4, sizeof(pkt_v4));
+       if (pkt_size > sizeof(pkt_v4)) {
+               for (int i = 0; i < (pkt_size - sizeof(pkt_v4)); i++)
+                       buf_in[i + sizeof(pkt_v4)] = i;
+       }
+
+       /* Run test program */
+       err = bpf_prog_test_run(pkt_fd, 1, buf_in, pkt_size,
+                               buf, &size, &retval, &duration);
+
+       ASSERT_OK(err, "ipv4");
+       ASSERT_EQ(retval, XDP_PASS, "ipv4 retval");
+       ASSERT_EQ(size, pkt_size, "ipv4 size");
+
+       /* Make sure bpf_xdp_output() was triggered and it sent the expected
+        * data to the perf ring buffer.
+        */
+       err = perf_buffer__poll(pb, 100);
 
-       *(bool *)ctx = true;
+       ASSERT_GE(err, 0, "perf_buffer__poll");
+       ASSERT_TRUE(test_ctx.passed, "test passed");
+       /* Verify test results */
+       ASSERT_EQ(ftrace_skel->bss->test_result_fentry, if_nametoindex("lo"),
+                 "fentry result");
+       ASSERT_EQ(ftrace_skel->bss->test_result_fexit, XDP_PASS, "fexit result");
+
+       free(buf);
+       free(buf_in);
 }
 
 void test_xdp_bpf2bpf(void)
 {
-       __u32 duration = 0, retval, size;
-       char buf[128];
        int err, pkt_fd, map_fd;
-       bool passed = false;
-       struct iphdr iph;
-       struct iptnl_info value4 = {.family = AF_INET};
+       int pkt_sizes[] = {sizeof(pkt_v4), 1024, 4100, 8200};
+       struct iptnl_info value4 = {.family = AF_INET6};
        struct test_xdp *pkt_skel = NULL;
        struct test_xdp_bpf2bpf *ftrace_skel = NULL;
        struct vip key4 = {.protocol = 6, .family = AF_INET};
@@ -52,7 +109,7 @@ void test_xdp_bpf2bpf(void)
 
        /* Load XDP program to introspect */
        pkt_skel = test_xdp__open_and_load();
-       if (CHECK(!pkt_skel, "pkt_skel_load", "test_xdp skeleton failed\n"))
+       if (!ASSERT_OK_PTR(pkt_skel, "test_xdp__open_and_load"))
                return;
 
        pkt_fd = bpf_program__fd(pkt_skel->progs._xdp_tx_iptunnel);
@@ -62,7 +119,7 @@ void test_xdp_bpf2bpf(void)
 
        /* Load trace program */
        ftrace_skel = test_xdp_bpf2bpf__open();
-       if (CHECK(!ftrace_skel, "__open", "ftrace skeleton failed\n"))
+       if (!ASSERT_OK_PTR(ftrace_skel, "test_xdp_bpf2bpf__open"))
                goto out;
 
        /* Demonstrate the bpf_program__set_attach_target() API rather than
@@ -77,50 +134,24 @@ void test_xdp_bpf2bpf(void)
        bpf_program__set_attach_target(prog, pkt_fd, "_xdp_tx_iptunnel");
 
        err = test_xdp_bpf2bpf__load(ftrace_skel);
-       if (CHECK(err, "__load", "ftrace skeleton failed\n"))
+       if (!ASSERT_OK(err, "test_xdp_bpf2bpf__load"))
                goto out;
 
        err = test_xdp_bpf2bpf__attach(ftrace_skel);
-       if (CHECK(err, "ftrace_attach", "ftrace attach failed: %d\n", err))
+       if (!ASSERT_OK(err, "test_xdp_bpf2bpf__attach"))
                goto out;
 
        /* Set up perf buffer */
-       pb = perf_buffer__new(bpf_map__fd(ftrace_skel->maps.perf_buf_map), 1,
-                             on_sample, NULL, &passed, NULL);
+       pb = perf_buffer__new(bpf_map__fd(ftrace_skel->maps.perf_buf_map), 8,
+                             on_sample, NULL, &test_ctx, NULL);
        if (!ASSERT_OK_PTR(pb, "perf_buf__new"))
                goto out;
 
-       /* Run test program */
-       err = bpf_prog_test_run(pkt_fd, 1, &pkt_v4, sizeof(pkt_v4),
-                               buf, &size, &retval, &duration);
-       memcpy(&iph, buf + sizeof(struct ethhdr), sizeof(iph));
-       if (CHECK(err || retval != XDP_TX || size != 74 ||
-                 iph.protocol != IPPROTO_IPIP, "ipv4",
-                 "err %d errno %d retval %d size %d\n",
-                 err, errno, retval, size))
-               goto out;
-
-       /* Make sure bpf_xdp_output() was triggered and it sent the expected
-        * data to the perf ring buffer.
-        */
-       err = perf_buffer__poll(pb, 100);
-       if (CHECK(err < 0, "perf_buffer__poll", "err %d\n", err))
-               goto out;
-
-       CHECK_FAIL(!passed);
-
-       /* Verify test results */
-       if (CHECK(ftrace_skel->bss->test_result_fentry != if_nametoindex("lo"),
-                 "result", "fentry failed err %llu\n",
-                 ftrace_skel->bss->test_result_fentry))
-               goto out;
-
-       CHECK(ftrace_skel->bss->test_result_fexit != XDP_TX, "result",
-             "fexit failed err %llu\n", ftrace_skel->bss->test_result_fexit);
-
+       for (int i = 0; i < ARRAY_SIZE(pkt_sizes); i++)
+               run_xdp_bpf2bpf_pkt_size(pkt_fd, pb, ftrace_skel,
+                                        pkt_sizes[i]);
 out:
-       if (pb)
-               perf_buffer__free(pb);
+       perf_buffer__free(pb);
        test_xdp__destroy(pkt_skel);
        test_xdp_bpf2bpf__destroy(ftrace_skel);
 }
index fd812bd..13aabb3 100644 (file)
@@ -3,11 +3,12 @@
 #include <linux/if_link.h>
 #include <test_progs.h>
 
+#include "test_xdp_with_cpumap_frags_helpers.skel.h"
 #include "test_xdp_with_cpumap_helpers.skel.h"
 
 #define IFINDEX_LO     1
 
-void serial_test_xdp_cpumap_attach(void)
+void test_xdp_with_cpumap_helpers(void)
 {
        struct test_xdp_with_cpumap_helpers *skel;
        struct bpf_prog_info info = {};
@@ -54,6 +55,67 @@ void serial_test_xdp_cpumap_attach(void)
        err = bpf_map_update_elem(map_fd, &idx, &val, 0);
        ASSERT_NEQ(err, 0, "Add non-BPF_XDP_CPUMAP program to cpumap entry");
 
+       /* Try to attach BPF_XDP program with frags to cpumap when we have
+        * already loaded a BPF_XDP program on the map
+        */
+       idx = 1;
+       val.qsize = 192;
+       val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_cm_frags);
+       err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+       ASSERT_NEQ(err, 0, "Add BPF_XDP program with frags to cpumap entry");
+
 out_close:
        test_xdp_with_cpumap_helpers__destroy(skel);
 }
+
+void test_xdp_with_cpumap_frags_helpers(void)
+{
+       struct test_xdp_with_cpumap_frags_helpers *skel;
+       struct bpf_prog_info info = {};
+       __u32 len = sizeof(info);
+       struct bpf_cpumap_val val = {
+               .qsize = 192,
+       };
+       int err, frags_prog_fd, map_fd;
+       __u32 idx = 0;
+
+       skel = test_xdp_with_cpumap_frags_helpers__open_and_load();
+       if (!ASSERT_OK_PTR(skel, "test_xdp_with_cpumap_helpers__open_and_load"))
+               return;
+
+       frags_prog_fd = bpf_program__fd(skel->progs.xdp_dummy_cm_frags);
+       map_fd = bpf_map__fd(skel->maps.cpu_map);
+       err = bpf_obj_get_info_by_fd(frags_prog_fd, &info, &len);
+       if (!ASSERT_OK(err, "bpf_obj_get_info_by_fd"))
+               goto out_close;
+
+       val.bpf_prog.fd = frags_prog_fd;
+       err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+       ASSERT_OK(err, "Add program to cpumap entry");
+
+       err = bpf_map_lookup_elem(map_fd, &idx, &val);
+       ASSERT_OK(err, "Read cpumap entry");
+       ASSERT_EQ(info.id, val.bpf_prog.id,
+                 "Match program id to cpumap entry prog_id");
+
+       /* Try to attach BPF_XDP program to cpumap when we have
+        * already loaded a BPF_XDP program with frags on the map
+        */
+       idx = 1;
+       val.qsize = 192;
+       val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_cm);
+       err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+       ASSERT_NEQ(err, 0, "Add BPF_XDP program to cpumap entry");
+
+out_close:
+       test_xdp_with_cpumap_frags_helpers__destroy(skel);
+}
+
+void serial_test_xdp_cpumap_attach(void)
+{
+       if (test__start_subtest("CPUMAP with programs in entries"))
+               test_xdp_with_cpumap_helpers();
+
+       if (test__start_subtest("CPUMAP with frags programs in entries"))
+               test_xdp_with_cpumap_frags_helpers();
+}
index 3079d55..2a784cc 100644 (file)
@@ -4,6 +4,7 @@
 #include <test_progs.h>
 
 #include "test_xdp_devmap_helpers.skel.h"
+#include "test_xdp_with_devmap_frags_helpers.skel.h"
 #include "test_xdp_with_devmap_helpers.skel.h"
 
 #define IFINDEX_LO 1
@@ -56,6 +57,15 @@ static void test_xdp_with_devmap_helpers(void)
        err = bpf_map_update_elem(map_fd, &idx, &val, 0);
        ASSERT_NEQ(err, 0, "Add non-BPF_XDP_DEVMAP program to devmap entry");
 
+       /* Try to attach BPF_XDP program with frags to devmap when we have
+        * already loaded a BPF_XDP program on the map
+        */
+       idx = 1;
+       val.ifindex = 1;
+       val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_dm_frags);
+       err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+       ASSERT_NEQ(err, 0, "Add BPF_XDP program with frags to devmap entry");
+
 out_close:
        test_xdp_with_devmap_helpers__destroy(skel);
 }
@@ -71,12 +81,57 @@ static void test_neg_xdp_devmap_helpers(void)
        }
 }
 
+void test_xdp_with_devmap_frags_helpers(void)
+{
+       struct test_xdp_with_devmap_frags_helpers *skel;
+       struct bpf_prog_info info = {};
+       struct bpf_devmap_val val = {
+               .ifindex = IFINDEX_LO,
+       };
+       __u32 len = sizeof(info);
+       int err, dm_fd_frags, map_fd;
+       __u32 idx = 0;
+
+       skel = test_xdp_with_devmap_frags_helpers__open_and_load();
+       if (!ASSERT_OK_PTR(skel, "test_xdp_with_devmap_helpers__open_and_load"))
+               return;
+
+       dm_fd_frags = bpf_program__fd(skel->progs.xdp_dummy_dm_frags);
+       map_fd = bpf_map__fd(skel->maps.dm_ports);
+       err = bpf_obj_get_info_by_fd(dm_fd_frags, &info, &len);
+       if (!ASSERT_OK(err, "bpf_obj_get_info_by_fd"))
+               goto out_close;
+
+       val.bpf_prog.fd = dm_fd_frags;
+       err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+       ASSERT_OK(err, "Add frags program to devmap entry");
+
+       err = bpf_map_lookup_elem(map_fd, &idx, &val);
+       ASSERT_OK(err, "Read devmap entry");
+       ASSERT_EQ(info.id, val.bpf_prog.id,
+                 "Match program id to devmap entry prog_id");
+
+       /* Try to attach BPF_XDP program to devmap when we have
+        * already loaded a BPF_XDP program with frags on the map
+        */
+       idx = 1;
+       val.ifindex = 1;
+       val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_dm);
+       err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+       ASSERT_NEQ(err, 0, "Add BPF_XDP program to devmap entry");
+
+out_close:
+       test_xdp_with_devmap_frags_helpers__destroy(skel);
+}
 
 void serial_test_xdp_devmap_attach(void)
 {
        if (test__start_subtest("DEVMAP with programs in entries"))
                test_xdp_with_devmap_helpers();
 
+       if (test__start_subtest("DEVMAP with frags programs in entries"))
+               test_xdp_with_devmap_frags_helpers();
+
        if (test__start_subtest("Verifier check of DEVMAP programs"))
                test_neg_xdp_devmap_helpers();
 }
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt_unix.c b/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt_unix.c
new file mode 100644 (file)
index 0000000..eafc877
--- /dev/null
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Amazon.com Inc. or its affiliates. */
+#include "bpf_iter.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <limits.h>
+
+#define AUTOBIND_LEN 6
+char sun_path[AUTOBIND_LEN];
+
+#define NR_CASES 5
+int sndbuf_setsockopt[NR_CASES] = {-1, 0, 8192, INT_MAX / 2, INT_MAX};
+int sndbuf_getsockopt[NR_CASES] = {-1, -1, -1, -1, -1};
+int sndbuf_getsockopt_expected[NR_CASES];
+
+static inline int cmpname(struct unix_sock *unix_sk)
+{
+       int i;
+
+       for (i = 0; i < AUTOBIND_LEN; i++) {
+               if (unix_sk->addr->name->sun_path[i] != sun_path[i])
+                       return -1;
+       }
+
+       return 0;
+}
+
+SEC("iter/unix")
+int change_sndbuf(struct bpf_iter__unix *ctx)
+{
+       struct unix_sock *unix_sk = ctx->unix_sk;
+       int i, err;
+
+       if (!unix_sk || !unix_sk->addr)
+               return 0;
+
+       if (unix_sk->addr->name->sun_path[0])
+               return 0;
+
+       if (cmpname(unix_sk))
+               return 0;
+
+       for (i = 0; i < NR_CASES; i++) {
+               err = bpf_setsockopt(unix_sk, SOL_SOCKET, SO_SNDBUF,
+                                    &sndbuf_setsockopt[i],
+                                    sizeof(sndbuf_setsockopt[i]));
+               if (err)
+                       break;
+
+               err = bpf_getsockopt(unix_sk, SOL_SOCKET, SO_SNDBUF,
+                                    &sndbuf_getsockopt[i],
+                                    sizeof(sndbuf_getsockopt[i]));
+               if (err)
+                       break;
+       }
+
+       return 0;
+}
+
+char _license[] SEC("license") = "GPL";
index c21e3f5..e6aefae 100644 (file)
@@ -63,7 +63,7 @@ int dump_unix(struct bpf_iter__unix *ctx)
                        BPF_SEQ_PRINTF(seq, " @");
 
                        for (i = 1; i < len; i++) {
-                               /* unix_mkname() tests this upper bound. */
+                               /* unix_validate_addr() tests this upper bound. */
                                if (i >= sizeof(struct sockaddr_un))
                                        break;
 
diff --git a/tools/testing/selftests/bpf/progs/bpf_mod_race.c b/tools/testing/selftests/bpf/progs/bpf_mod_race.c
new file mode 100644 (file)
index 0000000..82a5c6c
--- /dev/null
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+const volatile struct {
+       /* thread to activate trace programs for */
+       pid_t tgid;
+       /* return error from __init function */
+       int inject_error;
+       /* uffd monitored range start address */
+       void *fault_addr;
+} bpf_mod_race_config = { -1 };
+
+int bpf_blocking = 0;
+int res_try_get_module = -1;
+
+static __always_inline bool check_thread_id(void)
+{
+       struct task_struct *task = bpf_get_current_task_btf();
+
+       return task->tgid == bpf_mod_race_config.tgid;
+}
+
+/* The trace of execution is something like this:
+ *
+ * finit_module()
+ *   load_module()
+ *     prepare_coming_module()
+ *       notifier_call(MODULE_STATE_COMING)
+ *         btf_parse_module()
+ *         btf_alloc_id()              // Visible to userspace at this point
+ *         list_add(btf_mod->list, &btf_modules)
+ *     do_init_module()
+ *       freeinit = kmalloc()
+ *       ret = mod->init()
+ *         bpf_prog_widen_race()
+ *           bpf_copy_from_user()
+ *             ...<sleep>...
+ *       if (ret < 0)
+ *         ...
+ *         free_module()
+ * return ret
+ *
+ * At this point, module loading thread is blocked, we now load the program:
+ *
+ * bpf_check
+ *   add_kfunc_call/check_pseudo_btf_id
+ *     btf_try_get_module
+ *       try_get_module_live == false
+ *     return -ENXIO
+ *
+ * Without the fix (try_get_module_live in btf_try_get_module):
+ *
+ * bpf_check
+ *   add_kfunc_call/check_pseudo_btf_id
+ *     btf_try_get_module
+ *       try_get_module == true
+ *     <store module reference in btf_kfunc_tab or used_btf array>
+ *   ...
+ * return fd
+ *
+ * Now, if we inject an error in the blocked program, our module will be freed
+ * (going straight from MODULE_STATE_COMING to MODULE_STATE_GOING).
+ * Later, when bpf program is freed, it will try to module_put already freed
+ * module. This is why try_get_module_live returns false if mod->state is not
+ * MODULE_STATE_LIVE.
+ */
+
+SEC("fmod_ret.s/bpf_fentry_test1")
+int BPF_PROG(widen_race, int a, int ret)
+{
+       char dst;
+
+       if (!check_thread_id())
+               return 0;
+       /* Indicate that we will attempt to block */
+       bpf_blocking = 1;
+       bpf_copy_from_user(&dst, 1, bpf_mod_race_config.fault_addr);
+       return bpf_mod_race_config.inject_error;
+}
+
+SEC("fexit/do_init_module")
+int BPF_PROG(fexit_init_module, struct module *mod, int ret)
+{
+       if (!check_thread_id())
+               return 0;
+       /* Indicate that we finished blocking */
+       bpf_blocking = 2;
+       return 0;
+}
+
+SEC("fexit/btf_try_get_module")
+int BPF_PROG(fexit_module_get, const struct btf *btf, struct module *mod)
+{
+       res_try_get_module = !!mod;
+       return 0;
+}
+
+char _license[] SEC("license") = "GPL";
index e0f4260..1c1289b 100644 (file)
@@ -5,6 +5,8 @@
 #define AF_INET                        2
 #define AF_INET6               10
 
+#define SOL_SOCKET             1
+#define SO_SNDBUF              7
 #define __SO_ACCEPTCON         (1 << 16)
 
 #define SOL_TCP                        6
diff --git a/tools/testing/selftests/bpf/progs/cgroup_getset_retval_getsockopt.c b/tools/testing/selftests/bpf/progs/cgroup_getset_retval_getsockopt.c
new file mode 100644 (file)
index 0000000..b2a409e
--- /dev/null
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright 2021 Google LLC.
+ */
+
+#include <errno.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+__u32 invocations = 0;
+__u32 assertion_error = 0;
+__u32 retval_value = 0;
+__u32 ctx_retval_value = 0;
+
+SEC("cgroup/getsockopt")
+int get_retval(struct bpf_sockopt *ctx)
+{
+       retval_value = bpf_get_retval();
+       ctx_retval_value = ctx->retval;
+       __sync_fetch_and_add(&invocations, 1);
+
+       return 1;
+}
+
+SEC("cgroup/getsockopt")
+int set_eisconn(struct bpf_sockopt *ctx)
+{
+       __sync_fetch_and_add(&invocations, 1);
+
+       if (bpf_set_retval(-EISCONN))
+               assertion_error = 1;
+
+       return 1;
+}
+
+SEC("cgroup/getsockopt")
+int clear_retval(struct bpf_sockopt *ctx)
+{
+       __sync_fetch_and_add(&invocations, 1);
+
+       ctx->retval = 0;
+
+       return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/cgroup_getset_retval_setsockopt.c b/tools/testing/selftests/bpf/progs/cgroup_getset_retval_setsockopt.c
new file mode 100644 (file)
index 0000000..d6e5903
--- /dev/null
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright 2021 Google LLC.
+ */
+
+#include <errno.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+__u32 invocations = 0;
+__u32 assertion_error = 0;
+__u32 retval_value = 0;
+
+SEC("cgroup/setsockopt")
+int get_retval(struct bpf_sockopt *ctx)
+{
+       retval_value = bpf_get_retval();
+       __sync_fetch_and_add(&invocations, 1);
+
+       return 1;
+}
+
+SEC("cgroup/setsockopt")
+int set_eunatch(struct bpf_sockopt *ctx)
+{
+       __sync_fetch_and_add(&invocations, 1);
+
+       if (bpf_set_retval(-EUNATCH))
+               assertion_error = 1;
+
+       return 0;
+}
+
+SEC("cgroup/setsockopt")
+int set_eisconn(struct bpf_sockopt *ctx)
+{
+       __sync_fetch_and_add(&invocations, 1);
+
+       if (bpf_set_retval(-EISCONN))
+               assertion_error = 1;
+
+       return 0;
+}
+
+SEC("cgroup/setsockopt")
+int legacy_eperm(struct bpf_sockopt *ctx)
+{
+       __sync_fetch_and_add(&invocations, 1);
+
+       return 0;
+}
index 68a5a9d..7e94412 100644 (file)
@@ -7,12 +7,12 @@
 #include <bpf/bpf_endian.h>
 #include <bpf/bpf_helpers.h>
 
-struct bpf_map_def SEC("maps") sock_map = {
-       .type = BPF_MAP_TYPE_SOCKMAP,
-       .key_size = sizeof(int),
-       .value_size = sizeof(int),
-       .max_entries = 2,
-};
+struct {
+       __uint(type, BPF_MAP_TYPE_SOCKMAP);
+       __type(key, int);
+       __type(value, int);
+       __uint(max_entries, 2);
+} sock_map SEC(".maps");
 
 SEC("freplace/cls_redirect")
 int freplace_cls_redirect_test(struct __sk_buff *skb)
diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_race.c b/tools/testing/selftests/bpf/progs/kfunc_call_race.c
new file mode 100644 (file)
index 0000000..4e8fed7
--- /dev/null
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+
+extern void bpf_testmod_test_mod_kfunc(int i) __ksym;
+
+SEC("tc")
+int kfunc_call_fail(struct __sk_buff *ctx)
+{
+       bpf_testmod_test_mod_kfunc(0);
+       return 0;
+}
+
+char _license[] SEC("license") = "GPL";
index 8a8cf59..5aecbb9 100644 (file)
@@ -1,13 +1,20 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2021 Facebook */
-#include <linux/bpf.h>
+#include <vmlinux.h>
 #include <bpf/bpf_helpers.h>
-#include "bpf_tcp_helpers.h"
 
 extern int bpf_kfunc_call_test2(struct sock *sk, __u32 a, __u32 b) __ksym;
 extern __u64 bpf_kfunc_call_test1(struct sock *sk, __u32 a, __u64 b,
                                  __u32 c, __u64 d) __ksym;
 
+extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp) __ksym;
+extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym;
+extern void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb) __ksym;
+extern void bpf_kfunc_call_test_pass1(struct prog_test_pass1 *p) __ksym;
+extern void bpf_kfunc_call_test_pass2(struct prog_test_pass2 *p) __ksym;
+extern void bpf_kfunc_call_test_mem_len_pass1(void *mem, int len) __ksym;
+extern void bpf_kfunc_call_test_mem_len_fail2(__u64 *mem, int len) __ksym;
+
 SEC("tc")
 int kfunc_call_test2(struct __sk_buff *skb)
 {
@@ -44,4 +51,45 @@ int kfunc_call_test1(struct __sk_buff *skb)
        return ret;
 }
 
+SEC("tc")
+int kfunc_call_test_ref_btf_id(struct __sk_buff *skb)
+{
+       struct prog_test_ref_kfunc *pt;
+       unsigned long s = 0;
+       int ret = 0;
+
+       pt = bpf_kfunc_call_test_acquire(&s);
+       if (pt) {
+               if (pt->a != 42 || pt->b != 108)
+                       ret = -1;
+               bpf_kfunc_call_test_release(pt);
+       }
+       return ret;
+}
+
+SEC("tc")
+int kfunc_call_test_pass(struct __sk_buff *skb)
+{
+       struct prog_test_pass1 p1 = {};
+       struct prog_test_pass2 p2 = {};
+       short a = 0;
+       __u64 b = 0;
+       long c = 0;
+       char d = 0;
+       int e = 0;
+
+       bpf_kfunc_call_test_pass_ctx(skb);
+       bpf_kfunc_call_test_pass1(&p1);
+       bpf_kfunc_call_test_pass2(&p2);
+
+       bpf_kfunc_call_test_mem_len_pass1(&a, sizeof(a));
+       bpf_kfunc_call_test_mem_len_pass1(&b, sizeof(b));
+       bpf_kfunc_call_test_mem_len_pass1(&c, sizeof(c));
+       bpf_kfunc_call_test_mem_len_pass1(&d, sizeof(d));
+       bpf_kfunc_call_test_mem_len_pass1(&e, sizeof(e));
+       bpf_kfunc_call_test_mem_len_fail2(&b, -1);
+
+       return 0;
+}
+
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/ksym_race.c b/tools/testing/selftests/bpf/progs/ksym_race.c
new file mode 100644 (file)
index 0000000..def97f2
--- /dev/null
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+
+extern int bpf_testmod_ksym_percpu __ksym;
+
+SEC("tc")
+int ksym_fail(struct __sk_buff *ctx)
+{
+       return *(int *)bpf_this_cpu_ptr(&bpf_testmod_ksym_percpu);
+}
+
+char _license[] SEC("license") = "GPL";
index 1612a32..495990d 100644 (file)
@@ -2,19 +2,19 @@
 #include <linux/bpf.h>
 #include <bpf/bpf_helpers.h>
 
-struct bpf_map_def SEC("maps") htab = {
-       .type = BPF_MAP_TYPE_HASH,
-       .key_size = sizeof(__u32),
-       .value_size = sizeof(long),
-       .max_entries = 2,
-};
+struct {
+       __uint(type, BPF_MAP_TYPE_HASH);
+       __type(key, __u32);
+       __type(value, long);
+       __uint(max_entries, 2);
+} htab SEC(".maps");
 
-struct bpf_map_def SEC("maps") array = {
-       .type = BPF_MAP_TYPE_ARRAY,
-       .key_size = sizeof(__u32),
-       .value_size = sizeof(long),
-       .max_entries = 2,
-};
+struct {
+       __uint(type, BPF_MAP_TYPE_ARRAY);
+       __type(key, __u32);
+       __type(value, long);
+       __uint(max_entries, 2);
+} array SEC(".maps");
 
 /* Sample program which should always load for testing control paths. */
 SEC(".text") int func()
index 95d5b94..c9abfe3 100644 (file)
@@ -7,8 +7,6 @@ int bpf_prog1(struct __sk_buff *skb)
 {
        void *data_end = (void *)(long) skb->data_end;
        void *data = (void *)(long) skb->data;
-       __u32 lport = skb->local_port;
-       __u32 rport = skb->remote_port;
        __u8 *d = data;
        int err;
 
index 79c8139..d0298dc 100644 (file)
@@ -73,17 +73,17 @@ int _getsockopt(struct bpf_sockopt *ctx)
                 */
 
                if (optval + sizeof(struct tcp_zerocopy_receive) > optval_end)
-                       return 0; /* EPERM, bounds check */
+                       return 0; /* bounds check */
 
                if (((struct tcp_zerocopy_receive *)optval)->address != 0)
-                       return 0; /* EPERM, unexpected data */
+                       return 0; /* unexpected data */
 
                return 1;
        }
 
        if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) {
                if (optval + 1 > optval_end)
-                       return 0; /* EPERM, bounds check */
+                       return 0; /* bounds check */
 
                ctx->retval = 0; /* Reset system call return value to zero */
 
@@ -96,24 +96,24 @@ int _getsockopt(struct bpf_sockopt *ctx)
                 * bytes of data.
                 */
                if (optval_end - optval != page_size)
-                       return 0; /* EPERM, unexpected data size */
+                       return 0; /* unexpected data size */
 
                return 1;
        }
 
        if (ctx->level != SOL_CUSTOM)
-               return 0; /* EPERM, deny everything except custom level */
+               return 0; /* deny everything except custom level */
 
        if (optval + 1 > optval_end)
-               return 0; /* EPERM, bounds check */
+               return 0; /* bounds check */
 
        storage = bpf_sk_storage_get(&socket_storage_map, ctx->sk, 0,
                                     BPF_SK_STORAGE_GET_F_CREATE);
        if (!storage)
-               return 0; /* EPERM, couldn't get sk storage */
+               return 0; /* couldn't get sk storage */
 
        if (!ctx->retval)
-               return 0; /* EPERM, kernel should not have handled
+               return 0; /* kernel should not have handled
                           * SOL_CUSTOM, something is wrong!
                           */
        ctx->retval = 0; /* Reset system call return value to zero */
@@ -152,7 +152,7 @@ int _setsockopt(struct bpf_sockopt *ctx)
                /* Overwrite SO_SNDBUF value */
 
                if (optval + sizeof(__u32) > optval_end)
-                       return 0; /* EPERM, bounds check */
+                       return 0; /* bounds check */
 
                *(__u32 *)optval = 0x55AA;
                ctx->optlen = 4;
@@ -164,7 +164,7 @@ int _setsockopt(struct bpf_sockopt *ctx)
                /* Always use cubic */
 
                if (optval + 5 > optval_end)
-                       return 0; /* EPERM, bounds check */
+                       return 0; /* bounds check */
 
                memcpy(optval, "cubic", 5);
                ctx->optlen = 5;
@@ -175,10 +175,10 @@ int _setsockopt(struct bpf_sockopt *ctx)
        if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) {
                /* Original optlen is larger than PAGE_SIZE. */
                if (ctx->optlen != page_size * 2)
-                       return 0; /* EPERM, unexpected data size */
+                       return 0; /* unexpected data size */
 
                if (optval + 1 > optval_end)
-                       return 0; /* EPERM, bounds check */
+                       return 0; /* bounds check */
 
                /* Make sure we can trim the buffer. */
                optval[0] = 0;
@@ -189,21 +189,21 @@ int _setsockopt(struct bpf_sockopt *ctx)
                 * bytes of data.
                 */
                if (optval_end - optval != page_size)
-                       return 0; /* EPERM, unexpected data size */
+                       return 0; /* unexpected data size */
 
                return 1;
        }
 
        if (ctx->level != SOL_CUSTOM)
-               return 0; /* EPERM, deny everything except custom level */
+               return 0; /* deny everything except custom level */
 
        if (optval + 1 > optval_end)
-               return 0; /* EPERM, bounds check */
+               return 0; /* bounds check */
 
        storage = bpf_sk_storage_get(&socket_storage_map, ctx->sk, 0,
                                     BPF_SK_STORAGE_GET_F_CREATE);
        if (!storage)
-               return 0; /* EPERM, couldn't get sk storage */
+               return 0; /* couldn't get sk storage */
 
        storage->val = optval[0];
        ctx->optlen = -1; /* BPF has consumed this option, don't call kernel
diff --git a/tools/testing/selftests/bpf/progs/test_bpf_nf.c b/tools/testing/selftests/bpf/progs/test_bpf_nf.c
new file mode 100644 (file)
index 0000000..f00a973
--- /dev/null
@@ -0,0 +1,118 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+
+#define EAFNOSUPPORT 97
+#define EPROTO 71
+#define ENONET 64
+#define EINVAL 22
+#define ENOENT 2
+
+int test_einval_bpf_tuple = 0;
+int test_einval_reserved = 0;
+int test_einval_netns_id = 0;
+int test_einval_len_opts = 0;
+int test_eproto_l4proto = 0;
+int test_enonet_netns_id = 0;
+int test_enoent_lookup = 0;
+int test_eafnosupport = 0;
+
+struct nf_conn;
+
+struct bpf_ct_opts___local {
+       s32 netns_id;
+       s32 error;
+       u8 l4proto;
+       u8 reserved[3];
+} __attribute__((preserve_access_index));
+
+struct nf_conn *bpf_xdp_ct_lookup(struct xdp_md *, struct bpf_sock_tuple *, u32,
+                                 struct bpf_ct_opts___local *, u32) __ksym;
+struct nf_conn *bpf_skb_ct_lookup(struct __sk_buff *, struct bpf_sock_tuple *, u32,
+                                 struct bpf_ct_opts___local *, u32) __ksym;
+void bpf_ct_release(struct nf_conn *) __ksym;
+
+static __always_inline void
+nf_ct_test(struct nf_conn *(*func)(void *, struct bpf_sock_tuple *, u32,
+                                  struct bpf_ct_opts___local *, u32),
+          void *ctx)
+{
+       struct bpf_ct_opts___local opts_def = { .l4proto = IPPROTO_TCP, .netns_id = -1 };
+       struct bpf_sock_tuple bpf_tuple;
+       struct nf_conn *ct;
+
+       __builtin_memset(&bpf_tuple, 0, sizeof(bpf_tuple.ipv4));
+
+       ct = func(ctx, NULL, 0, &opts_def, sizeof(opts_def));
+       if (ct)
+               bpf_ct_release(ct);
+       else
+               test_einval_bpf_tuple = opts_def.error;
+
+       opts_def.reserved[0] = 1;
+       ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def));
+       opts_def.reserved[0] = 0;
+       opts_def.l4proto = IPPROTO_TCP;
+       if (ct)
+               bpf_ct_release(ct);
+       else
+               test_einval_reserved = opts_def.error;
+
+       opts_def.netns_id = -2;
+       ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def));
+       opts_def.netns_id = -1;
+       if (ct)
+               bpf_ct_release(ct);
+       else
+               test_einval_netns_id = opts_def.error;
+
+       ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def) - 1);
+       if (ct)
+               bpf_ct_release(ct);
+       else
+               test_einval_len_opts = opts_def.error;
+
+       opts_def.l4proto = IPPROTO_ICMP;
+       ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def));
+       opts_def.l4proto = IPPROTO_TCP;
+       if (ct)
+               bpf_ct_release(ct);
+       else
+               test_eproto_l4proto = opts_def.error;
+
+       opts_def.netns_id = 0xf00f;
+       ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def));
+       opts_def.netns_id = -1;
+       if (ct)
+               bpf_ct_release(ct);
+       else
+               test_enonet_netns_id = opts_def.error;
+
+       ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def));
+       if (ct)
+               bpf_ct_release(ct);
+       else
+               test_enoent_lookup = opts_def.error;
+
+       ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4) - 1, &opts_def, sizeof(opts_def));
+       if (ct)
+               bpf_ct_release(ct);
+       else
+               test_eafnosupport = opts_def.error;
+}
+
+SEC("xdp")
+int nf_xdp_ct_test(struct xdp_md *ctx)
+{
+       nf_ct_test((void *)bpf_xdp_ct_lookup, ctx);
+       return 0;
+}
+
+SEC("tc")
+int nf_skb_ct_test(struct __sk_buff *ctx)
+{
+       nf_ct_test((void *)bpf_skb_ct_lookup, ctx);
+       return 0;
+}
+
+char _license[] SEC("license") = "GPL";
index 160ead6..07c94df 100644 (file)
@@ -9,12 +9,15 @@ struct ipv_counts {
        unsigned int v6;
 };
 
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
 struct bpf_map_def SEC("maps") btf_map = {
        .type = BPF_MAP_TYPE_ARRAY,
        .key_size = sizeof(int),
        .value_size = sizeof(struct ipv_counts),
        .max_entries = 4,
 };
+#pragma GCC diagnostic pop
 
 BPF_ANNOTATE_KV_PAIR(btf_map, int, struct ipv_counts);
 
index 1884a5b..762671a 100644 (file)
@@ -9,6 +9,8 @@ struct ipv_counts {
        unsigned int v6;
 };
 
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
 /* just to validate we can handle maps in multiple sections */
 struct bpf_map_def SEC("maps") btf_map_legacy = {
        .type = BPF_MAP_TYPE_ARRAY,
@@ -16,6 +18,7 @@ struct bpf_map_def SEC("maps") btf_map_legacy = {
        .value_size = sizeof(long long),
        .max_entries = 4,
 };
+#pragma GCC diagnostic pop
 
 BPF_ANNOTATE_KV_PAIR(btf_map_legacy, int, struct ipv_counts);
 
index 15e0f99..1dabb88 100644 (file)
@@ -8,12 +8,12 @@ struct ipv_counts {
        unsigned int v6;
 };
 
-struct bpf_map_def SEC("maps") btf_map = {
-       .type = BPF_MAP_TYPE_ARRAY,
-       .key_size = sizeof(int),
-       .value_size = sizeof(struct ipv_counts),
-       .max_entries = 4,
-};
+struct {
+       __uint(type, BPF_MAP_TYPE_ARRAY);
+       __uint(key_size, sizeof(int));
+       __uint(value_size, sizeof(struct ipv_counts));
+       __uint(max_entries, 4);
+} btf_map SEC(".maps");
 
 __attribute__((noinline))
 int test_long_fname_2(void)
index c304cd5..37aacc6 100644 (file)
 
 #define NUM_CGROUP_LEVELS      4
 
-struct bpf_map_def SEC("maps") cgroup_ids = {
-       .type = BPF_MAP_TYPE_ARRAY,
-       .key_size = sizeof(__u32),
-       .value_size = sizeof(__u64),
-       .max_entries = NUM_CGROUP_LEVELS,
-};
+struct {
+       __uint(type, BPF_MAP_TYPE_ARRAY);
+       __type(key, __u32);
+       __type(value, __u64);
+       __uint(max_entries, NUM_CGROUP_LEVELS);
+} cgroup_ids SEC(".maps");
 
 static __always_inline void log_nth_level(struct __sk_buff *skb, __u32 level)
 {
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_progs_query.c b/tools/testing/selftests/bpf/progs/test_sockmap_progs_query.c
new file mode 100644 (file)
index 0000000..9d58d61
--- /dev/null
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+struct {
+       __uint(type, BPF_MAP_TYPE_SOCKMAP);
+       __uint(max_entries, 1);
+       __type(key, __u32);
+       __type(value, __u64);
+} sock_map SEC(".maps");
+
+SEC("sk_skb")
+int prog_skb_verdict(struct __sk_buff *skb)
+{
+       return SK_PASS;
+}
+
+SEC("sk_msg")
+int prog_skmsg_verdict(struct sk_msg_md *msg)
+{
+       return SK_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
index bf28814..950a70b 100644 (file)
 #define THROTTLE_RATE_BPS (5 * 1000 * 1000)
 
 /* flow_key => last_tstamp timestamp used */
-struct bpf_map_def SEC("maps") flow_map = {
-       .type = BPF_MAP_TYPE_HASH,
-       .key_size = sizeof(uint32_t),
-       .value_size = sizeof(uint64_t),
-       .max_entries = 1,
-};
+struct {
+       __uint(type, BPF_MAP_TYPE_HASH);
+       __type(key, uint32_t);
+       __type(value, uint64_t);
+       __uint(max_entries, 1);
+} flow_map SEC(".maps");
 
 static inline int throttle_flow(struct __sk_buff *skb)
 {
index cd747cd..6edebce 100644 (file)
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_endian.h>
 
-struct bpf_map_def SEC("maps") results = {
-       .type = BPF_MAP_TYPE_ARRAY,
-       .key_size = sizeof(__u32),
-       .value_size = sizeof(__u32),
-       .max_entries = 3,
-};
+struct {
+       __uint(type, BPF_MAP_TYPE_ARRAY);
+       __type(key, __u32);
+       __type(value, __u32);
+       __uint(max_entries, 3);
+} results SEC(".maps");
 
 static __always_inline __s64 gen_syncookie(void *data_end, struct bpf_sock *sk,
                                           void *iph, __u32 ip_size,
index 199c61b..53b64c9 100644 (file)
@@ -7,11 +7,10 @@ int _xdp_adjust_tail_grow(struct xdp_md *xdp)
 {
        void *data_end = (void *)(long)xdp->data_end;
        void *data = (void *)(long)xdp->data;
-       unsigned int data_len;
+       int data_len = bpf_xdp_get_buff_len(xdp);
        int offset = 0;
 
        /* Data length determine test case */
-       data_len = data_end - data;
 
        if (data_len == 54) { /* sizeof(pkt_v4) */
                offset = 4096; /* test too large offset */
@@ -20,7 +19,12 @@ int _xdp_adjust_tail_grow(struct xdp_md *xdp)
        } else if (data_len == 64) {
                offset = 128;
        } else if (data_len == 128) {
-               offset = 4096 - 256 - 320 - data_len; /* Max tail grow 3520 */
+               /* Max tail grow 3520 */
+               offset = 4096 - 256 - 320 - data_len;
+       } else if (data_len == 9000) {
+               offset = 10;
+       } else if (data_len == 9001) {
+               offset = 4096;
        } else {
                return XDP_ABORTED; /* No matching test */
        }
index b744825..ca68c03 100644 (file)
 SEC("xdp")
 int _xdp_adjust_tail_shrink(struct xdp_md *xdp)
 {
-       void *data_end = (void *)(long)xdp->data_end;
-       void *data = (void *)(long)xdp->data;
+       __u8 *data_end = (void *)(long)xdp->data_end;
+       __u8 *data = (void *)(long)xdp->data;
        int offset = 0;
 
-       if (data_end - data == 54) /* sizeof(pkt_v4) */
+       switch (bpf_xdp_get_buff_len(xdp)) {
+       case 54:
+               /* sizeof(pkt_v4) */
                offset = 256; /* shrink too much */
-       else
+               break;
+       case 9000:
+               /* non-linear buff test cases */
+               if (data + 1 > data_end)
+                       return XDP_DROP;
+
+               switch (data[0]) {
+               case 0:
+                       offset = 10;
+                       break;
+               case 1:
+                       offset = 4100;
+                       break;
+               case 2:
+                       offset = 8200;
+                       break;
+               default:
+                       return XDP_DROP;
+               }
+               break;
+       default:
                offset = 20;
+               break;
+       }
        if (bpf_xdp_adjust_tail(xdp, 0 - offset))
                return XDP_DROP;
        return XDP_TX;
index 58cf434..3379d30 100644 (file)
@@ -49,7 +49,7 @@ int BPF_PROG(trace_on_entry, struct xdp_buff *xdp)
        void *data = (void *)(long)xdp->data;
 
        meta.ifindex = xdp->rxq->dev->ifindex;
-       meta.pkt_len = data_end - data;
+       meta.pkt_len = bpf_xdp_get_buff_len((struct xdp_md *)xdp);
        bpf_xdp_output(xdp, &perf_buf_map,
                       ((__u64) meta.pkt_len << 32) |
                       BPF_F_CURRENT_CPU,
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_update_frags.c b/tools/testing/selftests/bpf/progs/test_xdp_update_frags.c
new file mode 100644 (file)
index 0000000..2a3496d
--- /dev/null
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <bpf/bpf_helpers.h>
+
+int _version SEC("version") = 1;
+
+SEC("xdp.frags")
+int xdp_adjust_frags(struct xdp_md *xdp)
+{
+       __u8 *data_end = (void *)(long)xdp->data_end;
+       __u8 *data = (void *)(long)xdp->data;
+       __u8 val[16] = {};
+       __u32 offset;
+       int err;
+
+       if (data + sizeof(__u32) > data_end)
+               return XDP_DROP;
+
+       offset = *(__u32 *)data;
+       err = bpf_xdp_load_bytes(xdp, offset, val, sizeof(val));
+       if (err < 0)
+               return XDP_DROP;
+
+       if (val[0] != 0xaa || val[15] != 0xaa) /* marker */
+               return XDP_DROP;
+
+       val[0] = 0xbb; /* update the marker */
+       val[15] = 0xbb;
+       err = bpf_xdp_store_bytes(xdp, offset, val, sizeof(val));
+       if (err < 0)
+               return XDP_DROP;
+
+       return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_frags_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_frags_helpers.c
new file mode 100644 (file)
index 0000000..62fb7cd
--- /dev/null
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+#define IFINDEX_LO     1
+
+struct {
+       __uint(type, BPF_MAP_TYPE_CPUMAP);
+       __uint(key_size, sizeof(__u32));
+       __uint(value_size, sizeof(struct bpf_cpumap_val));
+       __uint(max_entries, 4);
+} cpu_map SEC(".maps");
+
+SEC("xdp_cpumap/dummy_cm")
+int xdp_dummy_cm(struct xdp_md *ctx)
+{
+       return XDP_PASS;
+}
+
+SEC("xdp.frags/cpumap")
+int xdp_dummy_cm_frags(struct xdp_md *ctx)
+{
+       return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
index 5320250..48007f1 100644 (file)
@@ -33,4 +33,10 @@ int xdp_dummy_cm(struct xdp_md *ctx)
        return XDP_PASS;
 }
 
+SEC("xdp.frags/cpumap")
+int xdp_dummy_cm_frags(struct xdp_md *ctx)
+{
+       return XDP_PASS;
+}
+
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_frags_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_frags_helpers.c
new file mode 100644 (file)
index 0000000..e1caf51
--- /dev/null
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+struct {
+       __uint(type, BPF_MAP_TYPE_DEVMAP);
+       __uint(key_size, sizeof(__u32));
+       __uint(value_size, sizeof(struct bpf_devmap_val));
+       __uint(max_entries, 4);
+} dm_ports SEC(".maps");
+
+/* valid program on DEVMAP entry via SEC name;
+ * has access to egress and ingress ifindex
+ */
+SEC("xdp_devmap/map_prog")
+int xdp_dummy_dm(struct xdp_md *ctx)
+{
+       return XDP_PASS;
+}
+
+SEC("xdp.frags/devmap")
+int xdp_dummy_dm_frags(struct xdp_md *ctx)
+{
+       return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
index 1e6b9c3..8ae11fa 100644 (file)
@@ -40,4 +40,11 @@ int xdp_dummy_dm(struct xdp_md *ctx)
 
        return XDP_PASS;
 }
+
+SEC("xdp.frags/devmap")
+int xdp_dummy_dm_frags(struct xdp_md *ctx)
+{
+       return XDP_PASS;
+}
+
 char _license[] SEC("license") = "GPL";
index 76cd903..29bbaa5 100644 (file)
@@ -31,6 +31,7 @@
 #include <linux/if_ether.h>
 #include <linux/btf.h>
 
+#include <bpf/btf.h>
 #include <bpf/bpf.h>
 #include <bpf/libbpf.h>
 
@@ -66,6 +67,11 @@ static bool unpriv_disabled = false;
 static int skips;
 static bool verbose = false;
 
+struct kfunc_btf_id_pair {
+       const char *kfunc;
+       int insn_idx;
+};
+
 struct bpf_test {
        const char *descr;
        struct bpf_insn insns[MAX_INSNS];
@@ -92,6 +98,7 @@ struct bpf_test {
        int fixup_map_reuseport_array[MAX_FIXUPS];
        int fixup_map_ringbuf[MAX_FIXUPS];
        int fixup_map_timer[MAX_FIXUPS];
+       struct kfunc_btf_id_pair fixup_kfunc_btf_id[MAX_FIXUPS];
        /* Expected verifier log output for result REJECT or VERBOSE_ACCEPT.
         * Can be a tab-separated sequence of expected strings. An empty string
         * means no log verification.
@@ -744,6 +751,7 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
        int *fixup_map_reuseport_array = test->fixup_map_reuseport_array;
        int *fixup_map_ringbuf = test->fixup_map_ringbuf;
        int *fixup_map_timer = test->fixup_map_timer;
+       struct kfunc_btf_id_pair *fixup_kfunc_btf_id = test->fixup_kfunc_btf_id;
 
        if (test->fill_helper) {
                test->fill_insns = calloc(MAX_TEST_INSNS, sizeof(struct bpf_insn));
@@ -936,6 +944,26 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
                        fixup_map_timer++;
                } while (*fixup_map_timer);
        }
+
+       /* Patch in kfunc BTF IDs */
+       if (fixup_kfunc_btf_id->kfunc) {
+               struct btf *btf;
+               int btf_id;
+
+               do {
+                       btf_id = 0;
+                       btf = btf__load_vmlinux_btf();
+                       if (btf) {
+                               btf_id = btf__find_by_name_kind(btf,
+                                                               fixup_kfunc_btf_id->kfunc,
+                                                               BTF_KIND_FUNC);
+                               btf_id = btf_id < 0 ? 0 : btf_id;
+                       }
+                       btf__free(btf);
+                       prog[fixup_kfunc_btf_id->insn_idx].imm = btf_id;
+                       fixup_kfunc_btf_id++;
+               } while (fixup_kfunc_btf_id->kfunc);
+       }
 }
 
 struct libcap {
index d7b74eb..829be2b 100644 (file)
        .result  = ACCEPT,
 },
 {
+       "calls: invalid kfunc call: ptr_to_mem to struct with non-scalar",
+       .insns = {
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+       BPF_EXIT_INSN(),
+       },
+       .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+       .result = REJECT,
+       .errstr = "arg#0 pointer type STRUCT prog_test_fail1 must point to scalar",
+       .fixup_kfunc_btf_id = {
+               { "bpf_kfunc_call_test_fail1", 2 },
+       },
+},
+{
+       "calls: invalid kfunc call: ptr_to_mem to struct with nesting depth > 4",
+       .insns = {
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+       BPF_EXIT_INSN(),
+       },
+       .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+       .result = REJECT,
+       .errstr = "max struct nesting depth exceeded\narg#0 pointer type STRUCT prog_test_fail2",
+       .fixup_kfunc_btf_id = {
+               { "bpf_kfunc_call_test_fail2", 2 },
+       },
+},
+{
+       "calls: invalid kfunc call: ptr_to_mem to struct with FAM",
+       .insns = {
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+       BPF_EXIT_INSN(),
+       },
+       .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+       .result = REJECT,
+       .errstr = "arg#0 pointer type STRUCT prog_test_fail3 must point to scalar",
+       .fixup_kfunc_btf_id = {
+               { "bpf_kfunc_call_test_fail3", 2 },
+       },
+},
+{
+       "calls: invalid kfunc call: reg->type != PTR_TO_CTX",
+       .insns = {
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+       BPF_EXIT_INSN(),
+       },
+       .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+       .result = REJECT,
+       .errstr = "arg#0 expected pointer to ctx, but got PTR",
+       .fixup_kfunc_btf_id = {
+               { "bpf_kfunc_call_test_pass_ctx", 2 },
+       },
+},
+{
+       "calls: invalid kfunc call: void * not allowed in func proto without mem size arg",
+       .insns = {
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+       BPF_EXIT_INSN(),
+       },
+       .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+       .result = REJECT,
+       .errstr = "arg#0 pointer type UNKNOWN  must point to scalar",
+       .fixup_kfunc_btf_id = {
+               { "bpf_kfunc_call_test_mem_len_fail1", 2 },
+       },
+},
+{
        "calls: basic sanity",
        .insns = {
        BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
index 0a5d23d..ffa5502 100644 (file)
@@ -906,7 +906,10 @@ static bool rx_stats_are_valid(struct ifobject *ifobject)
                        return true;
                case STAT_TEST_RX_FULL:
                        xsk_stat = stats.rx_ring_full;
-                       expected_stat -= RX_FULL_RXQSIZE;
+                       if (ifobject->umem->num_frames < XSK_RING_PROD__DEFAULT_NUM_DESCS)
+                               expected_stat = ifobject->umem->num_frames - RX_FULL_RXQSIZE;
+                       else
+                               expected_stat = XSK_RING_PROD__DEFAULT_NUM_DESCS - RX_FULL_RXQSIZE;
                        break;
                case STAT_TEST_RX_FILL_EMPTY:
                        xsk_stat = stats.rx_fill_ring_empty_descs;