Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net

author Jakub Kicinski <kuba@kernel.org>

Thu, 27 Jan 2022 20:54:16 +0000 (12:54 -0800)

committer Jakub Kicinski <kuba@kernel.org>

Thu, 27 Jan 2022 20:54:16 +0000 (12:54 -0800)
author Jakub Kicinski <kuba@kernel.org>
Thu, 27 Jan 2022 20:54:16 +0000 (12:54 -0800)
committer Jakub Kicinski <kuba@kernel.org>
Thu, 27 Jan 2022 20:54:16 +0000 (12:54 -0800)
diff --git a/Documentation/bpf/btf.rst b/Documentation/bpf/btf.rst

index 1ebf4c5..ab08852 100644 (file)
--- a/Documentation/bpf/btf.rst
+++ b/Documentation/bpf/btf.rst
@@ -565,18 +565,15 @@ A map can be created with ``btf_fd`` and specified key/value type id.::
  In libbpf, the map can be defined with extra annotation like below:
  ::
  
-    struct bpf_map_def SEC("maps") btf_map = {
-        .type = BPF_MAP_TYPE_ARRAY,
-        .key_size = sizeof(int),
-        .value_size = sizeof(struct ipv_counts),
-        .max_entries = 4,
-    };
-    BPF_ANNOTATE_KV_PAIR(btf_map, int, struct ipv_counts);
+    struct {
+        __uint(type, BPF_MAP_TYPE_ARRAY);
+        __type(key, int);
+        __type(value, struct ipv_counts);
+        __uint(max_entries, 4);
+    } btf_map SEC(".maps");
  
-Here, the parameters for macro BPF_ANNOTATE_KV_PAIR are map name, key and
-value types for the map. During ELF parsing, libbpf is able to extract
-key/value type_id's and assign them to BPF_MAP_CREATE attributes
-automatically.
+During ELF parsing, libbpf is able to extract key/value type_id's and assign
+them to BPF_MAP_CREATE attributes automatically.
  
  .. _BPF_Prog_Load:
  
@@ -824,13 +821,12 @@ structure has bitfields. For example, for the following map,::
             ___A b1:4;
             enum A b2:4;
        };
-      struct bpf_map_def SEC("maps") tmpmap = {
-           .type = BPF_MAP_TYPE_ARRAY,
-           .key_size = sizeof(__u32),
-           .value_size = sizeof(struct tmp_t),
-           .max_entries = 1,
-      };
-      BPF_ANNOTATE_KV_PAIR(tmpmap, int, struct tmp_t);
+      struct {
+           __uint(type, BPF_MAP_TYPE_ARRAY);
+           __type(key, int);
+           __type(value, struct tmp_t);
+           __uint(max_entries, 1);
+      } tmpmap SEC(".maps");
  
  bpftool is able to pretty print like below:
  ::
diff --git a/Documentation/devicetree/bindings/net/fsl-fman.txt b/Documentation/devicetree/bindings/net/fsl-fman.txt

index 020337f..801efc7 100644 (file)
--- a/Documentation/devicetree/bindings/net/fsl-fman.txt
+++ b/Documentation/devicetree/bindings/net/fsl-fman.txt
@@ -388,14 +388,24 @@ PROPERTIES
                 Value type: <prop-encoded-array>
                 Definition: A standard property.
  
-- bus-frequency
+- clocks
+               Usage: optional
+               Value type: <phandle>
+               Definition: A reference to the input clock of the controller
+               from which the MDC frequency is derived.
+
+- clock-frequency
                 Usage: optional
                 Value type: <u32>
-               Definition: Specifies the external MDIO bus clock speed to
-               be used, if different from the standard 2.5 MHz.
-               This may be due to the standard speed being unsupported (e.g.
-               due to a hardware problem), or to advertise that all relevant
-               components in the system support a faster speed.
+               Definition: Specifies the external MDC frequency, in Hertz, to
+               be used. Requires that the input clock is specified in the
+               "clocks" property. See also: mdio.yaml.
+
+- suppress-preamble
+               Usage: optional
+               Value type: <boolean>
+               Definition: Disable generation of preamble bits. See also:
+               mdio.yaml.
  
  - interrupts
                 Usage: required for external MDIO
diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c

index 3867f3d..a3b9899 100644 (file)
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -2186,7 +2186,7 @@ int b53_eee_init(struct dsa_switch *ds, int port, struct phy_device *phy)
  {
         int ret;
  
-       ret = phy_init_eee(phy, 0);
+       ret = phy_init_eee(phy, false);
         if (ret)
                 return 0;
  
diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c

index b82512e..bc77a26 100644 (file)
--- a/drivers/net/dsa/mt7530.c
+++ b/drivers/net/dsa/mt7530.c
@@ -2846,7 +2846,7 @@ static void mt753x_phylink_mac_link_up(struct dsa_switch *ds, int port,
                         mcr |= PMCR_RX_FC_EN;
         }
  
-       if (mode == MLO_AN_PHY && phydev && phy_init_eee(phydev, 0) >= 0) {
+       if (mode == MLO_AN_PHY && phydev && phy_init_eee(phydev, false) >= 0) {
                 switch (speed) {
                 case SPEED_1000:
                         mcr |= PMCR_FORCE_EEE1G;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c

index 4f94136..c313221 100644 (file)
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -233,6 +233,7 @@ static const u16 bnxt_async_events_arr[] = {
         ASYNC_EVENT_CMPL_EVENT_ID_ECHO_REQUEST,
         ASYNC_EVENT_CMPL_EVENT_ID_PPS_TIMESTAMP,
         ASYNC_EVENT_CMPL_EVENT_ID_ERROR_REPORT,
+       ASYNC_EVENT_CMPL_EVENT_ID_PHC_UPDATE,
  };
  
  static struct workqueue_struct *bnxt_pf_wq;
@@ -2079,6 +2080,16 @@ static void bnxt_event_error_report(struct bnxt *bp, u32 data1, u32 data2)
         (BNXT_EVENT_RING_TYPE(data2) == \
          ASYNC_EVENT_CMPL_RING_MONITOR_MSG_EVENT_DATA2_DISABLE_RING_TYPE_RX)
  
+#define BNXT_EVENT_PHC_EVENT_TYPE(data1)       \
+       (((data1) & ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_MASK) >>\
+        ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_SFT)
+
+#define BNXT_EVENT_PHC_RTC_UPDATE(data1)       \
+       (((data1) & ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_PHC_TIME_MSB_MASK) >>\
+        ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_PHC_TIME_MSB_SFT)
+
+#define BNXT_PHC_BITS  48
+
  static int bnxt_async_event_process(struct bnxt *bp,
                                     struct hwrm_async_event_cmpl *cmpl)
  {
@@ -2258,6 +2269,24 @@ static int bnxt_async_event_process(struct bnxt *bp,
                 bnxt_event_error_report(bp, data1, data2);
                 goto async_event_process_exit;
         }
+       case ASYNC_EVENT_CMPL_EVENT_ID_PHC_UPDATE: {
+               switch (BNXT_EVENT_PHC_EVENT_TYPE(data1)) {
+               case ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_PHC_RTC_UPDATE:
+                       if (bp->fw_cap & BNXT_FW_CAP_PTP_RTC) {
+                               struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
+                               u64 ns;
+
+                               spin_lock_bh(&ptp->ptp_lock);
+                               bnxt_ptp_update_current_time(bp);
+                               ns = (((u64)BNXT_EVENT_PHC_RTC_UPDATE(data1) <<
+                                      BNXT_PHC_BITS) | ptp->current_time);
+                               bnxt_ptp_rtc_timecounter_init(ptp, ns);
+                               spin_unlock_bh(&ptp->ptp_lock);
+                       }
+                       break;
+               }
+               goto async_event_process_exit;
+       }
         case ASYNC_EVENT_CMPL_EVENT_ID_DEFERRED_RESPONSE: {
                 u16 seq_id = le32_to_cpu(cmpl->event_data2) & 0xffff;
  
@@ -7414,6 +7443,7 @@ static int __bnxt_hwrm_ptp_qcfg(struct bnxt *bp)
         struct hwrm_port_mac_ptp_qcfg_output *resp;
         struct hwrm_port_mac_ptp_qcfg_input *req;
         struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
+       bool phc_cfg;
         u8 flags;
         int rc;
  
@@ -7456,7 +7486,8 @@ static int __bnxt_hwrm_ptp_qcfg(struct bnxt *bp)
                 rc = -ENODEV;
                 goto exit;
         }
-       rc = bnxt_ptp_init(bp);
+       phc_cfg = (flags & PORT_MAC_PTP_QCFG_RESP_FLAGS_RTC_CONFIGURED) != 0;
+       rc = bnxt_ptp_init(bp, phc_cfg);
         if (rc)
                 netdev_warn(bp->dev, "PTP initialization failed.\n");
  exit:
@@ -7514,6 +7545,8 @@ static int __bnxt_hwrm_func_qcaps(struct bnxt *bp)
                 bp->fw_cap |= BNXT_FW_CAP_EXT_HW_STATS_SUPPORTED;
         if (BNXT_PF(bp) && (flags_ext & FUNC_QCAPS_RESP_FLAGS_EXT_PTP_PPS_SUPPORTED))
                 bp->fw_cap |= BNXT_FW_CAP_PTP_PPS;
+       if (flags_ext & FUNC_QCAPS_RESP_FLAGS_EXT_PTP_64BIT_RTC_SUPPORTED)
+               bp->fw_cap |= BNXT_FW_CAP_PTP_RTC;
         if (BNXT_PF(bp) && (flags_ext & FUNC_QCAPS_RESP_FLAGS_EXT_HOT_RESET_IF_SUPPORT))
                 bp->fw_cap |= BNXT_FW_CAP_HOT_RESET_IF;
         if (BNXT_PF(bp) && (flags_ext & FUNC_QCAPS_RESP_FLAGS_EXT_FW_LIVEPATCH_SUPPORTED))
@@ -10288,6 +10321,7 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init)
         /* VF-reps may need to be re-opened after the PF is re-opened */
         if (BNXT_PF(bp))
                 bnxt_vf_reps_open(bp);
+       bnxt_ptp_init_rtc(bp, true);
         return 0;
  
  open_err_irq:
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h

index 440dfeb..4b023e3 100644 (file)
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -1957,6 +1957,7 @@ struct bnxt {
         #define BNXT_FW_CAP_EXT_STATS_SUPPORTED         0x00040000
         #define BNXT_FW_CAP_ERR_RECOVER_RELOAD          0x00100000
         #define BNXT_FW_CAP_HOT_RESET                   0x00200000
+       #define BNXT_FW_CAP_PTP_RTC                     0x00400000
         #define BNXT_FW_CAP_VLAN_RX_STRIP               0x01000000
         #define BNXT_FW_CAP_VLAN_TX_INSERT              0x02000000
         #define BNXT_FW_CAP_EXT_HW_STATS_SUPPORTED      0x04000000
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h

index ea86c54..b7100ed 100644 (file)
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h
@@ -369,6 +369,12 @@ struct cmd_nums {
         #define HWRM_FUNC_PTP_EXT_CFG                     0x1a0UL
         #define HWRM_FUNC_PTP_EXT_QCFG                    0x1a1UL
         #define HWRM_FUNC_KEY_CTX_ALLOC                   0x1a2UL
+       #define HWRM_FUNC_BACKING_STORE_CFG_V2            0x1a3UL
+       #define HWRM_FUNC_BACKING_STORE_QCFG_V2           0x1a4UL
+       #define HWRM_FUNC_DBR_PACING_CFG                  0x1a5UL
+       #define HWRM_FUNC_DBR_PACING_QCFG                 0x1a6UL
+       #define HWRM_FUNC_DBR_PACING_BROADCAST_EVENT      0x1a7UL
+       #define HWRM_FUNC_BACKING_STORE_QCAPS_V2          0x1a8UL
         #define HWRM_SELFTEST_QLIST                       0x200UL
         #define HWRM_SELFTEST_EXEC                        0x201UL
         #define HWRM_SELFTEST_IRQ                         0x202UL
@@ -390,6 +396,9 @@ struct cmd_nums {
         #define HWRM_MFG_PRVSN_IMPORT_CERT                0x212UL
         #define HWRM_MFG_PRVSN_GET_STATE                  0x213UL
         #define HWRM_MFG_GET_NVM_MEASUREMENT              0x214UL
+       #define HWRM_MFG_PSOC_QSTATUS                     0x215UL
+       #define HWRM_MFG_SELFTEST_QLIST                   0x216UL
+       #define HWRM_MFG_SELFTEST_EXEC                    0x217UL
         #define HWRM_TF                                   0x2bcUL
         #define HWRM_TF_VERSION_GET                       0x2bdUL
         #define HWRM_TF_SESSION_OPEN                      0x2c6UL
@@ -532,8 +541,8 @@ struct hwrm_err_output {
  #define HWRM_VERSION_MAJOR 1
  #define HWRM_VERSION_MINOR 10
  #define HWRM_VERSION_UPDATE 2
-#define HWRM_VERSION_RSVD 63
-#define HWRM_VERSION_STR "1.10.2.63"
+#define HWRM_VERSION_RSVD 73
+#define HWRM_VERSION_STR "1.10.2.73"
  
  /* hwrm_ver_get_input (size:192b/24B) */
  struct hwrm_ver_get_input {
@@ -757,10 +766,11 @@ struct hwrm_async_event_cmpl {
         #define ASYNC_EVENT_CMPL_EVENT_ID_DEFERRED_RESPONSE          0x40UL
         #define ASYNC_EVENT_CMPL_EVENT_ID_PFC_WATCHDOG_CFG_CHANGE    0x41UL
         #define ASYNC_EVENT_CMPL_EVENT_ID_ECHO_REQUEST               0x42UL
-       #define ASYNC_EVENT_CMPL_EVENT_ID_PHC_MASTER                 0x43UL
+       #define ASYNC_EVENT_CMPL_EVENT_ID_PHC_UPDATE                 0x43UL
         #define ASYNC_EVENT_CMPL_EVENT_ID_PPS_TIMESTAMP              0x44UL
         #define ASYNC_EVENT_CMPL_EVENT_ID_ERROR_REPORT               0x45UL
-       #define ASYNC_EVENT_CMPL_EVENT_ID_MAX_RGTR_EVENT_ID          0x46UL
+       #define ASYNC_EVENT_CMPL_EVENT_ID_DOORBELL_PACING_THRESHOLD  0x46UL
+       #define ASYNC_EVENT_CMPL_EVENT_ID_MAX_RGTR_EVENT_ID          0x47UL
         #define ASYNC_EVENT_CMPL_EVENT_ID_FW_TRACE_MSG               0xfeUL
         #define ASYNC_EVENT_CMPL_EVENT_ID_HWRM_ERROR                 0xffUL
         #define ASYNC_EVENT_CMPL_EVENT_ID_LAST                      ASYNC_EVENT_CMPL_EVENT_ID_HWRM_ERROR
@@ -1112,34 +1122,37 @@ struct hwrm_async_event_cmpl_echo_request {
         __le32  event_data1;
  };
  
-/* hwrm_async_event_cmpl_phc_master (size:128b/16B) */
-struct hwrm_async_event_cmpl_phc_master {
+/* hwrm_async_event_cmpl_phc_update (size:128b/16B) */
+struct hwrm_async_event_cmpl_phc_update {
         __le16  type;
-       #define ASYNC_EVENT_CMPL_PHC_MASTER_TYPE_MASK            0x3fUL
-       #define ASYNC_EVENT_CMPL_PHC_MASTER_TYPE_SFT             0
-       #define ASYNC_EVENT_CMPL_PHC_MASTER_TYPE_HWRM_ASYNC_EVENT  0x2eUL
-       #define ASYNC_EVENT_CMPL_PHC_MASTER_TYPE_LAST             ASYNC_EVENT_CMPL_PHC_MASTER_TYPE_HWRM_ASYNC_EVENT
+       #define ASYNC_EVENT_CMPL_PHC_UPDATE_TYPE_MASK            0x3fUL
+       #define ASYNC_EVENT_CMPL_PHC_UPDATE_TYPE_SFT             0
+       #define ASYNC_EVENT_CMPL_PHC_UPDATE_TYPE_HWRM_ASYNC_EVENT  0x2eUL
+       #define ASYNC_EVENT_CMPL_PHC_UPDATE_TYPE_LAST             ASYNC_EVENT_CMPL_PHC_UPDATE_TYPE_HWRM_ASYNC_EVENT
         __le16  event_id;
-       #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_ID_PHC_MASTER 0x43UL
-       #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_ID_LAST      ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_ID_PHC_MASTER
+       #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_ID_PHC_UPDATE 0x43UL
+       #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_ID_LAST      ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_ID_PHC_UPDATE
         __le32  event_data2;
-       #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA2_PHC_MASTER_FID_MASK 0xffffUL
-       #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA2_PHC_MASTER_FID_SFT 0
-       #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA2_PHC_SEC_FID_MASK   0xffff0000UL
-       #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA2_PHC_SEC_FID_SFT    16
+       #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA2_PHC_MASTER_FID_MASK 0xffffUL
+       #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA2_PHC_MASTER_FID_SFT 0
+       #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA2_PHC_SEC_FID_MASK   0xffff0000UL
+       #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA2_PHC_SEC_FID_SFT    16
         u8      opaque_v;
-       #define ASYNC_EVENT_CMPL_PHC_MASTER_V          0x1UL
-       #define ASYNC_EVENT_CMPL_PHC_MASTER_OPAQUE_MASK 0xfeUL
-       #define ASYNC_EVENT_CMPL_PHC_MASTER_OPAQUE_SFT 1
+       #define ASYNC_EVENT_CMPL_PHC_UPDATE_V          0x1UL
+       #define ASYNC_EVENT_CMPL_PHC_UPDATE_OPAQUE_MASK 0xfeUL
+       #define ASYNC_EVENT_CMPL_PHC_UPDATE_OPAQUE_SFT 1
         u8      timestamp_lo;
         __le16  timestamp_hi;
         __le32  event_data1;
-       #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA1_FLAGS_MASK         0xfUL
-       #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA1_FLAGS_SFT          0
-       #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA1_FLAGS_PHC_MASTER     0x1UL
-       #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA1_FLAGS_PHC_SECONDARY  0x2UL
-       #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA1_FLAGS_PHC_FAILOVER   0x3UL
-       #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA1_FLAGS_LAST          ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA1_FLAGS_PHC_FAILOVER
+       #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_MASK          0xfUL
+       #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_SFT           0
+       #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_PHC_MASTER      0x1UL
+       #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_PHC_SECONDARY   0x2UL
+       #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_PHC_FAILOVER    0x3UL
+       #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_PHC_RTC_UPDATE  0x4UL
+       #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_LAST           ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_PHC_RTC_UPDATE
+       #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_PHC_TIME_MSB_MASK   0xffff0UL
+       #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_PHC_TIME_MSB_SFT    4
  };
  
  /* hwrm_async_event_cmpl_pps_timestamp (size:128b/16B) */
@@ -1330,6 +1343,30 @@ struct hwrm_async_event_cmpl_error_report_nvm {
         #define ASYNC_EVENT_CMPL_ERROR_REPORT_NVM_EVENT_DATA1_NVM_ERR_TYPE_LAST    ASYNC_EVENT_CMPL_ERROR_REPORT_NVM_EVENT_DATA1_NVM_ERR_TYPE_ERASE
  };
  
+/* hwrm_async_event_cmpl_error_report_doorbell_drop_threshold (size:128b/16B) */
+struct hwrm_async_event_cmpl_error_report_doorbell_drop_threshold {
+       __le16  type;
+       #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_TYPE_MASK            0x3fUL
+       #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_TYPE_SFT             0
+       #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_TYPE_HWRM_ASYNC_EVENT  0x2eUL
+       #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_TYPE_LAST             ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_TYPE_HWRM_ASYNC_EVENT
+       __le16  event_id;
+       #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_EVENT_ID_ERROR_REPORT 0x45UL
+       #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_EVENT_ID_LAST        ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_EVENT_ID_ERROR_REPORT
+       __le32  event_data2;
+       u8      opaque_v;
+       #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_V          0x1UL
+       #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_OPAQUE_MASK 0xfeUL
+       #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_OPAQUE_SFT 1
+       u8      timestamp_lo;
+       __le16  timestamp_hi;
+       __le32  event_data1;
+       #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_EVENT_DATA1_ERROR_TYPE_MASK                   0xffUL
+       #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_EVENT_DATA1_ERROR_TYPE_SFT                    0
+       #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_EVENT_DATA1_ERROR_TYPE_DOORBELL_DROP_THRESHOLD  0x4UL
+       #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_EVENT_DATA1_ERROR_TYPE_LAST                    ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_EVENT_DATA1_ERROR_TYPE_DOORBELL_DROP_THRESHOLD
+};
+
  /* hwrm_func_reset_input (size:192b/24B) */
  struct hwrm_func_reset_input {
         __le16  req_type;
@@ -1589,6 +1626,10 @@ struct hwrm_func_qcaps_output {
         #define FUNC_QCAPS_RESP_FLAGS_EXT_EP_RATE_CONTROL                        0x800000UL
         #define FUNC_QCAPS_RESP_FLAGS_EXT_MIN_BW_SUPPORTED                       0x1000000UL
         #define FUNC_QCAPS_RESP_FLAGS_EXT_TX_COAL_CMPL_CAP                       0x2000000UL
+       #define FUNC_QCAPS_RESP_FLAGS_EXT_BS_V2_SUPPORTED                        0x4000000UL
+       #define FUNC_QCAPS_RESP_FLAGS_EXT_BS_V2_REQUIRED                         0x8000000UL
+       #define FUNC_QCAPS_RESP_FLAGS_EXT_PTP_64BIT_RTC_SUPPORTED                0x10000000UL
+       #define FUNC_QCAPS_RESP_FLAGS_EXT_DBR_PACING_SUPPORTED                   0x20000000UL
         u8      max_schqs;
         u8      mpc_chnls_cap;
         #define FUNC_QCAPS_RESP_MPC_CHNLS_CAP_TCE         0x1UL
@@ -2455,7 +2496,7 @@ struct hwrm_func_backing_store_qcaps_output {
         __le16  rkc_entry_size;
         __le32  tkc_max_entries;
         __le32  rkc_max_entries;
-       u8      rsvd[7];
+       u8      rsvd1[7];
         u8      valid;
  };
  
@@ -3164,7 +3205,7 @@ struct hwrm_func_ptp_pin_cfg_output {
         u8      valid;
  };
  
-/* hwrm_func_ptp_cfg_input (size:320b/40B) */
+/* hwrm_func_ptp_cfg_input (size:384b/48B) */
  struct hwrm_func_ptp_cfg_input {
         __le16  req_type;
         __le16  cmpl_ring;
@@ -3178,6 +3219,7 @@ struct hwrm_func_ptp_cfg_input {
         #define FUNC_PTP_CFG_REQ_ENABLES_PTP_FREQ_ADJ_EXT_PERIOD     0x8UL
         #define FUNC_PTP_CFG_REQ_ENABLES_PTP_FREQ_ADJ_EXT_UP         0x10UL
         #define FUNC_PTP_CFG_REQ_ENABLES_PTP_FREQ_ADJ_EXT_PHASE      0x20UL
+       #define FUNC_PTP_CFG_REQ_ENABLES_PTP_SET_TIME                0x40UL
         u8      ptp_pps_event;
         #define FUNC_PTP_CFG_REQ_PTP_PPS_EVENT_INTERNAL     0x1UL
         #define FUNC_PTP_CFG_REQ_PTP_PPS_EVENT_EXTERNAL     0x2UL
@@ -3204,6 +3246,7 @@ struct hwrm_func_ptp_cfg_input {
         __le32  ptp_freq_adj_ext_up;
         __le32  ptp_freq_adj_ext_phase_lower;
         __le32  ptp_freq_adj_ext_phase_upper;
+       __le64  ptp_set_time;
  };
  
  /* hwrm_func_ptp_cfg_output (size:128b/16B) */
@@ -3243,6 +3286,308 @@ struct hwrm_func_ptp_ts_query_output {
         u8      valid;
  };
  
+/* hwrm_func_ptp_ext_cfg_input (size:256b/32B) */
+struct hwrm_func_ptp_ext_cfg_input {
+       __le16  req_type;
+       __le16  cmpl_ring;
+       __le16  seq_id;
+       __le16  target_id;
+       __le64  resp_addr;
+       __le16  enables;
+       #define FUNC_PTP_EXT_CFG_REQ_ENABLES_PHC_MASTER_FID     0x1UL
+       #define FUNC_PTP_EXT_CFG_REQ_ENABLES_PHC_SEC_FID        0x2UL
+       #define FUNC_PTP_EXT_CFG_REQ_ENABLES_PHC_SEC_MODE       0x4UL
+       #define FUNC_PTP_EXT_CFG_REQ_ENABLES_FAILOVER_TIMER     0x8UL
+       __le16  phc_master_fid;
+       __le16  phc_sec_fid;
+       u8      phc_sec_mode;
+       #define FUNC_PTP_EXT_CFG_REQ_PHC_SEC_MODE_SWITCH  0x0UL
+       #define FUNC_PTP_EXT_CFG_REQ_PHC_SEC_MODE_ALL     0x1UL
+       #define FUNC_PTP_EXT_CFG_REQ_PHC_SEC_MODE_PF_ONLY 0x2UL
+       #define FUNC_PTP_EXT_CFG_REQ_PHC_SEC_MODE_LAST   FUNC_PTP_EXT_CFG_REQ_PHC_SEC_MODE_PF_ONLY
+       u8      unused_0;
+       __le32  failover_timer;
+       u8      unused_1[4];
+};
+
+/* hwrm_func_ptp_ext_cfg_output (size:128b/16B) */
+struct hwrm_func_ptp_ext_cfg_output {
+       __le16  error_code;
+       __le16  req_type;
+       __le16  seq_id;
+       __le16  resp_len;
+       u8      unused_0[7];
+       u8      valid;
+};
+
+/* hwrm_func_ptp_ext_qcfg_input (size:192b/24B) */
+struct hwrm_func_ptp_ext_qcfg_input {
+       __le16  req_type;
+       __le16  cmpl_ring;
+       __le16  seq_id;
+       __le16  target_id;
+       __le64  resp_addr;
+       u8      unused_0[8];
+};
+
+/* hwrm_func_ptp_ext_qcfg_output (size:256b/32B) */
+struct hwrm_func_ptp_ext_qcfg_output {
+       __le16  error_code;
+       __le16  req_type;
+       __le16  seq_id;
+       __le16  resp_len;
+       __le16  phc_master_fid;
+       __le16  phc_sec_fid;
+       __le16  phc_active_fid0;
+       __le16  phc_active_fid1;
+       __le32  last_failover_event;
+       __le16  from_fid;
+       __le16  to_fid;
+       u8      unused_0[7];
+       u8      valid;
+};
+
+/* hwrm_func_backing_store_cfg_v2_input (size:448b/56B) */
+struct hwrm_func_backing_store_cfg_v2_input {
+       __le16  req_type;
+       __le16  cmpl_ring;
+       __le16  seq_id;
+       __le16  target_id;
+       __le64  resp_addr;
+       __le16  type;
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_QP          0x0UL
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_SRQ         0x1UL
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_CQ          0x2UL
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_VNIC        0x3UL
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_STAT        0x4UL
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_SP_TQM_RING 0x5UL
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_FP_TQM_RING 0x6UL
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_MRAV        0xeUL
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_TIM         0xfUL
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_TKC         0x13UL
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_RKC         0x14UL
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_MP_TQM_RING 0x15UL
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_INVALID     0xffffUL
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_LAST       FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_INVALID
+       __le16  instance;
+       __le32  flags;
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_FLAGS_PREBOOT_MODE     0x1UL
+       __le64  page_dir;
+       __le32  num_entries;
+       __le16  entry_size;
+       u8      page_size_pbl_level;
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_PBL_LEVEL_MASK  0xfUL
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_PBL_LEVEL_SFT   0
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_PBL_LEVEL_LVL_0   0x0UL
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_PBL_LEVEL_LVL_1   0x1UL
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_PBL_LEVEL_LVL_2   0x2UL
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_PBL_LEVEL_LAST   FUNC_BACKING_STORE_CFG_V2_REQ_PBL_LEVEL_LVL_2
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_MASK  0xf0UL
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_SFT   4
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_PG_4K   (0x0UL << 4)
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_PG_8K   (0x1UL << 4)
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_PG_64K  (0x2UL << 4)
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_PG_2M   (0x3UL << 4)
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_PG_8M   (0x4UL << 4)
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_PG_1G   (0x5UL << 4)
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_LAST   FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_PG_1G
+       u8      subtype_valid_cnt;
+       __le32  split_entry_0;
+       __le32  split_entry_1;
+       __le32  split_entry_2;
+       __le32  split_entry_3;
+};
+
+/* hwrm_func_backing_store_cfg_v2_output (size:128b/16B) */
+struct hwrm_func_backing_store_cfg_v2_output {
+       __le16  error_code;
+       __le16  req_type;
+       __le16  seq_id;
+       __le16  resp_len;
+       u8      rsvd0[7];
+       u8      valid;
+};
+
+/* hwrm_func_backing_store_qcfg_v2_input (size:192b/24B) */
+struct hwrm_func_backing_store_qcfg_v2_input {
+       __le16  req_type;
+       __le16  cmpl_ring;
+       __le16  seq_id;
+       __le16  target_id;
+       __le64  resp_addr;
+       __le16  type;
+       #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_QP          0x0UL
+       #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_SRQ         0x1UL
+       #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_CQ          0x2UL
+       #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_VNIC        0x3UL
+       #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_STAT        0x4UL
+       #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_SP_TQM_RING 0x5UL
+       #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_FP_TQM_RING 0x6UL
+       #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_MRAV        0xeUL
+       #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_TIM         0xfUL
+       #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_TKC         0x13UL
+       #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_RKC         0x14UL
+       #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_MP_TQM_RING 0x15UL
+       #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_INVALID     0xffffUL
+       #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_LAST       FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_INVALID
+       __le16  instance;
+       u8      rsvd[4];
+};
+
+/* hwrm_func_backing_store_qcfg_v2_output (size:448b/56B) */
+struct hwrm_func_backing_store_qcfg_v2_output {
+       __le16  error_code;
+       __le16  req_type;
+       __le16  seq_id;
+       __le16  resp_len;
+       __le16  type;
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_QP          0x0UL
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_SRQ         0x1UL
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_CQ          0x2UL
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_VNIC        0x3UL
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_STAT        0x4UL
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_SP_TQM_RING 0x5UL
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_FP_TQM_RING 0x6UL
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_MRAV        0xeUL
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_TIM         0xfUL
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_TKC         0x13UL
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_RKC         0x14UL
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_MP_TQM_RING 0x15UL
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_INVALID     0xffffUL
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_LAST       FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_INVALID
+       __le16  instance;
+       __le32  flags;
+       __le64  page_dir;
+       __le32  num_entries;
+       u8      page_size_pbl_level;
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_PBL_LEVEL_MASK  0xfUL
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_PBL_LEVEL_SFT   0
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_PBL_LEVEL_LVL_0   0x0UL
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_PBL_LEVEL_LVL_1   0x1UL
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_PBL_LEVEL_LVL_2   0x2UL
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_PBL_LEVEL_LAST   FUNC_BACKING_STORE_QCFG_V2_RESP_PBL_LEVEL_LVL_2
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_MASK  0xf0UL
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_SFT   4
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_PG_4K   (0x0UL << 4)
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_PG_8K   (0x1UL << 4)
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_PG_64K  (0x2UL << 4)
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_PG_2M   (0x3UL << 4)
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_PG_8M   (0x4UL << 4)
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_PG_1G   (0x5UL << 4)
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_LAST   FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_PG_1G
+       u8      subtype_valid_cnt;
+       u8      rsvd[2];
+       __le32  split_entry_0;
+       __le32  split_entry_1;
+       __le32  split_entry_2;
+       __le32  split_entry_3;
+       u8      rsvd2[7];
+       u8      valid;
+};
+
+/* qpc_split_entries (size:128b/16B) */
+struct qpc_split_entries {
+       __le32  qp_num_l2_entries;
+       __le32  qp_num_qp1_entries;
+       __le32  rsvd[2];
+};
+
+/* srq_split_entries (size:128b/16B) */
+struct srq_split_entries {
+       __le32  srq_num_l2_entries;
+       __le32  rsvd;
+       __le32  rsvd2[2];
+};
+
+/* cq_split_entries (size:128b/16B) */
+struct cq_split_entries {
+       __le32  cq_num_l2_entries;
+       __le32  rsvd;
+       __le32  rsvd2[2];
+};
+
+/* vnic_split_entries (size:128b/16B) */
+struct vnic_split_entries {
+       __le32  vnic_num_vnic_entries;
+       __le32  rsvd;
+       __le32  rsvd2[2];
+};
+
+/* mrav_split_entries (size:128b/16B) */
+struct mrav_split_entries {
+       __le32  mrav_num_av_entries;
+       __le32  rsvd;
+       __le32  rsvd2[2];
+};
+
+/* hwrm_func_backing_store_qcaps_v2_input (size:192b/24B) */
+struct hwrm_func_backing_store_qcaps_v2_input {
+       __le16  req_type;
+       __le16  cmpl_ring;
+       __le16  seq_id;
+       __le16  target_id;
+       __le64  resp_addr;
+       __le16  type;
+       #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_QP          0x0UL
+       #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SRQ         0x1UL
+       #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_CQ          0x2UL
+       #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_VNIC        0x3UL
+       #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_STAT        0x4UL
+       #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SP_TQM_RING 0x5UL
+       #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_FP_TQM_RING 0x6UL
+       #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_MRAV        0xeUL
+       #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_TIM         0xfUL
+       #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_TKC         0x13UL
+       #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_RKC         0x14UL
+       #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_MP_TQM_RING 0x15UL
+       #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_INVALID     0xffffUL
+       #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_LAST       FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_INVALID
+       u8      rsvd[6];
+};
+
+/* hwrm_func_backing_store_qcaps_v2_output (size:448b/56B) */
+struct hwrm_func_backing_store_qcaps_v2_output {
+       __le16  error_code;
+       __le16  req_type;
+       __le16  seq_id;
+       __le16  resp_len;
+       __le16  type;
+       #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_QP          0x0UL
+       #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_SRQ         0x1UL
+       #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_CQ          0x2UL
+       #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_VNIC        0x3UL
+       #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_STAT        0x4UL
+       #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_SP_TQM_RING 0x5UL
+       #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_FP_TQM_RING 0x6UL
+       #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_MRAV        0xeUL
+       #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_TIM         0xfUL
+       #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_TKC         0x13UL
+       #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_RKC         0x14UL
+       #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_MP_TQM_RING 0x15UL
+       #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_INVALID     0xffffUL
+       #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_LAST       FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_INVALID
+       __le16  entry_size;
+       __le32  flags;
+       #define FUNC_BACKING_STORE_QCAPS_V2_RESP_FLAGS_ENABLE_CTX_KIND_INIT     0x1UL
+       #define FUNC_BACKING_STORE_QCAPS_V2_RESP_FLAGS_TYPE_VALID               0x2UL
+       __le32  instance_bit_map;
+       u8      ctx_init_value;
+       u8      ctx_init_offset;
+       u8      entry_multiple;
+       u8      rsvd;
+       __le32  max_num_entries;
+       __le32  min_num_entries;
+       __le16  next_valid_type;
+       u8      subtype_valid_cnt;
+       u8      rsvd2;
+       __le32  split_entry_0;
+       __le32  split_entry_1;
+       __le32  split_entry_2;
+       __le32  split_entry_3;
+       u8      rsvd3[3];
+       u8      valid;
+};
+
  /* hwrm_func_drv_if_change_input (size:192b/24B) */
  struct hwrm_func_drv_if_change_input {
         __le16  req_type;
@@ -3741,7 +4086,7 @@ struct hwrm_port_phy_qcfg_output {
         u8      valid;
  };
  
-/* hwrm_port_mac_cfg_input (size:384b/48B) */
+/* hwrm_port_mac_cfg_input (size:448b/56B) */
  struct hwrm_port_mac_cfg_input {
         __le16  req_type;
         __le16  cmpl_ring;
@@ -3807,7 +4152,8 @@ struct hwrm_port_mac_cfg_input {
         #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_DEFAULT_COS_SFT           5
         u8      unused_0[3];
         __le32  ptp_freq_adj_ppb;
-       __le32  ptp_adj_phase;
+       u8      unused_1[4];
+       __le64  ptp_adj_phase;
  };
  
  /* hwrm_port_mac_cfg_output (size:128b/16B) */
@@ -3850,6 +4196,7 @@ struct hwrm_port_mac_ptp_qcfg_output {
         #define PORT_MAC_PTP_QCFG_RESP_FLAGS_ONE_STEP_TX_TS                      0x4UL
         #define PORT_MAC_PTP_QCFG_RESP_FLAGS_HWRM_ACCESS                         0x8UL
         #define PORT_MAC_PTP_QCFG_RESP_FLAGS_PARTIAL_DIRECT_ACCESS_REF_CLOCK     0x10UL
+       #define PORT_MAC_PTP_QCFG_RESP_FLAGS_RTC_CONFIGURED                      0x20UL
         u8      unused_0[3];
         __le32  rx_ts_reg_off_lower;
         __le32  rx_ts_reg_off_upper;
@@ -4339,7 +4686,8 @@ struct hwrm_port_phy_qcaps_output {
         #define PORT_PHY_QCAPS_RESP_PORT_CNT_2       0x2UL
         #define PORT_PHY_QCAPS_RESP_PORT_CNT_3       0x3UL
         #define PORT_PHY_QCAPS_RESP_PORT_CNT_4       0x4UL
-       #define PORT_PHY_QCAPS_RESP_PORT_CNT_LAST   PORT_PHY_QCAPS_RESP_PORT_CNT_4
+       #define PORT_PHY_QCAPS_RESP_PORT_CNT_12      0xcUL
+       #define PORT_PHY_QCAPS_RESP_PORT_CNT_LAST   PORT_PHY_QCAPS_RESP_PORT_CNT_12
         __le16  supported_speeds_force_mode;
         #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS_FORCE_MODE_100MBHD     0x1UL
         #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS_FORCE_MODE_100MB       0x2UL
@@ -4399,7 +4747,7 @@ struct hwrm_port_phy_qcaps_output {
         __le16  flags2;
         #define PORT_PHY_QCAPS_RESP_FLAGS2_PAUSE_UNSUPPORTED     0x1UL
         #define PORT_PHY_QCAPS_RESP_FLAGS2_PFC_UNSUPPORTED       0x2UL
-       u8      unused_0[1];
+       u8      internal_port_cnt;
         u8      valid;
  };
  
@@ -6221,12 +6569,13 @@ struct hwrm_vnic_rss_cfg_input {
         __le16  target_id;
         __le64  resp_addr;
         __le32  hash_type;
-       #define VNIC_RSS_CFG_REQ_HASH_TYPE_IPV4         0x1UL
-       #define VNIC_RSS_CFG_REQ_HASH_TYPE_TCP_IPV4     0x2UL
-       #define VNIC_RSS_CFG_REQ_HASH_TYPE_UDP_IPV4     0x4UL
-       #define VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6         0x8UL
-       #define VNIC_RSS_CFG_REQ_HASH_TYPE_TCP_IPV6     0x10UL
-       #define VNIC_RSS_CFG_REQ_HASH_TYPE_UDP_IPV6     0x20UL
+       #define VNIC_RSS_CFG_REQ_HASH_TYPE_IPV4                0x1UL
+       #define VNIC_RSS_CFG_REQ_HASH_TYPE_TCP_IPV4            0x2UL
+       #define VNIC_RSS_CFG_REQ_HASH_TYPE_UDP_IPV4            0x4UL
+       #define VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6                0x8UL
+       #define VNIC_RSS_CFG_REQ_HASH_TYPE_TCP_IPV6            0x10UL
+       #define VNIC_RSS_CFG_REQ_HASH_TYPE_UDP_IPV6            0x20UL
+       #define VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6_FLOW_LABEL     0x40UL
         __le16  vnic_id;
         u8      ring_table_pair_index;
         u8      hash_mode_flags;
@@ -7898,6 +8247,7 @@ struct hwrm_cfa_adv_flow_mgnt_qcaps_output {
         u8      valid;
  };
  
+/* hwrm_tunnel_dst_port_query_input (size:192b/24B) */
  struct hwrm_tunnel_dst_port_query_input {
         __le16  req_type;
         __le16  cmpl_ring;
@@ -8909,6 +9259,50 @@ struct hwrm_dbg_qcfg_output {
         u8      valid;
  };
  
+/* hwrm_dbg_crashdump_medium_cfg_input (size:320b/40B) */
+struct hwrm_dbg_crashdump_medium_cfg_input {
+       __le16  req_type;
+       __le16  cmpl_ring;
+       __le16  seq_id;
+       __le16  target_id;
+       __le64  resp_addr;
+       __le16  output_dest_flags;
+       #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_TYPE_DDR     0x1UL
+       __le16  pg_size_lvl;
+       #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_LVL_MASK      0x3UL
+       #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_LVL_SFT       0
+       #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_LVL_LVL_0       0x0UL
+       #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_LVL_LVL_1       0x1UL
+       #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_LVL_LVL_2       0x2UL
+       #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_LVL_LAST       DBG_CRASHDUMP_MEDIUM_CFG_REQ_LVL_LVL_2
+       #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_MASK  0x1cUL
+       #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_SFT   2
+       #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_PG_4K   (0x0UL << 2)
+       #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_PG_8K   (0x1UL << 2)
+       #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_PG_64K  (0x2UL << 2)
+       #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_PG_2M   (0x3UL << 2)
+       #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_PG_8M   (0x4UL << 2)
+       #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_PG_1G   (0x5UL << 2)
+       #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_LAST   DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_PG_1G
+       #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_UNUSED11_MASK 0xffe0UL
+       #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_UNUSED11_SFT  5
+       __le32  size;
+       __le32  coredump_component_disable_flags;
+       #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_NVRAM     0x1UL
+       __le32  unused_0;
+       __le64  pbl;
+};
+
+/* hwrm_dbg_crashdump_medium_cfg_output (size:128b/16B) */
+struct hwrm_dbg_crashdump_medium_cfg_output {
+       __le16  error_code;
+       __le16  req_type;
+       __le16  seq_id;
+       __le16  resp_len;
+       u8      unused_1[7];
+       u8      valid;
+};
+
  /* coredump_segment_record (size:128b/16B) */
  struct coredump_segment_record {
         __le16  component_id;
@@ -9372,8 +9766,35 @@ struct hwrm_nvm_install_update_output {
         __le16  resp_len;
         __le64  installed_items;
         u8      result;
-       #define NVM_INSTALL_UPDATE_RESP_RESULT_SUCCESS 0x0UL
-       #define NVM_INSTALL_UPDATE_RESP_RESULT_LAST   NVM_INSTALL_UPDATE_RESP_RESULT_SUCCESS
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_SUCCESS                      0x0UL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_FAILURE                      0xffUL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_MALLOC_FAILURE               0xfdUL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_INDEX_PARAMETER      0xfbUL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_TYPE_PARAMETER       0xf3UL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_PREREQUISITE         0xf2UL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_FILE_HEADER          0xecUL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_SIGNATURE            0xebUL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_PROP_STREAM          0xeaUL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_PROP_LENGTH          0xe9UL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_MANIFEST             0xe8UL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_TRAILER              0xe7UL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_CHECKSUM             0xe6UL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_ITEM_CHECKSUM        0xe5UL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_DATA_LENGTH          0xe4UL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_DIRECTIVE            0xe1UL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_UNSUPPORTED_CHIP_REV         0xceUL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_UNSUPPORTED_DEVICE_ID        0xcdUL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_UNSUPPORTED_SUBSYS_VENDOR    0xccUL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_UNSUPPORTED_SUBSYS_ID        0xcbUL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_UNSUPPORTED_PLATFORM         0xc5UL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_DUPLICATE_ITEM               0xc4UL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_ZERO_LENGTH_ITEM             0xc3UL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_INSTALL_CHECKSUM_ERROR       0xb9UL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_INSTALL_DATA_ERROR           0xb8UL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_INSTALL_AUTHENTICATION_ERROR 0xb7UL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_ITEM_NOT_FOUND               0xb0UL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_ITEM_LOCKED                  0xa7UL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_LAST                        NVM_INSTALL_UPDATE_RESP_RESULT_ITEM_LOCKED
         u8      problem_item;
         #define NVM_INSTALL_UPDATE_RESP_PROBLEM_ITEM_NONE    0x0UL
         #define NVM_INSTALL_UPDATE_RESP_PROBLEM_ITEM_PACKAGE 0xffUL
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c

index 4852096..a0b321a 100644 (file)
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
@@ -19,6 +19,20 @@
  #include "bnxt_hwrm.h"
  #include "bnxt_ptp.h"
  
+static int bnxt_ptp_cfg_settime(struct bnxt *bp, u64 time)
+{
+       struct hwrm_func_ptp_cfg_input *req;
+       int rc;
+
+       rc = hwrm_req_init(bp, req, HWRM_FUNC_PTP_CFG);
+       if (rc)
+               return rc;
+
+       req->enables = cpu_to_le16(FUNC_PTP_CFG_REQ_ENABLES_PTP_SET_TIME);
+       req->ptp_set_time = cpu_to_le64(time);
+       return hwrm_req_send(bp, req);
+}
+
  int bnxt_ptp_parse(struct sk_buff *skb, u16 *seq_id, u16 *hdr_off)
  {
         unsigned int ptp_class;
@@ -48,6 +62,9 @@ static int bnxt_ptp_settime(struct ptp_clock_info *ptp_info,
                                                 ptp_info);
         u64 ns = timespec64_to_ns(ts);
  
+       if (ptp->bp->fw_cap & BNXT_FW_CAP_PTP_RTC)
+               return bnxt_ptp_cfg_settime(ptp->bp, ns);
+
         spin_lock_bh(&ptp->ptp_lock);
         timecounter_init(&ptp->tc, &ptp->cc, ns);
         spin_unlock_bh(&ptp->ptp_lock);
@@ -131,11 +148,47 @@ static int bnxt_ptp_gettimex(struct ptp_clock_info *ptp_info,
         return 0;
  }
  
+/* Caller holds ptp_lock */
+void bnxt_ptp_update_current_time(struct bnxt *bp)
+{
+       struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
+
+       bnxt_refclk_read(ptp->bp, NULL, &ptp->current_time);
+       WRITE_ONCE(ptp->old_time, ptp->current_time);
+}
+
+static int bnxt_ptp_adjphc(struct bnxt_ptp_cfg *ptp, s64 delta)
+{
+       struct hwrm_port_mac_cfg_input *req;
+       int rc;
+
+       rc = hwrm_req_init(ptp->bp, req, HWRM_PORT_MAC_CFG);
+       if (rc)
+               return rc;
+
+       req->enables = cpu_to_le32(PORT_MAC_CFG_REQ_ENABLES_PTP_ADJ_PHASE);
+       req->ptp_adj_phase = cpu_to_le64(delta);
+
+       rc = hwrm_req_send(ptp->bp, req);
+       if (rc) {
+               netdev_err(ptp->bp->dev, "ptp adjphc failed. rc = %x\n", rc);
+       } else {
+               spin_lock_bh(&ptp->ptp_lock);
+               bnxt_ptp_update_current_time(ptp->bp);
+               spin_unlock_bh(&ptp->ptp_lock);
+       }
+
+       return rc;
+}
+
  static int bnxt_ptp_adjtime(struct ptp_clock_info *ptp_info, s64 delta)
  {
         struct bnxt_ptp_cfg *ptp = container_of(ptp_info, struct bnxt_ptp_cfg,
                                                 ptp_info);
  
+       if (ptp->bp->fw_cap & BNXT_FW_CAP_PTP_RTC)
+               return bnxt_ptp_adjphc(ptp, delta);
+
         spin_lock_bh(&ptp->ptp_lock);
         timecounter_adjtime(&ptp->tc, delta);
         spin_unlock_bh(&ptp->ptp_lock);
@@ -714,7 +767,70 @@ static bool bnxt_pps_config_ok(struct bnxt *bp)
         return !(bp->fw_cap & BNXT_FW_CAP_PTP_PPS) == !ptp->ptp_info.pin_config;
  }
  
-int bnxt_ptp_init(struct bnxt *bp)
+static void bnxt_ptp_timecounter_init(struct bnxt *bp, bool init_tc)
+{
+       struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
+
+       if (!ptp->ptp_clock) {
+               memset(&ptp->cc, 0, sizeof(ptp->cc));
+               ptp->cc.read = bnxt_cc_read;
+               ptp->cc.mask = CYCLECOUNTER_MASK(48);
+               ptp->cc.shift = 0;
+               ptp->cc.mult = 1;
+               ptp->next_overflow_check = jiffies + BNXT_PHC_OVERFLOW_PERIOD;
+       }
+       if (init_tc)
+               timecounter_init(&ptp->tc, &ptp->cc, ktime_to_ns(ktime_get_real()));
+}
+
+/* Caller holds ptp_lock */
+void bnxt_ptp_rtc_timecounter_init(struct bnxt_ptp_cfg *ptp, u64 ns)
+{
+       timecounter_init(&ptp->tc, &ptp->cc, ns);
+       /* For RTC, cycle_last must be in sync with the timecounter value. */
+       ptp->tc.cycle_last = ns & ptp->cc.mask;
+}
+
+int bnxt_ptp_init_rtc(struct bnxt *bp, bool phc_cfg)
+{
+       struct timespec64 tsp;
+       u64 ns;
+       int rc;
+
+       if (!bp->ptp_cfg || !(bp->fw_cap & BNXT_FW_CAP_PTP_RTC))
+               return -ENODEV;
+
+       if (!phc_cfg) {
+               ktime_get_real_ts64(&tsp);
+               ns = timespec64_to_ns(&tsp);
+               rc = bnxt_ptp_cfg_settime(bp, ns);
+               if (rc)
+                       return rc;
+       } else {
+               rc = bnxt_hwrm_port_ts_query(bp, PORT_TS_QUERY_REQ_FLAGS_CURRENT_TIME, &ns);
+               if (rc)
+                       return rc;
+       }
+       spin_lock_bh(&bp->ptp_cfg->ptp_lock);
+       bnxt_ptp_rtc_timecounter_init(bp->ptp_cfg, ns);
+       spin_unlock_bh(&bp->ptp_cfg->ptp_lock);
+
+       return 0;
+}
+
+static void bnxt_ptp_free(struct bnxt *bp)
+{
+       struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
+
+       if (ptp->ptp_clock) {
+               ptp_clock_unregister(ptp->ptp_clock);
+               ptp->ptp_clock = NULL;
+               kfree(ptp->ptp_info.pin_config);
+               ptp->ptp_info.pin_config = NULL;
+       }
+}
+
+int bnxt_ptp_init(struct bnxt *bp, bool phc_cfg)
  {
         struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
         int rc;
@@ -726,26 +842,23 @@ int bnxt_ptp_init(struct bnxt *bp)
         if (rc)
                 return rc;
  
+       if (bp->fw_cap & BNXT_FW_CAP_PTP_RTC) {
+               bnxt_ptp_timecounter_init(bp, false);
+               rc = bnxt_ptp_init_rtc(bp, phc_cfg);
+               if (rc)
+                       goto out;
+       }
+
         if (ptp->ptp_clock && bnxt_pps_config_ok(bp))
                 return 0;
  
-       if (ptp->ptp_clock) {
-               ptp_clock_unregister(ptp->ptp_clock);
-               ptp->ptp_clock = NULL;
-               kfree(ptp->ptp_info.pin_config);
-               ptp->ptp_info.pin_config = NULL;
-       }
+       bnxt_ptp_free(bp);
+
         atomic_set(&ptp->tx_avail, BNXT_MAX_TX_TS);
         spin_lock_init(&ptp->ptp_lock);
  
-       memset(&ptp->cc, 0, sizeof(ptp->cc));
-       ptp->cc.read = bnxt_cc_read;
-       ptp->cc.mask = CYCLECOUNTER_MASK(48);
-       ptp->cc.shift = 0;
-       ptp->cc.mult = 1;
-
-       ptp->next_overflow_check = jiffies + BNXT_PHC_OVERFLOW_PERIOD;
-       timecounter_init(&ptp->tc, &ptp->cc, ktime_to_ns(ktime_get_real()));
+       if (!(bp->fw_cap & BNXT_FW_CAP_PTP_RTC))
+               bnxt_ptp_timecounter_init(bp, true);
  
         ptp->ptp_info = bnxt_ptp_caps;
         if ((bp->fw_cap & BNXT_FW_CAP_PTP_PPS)) {
@@ -757,8 +870,8 @@ int bnxt_ptp_init(struct bnxt *bp)
                 int err = PTR_ERR(ptp->ptp_clock);
  
                 ptp->ptp_clock = NULL;
-               bnxt_unmap_ptp_regs(bp);
-               return err;
+               rc = err;
+               goto out;
         }
         if (bp->flags & BNXT_FLAG_CHIP_P5) {
                 spin_lock_bh(&ptp->ptp_lock);
@@ -768,6 +881,11 @@ int bnxt_ptp_init(struct bnxt *bp)
                 ptp_schedule_worker(ptp->ptp_clock, 0);
         }
         return 0;
+
+out:
+       bnxt_ptp_free(bp);
+       bnxt_unmap_ptp_regs(bp);
+       return rc;
  }
  
  void bnxt_ptp_clear(struct bnxt *bp)
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h

index 7c528e1..373baf4 100644 (file)
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h
@@ -131,12 +131,15 @@ do {                                              \
  #endif
  
  int bnxt_ptp_parse(struct sk_buff *skb, u16 *seq_id, u16 *hdr_off);
+void bnxt_ptp_update_current_time(struct bnxt *bp);
  void bnxt_ptp_pps_event(struct bnxt *bp, u32 data1, u32 data2);
  void bnxt_ptp_reapply_pps(struct bnxt *bp);
  int bnxt_hwtstamp_set(struct net_device *dev, struct ifreq *ifr);
  int bnxt_hwtstamp_get(struct net_device *dev, struct ifreq *ifr);
  int bnxt_get_tx_ts_p5(struct bnxt *bp, struct sk_buff *skb);
  int bnxt_get_rx_ts_p5(struct bnxt *bp, u64 *ts, u32 pkt_ts);
-int bnxt_ptp_init(struct bnxt *bp);
+void bnxt_ptp_rtc_timecounter_init(struct bnxt_ptp_cfg *ptp, u64 ns);
+int bnxt_ptp_init_rtc(struct bnxt *bp, bool phc_cfg);
+int bnxt_ptp_init(struct bnxt *bp, bool phc_cfg);
  void bnxt_ptp_clear(struct bnxt *bp);
  #endif
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c

index 87f1056..cfe0911 100644 (file)
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -1368,7 +1368,7 @@ static int bcmgenet_set_eee(struct net_device *dev, struct ethtool_eee *e)
         if (!p->eee_enabled) {
                 bcmgenet_eee_enable_set(dev, false);
         } else {
-               ret = phy_init_eee(dev->phydev, 0);
+               ret = phy_init_eee(dev->phydev, false);
                 if (ret) {
                         netif_err(priv, hw, dev, "EEE initialization failed\n");
                         return ret;
diff --git a/drivers/net/ethernet/cortina/gemini.c b/drivers/net/ethernet/cortina/gemini.c

index c78b99a..8014eb3 100644 (file)
--- a/drivers/net/ethernet/cortina/gemini.c
+++ b/drivers/net/ethernet/cortina/gemini.c
@@ -2363,11 +2363,13 @@ static void gemini_port_save_mac_addr(struct gemini_ethernet_port *port)
  static int gemini_ethernet_port_probe(struct platform_device *pdev)
  {
         char *port_names[2] = { "ethernet0", "ethernet1" };
+       struct device_node *np = pdev->dev.of_node;
         struct gemini_ethernet_port *port;
         struct device *dev = &pdev->dev;
         struct gemini_ethernet *geth;
         struct net_device *netdev;
         struct device *parent;
+       u8 mac[ETH_ALEN];
         unsigned int id;
         int irq;
         int ret;
@@ -2473,6 +2475,12 @@ static int gemini_ethernet_port_probe(struct platform_device *pdev)
         netif_napi_add(netdev, &port->napi, gmac_napi_poll,
                        DEFAULT_NAPI_WEIGHT);
  
+       ret = of_get_mac_address(np, mac);
+       if (!ret) {
+               dev_info(dev, "Setting macaddr from DT %pM\n", mac);
+               memcpy(port->mac_addr, mac, ETH_ALEN);
+       }
+
         if (is_valid_ether_addr((void *)port->mac_addr)) {
                 eth_hw_addr_set(netdev, (u8 *)port->mac_addr);
         } else {
diff --git a/drivers/net/ethernet/dec/tulip/pnic.c b/drivers/net/ethernet/dec/tulip/pnic.c

index 3fb39e3..653bde4 100644 (file)
--- a/drivers/net/ethernet/dec/tulip/pnic.c
+++ b/drivers/net/ethernet/dec/tulip/pnic.c
@@ -21,7 +21,7 @@ void pnic_do_nway(struct net_device *dev)
         struct tulip_private *tp = netdev_priv(dev);
         void __iomem *ioaddr = tp->base_addr;
         u32 phy_reg = ioread32(ioaddr + 0xB8);
-       u32 new_csr6 = tp->csr6 & ~0x40C40200;
+       u32 new_csr6;
  
         if (phy_reg & 0x78000000) { /* Ignore baseT4 */
                 if (phy_reg & 0x20000000)               dev->if_port = 5;
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c

index 623d113..521f036 100644 (file)
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c
@@ -100,6 +100,14 @@ static int dpaa2_mac_get_if_mode(struct fwnode_handle *dpmac_node,
         return err;
  }
  
+static struct phylink_pcs *dpaa2_mac_select_pcs(struct phylink_config *config,
+                                               phy_interface_t interface)
+{
+       struct dpaa2_mac *mac = phylink_to_dpaa2_mac(config);
+
+       return mac->pcs;
+}
+
  static void dpaa2_mac_config(struct phylink_config *config, unsigned int mode,
                              const struct phylink_link_state *state)
  {
@@ -172,6 +180,7 @@ static void dpaa2_mac_link_down(struct phylink_config *config,
  
  static const struct phylink_mac_ops dpaa2_mac_phylink_ops = {
         .validate = phylink_generic_validate,
+       .mac_select_pcs = dpaa2_mac_select_pcs,
         .mac_config = dpaa2_mac_config,
         .mac_link_up = dpaa2_mac_link_up,
         .mac_link_down = dpaa2_mac_link_down,
@@ -303,9 +312,6 @@ int dpaa2_mac_connect(struct dpaa2_mac *mac)
         }
         mac->phylink = phylink;
  
-       if (mac->pcs)
-               phylink_set_pcs(mac->phylink, mac->pcs);
-
         err = phylink_fwnode_phy_connect(mac->phylink, dpmac_node, 0);
         if (err) {
                 netdev_err(net_dev, "phylink_fwnode_phy_connect() = %d\n", err);
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c

index ed16a5a..a0c75c7 100644 (file)
--- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
@@ -934,18 +934,21 @@ static void enetc_mdiobus_destroy(struct enetc_pf *pf)
         enetc_imdio_remove(pf);
  }
  
+static struct phylink_pcs *
+enetc_pl_mac_select_pcs(struct phylink_config *config, phy_interface_t iface)
+{
+       struct enetc_pf *pf = phylink_to_enetc_pf(config);
+
+       return pf->pcs;
+}
+
  static void enetc_pl_mac_config(struct phylink_config *config,
                                 unsigned int mode,
                                 const struct phylink_link_state *state)
  {
         struct enetc_pf *pf = phylink_to_enetc_pf(config);
-       struct enetc_ndev_priv *priv;
  
         enetc_mac_config(&pf->si->hw, state->interface);
-
-       priv = netdev_priv(pf->si->ndev);
-       if (pf->pcs)
-               phylink_set_pcs(priv->phylink, pf->pcs);
  }
  
  static void enetc_force_rgmii_mac(struct enetc_hw *hw, int speed, int duplex)
@@ -1062,6 +1065,7 @@ static void enetc_pl_mac_link_down(struct phylink_config *config,
  
  static const struct phylink_mac_ops enetc_mac_phylink_ops = {
         .validate = phylink_generic_validate,
+       .mac_select_pcs = enetc_pl_mac_select_pcs,
         .mac_config = enetc_pl_mac_config,
         .mac_link_up = enetc_pl_mac_link_up,
         .mac_link_down = enetc_pl_mac_link_down,
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c

index 796133d..11227f5 100644 (file)
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -2797,7 +2797,7 @@ static int fec_enet_eee_mode_set(struct net_device *ndev, bool enable)
         int ret = 0;
  
         if (enable) {
-               ret = phy_init_eee(ndev->phydev, 0);
+               ret = phy_init_eee(ndev->phydev, false);
                 if (ret)
                         return ret;
  
diff --git a/drivers/net/ethernet/freescale/fec_ptp.c b/drivers/net/ethernet/freescale/fec_ptp.c

index af99017..7d49c28 100644 (file)
--- a/drivers/net/ethernet/freescale/fec_ptp.c
+++ b/drivers/net/ethernet/freescale/fec_ptp.c
@@ -101,7 +101,6 @@ static int fec_ptp_enable_pps(struct fec_enet_private *fep, uint enable)
         u32 val, tempval;
         struct timespec64 ts;
         u64 ns;
-       val = 0;
  
         if (fep->pps_enable == enable)
                 return 0;
diff --git a/drivers/net/ethernet/freescale/xgmac_mdio.c b/drivers/net/ethernet/freescale/xgmac_mdio.c

index 266e562..d38d0c3 100644 (file)
--- a/drivers/net/ethernet/freescale/xgmac_mdio.c
+++ b/drivers/net/ethernet/freescale/xgmac_mdio.c
@@ -14,6 +14,7 @@
  
  #include <linux/acpi.h>
  #include <linux/acpi_mdio.h>
+#include <linux/clk.h>
  #include <linux/interrupt.h>
  #include <linux/kernel.h>
  #include <linux/mdio.h>
@@ -36,9 +37,10 @@ struct tgec_mdio_controller {
  } __packed;
  
  #define MDIO_STAT_ENC          BIT(6)
-#define MDIO_STAT_CLKDIV(x)    (((x>>1) & 0xff) << 8)
+#define MDIO_STAT_CLKDIV(x)    (((x) & 0x1ff) << 7)
  #define MDIO_STAT_BSY          BIT(0)
  #define MDIO_STAT_RD_ER                BIT(1)
+#define MDIO_STAT_PRE_DIS      BIT(5)
  #define MDIO_CTL_DEV_ADDR(x)   (x & 0x1f)
  #define MDIO_CTL_PORT_ADDR(x)  ((x & 0x1f) << 5)
  #define MDIO_CTL_PRE_DIS       BIT(10)
@@ -50,6 +52,8 @@ struct tgec_mdio_controller {
  
  struct mdio_fsl_priv {
         struct  tgec_mdio_controller __iomem *mdio_base;
+       struct  clk *enet_clk;
+       u32     mdc_freq;
         bool    is_little_endian;
         bool    has_a009885;
         bool    has_a011043;
@@ -254,6 +258,50 @@ irq_restore:
         return ret;
  }
  
+static int xgmac_mdio_set_mdc_freq(struct mii_bus *bus)
+{
+       struct mdio_fsl_priv *priv = (struct mdio_fsl_priv *)bus->priv;
+       struct tgec_mdio_controller __iomem *regs = priv->mdio_base;
+       struct device *dev = bus->parent;
+       u32 mdio_stat, div;
+
+       if (device_property_read_u32(dev, "clock-frequency", &priv->mdc_freq))
+               return 0;
+
+       priv->enet_clk = devm_clk_get(dev, NULL);
+       if (IS_ERR(priv->enet_clk)) {
+               dev_err(dev, "Input clock unknown, not changing MDC frequency");
+               return PTR_ERR(priv->enet_clk);
+       }
+
+       div = ((clk_get_rate(priv->enet_clk) / priv->mdc_freq) - 1) / 2;
+       if (div < 5 || div > 0x1ff) {
+               dev_err(dev, "Requested MDC frequecy is out of range, ignoring");
+               return -EINVAL;
+       }
+
+       mdio_stat = xgmac_read32(&regs->mdio_stat, priv->is_little_endian);
+       mdio_stat &= ~MDIO_STAT_CLKDIV(0x1ff);
+       mdio_stat |= MDIO_STAT_CLKDIV(div);
+       xgmac_write32(mdio_stat, &regs->mdio_stat, priv->is_little_endian);
+       return 0;
+}
+
+static void xgmac_mdio_set_suppress_preamble(struct mii_bus *bus)
+{
+       struct mdio_fsl_priv *priv = (struct mdio_fsl_priv *)bus->priv;
+       struct tgec_mdio_controller __iomem *regs = priv->mdio_base;
+       struct device *dev = bus->parent;
+       u32 mdio_stat;
+
+       if (!device_property_read_bool(dev, "suppress-preamble"))
+               return;
+
+       mdio_stat = xgmac_read32(&regs->mdio_stat, priv->is_little_endian);
+       mdio_stat |= MDIO_STAT_PRE_DIS;
+       xgmac_write32(mdio_stat, &regs->mdio_stat, priv->is_little_endian);
+}
+
  static int xgmac_mdio_probe(struct platform_device *pdev)
  {
         struct fwnode_handle *fwnode;
@@ -273,7 +321,7 @@ static int xgmac_mdio_probe(struct platform_device *pdev)
                 return -EINVAL;
         }
  
-       bus = mdiobus_alloc_size(sizeof(struct mdio_fsl_priv));
+       bus = devm_mdiobus_alloc_size(&pdev->dev, sizeof(struct mdio_fsl_priv));
         if (!bus)
                 return -ENOMEM;
  
@@ -284,13 +332,11 @@ static int xgmac_mdio_probe(struct platform_device *pdev)
         bus->probe_capabilities = MDIOBUS_C22_C45;
         snprintf(bus->id, MII_BUS_ID_SIZE, "%pa", &res->start);
  
-       /* Set the PHY base address */
         priv = bus->priv;
-       priv->mdio_base = ioremap(res->start, resource_size(res));
-       if (!priv->mdio_base) {
-               ret = -ENOMEM;
-               goto err_ioremap;
-       }
+       priv->mdio_base = devm_ioremap(&pdev->dev, res->start,
+                                      resource_size(res));
+       if (IS_ERR(priv->mdio_base))
+               return PTR_ERR(priv->mdio_base);
  
         /* For both ACPI and DT cases, endianness of MDIO controller
          * needs to be specified using "little-endian" property.
@@ -303,6 +349,12 @@ static int xgmac_mdio_probe(struct platform_device *pdev)
         priv->has_a011043 = device_property_read_bool(&pdev->dev,
                                                       "fsl,erratum-a011043");
  
+       xgmac_mdio_set_suppress_preamble(bus);
+
+       ret = xgmac_mdio_set_mdc_freq(bus);
+       if (ret)
+               return ret;
+
         fwnode = pdev->dev.fwnode;
         if (is_of_node(fwnode))
                 ret = of_mdiobus_register(bus, to_of_node(fwnode));
@@ -312,32 +364,12 @@ static int xgmac_mdio_probe(struct platform_device *pdev)
                 ret = -EINVAL;
         if (ret) {
                 dev_err(&pdev->dev, "cannot register MDIO bus\n");
-               goto err_registration;
+               return ret;
         }
  
         platform_set_drvdata(pdev, bus);
  
         return 0;
-
-err_registration:
-       iounmap(priv->mdio_base);
-
-err_ioremap:
-       mdiobus_free(bus);
-
-       return ret;
-}
-
-static int xgmac_mdio_remove(struct platform_device *pdev)
-{
-       struct mii_bus *bus = platform_get_drvdata(pdev);
-       struct mdio_fsl_priv *priv = bus->priv;
-
-       mdiobus_unregister(bus);
-       iounmap(priv->mdio_base);
-       mdiobus_free(bus);
-
-       return 0;
  }
  
  static const struct of_device_id xgmac_mdio_match[] = {
@@ -364,7 +396,6 @@ static struct platform_driver xgmac_mdio_driver = {
                 .acpi_match_table = xgmac_acpi_match,
         },
         .probe = xgmac_mdio_probe,
-       .remove = xgmac_mdio_remove,
  };
  
  module_platform_driver(xgmac_mdio_driver);
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c

index 83c8908..315a43e 100644 (file)
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -1884,8 +1884,8 @@ static void mvneta_txq_bufs_free(struct mvneta_port *pp,
                         bytes_compl += buf->skb->len;
                         pkts_compl++;
                         dev_kfree_skb_any(buf->skb);
-               } else if (buf->type == MVNETA_TYPE_XDP_TX ||
-                          buf->type == MVNETA_TYPE_XDP_NDO) {
+               } else if ((buf->type == MVNETA_TYPE_XDP_TX ||
+                           buf->type == MVNETA_TYPE_XDP_NDO) && buf->xdpf) {
                         if (napi && buf->type == MVNETA_TYPE_XDP_TX)
                                 xdp_return_frame_rx_napi(buf->xdpf);
                         else
@@ -2060,61 +2060,106 @@ int mvneta_rx_refill_queue(struct mvneta_port *pp, struct mvneta_rx_queue *rxq)
  
  static void
  mvneta_xdp_put_buff(struct mvneta_port *pp, struct mvneta_rx_queue *rxq,
-                   struct xdp_buff *xdp, struct skb_shared_info *sinfo,
-                   int sync_len)
+                   struct xdp_buff *xdp, int sync_len)
  {
+       struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
         int i;
  
+       if (likely(!xdp_buff_has_frags(xdp)))
+               goto out;
+
         for (i = 0; i < sinfo->nr_frags; i++)
                 page_pool_put_full_page(rxq->page_pool,
                                         skb_frag_page(&sinfo->frags[i]), true);
+
+out:
         page_pool_put_page(rxq->page_pool, virt_to_head_page(xdp->data),
                            sync_len, true);
  }
  
  static int
  mvneta_xdp_submit_frame(struct mvneta_port *pp, struct mvneta_tx_queue *txq,
-                       struct xdp_frame *xdpf, bool dma_map)
+                       struct xdp_frame *xdpf, int *nxmit_byte, bool dma_map)
  {
-       struct mvneta_tx_desc *tx_desc;
-       struct mvneta_tx_buf *buf;
-       dma_addr_t dma_addr;
+       struct skb_shared_info *sinfo = xdp_get_shared_info_from_frame(xdpf);
+       struct device *dev = pp->dev->dev.parent;
+       struct mvneta_tx_desc *tx_desc = NULL;
+       int i, num_frames = 1;
+       struct page *page;
+
+       if (unlikely(xdp_frame_has_frags(xdpf)))
+               num_frames += sinfo->nr_frags;
  
-       if (txq->count >= txq->tx_stop_threshold)
+       if (txq->count + num_frames >= txq->size)
                 return MVNETA_XDP_DROPPED;
  
-       tx_desc = mvneta_txq_next_desc_get(txq);
+       for (i = 0; i < num_frames; i++) {
+               struct mvneta_tx_buf *buf = &txq->buf[txq->txq_put_index];
+               skb_frag_t *frag = NULL;
+               int len = xdpf->len;
+               dma_addr_t dma_addr;
  
-       buf = &txq->buf[txq->txq_put_index];
-       if (dma_map) {
-               /* ndo_xdp_xmit */
-               dma_addr = dma_map_single(pp->dev->dev.parent, xdpf->data,
-                                         xdpf->len, DMA_TO_DEVICE);
-               if (dma_mapping_error(pp->dev->dev.parent, dma_addr)) {
-                       mvneta_txq_desc_put(txq);
-                       return MVNETA_XDP_DROPPED;
+               if (unlikely(i)) { /* paged area */
+                       frag = &sinfo->frags[i - 1];
+                       len = skb_frag_size(frag);
                 }
-               buf->type = MVNETA_TYPE_XDP_NDO;
-       } else {
-               struct page *page = virt_to_page(xdpf->data);
  
-               dma_addr = page_pool_get_dma_addr(page) +
-                          sizeof(*xdpf) + xdpf->headroom;
-               dma_sync_single_for_device(pp->dev->dev.parent, dma_addr,
-                                          xdpf->len, DMA_BIDIRECTIONAL);
-               buf->type = MVNETA_TYPE_XDP_TX;
+               tx_desc = mvneta_txq_next_desc_get(txq);
+               if (dma_map) {
+                       /* ndo_xdp_xmit */
+                       void *data;
+
+                       data = unlikely(frag) ? skb_frag_address(frag)
+                                             : xdpf->data;
+                       dma_addr = dma_map_single(dev, data, len,
+                                                 DMA_TO_DEVICE);
+                       if (dma_mapping_error(dev, dma_addr)) {
+                               mvneta_txq_desc_put(txq);
+                               goto unmap;
+                       }
+
+                       buf->type = MVNETA_TYPE_XDP_NDO;
+               } else {
+                       page = unlikely(frag) ? skb_frag_page(frag)
+                                             : virt_to_page(xdpf->data);
+                       dma_addr = page_pool_get_dma_addr(page);
+                       if (unlikely(frag))
+                               dma_addr += skb_frag_off(frag);
+                       else
+                               dma_addr += sizeof(*xdpf) + xdpf->headroom;
+                       dma_sync_single_for_device(dev, dma_addr, len,
+                                                  DMA_BIDIRECTIONAL);
+                       buf->type = MVNETA_TYPE_XDP_TX;
+               }
+               buf->xdpf = unlikely(i) ? NULL : xdpf;
+
+               tx_desc->command = unlikely(i) ? 0 : MVNETA_TXD_F_DESC;
+               tx_desc->buf_phys_addr = dma_addr;
+               tx_desc->data_size = len;
+               *nxmit_byte += len;
+
+               mvneta_txq_inc_put(txq);
         }
-       buf->xdpf = xdpf;
  
-       tx_desc->command = MVNETA_TXD_FLZ_DESC;
-       tx_desc->buf_phys_addr = dma_addr;
-       tx_desc->data_size = xdpf->len;
+       /*last descriptor */
+       if (likely(tx_desc))
+               tx_desc->command |= MVNETA_TXD_L_DESC | MVNETA_TXD_Z_PAD;
  
-       mvneta_txq_inc_put(txq);
-       txq->pending++;
-       txq->count++;
+       txq->pending += num_frames;
+       txq->count += num_frames;
  
         return MVNETA_XDP_TX;
+
+unmap:
+       for (i--; i >= 0; i--) {
+               mvneta_txq_desc_put(txq);
+               tx_desc = txq->descs + txq->next_desc_to_proc;
+               dma_unmap_single(dev, tx_desc->buf_phys_addr,
+                                tx_desc->data_size,
+                                DMA_TO_DEVICE);
+       }
+
+       return MVNETA_XDP_DROPPED;
  }
  
  static int
@@ -2123,8 +2168,8 @@ mvneta_xdp_xmit_back(struct mvneta_port *pp, struct xdp_buff *xdp)
         struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats);
         struct mvneta_tx_queue *txq;
         struct netdev_queue *nq;
+       int cpu, nxmit_byte = 0;
         struct xdp_frame *xdpf;
-       int cpu;
         u32 ret;
  
         xdpf = xdp_convert_buff_to_frame(xdp);
@@ -2136,10 +2181,10 @@ mvneta_xdp_xmit_back(struct mvneta_port *pp, struct xdp_buff *xdp)
         nq = netdev_get_tx_queue(pp->dev, txq->id);
  
         __netif_tx_lock(nq, cpu);
-       ret = mvneta_xdp_submit_frame(pp, txq, xdpf, false);
+       ret = mvneta_xdp_submit_frame(pp, txq, xdpf, &nxmit_byte, false);
         if (ret == MVNETA_XDP_TX) {
                 u64_stats_update_begin(&stats->syncp);
-               stats->es.ps.tx_bytes += xdpf->len;
+               stats->es.ps.tx_bytes += nxmit_byte;
                 stats->es.ps.tx_packets++;
                 stats->es.ps.xdp_tx++;
                 u64_stats_update_end(&stats->syncp);
@@ -2178,11 +2223,11 @@ mvneta_xdp_xmit(struct net_device *dev, int num_frame,
  
         __netif_tx_lock(nq, cpu);
         for (i = 0; i < num_frame; i++) {
-               ret = mvneta_xdp_submit_frame(pp, txq, frames[i], true);
+               ret = mvneta_xdp_submit_frame(pp, txq, frames[i], &nxmit_byte,
+                                             true);
                 if (ret != MVNETA_XDP_TX)
                         break;
  
-               nxmit_byte += frames[i]->len;
                 nxmit++;
         }
  
@@ -2205,7 +2250,6 @@ mvneta_run_xdp(struct mvneta_port *pp, struct mvneta_rx_queue *rxq,
                struct bpf_prog *prog, struct xdp_buff *xdp,
                u32 frame_sz, struct mvneta_stats *stats)
  {
-       struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
         unsigned int len, data_len, sync;
         u32 ret, act;
  
@@ -2226,7 +2270,7 @@ mvneta_run_xdp(struct mvneta_port *pp, struct mvneta_rx_queue *rxq,
  
                 err = xdp_do_redirect(pp->dev, xdp, prog);
                 if (unlikely(err)) {
-                       mvneta_xdp_put_buff(pp, rxq, xdp, sinfo, sync);
+                       mvneta_xdp_put_buff(pp, rxq, xdp, sync);
                         ret = MVNETA_XDP_DROPPED;
                 } else {
                         ret = MVNETA_XDP_REDIR;
@@ -2237,7 +2281,7 @@ mvneta_run_xdp(struct mvneta_port *pp, struct mvneta_rx_queue *rxq,
         case XDP_TX:
                 ret = mvneta_xdp_xmit_back(pp, xdp);
                 if (ret != MVNETA_XDP_TX)
-                       mvneta_xdp_put_buff(pp, rxq, xdp, sinfo, sync);
+                       mvneta_xdp_put_buff(pp, rxq, xdp, sync);
                 break;
         default:
                 bpf_warn_invalid_xdp_action(pp->dev, prog, act);
@@ -2246,7 +2290,7 @@ mvneta_run_xdp(struct mvneta_port *pp, struct mvneta_rx_queue *rxq,
                 trace_xdp_exception(pp->dev, prog, act);
                 fallthrough;
         case XDP_DROP:
-               mvneta_xdp_put_buff(pp, rxq, xdp, sinfo, sync);
+               mvneta_xdp_put_buff(pp, rxq, xdp, sync);
                 ret = MVNETA_XDP_DROPPED;
                 stats->xdp_drop++;
                 break;
@@ -2269,7 +2313,6 @@ mvneta_swbm_rx_frame(struct mvneta_port *pp,
         int data_len = -MVNETA_MH_SIZE, len;
         struct net_device *dev = pp->dev;
         enum dma_data_direction dma_dir;
-       struct skb_shared_info *sinfo;
  
         if (*size > MVNETA_MAX_RX_BUF_SIZE) {
                 len = MVNETA_MAX_RX_BUF_SIZE;
@@ -2289,11 +2332,9 @@ mvneta_swbm_rx_frame(struct mvneta_port *pp,
  
         /* Prefetch header */
         prefetch(data);
+       xdp_buff_clear_frags_flag(xdp);
         xdp_prepare_buff(xdp, data, pp->rx_offset_correction + MVNETA_MH_SIZE,
                          data_len, false);
-
-       sinfo = xdp_get_shared_info_from_buff(xdp);
-       sinfo->nr_frags = 0;
  }
  
  static void
@@ -2301,9 +2342,9 @@ mvneta_swbm_add_rx_fragment(struct mvneta_port *pp,
                             struct mvneta_rx_desc *rx_desc,
                             struct mvneta_rx_queue *rxq,
                             struct xdp_buff *xdp, int *size,
-                           struct skb_shared_info *xdp_sinfo,
                             struct page *page)
  {
+       struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
         struct net_device *dev = pp->dev;
         enum dma_data_direction dma_dir;
         int data_len, len;
@@ -2321,25 +2362,25 @@ mvneta_swbm_add_rx_fragment(struct mvneta_port *pp,
                                 len, dma_dir);
         rx_desc->buf_phys_addr = 0;
  
-       if (data_len > 0 && xdp_sinfo->nr_frags < MAX_SKB_FRAGS) {
-               skb_frag_t *frag = &xdp_sinfo->frags[xdp_sinfo->nr_frags++];
+       if (!xdp_buff_has_frags(xdp))
+               sinfo->nr_frags = 0;
+
+       if (data_len > 0 && sinfo->nr_frags < MAX_SKB_FRAGS) {
+               skb_frag_t *frag = &sinfo->frags[sinfo->nr_frags++];
  
                 skb_frag_off_set(frag, pp->rx_offset_correction);
                 skb_frag_size_set(frag, data_len);
                 __skb_frag_set_page(frag, page);
+
+               if (!xdp_buff_has_frags(xdp)) {
+                       sinfo->xdp_frags_size = *size;
+                       xdp_buff_set_frags_flag(xdp);
+               }
+               if (page_is_pfmemalloc(page))
+                       xdp_buff_set_frag_pfmemalloc(xdp);
         } else {
                 page_pool_put_full_page(rxq->page_pool, page, true);
         }
-
-       /* last fragment */
-       if (len == *size) {
-               struct skb_shared_info *sinfo;
-
-               sinfo = xdp_get_shared_info_from_buff(xdp);
-               sinfo->nr_frags = xdp_sinfo->nr_frags;
-               memcpy(sinfo->frags, xdp_sinfo->frags,
-                      sinfo->nr_frags * sizeof(skb_frag_t));
-       }
         *size -= len;
  }
  
@@ -2348,8 +2389,11 @@ mvneta_swbm_build_skb(struct mvneta_port *pp, struct page_pool *pool,
                       struct xdp_buff *xdp, u32 desc_status)
  {
         struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
-       int i, num_frags = sinfo->nr_frags;
         struct sk_buff *skb;
+       u8 num_frags;
+
+       if (unlikely(xdp_buff_has_frags(xdp)))
+               num_frags = sinfo->nr_frags;
  
         skb = build_skb(xdp->data_hard_start, PAGE_SIZE);
         if (!skb)
@@ -2361,13 +2405,11 @@ mvneta_swbm_build_skb(struct mvneta_port *pp, struct page_pool *pool,
         skb_put(skb, xdp->data_end - xdp->data);
         skb->ip_summed = mvneta_rx_csum(pp, desc_status);
  
-       for (i = 0; i < num_frags; i++) {
-               skb_frag_t *frag = &sinfo->frags[i];
-
-               skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
-                               skb_frag_page(frag), skb_frag_off(frag),
-                               skb_frag_size(frag), PAGE_SIZE);
-       }
+       if (unlikely(xdp_buff_has_frags(xdp)))
+               xdp_update_skb_shared_info(skb, num_frags,
+                                          sinfo->xdp_frags_size,
+                                          num_frags * xdp->frame_sz,
+                                          xdp_buff_is_frag_pfmemalloc(xdp));
  
         return skb;
  }
@@ -2379,7 +2421,6 @@ static int mvneta_rx_swbm(struct napi_struct *napi,
  {
         int rx_proc = 0, rx_todo, refill, size = 0;
         struct net_device *dev = pp->dev;
-       struct skb_shared_info sinfo;
         struct mvneta_stats ps = {};
         struct bpf_prog *xdp_prog;
         u32 desc_status, frame_sz;
@@ -2388,8 +2429,6 @@ static int mvneta_rx_swbm(struct napi_struct *napi,
         xdp_init_buff(&xdp_buf, PAGE_SIZE, &rxq->xdp_rxq);
         xdp_buf.data_hard_start = NULL;
  
-       sinfo.nr_frags = 0;
-
         /* Get number of received packets */
         rx_todo = mvneta_rxq_busy_desc_num_get(pp, rxq);
  
@@ -2431,7 +2470,7 @@ static int mvneta_rx_swbm(struct napi_struct *napi,
                         }
  
                         mvneta_swbm_add_rx_fragment(pp, rx_desc, rxq, &xdp_buf,
-                                                   &size, &sinfo, page);
+                                                   &size, page);
                 } /* Middle or Last descriptor */
  
                 if (!(rx_status & MVNETA_RXD_LAST_DESC))
@@ -2439,7 +2478,7 @@ static int mvneta_rx_swbm(struct napi_struct *napi,
                         continue;
  
                 if (size) {
-                       mvneta_xdp_put_buff(pp, rxq, &xdp_buf, &sinfo, -1);
+                       mvneta_xdp_put_buff(pp, rxq, &xdp_buf, -1);
                         goto next;
                 }
  
@@ -2451,7 +2490,7 @@ static int mvneta_rx_swbm(struct napi_struct *napi,
                 if (IS_ERR(skb)) {
                         struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats);
  
-                       mvneta_xdp_put_buff(pp, rxq, &xdp_buf, &sinfo, -1);
+                       mvneta_xdp_put_buff(pp, rxq, &xdp_buf, -1);
  
                         u64_stats_update_begin(&stats->syncp);
                         stats->es.skb_alloc_error++;
@@ -2468,11 +2507,10 @@ static int mvneta_rx_swbm(struct napi_struct *napi,
                 napi_gro_receive(napi, skb);
  next:
                 xdp_buf.data_hard_start = NULL;
-               sinfo.nr_frags = 0;
         }
  
         if (xdp_buf.data_hard_start)
-               mvneta_xdp_put_buff(pp, rxq, &xdp_buf, &sinfo, -1);
+               mvneta_xdp_put_buff(pp, rxq, &xdp_buf, -1);
  
         if (ps.xdp_redirect)
                 xdp_do_flush_map();
@@ -3260,7 +3298,8 @@ static int mvneta_create_page_pool(struct mvneta_port *pp,
                 return err;
         }
  
-       err = xdp_rxq_info_reg(&rxq->xdp_rxq, pp->dev, rxq->id, 0);
+       err = __xdp_rxq_info_reg(&rxq->xdp_rxq, pp->dev, rxq->id, 0,
+                                PAGE_SIZE);
         if (err < 0)
                 goto err_free_pp;
  
@@ -3740,6 +3779,7 @@ static void mvneta_percpu_disable(void *arg)
  static int mvneta_change_mtu(struct net_device *dev, int mtu)
  {
         struct mvneta_port *pp = netdev_priv(dev);
+       struct bpf_prog *prog = pp->xdp_prog;
         int ret;
  
         if (!IS_ALIGNED(MVNETA_RX_PKT_SIZE(mtu), 8)) {
@@ -3748,8 +3788,11 @@ static int mvneta_change_mtu(struct net_device *dev, int mtu)
                 mtu = ALIGN(MVNETA_RX_PKT_SIZE(mtu), 8);
         }
  
-       if (pp->xdp_prog && mtu > MVNETA_MAX_RX_BUF_SIZE) {
-               netdev_info(dev, "Illegal MTU value %d for XDP mode\n", mtu);
+       if (prog && !prog->aux->xdp_has_frags &&
+           mtu > MVNETA_MAX_RX_BUF_SIZE) {
+               netdev_info(dev, "Illegal MTU %d for XDP prog without frags\n",
+                           mtu);
+
                 return -EINVAL;
         }
  
@@ -3969,6 +4012,15 @@ static const struct phylink_pcs_ops mvneta_phylink_pcs_ops = {
         .pcs_an_restart = mvneta_pcs_an_restart,
  };
  
+static struct phylink_pcs *mvneta_mac_select_pcs(struct phylink_config *config,
+                                                phy_interface_t interface)
+{
+       struct net_device *ndev = to_net_dev(config->dev);
+       struct mvneta_port *pp = netdev_priv(ndev);
+
+       return &pp->phylink_pcs;
+}
+
  static int mvneta_mac_prepare(struct phylink_config *config, unsigned int mode,
                               phy_interface_t interface)
  {
@@ -4169,13 +4221,14 @@ static void mvneta_mac_link_up(struct phylink_config *config,
         mvneta_port_up(pp);
  
         if (phy && pp->eee_enabled) {
-               pp->eee_active = phy_init_eee(phy, 0) >= 0;
+               pp->eee_active = phy_init_eee(phy, false) >= 0;
                 mvneta_set_eee(pp, pp->eee_active && pp->tx_lpi_enabled);
         }
  }
  
  static const struct phylink_mac_ops mvneta_phylink_ops = {
         .validate = phylink_generic_validate,
+       .mac_select_pcs = mvneta_mac_select_pcs,
         .mac_prepare = mvneta_mac_prepare,
         .mac_config = mvneta_mac_config,
         .mac_finish = mvneta_mac_finish,
@@ -4490,8 +4543,9 @@ static int mvneta_xdp_setup(struct net_device *dev, struct bpf_prog *prog,
         struct mvneta_port *pp = netdev_priv(dev);
         struct bpf_prog *old_prog;
  
-       if (prog && dev->mtu > MVNETA_MAX_RX_BUF_SIZE) {
-               NL_SET_ERR_MSG_MOD(extack, "MTU too large for XDP");
+       if (prog && !prog->aux->xdp_has_frags &&
+           dev->mtu > MVNETA_MAX_RX_BUF_SIZE) {
+               NL_SET_ERR_MSG_MOD(extack, "prog does not support XDP frags");
                 return -EOPNOTSUPP;
         }
  
@@ -5321,26 +5375,62 @@ static int mvneta_probe(struct platform_device *pdev)
         if (!dev)
                 return -ENOMEM;
  
-       dev->irq = irq_of_parse_and_map(dn, 0);
-       if (dev->irq == 0)
-               return -EINVAL;
+       dev->tx_queue_len = MVNETA_MAX_TXD;
+       dev->watchdog_timeo = 5 * HZ;
+       dev->netdev_ops = &mvneta_netdev_ops;
+       dev->ethtool_ops = &mvneta_eth_tool_ops;
+
+       pp = netdev_priv(dev);
+       spin_lock_init(&pp->lock);
+       pp->dn = dn;
+
+       pp->rxq_def = rxq_def;
+       pp->indir[0] = rxq_def;
  
         err = of_get_phy_mode(dn, &phy_mode);
         if (err) {
                 dev_err(&pdev->dev, "incorrect phy-mode\n");
-               goto err_free_irq;
+               return err;
         }
  
+       pp->phy_interface = phy_mode;
+
         comphy = devm_of_phy_get(&pdev->dev, dn, NULL);
-       if (comphy == ERR_PTR(-EPROBE_DEFER)) {
-               err = -EPROBE_DEFER;
-               goto err_free_irq;
-       } else if (IS_ERR(comphy)) {
+       if (comphy == ERR_PTR(-EPROBE_DEFER))
+               return -EPROBE_DEFER;
+
+       if (IS_ERR(comphy))
                 comphy = NULL;
+
+       pp->comphy = comphy;
+
+       pp->base = devm_platform_ioremap_resource(pdev, 0);
+       if (IS_ERR(pp->base))
+               return PTR_ERR(pp->base);
+
+       /* Get special SoC configurations */
+       if (of_device_is_compatible(dn, "marvell,armada-3700-neta"))
+               pp->neta_armada3700 = true;
+
+       dev->irq = irq_of_parse_and_map(dn, 0);
+       if (dev->irq == 0)
+               return -EINVAL;
+
+       pp->clk = devm_clk_get(&pdev->dev, "core");
+       if (IS_ERR(pp->clk))
+               pp->clk = devm_clk_get(&pdev->dev, NULL);
+       if (IS_ERR(pp->clk)) {
+               err = PTR_ERR(pp->clk);
+               goto err_free_irq;
         }
  
-       pp = netdev_priv(dev);
-       spin_lock_init(&pp->lock);
+       clk_prepare_enable(pp->clk);
+
+       pp->clk_bus = devm_clk_get(&pdev->dev, "bus");
+       if (!IS_ERR(pp->clk_bus))
+               clk_prepare_enable(pp->clk_bus);
+
+       pp->phylink_pcs.ops = &mvneta_phylink_pcs_ops;
  
         pp->phylink_config.dev = &dev->dev;
         pp->phylink_config.type = PHYLINK_NETDEV;
@@ -5377,55 +5467,16 @@ static int mvneta_probe(struct platform_device *pdev)
                                  phy_mode, &mvneta_phylink_ops);
         if (IS_ERR(phylink)) {
                 err = PTR_ERR(phylink);
-               goto err_free_irq;
-       }
-
-       dev->tx_queue_len = MVNETA_MAX_TXD;
-       dev->watchdog_timeo = 5 * HZ;
-       dev->netdev_ops = &mvneta_netdev_ops;
-
-       dev->ethtool_ops = &mvneta_eth_tool_ops;
-
-       pp->phylink = phylink;
-       pp->comphy = comphy;
-       pp->phy_interface = phy_mode;
-       pp->dn = dn;
-
-       pp->rxq_def = rxq_def;
-       pp->indir[0] = rxq_def;
-
-       /* Get special SoC configurations */
-       if (of_device_is_compatible(dn, "marvell,armada-3700-neta"))
-               pp->neta_armada3700 = true;
-
-       pp->clk = devm_clk_get(&pdev->dev, "core");
-       if (IS_ERR(pp->clk))
-               pp->clk = devm_clk_get(&pdev->dev, NULL);
-       if (IS_ERR(pp->clk)) {
-               err = PTR_ERR(pp->clk);
-               goto err_free_phylink;
-       }
-
-       clk_prepare_enable(pp->clk);
-
-       pp->clk_bus = devm_clk_get(&pdev->dev, "bus");
-       if (!IS_ERR(pp->clk_bus))
-               clk_prepare_enable(pp->clk_bus);
-
-       pp->base = devm_platform_ioremap_resource(pdev, 0);
-       if (IS_ERR(pp->base)) {
-               err = PTR_ERR(pp->base);
                 goto err_clk;
         }
  
-       pp->phylink_pcs.ops = &mvneta_phylink_pcs_ops;
-       phylink_set_pcs(phylink, &pp->phylink_pcs);
+       pp->phylink = phylink;
  
         /* Alloc per-cpu port structure */
         pp->ports = alloc_percpu(struct mvneta_pcpu_port);
         if (!pp->ports) {
                 err = -ENOMEM;
-               goto err_clk;
+               goto err_free_phylink;
         }
  
         /* Alloc per-cpu stats */
@@ -5569,12 +5620,12 @@ err_netdev:
         free_percpu(pp->stats);
  err_free_ports:
         free_percpu(pp->ports);
-err_clk:
-       clk_disable_unprepare(pp->clk_bus);
-       clk_disable_unprepare(pp->clk);
  err_free_phylink:
         if (pp->phylink)
                 phylink_destroy(pp->phylink);
+err_clk:
+       clk_disable_unprepare(pp->clk_bus);
+       clk_disable_unprepare(pp->clk);
  err_free_irq:
         irq_dispose_mapping(dev->irq);
         return err;
diff --git a/drivers/net/ethernet/mediatek/mtk_star_emac.c b/drivers/net/ethernet/mediatek/mtk_star_emac.c

index 89ca796..4cd0747 100644 (file)
--- a/drivers/net/ethernet/mediatek/mtk_star_emac.c
+++ b/drivers/net/ethernet/mediatek/mtk_star_emac.c
@@ -1556,6 +1556,7 @@ static int mtk_star_probe(struct platform_device *pdev)
         return devm_register_netdev(dev, ndev);
  }
  
+#ifdef CONFIG_OF
  static const struct of_device_id mtk_star_of_match[] = {
         { .compatible = "mediatek,mt8516-eth", },
         { .compatible = "mediatek,mt8518-eth", },
@@ -1563,6 +1564,7 @@ static const struct of_device_id mtk_star_of_match[] = {
         { }
  };
  MODULE_DEVICE_TABLE(of, mtk_star_of_match);
+#endif
  
  static SIMPLE_DEV_PM_OPS(mtk_star_pm_ops,
                          mtk_star_suspend, mtk_star_resume);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_env.c b/drivers/net/ethernet/mellanox/mlxsw/core_env.c

index 6dd4ae2..4e3de28 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlxsw/core_env.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_env.c
@@ -18,6 +18,7 @@ struct mlxsw_env_module_info {
         int num_ports_mapped;
         int num_ports_up;
         enum ethtool_module_power_mode_policy power_mode_policy;
+       enum mlxsw_reg_pmtm_module_type type;
  };
  
  struct mlxsw_env {
@@ -27,14 +28,47 @@ struct mlxsw_env {
         struct mlxsw_env_module_info module_info[];
  };
  
-static int mlxsw_env_validate_cable_ident(struct mlxsw_core *core, int id,
-                                         bool *qsfp, bool *cmis)
+static int __mlxsw_env_validate_module_type(struct mlxsw_core *core, u8 module)
+{
+       struct mlxsw_env *mlxsw_env = mlxsw_core_env(core);
+       int err;
+
+       switch (mlxsw_env->module_info[module].type) {
+       case MLXSW_REG_PMTM_MODULE_TYPE_TWISTED_PAIR:
+               err = -EINVAL;
+               break;
+       default:
+               err = 0;
+       }
+
+       return err;
+}
+
+static int mlxsw_env_validate_module_type(struct mlxsw_core *core, u8 module)
+{
+       struct mlxsw_env *mlxsw_env = mlxsw_core_env(core);
+       int err;
+
+       mutex_lock(&mlxsw_env->module_info_lock);
+       err = __mlxsw_env_validate_module_type(core, module);
+       mutex_unlock(&mlxsw_env->module_info_lock);
+
+       return err;
+}
+
+static int
+mlxsw_env_validate_cable_ident(struct mlxsw_core *core, int id, bool *qsfp,
+                              bool *cmis)
  {
         char mcia_pl[MLXSW_REG_MCIA_LEN];
         char *eeprom_tmp;
         u8 ident;
         int err;
  
+       err = mlxsw_env_validate_module_type(core, id);
+       if (err)
+               return err;
+
         mlxsw_reg_mcia_pack(mcia_pl, id, 0, MLXSW_REG_MCIA_PAGE0_LO_OFF, 0, 1,
                             MLXSW_REG_MCIA_I2C_ADDR_LOW);
         err = mlxsw_reg_query(core, MLXSW_REG(mcia), mcia_pl);
@@ -206,7 +240,8 @@ int mlxsw_env_module_temp_thresholds_get(struct mlxsw_core *core, int module,
         return 0;
  }
  
-int mlxsw_env_get_module_info(struct mlxsw_core *mlxsw_core, int module,
+int mlxsw_env_get_module_info(struct net_device *netdev,
+                             struct mlxsw_core *mlxsw_core, int module,
                               struct ethtool_modinfo *modinfo)
  {
         u8 module_info[MLXSW_REG_MCIA_EEPROM_MODULE_INFO_SIZE];
@@ -215,6 +250,13 @@ int mlxsw_env_get_module_info(struct mlxsw_core *mlxsw_core, int module,
         unsigned int read_size;
         int err;
  
+       err = mlxsw_env_validate_module_type(mlxsw_core, module);
+       if (err) {
+               netdev_err(netdev,
+                          "EEPROM is not equipped on port module type");
+               return err;
+       }
+
         err = mlxsw_env_query_module_eeprom(mlxsw_core, module, 0, offset,
                                             module_info, false, &read_size);
         if (err)
@@ -356,6 +398,13 @@ mlxsw_env_get_module_eeprom_by_page(struct mlxsw_core *mlxsw_core, u8 module,
  {
         u32 bytes_read = 0;
         u16 device_addr;
+       int err;
+
+       err = mlxsw_env_validate_module_type(mlxsw_core, module);
+       if (err) {
+               NL_SET_ERR_MSG_MOD(extack, "EEPROM is not equipped on port module type");
+               return err;
+       }
  
         /* Offset cannot be larger than 2 * ETH_MODULE_EEPROM_PAGE_LEN */
         device_addr = page->offset;
@@ -364,7 +413,6 @@ mlxsw_env_get_module_eeprom_by_page(struct mlxsw_core *mlxsw_core, u8 module,
                 char mcia_pl[MLXSW_REG_MCIA_LEN];
                 char *eeprom_tmp;
                 u8 size;
-               int err;
  
                 size = min_t(u8, page->length - bytes_read,
                              MLXSW_REG_MCIA_EEPROM_SIZE);
@@ -419,6 +467,12 @@ int mlxsw_env_reset_module(struct net_device *netdev,
  
         mutex_lock(&mlxsw_env->module_info_lock);
  
+       err = __mlxsw_env_validate_module_type(mlxsw_core, module);
+       if (err) {
+               netdev_err(netdev, "Reset module is not supported on port module type\n");
+               goto out;
+       }
+
         if (mlxsw_env->module_info[module].num_ports_up) {
                 netdev_err(netdev, "Cannot reset module when ports using it are administratively up\n");
                 err = -EINVAL;
@@ -461,6 +515,12 @@ mlxsw_env_get_module_power_mode(struct mlxsw_core *mlxsw_core, u8 module,
  
         mutex_lock(&mlxsw_env->module_info_lock);
  
+       err = __mlxsw_env_validate_module_type(mlxsw_core, module);
+       if (err) {
+               NL_SET_ERR_MSG_MOD(extack, "Power mode is not supported on port module type");
+               goto out;
+       }
+
         params->policy = mlxsw_env->module_info[module].power_mode_policy;
  
         mlxsw_reg_mcion_pack(mcion_pl, module);
@@ -571,6 +631,13 @@ mlxsw_env_set_module_power_mode(struct mlxsw_core *mlxsw_core, u8 module,
  
         mutex_lock(&mlxsw_env->module_info_lock);
  
+       err = __mlxsw_env_validate_module_type(mlxsw_core, module);
+       if (err) {
+               NL_SET_ERR_MSG_MOD(extack,
+                                  "Power mode set is not supported on port module type");
+               goto out;
+       }
+
         if (mlxsw_env->module_info[module].power_mode_policy == policy)
                 goto out;
  
@@ -661,13 +728,12 @@ static int mlxsw_env_temp_event_set(struct mlxsw_core *mlxsw_core,
         return mlxsw_reg_write(mlxsw_core, MLXSW_REG(mtmp), mtmp_pl);
  }
  
-static int mlxsw_env_module_temp_event_enable(struct mlxsw_core *mlxsw_core,
-                                             u8 module_count)
+static int mlxsw_env_module_temp_event_enable(struct mlxsw_core *mlxsw_core)
  {
         int i, err, sensor_index;
         bool has_temp_sensor;
  
-       for (i = 0; i < module_count; i++) {
+       for (i = 0; i < mlxsw_core_env(mlxsw_core)->module_count; i++) {
                 err = mlxsw_env_module_has_temp_sensor(mlxsw_core, i,
                                                        &has_temp_sensor);
                 if (err)
@@ -876,12 +942,11 @@ mlxsw_env_module_plug_event_unregister(struct mlxsw_env *mlxsw_env)
  }
  
  static int
-mlxsw_env_module_oper_state_event_enable(struct mlxsw_core *mlxsw_core,
-                                        u8 module_count)
+mlxsw_env_module_oper_state_event_enable(struct mlxsw_core *mlxsw_core)
  {
         int i, err;
  
-       for (i = 0; i < module_count; i++) {
+       for (i = 0; i < mlxsw_core_env(mlxsw_core)->module_count; i++) {
                 char pmaos_pl[MLXSW_REG_PMAOS_LEN];
  
                 mlxsw_reg_pmaos_pack(pmaos_pl, i);
@@ -999,6 +1064,28 @@ out_unlock:
  }
  EXPORT_SYMBOL(mlxsw_env_module_port_down);
  
+static int
+mlxsw_env_module_type_set(struct mlxsw_core *mlxsw_core)
+{
+       struct mlxsw_env *mlxsw_env = mlxsw_core_env(mlxsw_core);
+       int i;
+
+       for (i = 0; i < mlxsw_env->module_count; i++) {
+               char pmtm_pl[MLXSW_REG_PMTM_LEN];
+               int err;
+
+               mlxsw_reg_pmtm_pack(pmtm_pl, 0, i);
+               err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(pmtm), pmtm_pl);
+               if (err)
+                       return err;
+
+               mlxsw_env->module_info[i].type =
+                       mlxsw_reg_pmtm_module_type_get(pmtm_pl);
+       }
+
+       return 0;
+}
+
  int mlxsw_env_init(struct mlxsw_core *mlxsw_core, struct mlxsw_env **p_env)
  {
         char mgpir_pl[MLXSW_REG_MGPIR_LEN];
@@ -1037,17 +1124,21 @@ int mlxsw_env_init(struct mlxsw_core *mlxsw_core, struct mlxsw_env **p_env)
         if (err)
                 goto err_module_plug_event_register;
  
-       err = mlxsw_env_module_oper_state_event_enable(mlxsw_core,
-                                                      env->module_count);
+       err = mlxsw_env_module_oper_state_event_enable(mlxsw_core);
         if (err)
                 goto err_oper_state_event_enable;
  
-       err = mlxsw_env_module_temp_event_enable(mlxsw_core, env->module_count);
+       err = mlxsw_env_module_temp_event_enable(mlxsw_core);
         if (err)
                 goto err_temp_event_enable;
  
+       err = mlxsw_env_module_type_set(mlxsw_core);
+       if (err)
+               goto err_type_set;
+
         return 0;
  
+err_type_set:
  err_temp_event_enable:
  err_oper_state_event_enable:
         mlxsw_env_module_plug_event_unregister(env);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_env.h b/drivers/net/ethernet/mellanox/mlxsw/core_env.h

index da121b1..ec6564e 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlxsw/core_env.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_env.h
@@ -12,7 +12,8 @@ struct ethtool_eeprom;
  int mlxsw_env_module_temp_thresholds_get(struct mlxsw_core *core, int module,
                                          int off, int *temp);
  
-int mlxsw_env_get_module_info(struct mlxsw_core *mlxsw_core, int module,
+int mlxsw_env_get_module_info(struct net_device *netdev,
+                             struct mlxsw_core *mlxsw_core, int module,
                               struct ethtool_modinfo *modinfo);
  
  int mlxsw_env_get_module_eeprom(struct net_device *netdev,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/minimal.c b/drivers/net/ethernet/mellanox/mlxsw/minimal.c

index 10d13f5..9ac8ce0 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlxsw/minimal.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/minimal.c
@@ -110,7 +110,8 @@ static int mlxsw_m_get_module_info(struct net_device *netdev,
         struct mlxsw_m_port *mlxsw_m_port = netdev_priv(netdev);
         struct mlxsw_core *core = mlxsw_m_port->mlxsw_m->core;
  
-       return mlxsw_env_get_module_info(core, mlxsw_m_port->module, modinfo);
+       return mlxsw_env_get_module_info(netdev, core, mlxsw_m_port->module,
+                                        modinfo);
  }
  
  static int
diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h

index 24cc650..aba5db4 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -4482,6 +4482,8 @@ MLXSW_ITEM32(reg, ptys, ext_eth_proto_cap, 0x08, 0, 32);
  #define MLXSW_REG_PTYS_ETH_SPEED_100GBASE_SR4          BIT(21)
  #define MLXSW_REG_PTYS_ETH_SPEED_100GBASE_KR4          BIT(22)
  #define MLXSW_REG_PTYS_ETH_SPEED_100GBASE_LR4_ER4      BIT(23)
+#define MLXSW_REG_PTYS_ETH_SPEED_100BASE_T             BIT(24)
+#define MLXSW_REG_PTYS_ETH_SPEED_1000BASE_T            BIT(25)
  #define MLXSW_REG_PTYS_ETH_SPEED_25GBASE_CR            BIT(27)
  #define MLXSW_REG_PTYS_ETH_SPEED_25GBASE_KR            BIT(28)
  #define MLXSW_REG_PTYS_ETH_SPEED_25GBASE_SR            BIT(29)
@@ -6062,6 +6064,58 @@ static inline void mlxsw_reg_pllp_unpack(char *payload, u8 *label_port,
         *slot_index = mlxsw_reg_pllp_slot_index_get(payload);
  }
  
+/* PMTM - Port Module Type Mapping Register
+ * ----------------------------------------
+ * The PMTM register allows query or configuration of module types.
+ * The register can only be set when the module is disabled by PMAOS register
+ */
+#define MLXSW_REG_PMTM_ID 0x5067
+#define MLXSW_REG_PMTM_LEN 0x10
+
+MLXSW_REG_DEFINE(pmtm, MLXSW_REG_PMTM_ID, MLXSW_REG_PMTM_LEN);
+
+/* reg_pmtm_slot_index
+ * Slot index.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, pmtm, slot_index, 0x00, 24, 4);
+
+/* reg_pmtm_module
+ * Module number.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, pmtm, module, 0x00, 16, 8);
+
+enum mlxsw_reg_pmtm_module_type {
+       MLXSW_REG_PMTM_MODULE_TYPE_BACKPLANE_4_LANES = 0,
+       MLXSW_REG_PMTM_MODULE_TYPE_QSFP = 1,
+       MLXSW_REG_PMTM_MODULE_TYPE_SFP = 2,
+       MLXSW_REG_PMTM_MODULE_TYPE_BACKPLANE_SINGLE_LANE = 4,
+       MLXSW_REG_PMTM_MODULE_TYPE_BACKPLANE_2_LANES = 8,
+       MLXSW_REG_PMTM_MODULE_TYPE_CHIP2CHIP4X = 10,
+       MLXSW_REG_PMTM_MODULE_TYPE_CHIP2CHIP2X = 11,
+       MLXSW_REG_PMTM_MODULE_TYPE_CHIP2CHIP1X = 12,
+       MLXSW_REG_PMTM_MODULE_TYPE_QSFP_DD = 14,
+       MLXSW_REG_PMTM_MODULE_TYPE_OSFP = 15,
+       MLXSW_REG_PMTM_MODULE_TYPE_SFP_DD = 16,
+       MLXSW_REG_PMTM_MODULE_TYPE_DSFP = 17,
+       MLXSW_REG_PMTM_MODULE_TYPE_CHIP2CHIP8X = 18,
+       MLXSW_REG_PMTM_MODULE_TYPE_TWISTED_PAIR = 19,
+};
+
+/* reg_pmtm_module_type
+ * Module type.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, pmtm, module_type, 0x04, 0, 5);
+
+static inline void mlxsw_reg_pmtm_pack(char *payload, u8 slot_index, u8 module)
+{
+       MLXSW_REG_ZERO(pmtm, payload);
+       mlxsw_reg_pmtm_slot_index_set(payload, slot_index);
+       mlxsw_reg_pmtm_module_set(payload, module);
+}
+
  /* HTGT - Host Trap Group Table
   * ----------------------------
   * Configures the properties for forwarding to CPU.
@@ -12568,6 +12622,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = {
         MLXSW_REG(pddr),
         MLXSW_REG(pmmp),
         MLXSW_REG(pllp),
+       MLXSW_REG(pmtm),
         MLXSW_REG(htgt),
         MLXSW_REG(hpkt),
         MLXSW_REG(rgcr),
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum1_kvdl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum1_kvdl.c

index a9fff8a..d20e794 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum1_kvdl.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum1_kvdl.c
@@ -213,7 +213,6 @@ mlxsw_sp1_kvdl_part_init(struct mlxsw_sp *mlxsw_sp,
         struct mlxsw_sp1_kvdl_part *part;
         bool need_update = true;
         unsigned int nr_entries;
-       size_t usage_size;
         u64 resource_size;
         int err;
  
@@ -225,8 +224,8 @@ mlxsw_sp1_kvdl_part_init(struct mlxsw_sp *mlxsw_sp,
         }
  
         nr_entries = div_u64(resource_size, info->alloc_size);
-       usage_size = BITS_TO_LONGS(nr_entries) * sizeof(unsigned long);
-       part = kzalloc(sizeof(*part) + usage_size, GFP_KERNEL);
+       part = kzalloc(struct_size(part, usage, BITS_TO_LONGS(nr_entries)),
+                      GFP_KERNEL);
         if (!part)
                 return ERR_PTR(-ENOMEM);
  
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c

index 2053071..8b5d7f8 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c
@@ -1034,13 +1034,10 @@ static int mlxsw_sp_get_module_info(struct net_device *netdev,
  {
         struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(netdev);
         struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
-       int err;
-
-       err = mlxsw_env_get_module_info(mlxsw_sp->core,
-                                       mlxsw_sp_port->mapping.module,
-                                       modinfo);
  
-       return err;
+       return mlxsw_env_get_module_info(netdev, mlxsw_sp->core,
+                                        mlxsw_sp_port->mapping.module,
+                                        modinfo);
  }
  
  static int mlxsw_sp_get_module_eeprom(struct net_device *netdev,
@@ -1048,13 +1045,10 @@ static int mlxsw_sp_get_module_eeprom(struct net_device *netdev,
  {
         struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(netdev);
         struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
-       int err;
-
-       err = mlxsw_env_get_module_eeprom(netdev, mlxsw_sp->core,
-                                         mlxsw_sp_port->mapping.module, ee,
-                                         data);
  
-       return err;
+       return mlxsw_env_get_module_eeprom(netdev, mlxsw_sp->core,
+                                          mlxsw_sp_port->mapping.module, ee,
+                                          data);
  }
  
  static int
@@ -1273,12 +1267,22 @@ struct mlxsw_sp1_port_link_mode {
  
  static const struct mlxsw_sp1_port_link_mode mlxsw_sp1_port_link_mode[] = {
         {
+               .mask           = MLXSW_REG_PTYS_ETH_SPEED_100BASE_T,
+               .mask_ethtool   = ETHTOOL_LINK_MODE_100baseT_Full_BIT,
+               .speed          = SPEED_100,
+       },
+       {
                 .mask           = MLXSW_REG_PTYS_ETH_SPEED_SGMII |
                                   MLXSW_REG_PTYS_ETH_SPEED_1000BASE_KX,
                 .mask_ethtool   = ETHTOOL_LINK_MODE_1000baseKX_Full_BIT,
                 .speed          = SPEED_1000,
         },
         {
+               .mask           = MLXSW_REG_PTYS_ETH_SPEED_1000BASE_T,
+               .mask_ethtool   = ETHTOOL_LINK_MODE_1000baseT_Full_BIT,
+               .speed          = SPEED_1000,
+       },
+       {
                 .mask           = MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CX4 |
                                   MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KX4,
                 .mask_ethtool   = ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT,
diff --git a/drivers/net/ethernet/microchip/lan743x_ethtool.c b/drivers/net/ethernet/microchip/lan743x_ethtool.c

index 91a755e..5f1e7b8 100644 (file)
--- a/drivers/net/ethernet/microchip/lan743x_ethtool.c
+++ b/drivers/net/ethernet/microchip/lan743x_ethtool.c
@@ -750,7 +750,7 @@ static int lan743x_ethtool_set_eee(struct net_device *netdev,
         }
  
         if (eee->eee_enabled) {
-               ret = phy_init_eee(phydev, 0);
+               ret = phy_init_eee(phydev, false);
                 if (ret) {
                         netif_err(adapter, drv, adapter->netdev,
                                   "EEE initialization failed\n");
diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c

index 636dfef..49b85ca 100644 (file)
--- a/drivers/net/ethernet/microsoft/mana/gdma_main.c
+++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c
@@ -663,7 +663,7 @@ static int mana_gd_create_dma_region(struct gdma_dev *gd,
         struct gdma_context *gc = gd->gdma_context;
         struct hw_channel_context *hwc;
         u32 length = gmi->length;
-       u32 req_msg_size;
+       size_t req_msg_size;
         int err;
         int i;
  
@@ -674,7 +674,7 @@ static int mana_gd_create_dma_region(struct gdma_dev *gd,
                 return -EINVAL;
  
         hwc = gc->hwc.driver_data;
-       req_msg_size = sizeof(*req) + num_page * sizeof(u64);
+       req_msg_size = struct_size(req, page_addr_list, num_page);
         if (req_msg_size > hwc->max_req_msg_size)
                 return -EINVAL;
  
diff --git a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c

index dfb4468..ce865e6 100644 (file)
--- a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c
@@ -356,7 +356,7 @@ __nfp_tun_add_route_to_cache(struct list_head *route_list,
                         return 0;
                 }
  
-       entry = kmalloc(sizeof(*entry) + add_len, GFP_ATOMIC);
+       entry = kmalloc(struct_size(entry, ip_add, add_len), GFP_ATOMIC);
         if (!entry) {
                 spin_unlock_bh(list_lock);
                 return -ENOMEM;
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h

index 3d61a8c..50007cc 100644 (file)
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
@@ -1,8 +1,7 @@
  /* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
  /* Copyright (C) 2015-2018 Netronome Systems, Inc. */
  
-/*
- * nfp_net_ctrl.h
+/* nfp_net_ctrl.h
   * Netronome network device driver: Control BAR layout
   * Authors: Jakub Kicinski <jakub.kicinski@netronome.com>
   *          Jason McMullan <jason.mcmullan@netronome.com>
@@ -15,30 +14,24 @@
  
  #include <linux/types.h>
  
-/**
- * Configuration BAR size.
+/* Configuration BAR size.
   *
   * The configuration BAR is 8K in size, but due to
   * THB-350, 32k needs to be reserved.
   */
  #define NFP_NET_CFG_BAR_SZ             (32 * 1024)
  
-/**
- * Offset in Freelist buffer where packet starts on RX
- */
+/* Offset in Freelist buffer where packet starts on RX */
  #define NFP_NET_RX_OFFSET              32
  
-/**
- * LSO parameters
+/* LSO parameters
   * %NFP_NET_LSO_MAX_HDR_SZ:    Maximum header size supported for LSO frames
   * %NFP_NET_LSO_MAX_SEGS:      Maximum number of segments LSO frame can produce
   */
  #define NFP_NET_LSO_MAX_HDR_SZ         255
  #define NFP_NET_LSO_MAX_SEGS           64
  
-/**
- * Prepend field types
- */
+/* Prepend field types */
  #define NFP_NET_META_FIELD_SIZE                4
  #define NFP_NET_META_HASH              1 /* next field carries hash type */
  #define NFP_NET_META_MARK              2
@@ -49,9 +42,7 @@
  
  #define NFP_META_PORT_ID_CTRL          ~0U
  
-/**
- * Hash type pre-pended when a RSS hash was computed
- */
+/* Hash type pre-pended when a RSS hash was computed */
  #define NFP_NET_RSS_NONE               0
  #define NFP_NET_RSS_IPV4               1
  #define NFP_NET_RSS_IPV6               2
@@ -63,16 +54,14 @@
  #define NFP_NET_RSS_IPV6_UDP           8
  #define NFP_NET_RSS_IPV6_EX_UDP                9
  
-/**
- * Ring counts
+/* Ring counts
   * %NFP_NET_TXR_MAX:        Maximum number of TX rings
   * %NFP_NET_RXR_MAX:        Maximum number of RX rings
   */
  #define NFP_NET_TXR_MAX                        64
  #define NFP_NET_RXR_MAX                        64
  
-/**
- * Read/Write config words (0x0000 - 0x002c)
+/* Read/Write config words (0x0000 - 0x002c)
   * %NFP_NET_CFG_CTRL:       Global control
   * %NFP_NET_CFG_UPDATE:      Indicate which fields are updated
   * %NFP_NET_CFG_TXRS_ENABLE: Bitmask of enabled TX rings
@@ -147,8 +136,7 @@
  #define NFP_NET_CFG_LSC                        0x0020
  #define NFP_NET_CFG_MACADDR            0x0024
  
-/**
- * Read-only words (0x0030 - 0x0050):
+/* Read-only words (0x0030 - 0x0050):
   * %NFP_NET_CFG_VERSION:     Firmware version number
   * %NFP_NET_CFG_STS:        Status
   * %NFP_NET_CFG_CAP:        Capabilities (same bits as %NFP_NET_CFG_CTRL)
@@ -193,36 +181,31 @@
  #define NFP_NET_CFG_START_TXQ          0x0048
  #define NFP_NET_CFG_START_RXQ          0x004c
  
-/**
- * Prepend configuration
+/* Prepend configuration
   */
  #define NFP_NET_CFG_RX_OFFSET          0x0050
  #define NFP_NET_CFG_RX_OFFSET_DYNAMIC          0       /* Prepend mode */
  
-/**
- * RSS capabilities
+/* RSS capabilities
   * %NFP_NET_CFG_RSS_CAP_HFUNC: supported hash functions (same bits as
   *                             %NFP_NET_CFG_RSS_HFUNC)
   */
  #define NFP_NET_CFG_RSS_CAP            0x0054
  #define   NFP_NET_CFG_RSS_CAP_HFUNC      0xff000000
  
-/**
- * TLV area start
+/* TLV area start
   * %NFP_NET_CFG_TLV_BASE:      start anchor of the TLV area
   */
  #define NFP_NET_CFG_TLV_BASE           0x0058
  
-/**
- * VXLAN/UDP encap configuration
+/* VXLAN/UDP encap configuration
   * %NFP_NET_CFG_VXLAN_PORT:    Base address of table of tunnels' UDP dst ports
   * %NFP_NET_CFG_VXLAN_SZ:      Size of the UDP port table in bytes
   */
  #define NFP_NET_CFG_VXLAN_PORT         0x0060
  #define NFP_NET_CFG_VXLAN_SZ             0x0008
  
-/**
- * BPF section
+/* BPF section
   * %NFP_NET_CFG_BPF_ABI:       BPF ABI version
   * %NFP_NET_CFG_BPF_CAP:       BPF capabilities
   * %NFP_NET_CFG_BPF_MAX_LEN:   Maximum size of JITed BPF code in bytes
@@ -247,14 +230,12 @@
  #define   NFP_NET_CFG_BPF_CFG_MASK     7ULL
  #define   NFP_NET_CFG_BPF_ADDR_MASK    (~NFP_NET_CFG_BPF_CFG_MASK)
  
-/**
- * 40B reserved for future use (0x0098 - 0x00c0)
+/* 40B reserved for future use (0x0098 - 0x00c0)
   */
  #define NFP_NET_CFG_RESERVED           0x0098
  #define NFP_NET_CFG_RESERVED_SZ                0x0028
  
-/**
- * RSS configuration (0x0100 - 0x01ac):
+/* RSS configuration (0x0100 - 0x01ac):
   * Used only when NFP_NET_CFG_CTRL_RSS is enabled
   * %NFP_NET_CFG_RSS_CFG:     RSS configuration word
   * %NFP_NET_CFG_RSS_KEY:     RSS "secret" key
@@ -281,8 +262,7 @@
                                          NFP_NET_CFG_RSS_KEY_SZ)
  #define NFP_NET_CFG_RSS_ITBL_SZ                0x80
  
-/**
- * TX ring configuration (0x200 - 0x800)
+/* TX ring configuration (0x200 - 0x800)
   * %NFP_NET_CFG_TXR_BASE:    Base offset for TX ring configuration
   * %NFP_NET_CFG_TXR_ADDR:    Per TX ring DMA address (8B entries)
   * %NFP_NET_CFG_TXR_WB_ADDR: Per TX ring write back DMA address (8B entries)
@@ -301,8 +281,7 @@
  #define NFP_NET_CFG_TXR_IRQ_MOD(_x)    (NFP_NET_CFG_TXR_BASE + 0x500 + \
                                          ((_x) * 0x4))
  
-/**
- * RX ring configuration (0x0800 - 0x0c00)
+/* RX ring configuration (0x0800 - 0x0c00)
   * %NFP_NET_CFG_RXR_BASE:    Base offset for RX ring configuration
   * %NFP_NET_CFG_RXR_ADDR:    Per RX ring DMA address (8B entries)
   * %NFP_NET_CFG_RXR_SZ:      Per RX ring ring size (1B entries)
@@ -318,8 +297,7 @@
  #define NFP_NET_CFG_RXR_IRQ_MOD(_x)    (NFP_NET_CFG_RXR_BASE + 0x300 + \
                                          ((_x) * 0x4))
  
-/**
- * Interrupt Control/Cause registers (0x0c00 - 0x0d00)
+/* Interrupt Control/Cause registers (0x0c00 - 0x0d00)
   * These registers are only used when MSI-X auto-masking is not
   * enabled (%NFP_NET_CFG_CTRL_MSIXAUTO not set).  The array is index
   * by MSI-X entry and are 1B in size.  If an entry is zero, the
@@ -334,8 +312,7 @@
  #define   NFP_NET_CFG_ICR_RXTX         0x1
  #define   NFP_NET_CFG_ICR_LSC          0x2
  
-/**
- * General device stats (0x0d00 - 0x0d90)
+/* General device stats (0x0d00 - 0x0d90)
   * all counters are 64bit.
   */
  #define NFP_NET_CFG_STATS_BASE         0x0d00
@@ -368,8 +345,7 @@
  #define NFP_NET_CFG_STATS_APP3_FRAMES  (NFP_NET_CFG_STATS_BASE + 0xc0)
  #define NFP_NET_CFG_STATS_APP3_BYTES   (NFP_NET_CFG_STATS_BASE + 0xc8)
  
-/**
- * Per ring stats (0x1000 - 0x1800)
+/* Per ring stats (0x1000 - 0x1800)
   * options, 64bit per entry
   * %NFP_NET_CFG_TXR_STATS:   TX ring statistics (Packet and Byte count)
   * %NFP_NET_CFG_RXR_STATS:   RX ring statistics (Packet and Byte count)
@@ -381,8 +357,7 @@
  #define NFP_NET_CFG_RXR_STATS(_x)      (NFP_NET_CFG_RXR_STATS_BASE + \
                                          ((_x) * 0x10))
  
-/**
- * General use mailbox area (0x1800 - 0x19ff)
+/* General use mailbox area (0x1800 - 0x19ff)
   * 4B used for update command and 4B return code
   * followed by a max of 504B of variable length value
   */
@@ -399,8 +374,7 @@
  #define NFP_NET_CFG_MBOX_CMD_PCI_DSCP_PRIOMAP_SET      5
  #define NFP_NET_CFG_MBOX_CMD_TLV_CMSG                  6
  
-/**
- * VLAN filtering using general use mailbox
+/* VLAN filtering using general use mailbox
   * %NFP_NET_CFG_VLAN_FILTER:           Base address of VLAN filter mailbox
   * %NFP_NET_CFG_VLAN_FILTER_VID:       VLAN ID to filter
   * %NFP_NET_CFG_VLAN_FILTER_PROTO:     VLAN proto to filter
@@ -411,8 +385,7 @@
  #define  NFP_NET_CFG_VLAN_FILTER_PROTO  (NFP_NET_CFG_VLAN_FILTER + 2)
  #define NFP_NET_CFG_VLAN_FILTER_SZ      0x0004
  
-/**
- * TLV capabilities
+/* TLV capabilities
   * %NFP_NET_CFG_TLV_TYPE:      Offset of type within the TLV
   * %NFP_NET_CFG_TLV_TYPE_REQUIRED: Driver must be able to parse the TLV
   * %NFP_NET_CFG_TLV_LENGTH:    Offset of length within the TLV
@@ -438,8 +411,7 @@
  #define NFP_NET_CFG_TLV_HEADER_TYPE    0x7fff0000
  #define NFP_NET_CFG_TLV_HEADER_LENGTH  0x0000ffff
  
-/**
- * Capability TLV types
+/* Capability TLV types
   *
   * %NFP_NET_CFG_TLV_TYPE_UNKNOWN:
   * Special TLV type to catch bugs, should never be encountered.  Drivers should
@@ -512,8 +484,7 @@
  
  struct device;
  
-/**
- * struct nfp_net_tlv_caps - parsed control BAR TLV capabilities
+/* struct nfp_net_tlv_caps - parsed control BAR TLV capabilities
   * @me_freq_mhz:       ME clock_freq (MHz)
   * @mbox_off:          vNIC mailbox area offset
   * @mbox_len:          vNIC mailbox area length
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_sriov.h b/drivers/net/ethernet/netronome/nfp/nfp_net_sriov.h

index a3db0cb..786be58 100644 (file)
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_sriov.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_sriov.h
@@ -4,8 +4,7 @@
  #ifndef _NFP_NET_SRIOV_H_
  #define _NFP_NET_SRIOV_H_
  
-/**
- * SRIOV VF configuration.
+/* SRIOV VF configuration.
   * The configuration memory begins with a mailbox region for communication with
   * the firmware followed by individual VF entries.
   */
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_port.h b/drivers/net/ethernet/netronome/nfp/nfp_port.h

index ae4da18..df316b9 100644 (file)
--- a/drivers/net/ethernet/netronome/nfp/nfp_port.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_port.h
@@ -132,8 +132,7 @@ void nfp_devlink_port_unregister(struct nfp_port *port);
  void nfp_devlink_port_type_eth_set(struct nfp_port *port);
  void nfp_devlink_port_type_clear(struct nfp_port *port);
  
-/**
- * Mac stats (0x0000 - 0x0200)
+/* Mac stats (0x0000 - 0x0200)
   * all counters are 64bit.
   */
  #define NFP_MAC_STATS_BASE                0x0000
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c

index 10e7d8b..730fea2 100644 (file)
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c
@@ -513,7 +513,7 @@ nfp_nsp_command_buf_dma_sg(struct nfp_nsp *nsp,
         dma_size = BIT_ULL(dma_order);
         nseg = DIV_ROUND_UP(max_size, chunk_size);
  
-       chunks = kzalloc(array_size(sizeof(*chunks), nseg), GFP_KERNEL);
+       chunks = kcalloc(nseg, sizeof(*chunks), GFP_KERNEL);
         if (!chunks)
                 return -ENOMEM;
  
diff --git a/drivers/net/ethernet/pensando/ionic/ionic.h b/drivers/net/ethernet/pensando/ionic/ionic.h

index 5e25411..602f4d4 100644 (file)
--- a/drivers/net/ethernet/pensando/ionic/ionic.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic.h
@@ -18,7 +18,7 @@ struct ionic_lif;
  #define PCI_DEVICE_ID_PENSANDO_IONIC_ETH_PF    0x1002
  #define PCI_DEVICE_ID_PENSANDO_IONIC_ETH_VF    0x1003
  
-#define DEVCMD_TIMEOUT  10
+#define DEVCMD_TIMEOUT                 5
  #define IONIC_ADMINQ_TIME_SLICE                msecs_to_jiffies(100)
  
  #define IONIC_PHC_UPDATE_NS    10000000000         /* 10s in nanoseconds */
@@ -78,6 +78,9 @@ void ionic_adminq_netdev_err_print(struct ionic_lif *lif, u8 opcode,
                                    u8 status, int err);
  
  int ionic_dev_cmd_wait(struct ionic *ionic, unsigned long max_wait);
+int ionic_dev_cmd_wait_nomsg(struct ionic *ionic, unsigned long max_wait);
+void ionic_dev_cmd_dev_err_print(struct ionic *ionic, u8 opcode, u8 status,
+                                int err);
  int ionic_set_dma_mask(struct ionic *ionic);
  int ionic_setup(struct ionic *ionic);
  
@@ -89,4 +92,6 @@ int ionic_port_identify(struct ionic *ionic);
  int ionic_port_init(struct ionic *ionic);
  int ionic_port_reset(struct ionic *ionic);
  
+const char *ionic_vf_attr_to_str(enum ionic_vf_attr attr);
+
  #endif /* _IONIC_H_ */
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c

index 7e296fa..6ffc62c 100644 (file)
--- a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c
@@ -109,8 +109,8 @@ void ionic_bus_unmap_dbpage(struct ionic *ionic, void __iomem *page)
  
  static void ionic_vf_dealloc_locked(struct ionic *ionic)
  {
+       struct ionic_vf_setattr_cmd vfc = { .attr = IONIC_VF_ATTR_STATSADDR };
         struct ionic_vf *v;
-       dma_addr_t dma = 0;
         int i;
  
         if (!ionic->vfs)
@@ -120,9 +120,8 @@ static void ionic_vf_dealloc_locked(struct ionic *ionic)
                 v = &ionic->vfs[i];
  
                 if (v->stats_pa) {
-                       (void)ionic_set_vf_config(ionic, i,
-                                                 IONIC_VF_ATTR_STATSADDR,
-                                                 (u8 *)&dma);
+                       vfc.stats_pa = 0;
+                       (void)ionic_set_vf_config(ionic, i, &vfc);
                         dma_unmap_single(ionic->dev, v->stats_pa,
                                          sizeof(v->stats), DMA_FROM_DEVICE);
                         v->stats_pa = 0;
@@ -143,6 +142,7 @@ static void ionic_vf_dealloc(struct ionic *ionic)
  
  static int ionic_vf_alloc(struct ionic *ionic, int num_vfs)
  {
+       struct ionic_vf_setattr_cmd vfc = { .attr = IONIC_VF_ATTR_STATSADDR };
         struct ionic_vf *v;
         int err = 0;
         int i;
@@ -166,9 +166,10 @@ static int ionic_vf_alloc(struct ionic *ionic, int num_vfs)
                 }
  
                 ionic->num_vfs++;
+
                 /* ignore failures from older FW, we just won't get stats */
-               (void)ionic_set_vf_config(ionic, i, IONIC_VF_ATTR_STATSADDR,
-                                         (u8 *)&v->stats_pa);
+               vfc.stats_pa = cpu_to_le64(v->stats_pa);
+               (void)ionic_set_vf_config(ionic, i, &vfc);
         }
  
  out:
@@ -331,6 +332,9 @@ static int ionic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
                 goto err_out_deregister_lifs;
         }
  
+       mod_timer(&ionic->watchdog_timer,
+                 round_jiffies(jiffies + ionic->watchdog_period));
+
         return 0;
  
  err_out_deregister_lifs:
@@ -348,7 +352,6 @@ err_out_port_reset:
  err_out_reset:
         ionic_reset(ionic);
  err_out_teardown:
-       del_timer_sync(&ionic->watchdog_timer);
         pci_clear_master(pdev);
         /* Don't fail the probe for these errors, keep
          * the hw interface around for inspection
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.c b/drivers/net/ethernet/pensando/ionic/ionic_dev.c

index d57e80d..52a1b5c 100644 (file)
--- a/drivers/net/ethernet/pensando/ionic/ionic_dev.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.c
@@ -33,7 +33,8 @@ static void ionic_watchdog_cb(struct timer_list *t)
             !test_bit(IONIC_LIF_F_FW_RESET, lif->state))
                 ionic_link_status_check_request(lif, CAN_NOT_SLEEP);
  
-       if (test_bit(IONIC_LIF_F_FILTER_SYNC_NEEDED, lif->state)) {
+       if (test_bit(IONIC_LIF_F_FILTER_SYNC_NEEDED, lif->state) &&
+           !test_bit(IONIC_LIF_F_FW_RESET, lif->state)) {
                 work = kzalloc(sizeof(*work), GFP_ATOMIC);
                 if (!work) {
                         netdev_err(lif->netdev, "rxmode change dropped\n");
@@ -46,6 +47,24 @@ static void ionic_watchdog_cb(struct timer_list *t)
         }
  }
  
+static void ionic_watchdog_init(struct ionic *ionic)
+{
+       struct ionic_dev *idev = &ionic->idev;
+
+       timer_setup(&ionic->watchdog_timer, ionic_watchdog_cb, 0);
+       ionic->watchdog_period = IONIC_WATCHDOG_SECS * HZ;
+
+       /* set times to ensure the first check will proceed */
+       atomic_long_set(&idev->last_check_time, jiffies - 2 * HZ);
+       idev->last_hb_time = jiffies - 2 * ionic->watchdog_period;
+       /* init as ready, so no transition if the first check succeeds */
+       idev->last_fw_hb = 0;
+       idev->fw_hb_ready = true;
+       idev->fw_status_ready = true;
+       idev->fw_generation = IONIC_FW_STS_F_GENERATION &
+                             ioread8(&idev->dev_info_regs->fw_status);
+}
+
  void ionic_init_devinfo(struct ionic *ionic)
  {
         struct ionic_dev *idev = &ionic->idev;
@@ -109,21 +128,7 @@ int ionic_dev_setup(struct ionic *ionic)
                 return -EFAULT;
         }
  
-       timer_setup(&ionic->watchdog_timer, ionic_watchdog_cb, 0);
-       ionic->watchdog_period = IONIC_WATCHDOG_SECS * HZ;
-
-       /* set times to ensure the first check will proceed */
-       atomic_long_set(&idev->last_check_time, jiffies - 2 * HZ);
-       idev->last_hb_time = jiffies - 2 * ionic->watchdog_period;
-       /* init as ready, so no transition if the first check succeeds */
-       idev->last_fw_hb = 0;
-       idev->fw_hb_ready = true;
-       idev->fw_status_ready = true;
-       idev->fw_generation = IONIC_FW_STS_F_GENERATION &
-                             ioread8(&idev->dev_info_regs->fw_status);
-
-       mod_timer(&ionic->watchdog_timer,
-                 round_jiffies(jiffies + ionic->watchdog_period));
+       ionic_watchdog_init(ionic);
  
         idev->db_pages = bar->vaddr;
         idev->phy_db_pages = bar->bus_addr;
@@ -132,10 +137,21 @@ int ionic_dev_setup(struct ionic *ionic)
  }
  
  /* Devcmd Interface */
+bool ionic_is_fw_running(struct ionic_dev *idev)
+{
+       u8 fw_status = ioread8(&idev->dev_info_regs->fw_status);
+
+       /* firmware is useful only if the running bit is set and
+        * fw_status != 0xff (bad PCI read)
+        */
+       return (fw_status != 0xff) && (fw_status & IONIC_FW_STS_F_RUNNING);
+}
+
  int ionic_heartbeat_check(struct ionic *ionic)
  {
-       struct ionic_dev *idev = &ionic->idev;
         unsigned long check_time, last_check_time;
+       struct ionic_dev *idev = &ionic->idev;
+       struct ionic_lif *lif = ionic->lif;
         bool fw_status_ready = true;
         bool fw_hb_ready;
         u8 fw_generation;
@@ -155,13 +171,10 @@ do_check_time:
                 goto do_check_time;
         }
  
-       /* firmware is useful only if the running bit is set and
-        * fw_status != 0xff (bad PCI read)
-        * If fw_status is not ready don't bother with the generation.
-        */
         fw_status = ioread8(&idev->dev_info_regs->fw_status);
  
-       if (fw_status == 0xff || !(fw_status & IONIC_FW_STS_F_RUNNING)) {
+       /* If fw_status is not ready don't bother with the generation */
+       if (!ionic_is_fw_running(idev)) {
                 fw_status_ready = false;
         } else {
                 fw_generation = fw_status & IONIC_FW_STS_F_GENERATION;
@@ -176,31 +189,41 @@ do_check_time:
                          * the down, the next watchdog will see the fw is up
                          * and the generation value stable, so will trigger
                          * the fw-up activity.
+                        *
+                        * If we had already moved to FW_RESET from a RESET event,
+                        * it is possible that we never saw the fw_status go to 0,
+                        * so we fake the current idev->fw_status_ready here to
+                        * force the transition and get FW up again.
                          */
-                       fw_status_ready = false;
+                       if (test_bit(IONIC_LIF_F_FW_RESET, lif->state))
+                               idev->fw_status_ready = false;  /* go to running */
+                       else
+                               fw_status_ready = false;        /* go to down */
                 }
         }
  
         /* is this a transition? */
         if (fw_status_ready != idev->fw_status_ready) {
-               struct ionic_lif *lif = ionic->lif;
                 bool trigger = false;
  
-               idev->fw_status_ready = fw_status_ready;
-
-               if (!fw_status_ready) {
-                       dev_info(ionic->dev, "FW stopped %u\n", fw_status);
-                       if (lif && !test_bit(IONIC_LIF_F_FW_RESET, lif->state))
-                               trigger = true;
-               } else {
-                       dev_info(ionic->dev, "FW running %u\n", fw_status);
-                       if (lif && test_bit(IONIC_LIF_F_FW_RESET, lif->state))
-                               trigger = true;
+               if (!fw_status_ready && lif &&
+                   !test_bit(IONIC_LIF_F_FW_RESET, lif->state) &&
+                   !test_and_set_bit(IONIC_LIF_F_FW_STOPPING, lif->state)) {
+                       dev_info(ionic->dev, "FW stopped 0x%02x\n", fw_status);
+                       trigger = true;
+
+               } else if (fw_status_ready && lif &&
+                          test_bit(IONIC_LIF_F_FW_RESET, lif->state) &&
+                          !test_bit(IONIC_LIF_F_FW_STOPPING, lif->state)) {
+                       dev_info(ionic->dev, "FW running 0x%02x\n", fw_status);
+                       trigger = true;
                 }
  
                 if (trigger) {
                         struct ionic_deferred_work *work;
  
+                       idev->fw_status_ready = fw_status_ready;
+
                         work = kzalloc(sizeof(*work), GFP_ATOMIC);
                         if (work) {
                                 work->type = IONIC_DW_TYPE_LIF_RESET;
@@ -210,12 +233,14 @@ do_check_time:
                 }
         }
  
-       if (!fw_status_ready)
+       if (!idev->fw_status_ready)
                 return -ENXIO;
  
-       /* wait at least one watchdog period since the last heartbeat */
+       /* Because of some variability in the actual FW heartbeat, we
+        * wait longer than the DEVCMD_TIMEOUT before checking again.
+        */
         last_check_time = idev->last_hb_time;
-       if (time_before(check_time, last_check_time + ionic->watchdog_period))
+       if (time_before(check_time, last_check_time + DEVCMD_TIMEOUT * 2 * HZ))
                 return 0;
  
         fw_hb = ioread32(&idev->dev_info_regs->fw_heartbeat);
@@ -392,60 +417,63 @@ void ionic_dev_cmd_port_pause(struct ionic_dev *idev, u8 pause_type)
  }
  
  /* VF commands */
-int ionic_set_vf_config(struct ionic *ionic, int vf, u8 attr, u8 *data)
+int ionic_set_vf_config(struct ionic *ionic, int vf,
+                       struct ionic_vf_setattr_cmd *vfc)
  {
         union ionic_dev_cmd cmd = {
                 .vf_setattr.opcode = IONIC_CMD_VF_SETATTR,
-               .vf_setattr.attr = attr,
+               .vf_setattr.attr = vfc->attr,
                 .vf_setattr.vf_index = cpu_to_le16(vf),
         };
         int err;
  
+       memcpy(cmd.vf_setattr.pad, vfc->pad, sizeof(vfc->pad));
+
+       mutex_lock(&ionic->dev_cmd_lock);
+       ionic_dev_cmd_go(&ionic->idev, &cmd);
+       err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT);
+       mutex_unlock(&ionic->dev_cmd_lock);
+
+       return err;
+}
+
+int ionic_dev_cmd_vf_getattr(struct ionic *ionic, int vf, u8 attr,
+                            struct ionic_vf_getattr_comp *comp)
+{
+       union ionic_dev_cmd cmd = {
+               .vf_getattr.opcode = IONIC_CMD_VF_GETATTR,
+               .vf_getattr.attr = attr,
+               .vf_getattr.vf_index = cpu_to_le16(vf),
+       };
+       int err;
+
+       if (vf >= ionic->num_vfs)
+               return -EINVAL;
+
         switch (attr) {
         case IONIC_VF_ATTR_SPOOFCHK:
-               cmd.vf_setattr.spoofchk = *data;
-               dev_dbg(ionic->dev, "%s: vf %d spoof %d\n",
-                       __func__, vf, *data);
-               break;
         case IONIC_VF_ATTR_TRUST:
-               cmd.vf_setattr.trust = *data;
-               dev_dbg(ionic->dev, "%s: vf %d trust %d\n",
-                       __func__, vf, *data);
-               break;
         case IONIC_VF_ATTR_LINKSTATE:
-               cmd.vf_setattr.linkstate = *data;
-               dev_dbg(ionic->dev, "%s: vf %d linkstate %d\n",
-                       __func__, vf, *data);
-               break;
         case IONIC_VF_ATTR_MAC:
-               ether_addr_copy(cmd.vf_setattr.macaddr, data);
-               dev_dbg(ionic->dev, "%s: vf %d macaddr %pM\n",
-                       __func__, vf, data);
-               break;
         case IONIC_VF_ATTR_VLAN:
-               cmd.vf_setattr.vlanid = cpu_to_le16(*(u16 *)data);
-               dev_dbg(ionic->dev, "%s: vf %d vlan %d\n",
-                       __func__, vf, *(u16 *)data);
-               break;
         case IONIC_VF_ATTR_RATE:
-               cmd.vf_setattr.maxrate = cpu_to_le32(*(u32 *)data);
-               dev_dbg(ionic->dev, "%s: vf %d maxrate %d\n",
-                       __func__, vf, *(u32 *)data);
                 break;
         case IONIC_VF_ATTR_STATSADDR:
-               cmd.vf_setattr.stats_pa = cpu_to_le64(*(u64 *)data);
-               dev_dbg(ionic->dev, "%s: vf %d stats_pa 0x%08llx\n",
-                       __func__, vf, *(u64 *)data);
-               break;
         default:
                 return -EINVAL;
         }
  
         mutex_lock(&ionic->dev_cmd_lock);
         ionic_dev_cmd_go(&ionic->idev, &cmd);
-       err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT);
+       err = ionic_dev_cmd_wait_nomsg(ionic, DEVCMD_TIMEOUT);
+       memcpy_fromio(comp, &ionic->idev.dev_cmd_regs->comp.vf_getattr,
+                     sizeof(*comp));
         mutex_unlock(&ionic->dev_cmd_lock);
  
+       if (err && comp->status != IONIC_RC_ENOSUPP)
+               ionic_dev_cmd_dev_err_print(ionic, cmd.vf_getattr.opcode,
+                                           comp->status, err);
+
         return err;
  }
  
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.h b/drivers/net/ethernet/pensando/ionic/ionic_dev.h

index e5acf3b..563c302 100644 (file)
--- a/drivers/net/ethernet/pensando/ionic/ionic_dev.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
@@ -318,7 +318,10 @@ void ionic_dev_cmd_port_autoneg(struct ionic_dev *idev, u8 an_enable);
  void ionic_dev_cmd_port_fec(struct ionic_dev *idev, u8 fec_type);
  void ionic_dev_cmd_port_pause(struct ionic_dev *idev, u8 pause_type);
  
-int ionic_set_vf_config(struct ionic *ionic, int vf, u8 attr, u8 *data);
+int ionic_set_vf_config(struct ionic *ionic, int vf,
+                       struct ionic_vf_setattr_cmd *vfc);
+int ionic_dev_cmd_vf_getattr(struct ionic *ionic, int vf, u8 attr,
+                            struct ionic_vf_getattr_comp *comp);
  void ionic_dev_cmd_queue_identify(struct ionic_dev *idev,
                                   u16 lif_type, u8 qtype, u8 qver);
  void ionic_dev_cmd_lif_identify(struct ionic_dev *idev, u8 type, u8 ver);
@@ -353,5 +356,6 @@ void ionic_q_rewind(struct ionic_queue *q, struct ionic_desc_info *start);
  void ionic_q_service(struct ionic_queue *q, struct ionic_cq_info *cq_info,
                      unsigned int stop_index);
  int ionic_heartbeat_check(struct ionic *ionic);
+bool ionic_is_fw_running(struct ionic_dev *idev);
  
  #endif /* _IONIC_DEV_H_ */
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c

index 2ff7be1..542e395 100644 (file)
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
@@ -1112,12 +1112,17 @@ static bool ionic_notifyq_service(struct ionic_cq *cq,
                 ionic_link_status_check_request(lif, CAN_NOT_SLEEP);
                 break;
         case IONIC_EVENT_RESET:
-               work = kzalloc(sizeof(*work), GFP_ATOMIC);
-               if (!work) {
-                       netdev_err(lif->netdev, "Reset event dropped\n");
-               } else {
-                       work->type = IONIC_DW_TYPE_LIF_RESET;
-                       ionic_lif_deferred_enqueue(&lif->deferred, work);
+               if (lif->ionic->idev.fw_status_ready &&
+                   !test_bit(IONIC_LIF_F_FW_RESET, lif->state) &&
+                   !test_and_set_bit(IONIC_LIF_F_FW_STOPPING, lif->state)) {
+                       work = kzalloc(sizeof(*work), GFP_ATOMIC);
+                       if (!work) {
+                               netdev_err(lif->netdev, "Reset event dropped\n");
+                               clear_bit(IONIC_LIF_F_FW_STOPPING, lif->state);
+                       } else {
+                               work->type = IONIC_DW_TYPE_LIF_RESET;
+                               ionic_lif_deferred_enqueue(&lif->deferred, work);
+                       }
                 }
                 break;
         default:
@@ -1782,7 +1787,7 @@ static void ionic_lif_quiesce(struct ionic_lif *lif)
  
         err = ionic_adminq_post_wait(lif, &ctx);
         if (err)
-               netdev_err(lif->netdev, "lif quiesce failed %d\n", err);
+               netdev_dbg(lif->netdev, "lif quiesce failed %d\n", err);
  }
  
  static void ionic_txrx_disable(struct ionic_lif *lif)
@@ -2152,6 +2157,76 @@ static int ionic_eth_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd
         }
  }
  
+static int ionic_update_cached_vf_config(struct ionic *ionic, int vf)
+{
+       struct ionic_vf_getattr_comp comp = { 0 };
+       int err;
+       u8 attr;
+
+       attr = IONIC_VF_ATTR_VLAN;
+       err = ionic_dev_cmd_vf_getattr(ionic, vf, attr, &comp);
+       if (err && comp.status != IONIC_RC_ENOSUPP)
+               goto err_out;
+       if (!err)
+               ionic->vfs[vf].vlanid = comp.vlanid;
+
+       attr = IONIC_VF_ATTR_SPOOFCHK;
+       err = ionic_dev_cmd_vf_getattr(ionic, vf, attr, &comp);
+       if (err && comp.status != IONIC_RC_ENOSUPP)
+               goto err_out;
+       if (!err)
+               ionic->vfs[vf].spoofchk = comp.spoofchk;
+
+       attr = IONIC_VF_ATTR_LINKSTATE;
+       err = ionic_dev_cmd_vf_getattr(ionic, vf, attr, &comp);
+       if (err && comp.status != IONIC_RC_ENOSUPP)
+               goto err_out;
+       if (!err) {
+               switch (comp.linkstate) {
+               case IONIC_VF_LINK_STATUS_UP:
+                       ionic->vfs[vf].linkstate = IFLA_VF_LINK_STATE_ENABLE;
+                       break;
+               case IONIC_VF_LINK_STATUS_DOWN:
+                       ionic->vfs[vf].linkstate = IFLA_VF_LINK_STATE_DISABLE;
+                       break;
+               case IONIC_VF_LINK_STATUS_AUTO:
+                       ionic->vfs[vf].linkstate = IFLA_VF_LINK_STATE_AUTO;
+                       break;
+               default:
+                       dev_warn(ionic->dev, "Unexpected link state %u\n", comp.linkstate);
+                       break;
+               }
+       }
+
+       attr = IONIC_VF_ATTR_RATE;
+       err = ionic_dev_cmd_vf_getattr(ionic, vf, attr, &comp);
+       if (err && comp.status != IONIC_RC_ENOSUPP)
+               goto err_out;
+       if (!err)
+               ionic->vfs[vf].maxrate = comp.maxrate;
+
+       attr = IONIC_VF_ATTR_TRUST;
+       err = ionic_dev_cmd_vf_getattr(ionic, vf, attr, &comp);
+       if (err && comp.status != IONIC_RC_ENOSUPP)
+               goto err_out;
+       if (!err)
+               ionic->vfs[vf].trusted = comp.trust;
+
+       attr = IONIC_VF_ATTR_MAC;
+       err = ionic_dev_cmd_vf_getattr(ionic, vf, attr, &comp);
+       if (err && comp.status != IONIC_RC_ENOSUPP)
+               goto err_out;
+       if (!err)
+               ether_addr_copy(ionic->vfs[vf].macaddr, comp.macaddr);
+
+err_out:
+       if (err)
+               dev_err(ionic->dev, "Failed to get %s for VF %d\n",
+                       ionic_vf_attr_to_str(attr), vf);
+
+       return err;
+}
+
  static int ionic_get_vf_config(struct net_device *netdev,
                                int vf, struct ifla_vf_info *ivf)
  {
@@ -2167,14 +2242,18 @@ static int ionic_get_vf_config(struct net_device *netdev,
         if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) {
                 ret = -EINVAL;
         } else {
-               ivf->vf           = vf;
-               ivf->vlan         = le16_to_cpu(ionic->vfs[vf].vlanid);
-               ivf->qos          = 0;
-               ivf->spoofchk     = ionic->vfs[vf].spoofchk;
-               ivf->linkstate    = ionic->vfs[vf].linkstate;
-               ivf->max_tx_rate  = le32_to_cpu(ionic->vfs[vf].maxrate);
-               ivf->trusted      = ionic->vfs[vf].trusted;
-               ether_addr_copy(ivf->mac, ionic->vfs[vf].macaddr);
+               ivf->vf = vf;
+               ivf->qos = 0;
+
+               ret = ionic_update_cached_vf_config(ionic, vf);
+               if (!ret) {
+                       ivf->vlan         = le16_to_cpu(ionic->vfs[vf].vlanid);
+                       ivf->spoofchk     = ionic->vfs[vf].spoofchk;
+                       ivf->linkstate    = ionic->vfs[vf].linkstate;
+                       ivf->max_tx_rate  = le32_to_cpu(ionic->vfs[vf].maxrate);
+                       ivf->trusted      = ionic->vfs[vf].trusted;
+                       ether_addr_copy(ivf->mac, ionic->vfs[vf].macaddr);
+               }
         }
  
         up_read(&ionic->vf_op_lock);
@@ -2220,6 +2299,7 @@ static int ionic_get_vf_stats(struct net_device *netdev, int vf,
  
  static int ionic_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
  {
+       struct ionic_vf_setattr_cmd vfc = { .attr = IONIC_VF_ATTR_MAC };
         struct ionic_lif *lif = netdev_priv(netdev);
         struct ionic *ionic = lif->ionic;
         int ret;
@@ -2235,7 +2315,11 @@ static int ionic_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
         if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) {
                 ret = -EINVAL;
         } else {
-               ret = ionic_set_vf_config(ionic, vf, IONIC_VF_ATTR_MAC, mac);
+               ether_addr_copy(vfc.macaddr, mac);
+               dev_dbg(ionic->dev, "%s: vf %d macaddr %pM\n",
+                       __func__, vf, vfc.macaddr);
+
+               ret = ionic_set_vf_config(ionic, vf, &vfc);
                 if (!ret)
                         ether_addr_copy(ionic->vfs[vf].macaddr, mac);
         }
@@ -2247,6 +2331,7 @@ static int ionic_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
  static int ionic_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan,
                              u8 qos, __be16 proto)
  {
+       struct ionic_vf_setattr_cmd vfc = { .attr = IONIC_VF_ATTR_VLAN };
         struct ionic_lif *lif = netdev_priv(netdev);
         struct ionic *ionic = lif->ionic;
         int ret;
@@ -2269,8 +2354,11 @@ static int ionic_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan,
         if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) {
                 ret = -EINVAL;
         } else {
-               ret = ionic_set_vf_config(ionic, vf,
-                                         IONIC_VF_ATTR_VLAN, (u8 *)&vlan);
+               vfc.vlanid = cpu_to_le16(vlan);
+               dev_dbg(ionic->dev, "%s: vf %d vlan %d\n",
+                       __func__, vf, le16_to_cpu(vfc.vlanid));
+
+               ret = ionic_set_vf_config(ionic, vf, &vfc);
                 if (!ret)
                         ionic->vfs[vf].vlanid = cpu_to_le16(vlan);
         }
@@ -2282,6 +2370,7 @@ static int ionic_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan,
  static int ionic_set_vf_rate(struct net_device *netdev, int vf,
                              int tx_min, int tx_max)
  {
+       struct ionic_vf_setattr_cmd vfc = { .attr = IONIC_VF_ATTR_RATE };
         struct ionic_lif *lif = netdev_priv(netdev);
         struct ionic *ionic = lif->ionic;
         int ret;
@@ -2298,8 +2387,11 @@ static int ionic_set_vf_rate(struct net_device *netdev, int vf,
         if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) {
                 ret = -EINVAL;
         } else {
-               ret = ionic_set_vf_config(ionic, vf,
-                                         IONIC_VF_ATTR_RATE, (u8 *)&tx_max);
+               vfc.maxrate = cpu_to_le32(tx_max);
+               dev_dbg(ionic->dev, "%s: vf %d maxrate %d\n",
+                       __func__, vf, le32_to_cpu(vfc.maxrate));
+
+               ret = ionic_set_vf_config(ionic, vf, &vfc);
                 if (!ret)
                         lif->ionic->vfs[vf].maxrate = cpu_to_le32(tx_max);
         }
@@ -2310,9 +2402,9 @@ static int ionic_set_vf_rate(struct net_device *netdev, int vf,
  
  static int ionic_set_vf_spoofchk(struct net_device *netdev, int vf, bool set)
  {
+       struct ionic_vf_setattr_cmd vfc = { .attr = IONIC_VF_ATTR_SPOOFCHK };
         struct ionic_lif *lif = netdev_priv(netdev);
         struct ionic *ionic = lif->ionic;
-       u8 data = set;  /* convert to u8 for config */
         int ret;
  
         if (!netif_device_present(netdev))
@@ -2323,10 +2415,13 @@ static int ionic_set_vf_spoofchk(struct net_device *netdev, int vf, bool set)
         if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) {
                 ret = -EINVAL;
         } else {
-               ret = ionic_set_vf_config(ionic, vf,
-                                         IONIC_VF_ATTR_SPOOFCHK, &data);
+               vfc.spoofchk = set;
+               dev_dbg(ionic->dev, "%s: vf %d spoof %d\n",
+                       __func__, vf, vfc.spoofchk);
+
+               ret = ionic_set_vf_config(ionic, vf, &vfc);
                 if (!ret)
-                       ionic->vfs[vf].spoofchk = data;
+                       ionic->vfs[vf].spoofchk = set;
         }
  
         up_write(&ionic->vf_op_lock);
@@ -2335,9 +2430,9 @@ static int ionic_set_vf_spoofchk(struct net_device *netdev, int vf, bool set)
  
  static int ionic_set_vf_trust(struct net_device *netdev, int vf, bool set)
  {
+       struct ionic_vf_setattr_cmd vfc = { .attr = IONIC_VF_ATTR_TRUST };
         struct ionic_lif *lif = netdev_priv(netdev);
         struct ionic *ionic = lif->ionic;
-       u8 data = set;  /* convert to u8 for config */
         int ret;
  
         if (!netif_device_present(netdev))
@@ -2348,10 +2443,13 @@ static int ionic_set_vf_trust(struct net_device *netdev, int vf, bool set)
         if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) {
                 ret = -EINVAL;
         } else {
-               ret = ionic_set_vf_config(ionic, vf,
-                                         IONIC_VF_ATTR_TRUST, &data);
+               vfc.trust = set;
+               dev_dbg(ionic->dev, "%s: vf %d trust %d\n",
+                       __func__, vf, vfc.trust);
+
+               ret = ionic_set_vf_config(ionic, vf, &vfc);
                 if (!ret)
-                       ionic->vfs[vf].trusted = data;
+                       ionic->vfs[vf].trusted = set;
         }
  
         up_write(&ionic->vf_op_lock);
@@ -2360,20 +2458,21 @@ static int ionic_set_vf_trust(struct net_device *netdev, int vf, bool set)
  
  static int ionic_set_vf_link_state(struct net_device *netdev, int vf, int set)
  {
+       struct ionic_vf_setattr_cmd vfc = { .attr = IONIC_VF_ATTR_LINKSTATE };
         struct ionic_lif *lif = netdev_priv(netdev);
         struct ionic *ionic = lif->ionic;
-       u8 data;
+       u8 vfls;
         int ret;
  
         switch (set) {
         case IFLA_VF_LINK_STATE_ENABLE:
-               data = IONIC_VF_LINK_STATUS_UP;
+               vfls = IONIC_VF_LINK_STATUS_UP;
                 break;
         case IFLA_VF_LINK_STATE_DISABLE:
-               data = IONIC_VF_LINK_STATUS_DOWN;
+               vfls = IONIC_VF_LINK_STATUS_DOWN;
                 break;
         case IFLA_VF_LINK_STATE_AUTO:
-               data = IONIC_VF_LINK_STATUS_AUTO;
+               vfls = IONIC_VF_LINK_STATUS_AUTO;
                 break;
         default:
                 return -EINVAL;
@@ -2387,8 +2486,11 @@ static int ionic_set_vf_link_state(struct net_device *netdev, int vf, int set)
         if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) {
                 ret = -EINVAL;
         } else {
-               ret = ionic_set_vf_config(ionic, vf,
-                                         IONIC_VF_ATTR_LINKSTATE, &data);
+               vfc.linkstate = vfls;
+               dev_dbg(ionic->dev, "%s: vf %d linkstate %d\n",
+                       __func__, vf, vfc.linkstate);
+
+               ret = ionic_set_vf_config(ionic, vf, &vfc);
                 if (!ret)
                         ionic->vfs[vf].linkstate = set;
         }
@@ -2835,6 +2937,7 @@ static void ionic_lif_handle_fw_down(struct ionic_lif *lif)
  
         mutex_unlock(&lif->queue_lock);
  
+       clear_bit(IONIC_LIF_F_FW_STOPPING, lif->state);
         dev_info(ionic->dev, "FW Down: LIFs stopped\n");
  }
  
@@ -2934,8 +3037,6 @@ void ionic_lif_free(struct ionic_lif *lif)
         /* unmap doorbell page */
         ionic_bus_unmap_dbpage(lif->ionic, lif->kern_dbpage);
         lif->kern_dbpage = NULL;
-       kfree(lif->dbid_inuse);
-       lif->dbid_inuse = NULL;
  
         mutex_destroy(&lif->config_lock);
         mutex_destroy(&lif->queue_lock);
@@ -3135,22 +3236,12 @@ int ionic_lif_init(struct ionic_lif *lif)
                 return -EINVAL;
         }
  
-       lif->dbid_inuse = bitmap_zalloc(lif->dbid_count, GFP_KERNEL);
-       if (!lif->dbid_inuse) {
-               dev_err(dev, "Failed alloc doorbell id bitmap, aborting\n");
-               return -ENOMEM;
-       }
-
-       /* first doorbell id reserved for kernel (dbid aka pid == zero) */
-       set_bit(0, lif->dbid_inuse);
         lif->kern_pid = 0;
-
         dbpage_num = ionic_db_page_num(lif, lif->kern_pid);
         lif->kern_dbpage = ionic_bus_map_dbpage(lif->ionic, dbpage_num);
         if (!lif->kern_dbpage) {
                 dev_err(dev, "Cannot map dbpage, aborting\n");
-               err = -ENOMEM;
-               goto err_out_free_dbid;
+               return -ENOMEM;
         }
  
         err = ionic_lif_adminq_init(lif);
@@ -3186,15 +3277,13 @@ int ionic_lif_init(struct ionic_lif *lif)
         return 0;
  
  err_out_notifyq_deinit:
+       napi_disable(&lif->adminqcq->napi);
         ionic_lif_qcq_deinit(lif, lif->notifyqcq);
  err_out_adminq_deinit:
         ionic_lif_qcq_deinit(lif, lif->adminqcq);
         ionic_lif_reset(lif);
         ionic_bus_unmap_dbpage(lif->ionic, lif->kern_dbpage);
         lif->kern_dbpage = NULL;
-err_out_free_dbid:
-       kfree(lif->dbid_inuse);
-       lif->dbid_inuse = NULL;
  
         return err;
  }
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.h b/drivers/net/ethernet/pensando/ionic/ionic_lif.h

index 9f7ab2f..a53984b 100644 (file)
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.h
@@ -135,6 +135,7 @@ enum ionic_lif_state_flags {
         IONIC_LIF_F_LINK_CHECK_REQUESTED,
         IONIC_LIF_F_FILTER_SYNC_NEEDED,
         IONIC_LIF_F_FW_RESET,
+       IONIC_LIF_F_FW_STOPPING,
         IONIC_LIF_F_SPLIT_INTR,
         IONIC_LIF_F_BROKEN,
         IONIC_LIF_F_TX_DIM_INTR,
@@ -213,7 +214,6 @@ struct ionic_lif {
         u32 rx_coalesce_hw;             /* what the hw is using */
         u32 tx_coalesce_usecs;          /* what the user asked for */
         u32 tx_coalesce_hw;             /* what the hw is using */
-       unsigned long *dbid_inuse;
         unsigned int dbid_count;
  
         struct ionic_phc *phc;
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_main.c b/drivers/net/ethernet/pensando/ionic/ionic_main.c

index 875f4ec..4029b4e 100644 (file)
--- a/drivers/net/ethernet/pensando/ionic/ionic_main.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_main.c
@@ -188,6 +188,28 @@ static const char *ionic_opcode_to_str(enum ionic_cmd_opcode opcode)
         }
  }
  
+const char *ionic_vf_attr_to_str(enum ionic_vf_attr attr)
+{
+       switch (attr) {
+       case IONIC_VF_ATTR_SPOOFCHK:
+               return "IONIC_VF_ATTR_SPOOFCHK";
+       case IONIC_VF_ATTR_TRUST:
+               return "IONIC_VF_ATTR_TRUST";
+       case IONIC_VF_ATTR_LINKSTATE:
+               return "IONIC_VF_ATTR_LINKSTATE";
+       case IONIC_VF_ATTR_MAC:
+               return "IONIC_VF_ATTR_MAC";
+       case IONIC_VF_ATTR_VLAN:
+               return "IONIC_VF_ATTR_VLAN";
+       case IONIC_VF_ATTR_RATE:
+               return "IONIC_VF_ATTR_RATE";
+       case IONIC_VF_ATTR_STATSADDR:
+               return "IONIC_VF_ATTR_STATSADDR";
+       default:
+               return "IONIC_VF_ATTR_UNKNOWN";
+       }
+}
+
  static void ionic_adminq_flush(struct ionic_lif *lif)
  {
         struct ionic_desc_info *desc_info;
@@ -215,9 +237,13 @@ static void ionic_adminq_flush(struct ionic_lif *lif)
  void ionic_adminq_netdev_err_print(struct ionic_lif *lif, u8 opcode,
                                    u8 status, int err)
  {
+       const char *stat_str;
+
+       stat_str = (err == -ETIMEDOUT) ? "TIMEOUT" :
+                                        ionic_error_to_str(status);
+
         netdev_err(lif->netdev, "%s (%d) failed: %s (%d)\n",
-                  ionic_opcode_to_str(opcode), opcode,
-                  ionic_error_to_str(status), err);
+                  ionic_opcode_to_str(opcode), opcode, stat_str, err);
  }
  
  static int ionic_adminq_check_err(struct ionic_lif *lif,
@@ -318,6 +344,7 @@ int ionic_adminq_wait(struct ionic_lif *lif, struct ionic_admin_ctx *ctx,
                 if (do_msg && !test_bit(IONIC_LIF_F_FW_RESET, lif->state))
                         netdev_err(netdev, "Posting of %s (%d) failed: %d\n",
                                    name, ctx->cmd.cmd.opcode, err);
+               ctx->comp.comp.status = IONIC_RC_ERROR;
                 return err;
         }
  
@@ -331,11 +358,15 @@ int ionic_adminq_wait(struct ionic_lif *lif, struct ionic_admin_ctx *ctx,
                 if (remaining)
                         break;
  
-               /* interrupt the wait if FW stopped */
-               if (test_bit(IONIC_LIF_F_FW_RESET, lif->state)) {
+               /* force a check of FW status and break out if FW reset */
+               (void)ionic_heartbeat_check(lif->ionic);
+               if ((test_bit(IONIC_LIF_F_FW_RESET, lif->state) &&
+                    !lif->ionic->idev.fw_status_ready) ||
+                   test_bit(IONIC_LIF_F_FW_STOPPING, lif->state)) {
                         if (do_msg)
-                               netdev_err(netdev, "%s (%d) interrupted, FW in reset\n",
-                                          name, ctx->cmd.cmd.opcode);
+                               netdev_warn(netdev, "%s (%d) interrupted, FW in reset\n",
+                                           name, ctx->cmd.cmd.opcode);
+                       ctx->comp.comp.status = IONIC_RC_ERROR;
                         return -ENXIO;
                 }
  
@@ -370,21 +401,34 @@ int ionic_adminq_post_wait_nomsg(struct ionic_lif *lif, struct ionic_admin_ctx *
  
  static void ionic_dev_cmd_clean(struct ionic *ionic)
  {
-       union __iomem ionic_dev_cmd_regs *regs = ionic->idev.dev_cmd_regs;
+       struct ionic_dev *idev = &ionic->idev;
  
-       iowrite32(0, &regs->doorbell);
-       memset_io(&regs->cmd, 0, sizeof(regs->cmd));
+       iowrite32(0, &idev->dev_cmd_regs->doorbell);
+       memset_io(&idev->dev_cmd_regs->cmd, 0, sizeof(idev->dev_cmd_regs->cmd));
  }
  
-int ionic_dev_cmd_wait(struct ionic *ionic, unsigned long max_seconds)
+void ionic_dev_cmd_dev_err_print(struct ionic *ionic, u8 opcode, u8 status,
+                                int err)
+{
+       const char *stat_str;
+
+       stat_str = (err == -ETIMEDOUT) ? "TIMEOUT" :
+                                        ionic_error_to_str(status);
+
+       dev_err(ionic->dev, "DEV_CMD %s (%d) error, %s (%d) failed\n",
+               ionic_opcode_to_str(opcode), opcode, stat_str, err);
+}
+
+static int __ionic_dev_cmd_wait(struct ionic *ionic, unsigned long max_seconds,
+                               const bool do_msg)
  {
         struct ionic_dev *idev = &ionic->idev;
         unsigned long start_time;
         unsigned long max_wait;
         unsigned long duration;
+       int done = 0;
+       bool fw_up;
         int opcode;
-       int hb = 0;
-       int done;
         int err;
  
         /* Wait for dev cmd to complete, retrying if we get EAGAIN,
@@ -394,31 +438,24 @@ int ionic_dev_cmd_wait(struct ionic *ionic, unsigned long max_seconds)
  try_again:
         opcode = readb(&idev->dev_cmd_regs->cmd.cmd.opcode);
         start_time = jiffies;
-       do {
+       for (fw_up = ionic_is_fw_running(idev);
+            !done && fw_up && time_before(jiffies, max_wait);
+            fw_up = ionic_is_fw_running(idev)) {
                 done = ionic_dev_cmd_done(idev);
                 if (done)
                         break;
                 usleep_range(100, 200);
-
-               /* Don't check the heartbeat on FW_CONTROL commands as they are
-                * notorious for interrupting the firmware's heartbeat update.
-                */
-               if (opcode != IONIC_CMD_FW_CONTROL)
-                       hb = ionic_heartbeat_check(ionic);
-       } while (!done && !hb && time_before(jiffies, max_wait));
+       }
         duration = jiffies - start_time;
  
         dev_dbg(ionic->dev, "DEVCMD %s (%d) done=%d took %ld secs (%ld jiffies)\n",
                 ionic_opcode_to_str(opcode), opcode,
                 done, duration / HZ, duration);
  
-       if (!done && hb) {
-               /* It is possible (but unlikely) that FW was busy and missed a
-                * heartbeat check but is still alive and will process this
-                * request, so don't clean the dev_cmd in this case.
-                */
-               dev_dbg(ionic->dev, "DEVCMD %s (%d) failed - FW halted\n",
-                       ionic_opcode_to_str(opcode), opcode);
+       if (!done && !fw_up) {
+               ionic_dev_cmd_clean(ionic);
+               dev_warn(ionic->dev, "DEVCMD %s (%d) interrupted - FW is down\n",
+                        ionic_opcode_to_str(opcode), opcode);
                 return -ENXIO;
         }
  
@@ -444,9 +481,9 @@ try_again:
                 }
  
                 if (!(opcode == IONIC_CMD_FW_CONTROL && err == IONIC_RC_EAGAIN))
-                       dev_err(ionic->dev, "DEV_CMD %s (%d) error, %s (%d) failed\n",
-                               ionic_opcode_to_str(opcode), opcode,
-                               ionic_error_to_str(err), err);
+                       if (do_msg)
+                               ionic_dev_cmd_dev_err_print(ionic, opcode, err,
+                                                           ionic_error_to_errno(err));
  
                 return ionic_error_to_errno(err);
         }
@@ -454,6 +491,16 @@ try_again:
         return 0;
  }
  
+int ionic_dev_cmd_wait(struct ionic *ionic, unsigned long max_seconds)
+{
+       return __ionic_dev_cmd_wait(ionic, max_seconds, true);
+}
+
+int ionic_dev_cmd_wait_nomsg(struct ionic *ionic, unsigned long max_seconds)
+{
+       return __ionic_dev_cmd_wait(ionic, max_seconds, false);
+}
+
  int ionic_setup(struct ionic *ionic)
  {
         int err;
@@ -540,6 +587,9 @@ int ionic_reset(struct ionic *ionic)
         struct ionic_dev *idev = &ionic->idev;
         int err;
  
+       if (!ionic_is_fw_running(idev))
+               return 0;
+
         mutex_lock(&ionic->dev_cmd_lock);
         ionic_dev_cmd_reset(idev);
         err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT);
@@ -612,15 +662,17 @@ int ionic_port_init(struct ionic *ionic)
  int ionic_port_reset(struct ionic *ionic)
  {
         struct ionic_dev *idev = &ionic->idev;
-       int err;
+       int err = 0;
  
         if (!idev->port_info)
                 return 0;
  
-       mutex_lock(&ionic->dev_cmd_lock);
-       ionic_dev_cmd_port_reset(idev);
-       err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT);
-       mutex_unlock(&ionic->dev_cmd_lock);
+       if (ionic_is_fw_running(idev)) {
+               mutex_lock(&ionic->dev_cmd_lock);
+               ionic_dev_cmd_port_reset(idev);
+               err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT);
+               mutex_unlock(&ionic->dev_cmd_lock);
+       }
  
         dma_free_coherent(ionic->dev, idev->port_info_sz,
                           idev->port_info, idev->port_info_pa);
@@ -628,9 +680,6 @@ int ionic_port_reset(struct ionic *ionic)
         idev->port_info = NULL;
         idev->port_info_pa = 0;
  
-       if (err)
-               dev_err(ionic->dev, "Failed to reset port\n");
-
         return err;
  }
  
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c b/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c

index f6e785f..b736337 100644 (file)
--- a/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c
@@ -376,10 +376,24 @@ static int ionic_lif_filter_add(struct ionic_lif *lif,
  
                 spin_unlock_bh(&lif->rx_filters.lock);
  
-               if (err == -ENOSPC) {
-                       if (le16_to_cpu(ctx.cmd.rx_filter_add.match) == IONIC_RX_FILTER_MATCH_VLAN)
-                               lif->max_vlans = lif->nvlans;
+               /* store the max_vlans limit that we found */
+               if (err == -ENOSPC &&
+                   le16_to_cpu(ctx.cmd.rx_filter_add.match) == IONIC_RX_FILTER_MATCH_VLAN)
+                       lif->max_vlans = lif->nvlans;
+
+               /* Prevent unnecessary error messages on recoverable
+                * errors as the filter will get retried on the next
+                * sync attempt.
+                */
+               switch (err) {
+               case -ENOSPC:
+               case -ENXIO:
+               case -ETIMEDOUT:
+               case -EAGAIN:
+               case -EBUSY:
                         return 0;
+               default:
+                       break;
                 }
  
                 ionic_adminq_netdev_err_print(lif, ctx.cmd.cmd.opcode,
@@ -494,9 +508,22 @@ static int ionic_lif_filter_del(struct ionic_lif *lif,
         spin_unlock_bh(&lif->rx_filters.lock);
  
         if (state != IONIC_FILTER_STATE_NEW) {
-               err = ionic_adminq_post_wait(lif, &ctx);
-               if (err && err != -EEXIST)
+               err = ionic_adminq_post_wait_nomsg(lif, &ctx);
+
+               switch (err) {
+                       /* ignore these errors */
+               case -EEXIST:
+               case -ENXIO:
+               case -ETIMEDOUT:
+               case -EAGAIN:
+               case -EBUSY:
+               case 0:
+                       break;
+               default:
+                       ionic_adminq_netdev_err_print(lif, ctx.cmd.cmd.opcode,
+                                                     ctx.comp.comp.status, err);
                         return err;
+               }
         }
  
         return 0;
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c

index 94384f5..d197a70 100644 (file)
--- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
@@ -669,27 +669,37 @@ dma_fail:
         return -EIO;
  }
  
+static void ionic_tx_desc_unmap_bufs(struct ionic_queue *q,
+                                    struct ionic_desc_info *desc_info)
+{
+       struct ionic_buf_info *buf_info = desc_info->bufs;
+       struct device *dev = q->dev;
+       unsigned int i;
+
+       if (!desc_info->nbufs)
+               return;
+
+       dma_unmap_single(dev, (dma_addr_t)buf_info->dma_addr,
+                        buf_info->len, DMA_TO_DEVICE);
+       buf_info++;
+       for (i = 1; i < desc_info->nbufs; i++, buf_info++)
+               dma_unmap_page(dev, (dma_addr_t)buf_info->dma_addr,
+                              buf_info->len, DMA_TO_DEVICE);
+
+       desc_info->nbufs = 0;
+}
+
  static void ionic_tx_clean(struct ionic_queue *q,
                            struct ionic_desc_info *desc_info,
                            struct ionic_cq_info *cq_info,
                            void *cb_arg)
  {
-       struct ionic_buf_info *buf_info = desc_info->bufs;
         struct ionic_tx_stats *stats = q_to_tx_stats(q);
         struct ionic_qcq *qcq = q_to_qcq(q);
         struct sk_buff *skb = cb_arg;
-       struct device *dev = q->dev;
-       unsigned int i;
         u16 qi;
  
-       if (desc_info->nbufs) {
-               dma_unmap_single(dev, (dma_addr_t)buf_info->dma_addr,
-                                buf_info->len, DMA_TO_DEVICE);
-               buf_info++;
-               for (i = 1; i < desc_info->nbufs; i++, buf_info++)
-                       dma_unmap_page(dev, (dma_addr_t)buf_info->dma_addr,
-                                      buf_info->len, DMA_TO_DEVICE);
-       }
+       ionic_tx_desc_unmap_bufs(q, desc_info);
  
         if (!skb)
                 return;
@@ -931,8 +941,11 @@ static int ionic_tx_tso(struct ionic_queue *q, struct sk_buff *skb)
                 err = ionic_tx_tcp_inner_pseudo_csum(skb);
         else
                 err = ionic_tx_tcp_pseudo_csum(skb);
-       if (err)
+       if (err) {
+               /* clean up mapping from ionic_tx_map_skb */
+               ionic_tx_desc_unmap_bufs(q, desc_info);
                 return err;
+       }
  
         if (encap)
                 hdrlen = skb_inner_transport_header(skb) - skb->data +
@@ -1003,8 +1016,8 @@ static int ionic_tx_tso(struct ionic_queue *q, struct sk_buff *skb)
         return 0;
  }
  
-static int ionic_tx_calc_csum(struct ionic_queue *q, struct sk_buff *skb,
-                             struct ionic_desc_info *desc_info)
+static void ionic_tx_calc_csum(struct ionic_queue *q, struct sk_buff *skb,
+                              struct ionic_desc_info *desc_info)
  {
         struct ionic_txq_desc *desc = desc_info->txq_desc;
         struct ionic_buf_info *buf_info = desc_info->bufs;
@@ -1038,12 +1051,10 @@ static int ionic_tx_calc_csum(struct ionic_queue *q, struct sk_buff *skb,
                 stats->crc32_csum++;
         else
                 stats->csum++;
-
-       return 0;
  }
  
-static int ionic_tx_calc_no_csum(struct ionic_queue *q, struct sk_buff *skb,
-                                struct ionic_desc_info *desc_info)
+static void ionic_tx_calc_no_csum(struct ionic_queue *q, struct sk_buff *skb,
+                                 struct ionic_desc_info *desc_info)
  {
         struct ionic_txq_desc *desc = desc_info->txq_desc;
         struct ionic_buf_info *buf_info = desc_info->bufs;
@@ -1074,12 +1085,10 @@ static int ionic_tx_calc_no_csum(struct ionic_queue *q, struct sk_buff *skb,
         desc->csum_offset = 0;
  
         stats->csum_none++;
-
-       return 0;
  }
  
-static int ionic_tx_skb_frags(struct ionic_queue *q, struct sk_buff *skb,
-                             struct ionic_desc_info *desc_info)
+static void ionic_tx_skb_frags(struct ionic_queue *q, struct sk_buff *skb,
+                              struct ionic_desc_info *desc_info)
  {
         struct ionic_txq_sg_desc *sg_desc = desc_info->txq_sg_desc;
         struct ionic_buf_info *buf_info = &desc_info->bufs[1];
@@ -1093,31 +1102,24 @@ static int ionic_tx_skb_frags(struct ionic_queue *q, struct sk_buff *skb,
         }
  
         stats->frags += skb_shinfo(skb)->nr_frags;
-
-       return 0;
  }
  
  static int ionic_tx(struct ionic_queue *q, struct sk_buff *skb)
  {
         struct ionic_desc_info *desc_info = &q->info[q->head_idx];
         struct ionic_tx_stats *stats = q_to_tx_stats(q);
-       int err;
  
         if (unlikely(ionic_tx_map_skb(q, skb, desc_info)))
                 return -EIO;
  
         /* set up the initial descriptor */
         if (skb->ip_summed == CHECKSUM_PARTIAL)
-               err = ionic_tx_calc_csum(q, skb, desc_info);
+               ionic_tx_calc_csum(q, skb, desc_info);
         else
-               err = ionic_tx_calc_no_csum(q, skb, desc_info);
-       if (err)
-               return err;
+               ionic_tx_calc_no_csum(q, skb, desc_info);
  
         /* add frags */
-       err = ionic_tx_skb_frags(q, skb, desc_info);
-       if (err)
-               return err;
+       ionic_tx_skb_frags(q, skb, desc_info);
  
         skb_tx_timestamp(skb);
         stats->pkts++;
diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c

index 19e2621..3c3d150 100644 (file)
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -2684,7 +2684,26 @@ static void rtl_hw_aspm_clkreq_enable(struct rtl8169_private *tp, bool enable)
         if (enable && tp->aspm_manageable) {
                 RTL_W8(tp, Config5, RTL_R8(tp, Config5) | ASPM_en);
                 RTL_W8(tp, Config2, RTL_R8(tp, Config2) | ClkReqEn);
+
+               switch (tp->mac_version) {
+               case RTL_GIGA_MAC_VER_60 ... RTL_GIGA_MAC_VER_63:
+                       /* reset ephy tx/rx disable timer */
+                       r8168_mac_ocp_modify(tp, 0xe094, 0xff00, 0);
+                       /* chip can trigger L1.2 */
+                       r8168_mac_ocp_modify(tp, 0xe092, 0x00ff, BIT(2));
+                       break;
+               default:
+                       break;
+               }
         } else {
+               switch (tp->mac_version) {
+               case RTL_GIGA_MAC_VER_60 ... RTL_GIGA_MAC_VER_63:
+                       r8168_mac_ocp_modify(tp, 0xe092, 0x00ff, 0);
+                       break;
+               default:
+                       break;
+               }
+
                 RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~ClkReqEn);
                 RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~ASPM_en);
         }
@@ -4843,8 +4862,6 @@ static void rtl8169_net_suspend(struct rtl8169_private *tp)
                 rtl8169_down(tp);
  }
  
-#ifdef CONFIG_PM
-
  static int rtl8169_runtime_resume(struct device *dev)
  {
         struct rtl8169_private *tp = dev_get_drvdata(dev);
@@ -4860,7 +4877,7 @@ static int rtl8169_runtime_resume(struct device *dev)
         return 0;
  }
  
-static int __maybe_unused rtl8169_suspend(struct device *device)
+static int rtl8169_suspend(struct device *device)
  {
         struct rtl8169_private *tp = dev_get_drvdata(device);
  
@@ -4873,7 +4890,7 @@ static int __maybe_unused rtl8169_suspend(struct device *device)
         return 0;
  }
  
-static int __maybe_unused rtl8169_resume(struct device *device)
+static int rtl8169_resume(struct device *device)
  {
         struct rtl8169_private *tp = dev_get_drvdata(device);
  
@@ -4915,13 +4932,11 @@ static int rtl8169_runtime_idle(struct device *device)
  }
  
  static const struct dev_pm_ops rtl8169_pm_ops = {
-       SET_SYSTEM_SLEEP_PM_OPS(rtl8169_suspend, rtl8169_resume)
-       SET_RUNTIME_PM_OPS(rtl8169_runtime_suspend, rtl8169_runtime_resume,
-                          rtl8169_runtime_idle)
+       SYSTEM_SLEEP_PM_OPS(rtl8169_suspend, rtl8169_resume)
+       RUNTIME_PM_OPS(rtl8169_runtime_suspend, rtl8169_runtime_resume,
+                      rtl8169_runtime_idle)
  };
  
-#endif /* CONFIG_PM */
-
  static void rtl_wol_shutdown_quirk(struct rtl8169_private *tp)
  {
         /* WoL fails with 8168b when the receiver is disabled. */
@@ -5255,6 +5270,16 @@ done:
         rtl_rar_set(tp, mac_addr);
  }
  
+/* register is set if system vendor successfully tested ASPM 1.2 */
+static bool rtl_aspm_is_safe(struct rtl8169_private *tp)
+{
+       if (tp->mac_version >= RTL_GIGA_MAC_VER_60 &&
+           r8168_mac_ocp_read(tp, 0xc0b2) & 0xf)
+               return true;
+
+       return false;
+}
+
  static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
  {
         struct rtl8169_private *tp;
@@ -5333,7 +5358,9 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
          * Chips from RTL8168h partially have issues with L1.2, but seem
          * to work fine with L1 and L1.1.
          */
-       if (tp->mac_version >= RTL_GIGA_MAC_VER_45)
+       if (rtl_aspm_is_safe(tp))
+               rc = 0;
+       else if (tp->mac_version >= RTL_GIGA_MAC_VER_45)
                 rc = pci_disable_link_state(pdev, PCIE_LINK_STATE_L1_2);
         else
                 rc = pci_disable_link_state(pdev, PCIE_LINK_STATE_L1);
@@ -5460,9 +5487,7 @@ static struct pci_driver rtl8169_pci_driver = {
         .probe          = rtl_init_one,
         .remove         = rtl_remove_one,
         .shutdown       = rtl_shutdown,
-#ifdef CONFIG_PM
-       .driver.pm      = &rtl8169_pm_ops,
-#endif
+       .driver.pm      = pm_ptr(&rtl8169_pm_ops),
  };
  
  module_pci_driver(rtl8169_pci_driver);
diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c

index 32161a5..77a0d9d 100644 (file)
--- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
+++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
@@ -127,7 +127,7 @@ bool sxgbe_eee_init(struct sxgbe_priv_data * const priv)
         /* MAC core supports the EEE feature. */
         if (priv->hw_cap.eee) {
                 /* Check if the PHY supports EEE */
-               if (phy_init_eee(ndev->phydev, 1))
+               if (phy_init_eee(ndev->phydev, true))
                         return false;
  
                 priv->eee_active = 1;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c

index 639a753..eb7e41d 100644 (file)
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -938,105 +938,15 @@ static void stmmac_mac_flow_ctrl(struct stmmac_priv *priv, u32 duplex)
                         priv->pause, tx_cnt);
  }
  
-static void stmmac_validate(struct phylink_config *config,
-                           unsigned long *supported,
-                           struct phylink_link_state *state)
+static struct phylink_pcs *stmmac_mac_select_pcs(struct phylink_config *config,
+                                                phy_interface_t interface)
  {
         struct stmmac_priv *priv = netdev_priv(to_net_dev(config->dev));
-       __ETHTOOL_DECLARE_LINK_MODE_MASK(mac_supported) = { 0, };
-       __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, };
-       int tx_cnt = priv->plat->tx_queues_to_use;
-       int max_speed = priv->plat->max_speed;
-
-       phylink_set(mac_supported, 10baseT_Half);
-       phylink_set(mac_supported, 10baseT_Full);
-       phylink_set(mac_supported, 100baseT_Half);
-       phylink_set(mac_supported, 100baseT_Full);
-       phylink_set(mac_supported, 1000baseT_Half);
-       phylink_set(mac_supported, 1000baseT_Full);
-       phylink_set(mac_supported, 1000baseKX_Full);
-
-       phylink_set(mac_supported, Autoneg);
-       phylink_set(mac_supported, Pause);
-       phylink_set(mac_supported, Asym_Pause);
-       phylink_set_port_modes(mac_supported);
-
-       /* Cut down 1G if asked to */
-       if ((max_speed > 0) && (max_speed < 1000)) {
-               phylink_set(mask, 1000baseT_Full);
-               phylink_set(mask, 1000baseX_Full);
-       } else if (priv->plat->has_gmac4) {
-               if (!max_speed || max_speed >= 2500) {
-                       phylink_set(mac_supported, 2500baseT_Full);
-                       phylink_set(mac_supported, 2500baseX_Full);
-               }
-       } else if (priv->plat->has_xgmac) {
-               if (!max_speed || (max_speed >= 2500)) {
-                       phylink_set(mac_supported, 2500baseT_Full);
-                       phylink_set(mac_supported, 2500baseX_Full);
-               }
-               if (!max_speed || (max_speed >= 5000)) {
-                       phylink_set(mac_supported, 5000baseT_Full);
-               }
-               if (!max_speed || (max_speed >= 10000)) {
-                       phylink_set(mac_supported, 10000baseSR_Full);
-                       phylink_set(mac_supported, 10000baseLR_Full);
-                       phylink_set(mac_supported, 10000baseER_Full);
-                       phylink_set(mac_supported, 10000baseLRM_Full);
-                       phylink_set(mac_supported, 10000baseT_Full);
-                       phylink_set(mac_supported, 10000baseKX4_Full);
-                       phylink_set(mac_supported, 10000baseKR_Full);
-               }
-               if (!max_speed || (max_speed >= 25000)) {
-                       phylink_set(mac_supported, 25000baseCR_Full);
-                       phylink_set(mac_supported, 25000baseKR_Full);
-                       phylink_set(mac_supported, 25000baseSR_Full);
-               }
-               if (!max_speed || (max_speed >= 40000)) {
-                       phylink_set(mac_supported, 40000baseKR4_Full);
-                       phylink_set(mac_supported, 40000baseCR4_Full);
-                       phylink_set(mac_supported, 40000baseSR4_Full);
-                       phylink_set(mac_supported, 40000baseLR4_Full);
-               }
-               if (!max_speed || (max_speed >= 50000)) {
-                       phylink_set(mac_supported, 50000baseCR2_Full);
-                       phylink_set(mac_supported, 50000baseKR2_Full);
-                       phylink_set(mac_supported, 50000baseSR2_Full);
-                       phylink_set(mac_supported, 50000baseKR_Full);
-                       phylink_set(mac_supported, 50000baseSR_Full);
-                       phylink_set(mac_supported, 50000baseCR_Full);
-                       phylink_set(mac_supported, 50000baseLR_ER_FR_Full);
-                       phylink_set(mac_supported, 50000baseDR_Full);
-               }
-               if (!max_speed || (max_speed >= 100000)) {
-                       phylink_set(mac_supported, 100000baseKR4_Full);
-                       phylink_set(mac_supported, 100000baseSR4_Full);
-                       phylink_set(mac_supported, 100000baseCR4_Full);
-                       phylink_set(mac_supported, 100000baseLR4_ER4_Full);
-                       phylink_set(mac_supported, 100000baseKR2_Full);
-                       phylink_set(mac_supported, 100000baseSR2_Full);
-                       phylink_set(mac_supported, 100000baseCR2_Full);
-                       phylink_set(mac_supported, 100000baseLR2_ER2_FR2_Full);
-                       phylink_set(mac_supported, 100000baseDR2_Full);
-               }
-       }
-
-       /* Half-Duplex can only work with single queue */
-       if (tx_cnt > 1) {
-               phylink_set(mask, 10baseT_Half);
-               phylink_set(mask, 100baseT_Half);
-               phylink_set(mask, 1000baseT_Half);
-       }
-
-       linkmode_and(supported, supported, mac_supported);
-       linkmode_andnot(supported, supported, mask);
  
-       linkmode_and(state->advertising, state->advertising, mac_supported);
-       linkmode_andnot(state->advertising, state->advertising, mask);
+       if (!priv->hw->xpcs)
+               return NULL;
  
-       /* If PCS is supported, check which modes it supports. */
-       if (priv->hw->xpcs)
-               xpcs_validate(priv->hw->xpcs, supported, state);
+       return &priv->hw->xpcs->pcs;
  }
  
  static void stmmac_mac_config(struct phylink_config *config, unsigned int mode,
@@ -1175,7 +1085,8 @@ static void stmmac_mac_link_up(struct phylink_config *config,
  }
  
  static const struct phylink_mac_ops stmmac_phylink_mac_ops = {
-       .validate = stmmac_validate,
+       .validate = phylink_generic_validate,
+       .mac_select_pcs = stmmac_mac_select_pcs,
         .mac_config = stmmac_mac_config,
         .mac_link_down = stmmac_mac_link_down,
         .mac_link_up = stmmac_mac_link_up,
@@ -1255,12 +1166,12 @@ static int stmmac_phy_setup(struct stmmac_priv *priv)
  {
         struct stmmac_mdio_bus_data *mdio_bus_data = priv->plat->mdio_bus_data;
         struct fwnode_handle *fwnode = of_fwnode_handle(priv->plat->phylink_node);
+       int max_speed = priv->plat->max_speed;
         int mode = priv->plat->phy_interface;
         struct phylink *phylink;
  
         priv->phylink_config.dev = &priv->dev->dev;
         priv->phylink_config.type = PHYLINK_NETDEV;
-       priv->phylink_config.pcs_poll = true;
         if (priv->plat->mdio_bus_data)
                 priv->phylink_config.ovr_an_inband =
                         mdio_bus_data->xpcs_an_inband;
@@ -1268,14 +1179,50 @@ static int stmmac_phy_setup(struct stmmac_priv *priv)
         if (!fwnode)
                 fwnode = dev_fwnode(priv->device);
  
+       /* Set the platform/firmware specified interface mode */
+       __set_bit(mode, priv->phylink_config.supported_interfaces);
+
+       /* If we have an xpcs, it defines which PHY interfaces are supported. */
+       if (priv->hw->xpcs)
+               xpcs_get_interfaces(priv->hw->xpcs,
+                                   priv->phylink_config.supported_interfaces);
+
+       priv->phylink_config.mac_capabilities = MAC_ASYM_PAUSE | MAC_SYM_PAUSE |
+               MAC_10 | MAC_100;
+
+       if (!max_speed || max_speed >= 1000)
+               priv->phylink_config.mac_capabilities |= MAC_1000;
+
+       if (priv->plat->has_gmac4) {
+               if (!max_speed || max_speed >= 2500)
+                       priv->phylink_config.mac_capabilities |= MAC_2500FD;
+       } else if (priv->plat->has_xgmac) {
+               if (!max_speed || max_speed >= 2500)
+                       priv->phylink_config.mac_capabilities |= MAC_2500FD;
+               if (!max_speed || max_speed >= 5000)
+                       priv->phylink_config.mac_capabilities |= MAC_5000FD;
+               if (!max_speed || max_speed >= 10000)
+                       priv->phylink_config.mac_capabilities |= MAC_10000FD;
+               if (!max_speed || max_speed >= 25000)
+                       priv->phylink_config.mac_capabilities |= MAC_25000FD;
+               if (!max_speed || max_speed >= 40000)
+                       priv->phylink_config.mac_capabilities |= MAC_40000FD;
+               if (!max_speed || max_speed >= 50000)
+                       priv->phylink_config.mac_capabilities |= MAC_50000FD;
+               if (!max_speed || max_speed >= 100000)
+                       priv->phylink_config.mac_capabilities |= MAC_100000FD;
+       }
+
+       /* Half-Duplex can only work with single queue */
+       if (priv->plat->tx_queues_to_use > 1)
+               priv->phylink_config.mac_capabilities &=
+                       ~(MAC_10HD | MAC_100HD | MAC_1000HD);
+
         phylink = phylink_create(&priv->phylink_config, fwnode,
                                  mode, &stmmac_phylink_mac_ops);
         if (IS_ERR(phylink))
                 return PTR_ERR(phylink);
  
-       if (priv->hw->xpcs)
-               phylink_set_pcs(phylink, &priv->hw->xpcs->pcs);
-
         priv->phylink = phylink;
         return 0;
  }
diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet.h b/drivers/net/ethernet/xilinx/xilinx_axienet.h

index 5b4d153..4010896 100644 (file)
--- a/drivers/net/ethernet/xilinx/xilinx_axienet.h
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet.h
@@ -386,6 +386,7 @@ struct axidma_bd {
   * @phylink:   Pointer to phylink instance
   * @phylink_config: phylink configuration settings
   * @pcs_phy:   Reference to PCS/PMA PHY if used
+ * @pcs:       phylink pcs structure for PCS PHY
   * @switch_x_sgmii: Whether switchable 1000BaseX/SGMII mode is enabled in the core
   * @axi_clk:   AXI4-Lite bus clock
   * @misc_clks: Misc ethernet clocks (AXI4-Stream, Ref, MGT clocks)
@@ -434,6 +435,7 @@ struct axienet_local {
         struct phylink_config phylink_config;
  
         struct mdio_device *pcs_phy;
+       struct phylink_pcs pcs;
  
         bool switch_x_sgmii;
  
diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c

index 377c94e..de0a637 100644 (file)
--- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
@@ -1537,78 +1537,78 @@ static const struct ethtool_ops axienet_ethtool_ops = {
         .nway_reset     = axienet_ethtools_nway_reset,
  };
  
-static void axienet_mac_pcs_get_state(struct phylink_config *config,
-                                     struct phylink_link_state *state)
+static struct axienet_local *pcs_to_axienet_local(struct phylink_pcs *pcs)
  {
-       struct net_device *ndev = to_net_dev(config->dev);
-       struct axienet_local *lp = netdev_priv(ndev);
+       return container_of(pcs, struct axienet_local, pcs);
+}
  
-       switch (state->interface) {
-       case PHY_INTERFACE_MODE_SGMII:
-       case PHY_INTERFACE_MODE_1000BASEX:
-               phylink_mii_c22_pcs_get_state(lp->pcs_phy, state);
-               break;
-       default:
-               break;
-       }
+static void axienet_pcs_get_state(struct phylink_pcs *pcs,
+                                 struct phylink_link_state *state)
+{
+       struct mdio_device *pcs_phy = pcs_to_axienet_local(pcs)->pcs_phy;
+
+       phylink_mii_c22_pcs_get_state(pcs_phy, state);
  }
  
-static void axienet_mac_an_restart(struct phylink_config *config)
+static void axienet_pcs_an_restart(struct phylink_pcs *pcs)
  {
-       struct net_device *ndev = to_net_dev(config->dev);
-       struct axienet_local *lp = netdev_priv(ndev);
+       struct mdio_device *pcs_phy = pcs_to_axienet_local(pcs)->pcs_phy;
  
-       phylink_mii_c22_pcs_an_restart(lp->pcs_phy);
+       phylink_mii_c22_pcs_an_restart(pcs_phy);
  }
  
-static int axienet_mac_prepare(struct phylink_config *config, unsigned int mode,
-                              phy_interface_t iface)
+static int axienet_pcs_config(struct phylink_pcs *pcs, unsigned int mode,
+                             phy_interface_t interface,
+                             const unsigned long *advertising,
+                             bool permit_pause_to_mac)
  {
-       struct net_device *ndev = to_net_dev(config->dev);
+       struct mdio_device *pcs_phy = pcs_to_axienet_local(pcs)->pcs_phy;
+       struct net_device *ndev = pcs_to_axienet_local(pcs)->ndev;
         struct axienet_local *lp = netdev_priv(ndev);
         int ret;
  
-       switch (iface) {
-       case PHY_INTERFACE_MODE_SGMII:
-       case PHY_INTERFACE_MODE_1000BASEX:
-               if (!lp->switch_x_sgmii)
-                       return 0;
-
-               ret = mdiobus_write(lp->pcs_phy->bus,
-                                   lp->pcs_phy->addr,
-                                   XLNX_MII_STD_SELECT_REG,
-                                   iface == PHY_INTERFACE_MODE_SGMII ?
+       if (lp->switch_x_sgmii) {
+               ret = mdiodev_write(pcs_phy, XLNX_MII_STD_SELECT_REG,
+                                   interface == PHY_INTERFACE_MODE_SGMII ?
                                         XLNX_MII_STD_SELECT_SGMII : 0);
-               if (ret < 0)
-                       netdev_warn(ndev, "Failed to switch PHY interface: %d\n",
+               if (ret < 0) {
+                       netdev_warn(ndev,
+                                   "Failed to switch PHY interface: %d\n",
                                     ret);
-               return ret;
-       default:
-               return 0;
+                       return ret;
+               }
         }
+
+       ret = phylink_mii_c22_pcs_config(pcs_phy, mode, interface, advertising);
+       if (ret < 0)
+               netdev_warn(ndev, "Failed to configure PCS: %d\n", ret);
+
+       return ret;
  }
  
-static void axienet_mac_config(struct phylink_config *config, unsigned int mode,
-                              const struct phylink_link_state *state)
+static const struct phylink_pcs_ops axienet_pcs_ops = {
+       .pcs_get_state = axienet_pcs_get_state,
+       .pcs_config = axienet_pcs_config,
+       .pcs_an_restart = axienet_pcs_an_restart,
+};
+
+static struct phylink_pcs *axienet_mac_select_pcs(struct phylink_config *config,
+                                                 phy_interface_t interface)
  {
         struct net_device *ndev = to_net_dev(config->dev);
         struct axienet_local *lp = netdev_priv(ndev);
-       int ret;
  
-       switch (state->interface) {
-       case PHY_INTERFACE_MODE_SGMII:
-       case PHY_INTERFACE_MODE_1000BASEX:
-               ret = phylink_mii_c22_pcs_config(lp->pcs_phy, mode,
-                                                state->interface,
-                                                state->advertising);
-               if (ret < 0)
-                       netdev_warn(ndev, "Failed to configure PCS: %d\n",
-                                   ret);
-               break;
+       if (interface == PHY_INTERFACE_MODE_1000BASEX ||
+           interface ==  PHY_INTERFACE_MODE_SGMII)
+               return &lp->pcs;
  
-       default:
-               break;
-       }
+       return NULL;
+}
+
+static void axienet_mac_config(struct phylink_config *config, unsigned int mode,
+                              const struct phylink_link_state *state)
+{
+       /* nothing meaningful to do */
  }
  
  static void axienet_mac_link_down(struct phylink_config *config,
@@ -1663,9 +1663,7 @@ static void axienet_mac_link_up(struct phylink_config *config,
  
  static const struct phylink_mac_ops axienet_phylink_ops = {
         .validate = phylink_generic_validate,
-       .mac_pcs_get_state = axienet_mac_pcs_get_state,
-       .mac_an_restart = axienet_mac_an_restart,
-       .mac_prepare = axienet_mac_prepare,
+       .mac_select_pcs = axienet_mac_select_pcs,
         .mac_config = axienet_mac_config,
         .mac_link_down = axienet_mac_link_down,
         .mac_link_up = axienet_mac_link_up,
@@ -2079,12 +2077,12 @@ static int axienet_probe(struct platform_device *pdev)
                         ret = -EPROBE_DEFER;
                         goto cleanup_mdio;
                 }
-               lp->phylink_config.pcs_poll = true;
+               lp->pcs.ops = &axienet_pcs_ops;
+               lp->pcs.poll = true;
         }
  
         lp->phylink_config.dev = &ndev->dev;
         lp->phylink_config.type = PHYLINK_NETDEV;
-       lp->phylink_config.legacy_pre_march2020 = true;
         lp->phylink_config.mac_capabilities = MAC_SYM_PAUSE | MAC_ASYM_PAUSE |
                 MAC_10FD | MAC_100FD | MAC_1000FD;
  
diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c

index cd6742e..61418d4 100644 (file)
--- a/drivers/net/pcs/pcs-xpcs.c
+++ b/drivers/net/pcs/pcs-xpcs.c
@@ -632,35 +632,43 @@ static void xpcs_resolve_pma(struct dw_xpcs *xpcs,
         }
  }
  
-void xpcs_validate(struct dw_xpcs *xpcs, unsigned long *supported,
-                  struct phylink_link_state *state)
+static int xpcs_validate(struct phylink_pcs *pcs, unsigned long *supported,
+                        const struct phylink_link_state *state)
  {
-       __ETHTOOL_DECLARE_LINK_MODE_MASK(xpcs_supported);
+       __ETHTOOL_DECLARE_LINK_MODE_MASK(xpcs_supported) = { 0, };
         const struct xpcs_compat *compat;
+       struct dw_xpcs *xpcs;
         int i;
  
-       /* phylink expects us to report all supported modes with
-        * PHY_INTERFACE_MODE_NA, just don't limit the supported and
-        * advertising masks and exit.
-        */
-       if (state->interface == PHY_INTERFACE_MODE_NA)
-               return;
-
-       linkmode_zero(xpcs_supported);
-
+       xpcs = phylink_pcs_to_xpcs(pcs);
         compat = xpcs_find_compat(xpcs->id, state->interface);
  
-       /* Populate the supported link modes for this
-        * PHY interface type
+       /* Populate the supported link modes for this PHY interface type.
+        * FIXME: what about the port modes and autoneg bit? This masks
+        * all those away.
          */
         if (compat)
                 for (i = 0; compat->supported[i] != __ETHTOOL_LINK_MODE_MASK_NBITS; i++)
                         set_bit(compat->supported[i], xpcs_supported);
  
         linkmode_and(supported, supported, xpcs_supported);
-       linkmode_and(state->advertising, state->advertising, xpcs_supported);
+
+       return 0;
+}
+
+void xpcs_get_interfaces(struct dw_xpcs *xpcs, unsigned long *interfaces)
+{
+       int i, j;
+
+       for (i = 0; i < DW_XPCS_INTERFACE_MAX; i++) {
+               const struct xpcs_compat *compat = &xpcs->id->compat[i];
+
+               for (j = 0; j < compat->num_interfaces; j++)
+                       if (compat->interface[j] < PHY_INTERFACE_MODE_MAX)
+                               __set_bit(compat->interface[j], interfaces);
+       }
  }
-EXPORT_SYMBOL_GPL(xpcs_validate);
+EXPORT_SYMBOL_GPL(xpcs_get_interfaces);
  
  int xpcs_config_eee(struct dw_xpcs *xpcs, int mult_fact_100ns, int enable)
  {
@@ -1106,6 +1114,7 @@ static const struct xpcs_id xpcs_id_list[] = {
  };
  
  static const struct phylink_pcs_ops xpcs_phylink_ops = {
+       .pcs_validate = xpcs_validate,
         .pcs_config = xpcs_config,
         .pcs_get_state = xpcs_get_state,
         .pcs_link_up = xpcs_link_up,
diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c

index 5b6c0d1..f504fe5 100644 (file)
--- a/drivers/net/phy/at803x.c
+++ b/drivers/net/phy/at803x.c
@@ -19,6 +19,8 @@
  #include <linux/regulator/of_regulator.h>
  #include <linux/regulator/driver.h>
  #include <linux/regulator/consumer.h>
+#include <linux/phylink.h>
+#include <linux/sfp.h>
  #include <dt-bindings/net/qca-ar803x.h>
  
  #define AT803X_SPECIFIC_FUNCTION_CONTROL       0x10
@@ -51,6 +53,8 @@
  #define AT803X_INTR_ENABLE_PAGE_RECEIVED       BIT(12)
  #define AT803X_INTR_ENABLE_LINK_FAIL           BIT(11)
  #define AT803X_INTR_ENABLE_LINK_SUCCESS                BIT(10)
+#define AT803X_INTR_ENABLE_LINK_FAIL_BX                BIT(8)
+#define AT803X_INTR_ENABLE_LINK_SUCCESS_BX     BIT(7)
  #define AT803X_INTR_ENABLE_WIRESPEED_DOWNGRADE BIT(5)
  #define AT803X_INTR_ENABLE_POLARITY_CHANGED    BIT(1)
  #define AT803X_INTR_ENABLE_WOL                 BIT(0)
@@ -85,7 +89,17 @@
  #define AT803X_DEBUG_DATA                      0x1E
  
  #define AT803X_MODE_CFG_MASK                   0x0F
-#define AT803X_MODE_CFG_SGMII                  0x01
+#define AT803X_MODE_CFG_BASET_RGMII            0x00
+#define AT803X_MODE_CFG_BASET_SGMII            0x01
+#define AT803X_MODE_CFG_BX1000_RGMII_50OHM     0x02
+#define AT803X_MODE_CFG_BX1000_RGMII_75OHM     0x03
+#define AT803X_MODE_CFG_BX1000_CONV_50OHM      0x04
+#define AT803X_MODE_CFG_BX1000_CONV_75OHM      0x05
+#define AT803X_MODE_CFG_FX100_RGMII_50OHM      0x06
+#define AT803X_MODE_CFG_FX100_CONV_50OHM       0x07
+#define AT803X_MODE_CFG_RGMII_AUTO_MDET                0x0B
+#define AT803X_MODE_CFG_FX100_RGMII_75OHM      0x0E
+#define AT803X_MODE_CFG_FX100_CONV_75OHM       0x0F
  
  #define AT803X_PSSR                            0x11    /*PHY-Specific Status Register*/
  #define AT803X_PSSR_MR_AN_COMPLETE             0x0200
@@ -283,6 +297,8 @@ struct at803x_priv {
         u16 clk_25m_mask;
         u8 smarteee_lpi_tw_1g;
         u8 smarteee_lpi_tw_100m;
+       bool is_fiber;
+       bool is_1000basex;
         struct regulator_dev *vddio_rdev;
         struct regulator_dev *vddh_rdev;
         struct regulator *vddio;
@@ -650,6 +666,55 @@ static int at8031_register_regulators(struct phy_device *phydev)
         return 0;
  }
  
+static int at803x_sfp_insert(void *upstream, const struct sfp_eeprom_id *id)
+{
+       struct phy_device *phydev = upstream;
+       __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_support);
+       __ETHTOOL_DECLARE_LINK_MODE_MASK(sfp_support);
+       phy_interface_t iface;
+
+       linkmode_zero(phy_support);
+       phylink_set(phy_support, 1000baseX_Full);
+       phylink_set(phy_support, 1000baseT_Full);
+       phylink_set(phy_support, Autoneg);
+       phylink_set(phy_support, Pause);
+       phylink_set(phy_support, Asym_Pause);
+
+       linkmode_zero(sfp_support);
+       sfp_parse_support(phydev->sfp_bus, id, sfp_support);
+       /* Some modules support 10G modes as well as others we support.
+        * Mask out non-supported modes so the correct interface is picked.
+        */
+       linkmode_and(sfp_support, phy_support, sfp_support);
+
+       if (linkmode_empty(sfp_support)) {
+               dev_err(&phydev->mdio.dev, "incompatible SFP module inserted\n");
+               return -EINVAL;
+       }
+
+       iface = sfp_select_interface(phydev->sfp_bus, sfp_support);
+
+       /* Only 1000Base-X is supported by AR8031/8033 as the downstream SerDes
+        * interface for use with SFP modules.
+        * However, some copper modules detected as having a preferred SGMII
+        * interface do default to and function in 1000Base-X mode, so just
+        * print a warning and allow such modules, as they may have some chance
+        * of working.
+        */
+       if (iface == PHY_INTERFACE_MODE_SGMII)
+               dev_warn(&phydev->mdio.dev, "module may not function if 1000Base-X not supported\n");
+       else if (iface != PHY_INTERFACE_MODE_1000BASEX)
+               return -EINVAL;
+
+       return 0;
+}
+
+static const struct sfp_upstream_ops at803x_sfp_ops = {
+       .attach = phy_sfp_attach,
+       .detach = phy_sfp_detach,
+       .module_insert = at803x_sfp_insert,
+};
+
  static int at803x_parse_dt(struct phy_device *phydev)
  {
         struct device_node *node = phydev->mdio.dev.of_node;
@@ -757,6 +822,11 @@ static int at803x_parse_dt(struct phy_device *phydev)
                         phydev_err(phydev, "failed to get VDDIO regulator\n");
                         return PTR_ERR(priv->vddio);
                 }
+
+               /* Only AR8031/8033 support 1000Base-X for SFP modules */
+               ret = phy_sfp_probe(phydev, &at803x_sfp_ops);
+               if (ret < 0)
+                       return ret;
         }
  
         return 0;
@@ -784,16 +854,24 @@ static int at803x_probe(struct phy_device *phydev)
                         return ret;
         }
  
-       /* Some bootloaders leave the fiber page selected.
-        * Switch to the copper page, as otherwise we read
-        * the PHY capabilities from the fiber side.
-        */
         if (phydev->drv->phy_id == ATH8031_PHY_ID) {
-               phy_lock_mdio_bus(phydev);
-               ret = at803x_write_page(phydev, AT803X_PAGE_COPPER);
-               phy_unlock_mdio_bus(phydev);
-               if (ret)
+               int ccr = phy_read(phydev, AT803X_REG_CHIP_CONFIG);
+               int mode_cfg;
+
+               if (ccr < 0)
                         goto err;
+               mode_cfg = ccr & AT803X_MODE_CFG_MASK;
+
+               switch (mode_cfg) {
+               case AT803X_MODE_CFG_BX1000_RGMII_50OHM:
+               case AT803X_MODE_CFG_BX1000_RGMII_75OHM:
+                       priv->is_1000basex = true;
+                       fallthrough;
+               case AT803X_MODE_CFG_FX100_RGMII_50OHM:
+               case AT803X_MODE_CFG_FX100_RGMII_75OHM:
+                       priv->is_fiber = true;
+                       break;
+               }
         }
  
         return 0;
@@ -815,6 +893,7 @@ static void at803x_remove(struct phy_device *phydev)
  
  static int at803x_get_features(struct phy_device *phydev)
  {
+       struct at803x_priv *priv = phydev->priv;
         int err;
  
         err = genphy_read_abilities(phydev);
@@ -841,12 +920,13 @@ static int at803x_get_features(struct phy_device *phydev)
          * As a result of that, ESTATUS_1000_XFULL is set
          * to 1 even when operating in copper TP mode.
          *
-        * Remove this mode from the supported link modes,
-        * as this driver currently only supports copper
-        * operation.
+        * Remove this mode from the supported link modes
+        * when not operating in 1000BaseX mode.
          */
-       linkmode_clear_bit(ETHTOOL_LINK_MODE_1000baseX_Full_BIT,
-                          phydev->supported);
+       if (!priv->is_1000basex)
+               linkmode_clear_bit(ETHTOOL_LINK_MODE_1000baseX_Full_BIT,
+                                  phydev->supported);
+
         return 0;
  }
  
@@ -910,8 +990,27 @@ static int at8031_pll_config(struct phy_device *phydev)
  
  static int at803x_config_init(struct phy_device *phydev)
  {
+       struct at803x_priv *priv = phydev->priv;
         int ret;
  
+       if (phydev->drv->phy_id == ATH8031_PHY_ID) {
+               /* Some bootloaders leave the fiber page selected.
+                * Switch to the appropriate page (fiber or copper), as otherwise we
+                * read the PHY capabilities from the wrong page.
+                */
+               phy_lock_mdio_bus(phydev);
+               ret = at803x_write_page(phydev,
+                                       priv->is_fiber ? AT803X_PAGE_FIBER :
+                                                        AT803X_PAGE_COPPER);
+               phy_unlock_mdio_bus(phydev);
+               if (ret)
+                       return ret;
+
+               ret = at8031_pll_config(phydev);
+               if (ret < 0)
+                       return ret;
+       }
+
         /* The RX and TX delay default is:
          *   after HW reset: RX delay enabled and TX delay disabled
          *   after SW reset: RX delay enabled, while TX delay retains the
@@ -941,12 +1040,6 @@ static int at803x_config_init(struct phy_device *phydev)
         if (ret < 0)
                 return ret;
  
-       if (phydev->drv->phy_id == ATH8031_PHY_ID) {
-               ret = at8031_pll_config(phydev);
-               if (ret < 0)
-                       return ret;
-       }
-
         /* Ar803x extended next page bit is enabled by default. Cisco
          * multigig switches read this bit and attempt to negotiate 10Gbps
          * rates even if the next page bit is disabled. This is incorrect
@@ -967,6 +1060,7 @@ static int at803x_ack_interrupt(struct phy_device *phydev)
  
  static int at803x_config_intr(struct phy_device *phydev)
  {
+       struct at803x_priv *priv = phydev->priv;
         int err;
         int value;
  
@@ -983,6 +1077,10 @@ static int at803x_config_intr(struct phy_device *phydev)
                 value |= AT803X_INTR_ENABLE_DUPLEX_CHANGED;
                 value |= AT803X_INTR_ENABLE_LINK_FAIL;
                 value |= AT803X_INTR_ENABLE_LINK_SUCCESS;
+               if (priv->is_fiber) {
+                       value |= AT803X_INTR_ENABLE_LINK_FAIL_BX;
+                       value |= AT803X_INTR_ENABLE_LINK_SUCCESS_BX;
+               }
  
                 err = phy_write(phydev, AT803X_INTR_ENABLE, value);
         } else {
@@ -1115,8 +1213,12 @@ static int at803x_read_specific_status(struct phy_device *phydev)
  
  static int at803x_read_status(struct phy_device *phydev)
  {
+       struct at803x_priv *priv = phydev->priv;
         int err, old_link = phydev->link;
  
+       if (priv->is_1000basex)
+               return genphy_c37_read_status(phydev);
+
         /* Update the link, but return if there was an error */
         err = genphy_update_link(phydev);
         if (err)
@@ -1170,6 +1272,7 @@ static int at803x_config_mdix(struct phy_device *phydev, u8 ctrl)
  
  static int at803x_config_aneg(struct phy_device *phydev)
  {
+       struct at803x_priv *priv = phydev->priv;
         int ret;
  
         ret = at803x_config_mdix(phydev, phydev->mdix_ctrl);
@@ -1186,6 +1289,9 @@ static int at803x_config_aneg(struct phy_device *phydev)
                         return ret;
         }
  
+       if (priv->is_1000basex)
+               return genphy_c37_config_aneg(phydev);
+
         /* Do not restart auto-negotiation by setting ret to 0 defautly,
          * when calling __genphy_config_aneg later.
          */
diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c

index 4514d35..9b72334 100644 (file)
--- a/drivers/net/usb/asix_devices.c
+++ b/drivers/net/usb/asix_devices.c
@@ -858,7 +858,6 @@ static int marvell_phy_init(struct usbnet *dev)
                 reg = asix_mdio_read(dev->net, dev->mii.phy_id,
                         MII_MARVELL_LED_CTRL);
                 netdev_dbg(dev->net, "MII_MARVELL_LED_CTRL (2) = 0x%04x\n", reg);
-               reg &= 0xfc0f;
         }
  
         return 0;
diff --git a/drivers/nfc/st-nci/vendor_cmds.c b/drivers/nfc/st-nci/vendor_cmds.c

index 30d2912..6335d7a 100644 (file)
--- a/drivers/nfc/st-nci/vendor_cmds.c
+++ b/drivers/nfc/st-nci/vendor_cmds.c
@@ -456,7 +456,7 @@ static const struct nfc_vendor_cmd st_nci_vendor_cmds[] = {
  
  int st_nci_vendor_cmds_init(struct nci_dev *ndev)
  {
-       return nfc_set_vendor_cmds(ndev->nfc_dev, st_nci_vendor_cmds,
+       return nci_set_vendor_cmds(ndev, st_nci_vendor_cmds,
                                    sizeof(st_nci_vendor_cmds));
  }
  EXPORT_SYMBOL(st_nci_vendor_cmds_init);
diff --git a/drivers/nfc/st21nfca/vendor_cmds.c b/drivers/nfc/st21nfca/vendor_cmds.c

index 7488286..bfa418d 100644 (file)
--- a/drivers/nfc/st21nfca/vendor_cmds.c
+++ b/drivers/nfc/st21nfca/vendor_cmds.c
@@ -358,7 +358,7 @@ int st21nfca_vendor_cmds_init(struct nfc_hci_dev *hdev)
         struct st21nfca_hci_info *info = nfc_hci_get_clientdata(hdev);
  
         init_completion(&info->vendor_info.req_completion);
-       return nfc_set_vendor_cmds(hdev->ndev, st21nfca_vendor_cmds,
-                                  sizeof(st21nfca_vendor_cmds));
+       return nfc_hci_set_vendor_cmds(hdev, st21nfca_vendor_cmds,
+                                      sizeof(st21nfca_vendor_cmds));
  }
  EXPORT_SYMBOL(st21nfca_vendor_cmds_init);
diff --git a/drivers/ptp/ptp_sysfs.c b/drivers/ptp/ptp_sysfs.c

index 41b92dc..9233bfe 100644 (file)
--- a/drivers/ptp/ptp_sysfs.c
+++ b/drivers/ptp/ptp_sysfs.c
@@ -14,7 +14,7 @@ static ssize_t clock_name_show(struct device *dev,
                                struct device_attribute *attr, char *page)
  {
         struct ptp_clock *ptp = dev_get_drvdata(dev);
-       return snprintf(page, PAGE_SIZE-1, "%s\n", ptp->info->name);
+       return sysfs_emit(page, "%s\n", ptp->info->name);
  }
  static DEVICE_ATTR_RO(clock_name);
  
@@ -387,7 +387,7 @@ static ssize_t ptp_pin_show(struct device *dev, struct device_attribute *attr,
  
         mutex_unlock(&ptp->pincfg_mux);
  
-       return snprintf(page, PAGE_SIZE, "%u %u\n", func, chan);
+       return sysfs_emit(page, "%u %u\n", func, chan);
  }
  
  static ssize_t ptp_pin_store(struct device *dev, struct device_attribute *attr,
diff --git a/include/linux/bpf.h b/include/linux/bpf.h

index fa517ae..8c92c97 100644 (file)
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -194,6 +194,17 @@ struct bpf_map {
         struct work_struct work;
         struct mutex freeze_mutex;
         atomic64_t writecnt;
+       /* 'Ownership' of program-containing map is claimed by the first program
+        * that is going to use this map or by the first program which FD is
+        * stored in the map to make sure that all callers and callees have the
+        * same prog type, JITed flag and xdp_has_frags flag.
+        */
+       struct {
+               spinlock_t lock;
+               enum bpf_prog_type type;
+               bool jited;
+               bool xdp_has_frags;
+       } owner;
  };
  
  static inline bool map_value_has_spin_lock(const struct bpf_map *map)
@@ -578,7 +589,6 @@ struct bpf_verifier_ops {
                                  const struct btf_type *t, int off, int size,
                                  enum bpf_access_type atype,
                                  u32 *next_btf_id);
-       bool (*check_kfunc_call)(u32 kfunc_btf_id, struct module *owner);
  };
  
  struct bpf_prog_offload_ops {
@@ -939,6 +949,7 @@ struct bpf_prog_aux {
         bool func_proto_unreliable;
         bool sleepable;
         bool tail_call_reachable;
+       bool xdp_has_frags;
         struct hlist_node tramp_hlist;
         /* BTF_KIND_FUNC_PROTO for valid attach_btf_id */
         const struct btf_type *attach_func_proto;
@@ -999,16 +1010,6 @@ struct bpf_prog_aux {
  };
  
  struct bpf_array_aux {
-       /* 'Ownership' of prog array is claimed by the first program that
-        * is going to use this map or by the first program which FD is
-        * stored in the map to make sure that all callers and callees have
-        * the same prog type and JITed flag.
-        */
-       struct {
-               spinlock_t lock;
-               enum bpf_prog_type type;
-               bool jited;
-       } owner;
         /* Programs with direct jumps into programs part of this array. */
         struct list_head poke_progs;
         struct bpf_map *map;
@@ -1183,7 +1184,14 @@ struct bpf_event_entry {
         struct rcu_head rcu;
  };
  
-bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp);
+static inline bool map_type_contains_progs(struct bpf_map *map)
+{
+       return map->map_type == BPF_MAP_TYPE_PROG_ARRAY ||
+              map->map_type == BPF_MAP_TYPE_DEVMAP ||
+              map->map_type == BPF_MAP_TYPE_CPUMAP;
+}
+
+bool bpf_prog_map_compatible(struct bpf_map *map, const struct bpf_prog *fp);
  int bpf_prog_calc_tag(struct bpf_prog *fp);
  
  const struct bpf_func_proto *bpf_get_trace_printk_proto(void);
@@ -1251,6 +1259,7 @@ struct bpf_run_ctx {};
  struct bpf_cg_run_ctx {
         struct bpf_run_ctx run_ctx;
         const struct bpf_prog_array_item *prog_item;
+       int retval;
  };
  
  struct bpf_trace_run_ctx {
@@ -1283,19 +1292,19 @@ static inline void bpf_reset_run_ctx(struct bpf_run_ctx *old_ctx)
  
  typedef u32 (*bpf_prog_run_fn)(const struct bpf_prog *prog, const void *ctx);
  
-static __always_inline u32
+static __always_inline int
  BPF_PROG_RUN_ARRAY_CG_FLAGS(const struct bpf_prog_array __rcu *array_rcu,
                             const void *ctx, bpf_prog_run_fn run_prog,
-                           u32 *ret_flags)
+                           int retval, u32 *ret_flags)
  {
         const struct bpf_prog_array_item *item;
         const struct bpf_prog *prog;
         const struct bpf_prog_array *array;
         struct bpf_run_ctx *old_run_ctx;
         struct bpf_cg_run_ctx run_ctx;
-       u32 ret = 1;
         u32 func_ret;
  
+       run_ctx.retval = retval;
         migrate_disable();
         rcu_read_lock();
         array = rcu_dereference(array_rcu);
@@ -1304,27 +1313,29 @@ BPF_PROG_RUN_ARRAY_CG_FLAGS(const struct bpf_prog_array __rcu *array_rcu,
         while ((prog = READ_ONCE(item->prog))) {
                 run_ctx.prog_item = item;
                 func_ret = run_prog(prog, ctx);
-               ret &= (func_ret & 1);
+               if (!(func_ret & 1) && !IS_ERR_VALUE((long)run_ctx.retval))
+                       run_ctx.retval = -EPERM;
                 *(ret_flags) |= (func_ret >> 1);
                 item++;
         }
         bpf_reset_run_ctx(old_run_ctx);
         rcu_read_unlock();
         migrate_enable();
-       return ret;
+       return run_ctx.retval;
  }
  
-static __always_inline u32
+static __always_inline int
  BPF_PROG_RUN_ARRAY_CG(const struct bpf_prog_array __rcu *array_rcu,
-                     const void *ctx, bpf_prog_run_fn run_prog)
+                     const void *ctx, bpf_prog_run_fn run_prog,
+                     int retval)
  {
         const struct bpf_prog_array_item *item;
         const struct bpf_prog *prog;
         const struct bpf_prog_array *array;
         struct bpf_run_ctx *old_run_ctx;
         struct bpf_cg_run_ctx run_ctx;
-       u32 ret = 1;
  
+       run_ctx.retval = retval;
         migrate_disable();
         rcu_read_lock();
         array = rcu_dereference(array_rcu);
@@ -1332,13 +1343,14 @@ BPF_PROG_RUN_ARRAY_CG(const struct bpf_prog_array __rcu *array_rcu,
         old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
         while ((prog = READ_ONCE(item->prog))) {
                 run_ctx.prog_item = item;
-               ret &= run_prog(prog, ctx);
+               if (!run_prog(prog, ctx) && !IS_ERR_VALUE((long)run_ctx.retval))
+                       run_ctx.retval = -EPERM;
                 item++;
         }
         bpf_reset_run_ctx(old_run_ctx);
         rcu_read_unlock();
         migrate_enable();
-       return ret;
+       return run_ctx.retval;
  }
  
  static __always_inline u32
@@ -1391,19 +1403,21 @@ out:
   *   0: NET_XMIT_SUCCESS  skb should be transmitted
   *   1: NET_XMIT_DROP     skb should be dropped and cn
   *   2: NET_XMIT_CN       skb should be transmitted and cn
- *   3: -EPERM            skb should be dropped
+ *   3: -err              skb should be dropped
   */
  #define BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY(array, ctx, func)                \
         ({                                              \
                 u32 _flags = 0;                         \
                 bool _cn;                               \
                 u32 _ret;                               \
-               _ret = BPF_PROG_RUN_ARRAY_CG_FLAGS(array, ctx, func, &_flags); \
+               _ret = BPF_PROG_RUN_ARRAY_CG_FLAGS(array, ctx, func, 0, &_flags); \
                 _cn = _flags & BPF_RET_SET_CN;          \
-               if (_ret)                               \
+               if (_ret && !IS_ERR_VALUE((long)_ret))  \
+                       _ret = -EFAULT;                 \
+               if (!_ret)                              \
                         _ret = (_cn ? NET_XMIT_CN : NET_XMIT_SUCCESS);  \
                 else                                    \
-                       _ret = (_cn ? NET_XMIT_DROP : -EPERM);          \
+                       _ret = (_cn ? NET_XMIT_DROP : _ret);            \
                 _ret;                                   \
         })
  
@@ -1724,7 +1738,6 @@ int bpf_prog_test_run_raw_tp(struct bpf_prog *prog,
  int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog,
                                 const union bpf_attr *kattr,
                                 union bpf_attr __user *uattr);
-bool bpf_prog_test_check_kfunc_call(u32 kfunc_id, struct module *owner);
  bool btf_ctx_access(int off, int size, enum bpf_access_type type,
                     const struct bpf_prog *prog,
                     struct bpf_insn_access_aux *info);
@@ -1976,12 +1989,6 @@ static inline int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog,
         return -ENOTSUPP;
  }
  
-static inline bool bpf_prog_test_check_kfunc_call(u32 kfunc_id,
-                                                 struct module *owner)
-{
-       return false;
-}
-
  static inline void bpf_map_put(struct bpf_map *map)
  {
  }
@@ -2076,6 +2083,9 @@ int bpf_prog_test_run_syscall(struct bpf_prog *prog,
  int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog);
  int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype);
  int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value, u64 flags);
+int sock_map_bpf_prog_query(const union bpf_attr *attr,
+                           union bpf_attr __user *uattr);
+
  void sock_map_unhash(struct sock *sk);
  void sock_map_close(struct sock *sk, long timeout);
  #else
@@ -2129,6 +2139,12 @@ static inline int sock_map_update_elem_sys(struct bpf_map *map, void *key, void
  {
         return -EOPNOTSUPP;
  }
+
+static inline int sock_map_bpf_prog_query(const union bpf_attr *attr,
+                                         union bpf_attr __user *uattr)
+{
+       return -EINVAL;
+}
  #endif /* CONFIG_BPF_SYSCALL */
  #endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */
  
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h

index e999317..7a7be8c 100644 (file)
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -521,6 +521,8 @@ bpf_prog_offload_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt);
  
  int check_ptr_off_reg(struct bpf_verifier_env *env,
                       const struct bpf_reg_state *reg, int regno);
+int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
+                            u32 regno);
  int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
                    u32 regno, u32 mem_size);
  
@@ -564,4 +566,9 @@ static inline u32 type_flag(u32 type)
         return type & ~BPF_BASE_TYPE_MASK;
  }
  
+static inline enum bpf_prog_type resolve_prog_type(struct bpf_prog *prog)
+{
+       return prog->aux->dst_prog ? prog->aux->dst_prog->type : prog->type;
+}
+
  #endif /* _LINUX_BPF_VERIFIER_H */
diff --git a/include/linux/btf.h b/include/linux/btf.h

index 0c74348..b12cfe3 100644 (file)
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -12,11 +12,33 @@
  #define BTF_TYPE_EMIT(type) ((void)(type *)0)
  #define BTF_TYPE_EMIT_ENUM(enum_val) ((void)enum_val)
  
+enum btf_kfunc_type {
+       BTF_KFUNC_TYPE_CHECK,
+       BTF_KFUNC_TYPE_ACQUIRE,
+       BTF_KFUNC_TYPE_RELEASE,
+       BTF_KFUNC_TYPE_RET_NULL,
+       BTF_KFUNC_TYPE_MAX,
+};
+
  struct btf;
  struct btf_member;
  struct btf_type;
  union bpf_attr;
  struct btf_show;
+struct btf_id_set;
+
+struct btf_kfunc_id_set {
+       struct module *owner;
+       union {
+               struct {
+                       struct btf_id_set *check_set;
+                       struct btf_id_set *acquire_set;
+                       struct btf_id_set *release_set;
+                       struct btf_id_set *ret_null_set;
+               };
+               struct btf_id_set *sets[BTF_KFUNC_TYPE_MAX];
+       };
+};
  
  extern const struct file_operations btf_fops;
  
@@ -307,6 +329,11 @@ const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id);
  const char *btf_name_by_offset(const struct btf *btf, u32 offset);
  struct btf *btf_parse_vmlinux(void);
  struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog);
+bool btf_kfunc_id_set_contains(const struct btf *btf,
+                              enum bpf_prog_type prog_type,
+                              enum btf_kfunc_type type, u32 kfunc_btf_id);
+int register_btf_kfunc_id_set(enum bpf_prog_type prog_type,
+                             const struct btf_kfunc_id_set *s);
  #else
  static inline const struct btf_type *btf_type_by_id(const struct btf *btf,
                                                     u32 type_id)
@@ -318,50 +345,18 @@ static inline const char *btf_name_by_offset(const struct btf *btf,
  {
         return NULL;
  }
-#endif
-
-struct kfunc_btf_id_set {
-       struct list_head list;
-       struct btf_id_set *set;
-       struct module *owner;
-};
-
-struct kfunc_btf_id_list {
-       struct list_head list;
-       struct mutex mutex;
-};
-
-#ifdef CONFIG_DEBUG_INFO_BTF_MODULES
-void register_kfunc_btf_id_set(struct kfunc_btf_id_list *l,
-                              struct kfunc_btf_id_set *s);
-void unregister_kfunc_btf_id_set(struct kfunc_btf_id_list *l,
-                                struct kfunc_btf_id_set *s);
-bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist, u32 kfunc_id,
-                             struct module *owner);
-
-extern struct kfunc_btf_id_list bpf_tcp_ca_kfunc_list;
-extern struct kfunc_btf_id_list prog_test_kfunc_list;
-#else
-static inline void register_kfunc_btf_id_set(struct kfunc_btf_id_list *l,
-                                            struct kfunc_btf_id_set *s)
-{
-}
-static inline void unregister_kfunc_btf_id_set(struct kfunc_btf_id_list *l,
-                                              struct kfunc_btf_id_set *s)
+static inline bool btf_kfunc_id_set_contains(const struct btf *btf,
+                                            enum bpf_prog_type prog_type,
+                                            enum btf_kfunc_type type,
+                                            u32 kfunc_btf_id)
  {
+       return false;
  }
-static inline bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist,
-                                           u32 kfunc_id, struct module *owner)
+static inline int register_btf_kfunc_id_set(enum bpf_prog_type prog_type,
+                                           const struct btf_kfunc_id_set *s)
  {
-       return false;
+       return 0;
  }
-
-static struct kfunc_btf_id_list bpf_tcp_ca_kfunc_list __maybe_unused;
-static struct kfunc_btf_id_list prog_test_kfunc_list __maybe_unused;
  #endif
  
-#define DEFINE_KFUNC_BTF_ID_SET(set, name)                                     \
-       struct kfunc_btf_id_set name = { LIST_HEAD_INIT(name.list), (set),     \
-                                        THIS_MODULE }
-
  #endif
diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h

index 919c0fd..bc5d9cc 100644 (file)
--- a/include/linux/btf_ids.h
+++ b/include/linux/btf_ids.h
@@ -11,6 +11,7 @@ struct btf_id_set {
  #ifdef CONFIG_DEBUG_INFO_BTF
  
  #include <linux/compiler.h> /* for __PASTE */
+#include <linux/compiler_attributes.h> /* for __maybe_unused */
  
  /*
   * Following macros help to define lists of BTF IDs placed
@@ -146,14 +147,14 @@ extern struct btf_id_set name;
  
  #else
  
-#define BTF_ID_LIST(name) static u32 name[5];
+#define BTF_ID_LIST(name) static u32 __maybe_unused name[5];
  #define BTF_ID(prefix, name)
  #define BTF_ID_UNUSED
-#define BTF_ID_LIST_GLOBAL(name, n) u32 name[n];
-#define BTF_ID_LIST_SINGLE(name, prefix, typename) static u32 name[1];
-#define BTF_ID_LIST_GLOBAL_SINGLE(name, prefix, typename) u32 name[1];
-#define BTF_SET_START(name) static struct btf_id_set name = { 0 };
-#define BTF_SET_START_GLOBAL(name) static struct btf_id_set name = { 0 };
+#define BTF_ID_LIST_GLOBAL(name, n) u32 __maybe_unused name[n];
+#define BTF_ID_LIST_SINGLE(name, prefix, typename) static u32 __maybe_unused name[1];
+#define BTF_ID_LIST_GLOBAL_SINGLE(name, prefix, typename) u32 __maybe_unused name[1];
+#define BTF_SET_START(name) static struct btf_id_set __maybe_unused name = { 0 };
+#define BTF_SET_START_GLOBAL(name) static struct btf_id_set __maybe_unused name = { 0 };
  #define BTF_SET_END(name)
  
  #endif /* CONFIG_DEBUG_INFO_BTF */
diff --git a/include/linux/filter.h b/include/linux/filter.h

index 71fa57b..d23e999 100644 (file)
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -1356,7 +1356,10 @@ struct bpf_sockopt_kern {
         s32             level;
         s32             optname;
         s32             optlen;
-       s32             retval;
+       /* for retval in struct bpf_cg_run_ctx */
+       struct task_struct *current_task;
+       /* Temporary "register" for indirect stores to ppos. */
+       u64             tmp_reg;
  };
  
  int copy_bpf_fprog_from_user(struct sock_fprog *dst, sockptr_t src, int len);
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h

index a59d25f..1e0f8a3 100644 (file)
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -371,19 +371,12 @@ static inline struct ipv6_pinfo * inet6_sk(const struct sock *__sk)
         return NULL;
  }
  
-static inline struct inet6_request_sock *
-                       inet6_rsk(const struct request_sock *rsk)
-{
-       return NULL;
-}
-
  static inline struct raw6_sock *raw6_sk(const struct sock *sk)
  {
         return NULL;
  }
  
  #define inet6_rcv_saddr(__sk)  NULL
-#define tcp_twsk_ipv6only(__sk)                0
  #define inet_v6_ipv6only(__sk)         0
  #endif /* IS_ENABLED(CONFIG_IPV6) */
  #endif /* _IPV6_H */
diff --git a/include/linux/linkmode.h b/include/linux/linkmode.h

index f8397f3..15e0e02 100644 (file)
--- a/include/linux/linkmode.h
+++ b/include/linux/linkmode.h
@@ -66,11 +66,6 @@ static inline void linkmode_mod_bit(int nr, volatile unsigned long *addr,
                 linkmode_clear_bit(nr, addr);
  }
  
-static inline void linkmode_change_bit(int nr, volatile unsigned long *addr)
-{
-       __change_bit(nr, addr);
-}
-
  static inline int linkmode_test_bit(int nr, const volatile unsigned long *addr)
  {
         return test_bit(nr, addr);
diff --git a/include/linux/mii.h b/include/linux/mii.h

index 12ea29e..b8a1a17 100644 (file)
--- a/include/linux/mii.h
+++ b/include/linux/mii.h
@@ -388,23 +388,6 @@ mii_lpa_mod_linkmode_lpa_sgmii(unsigned long *lp_advertising, u32 lpa)
  }
  
  /**
- * mii_lpa_to_linkmode_adv_sgmii
- * @advertising: pointer to destination link mode.
- * @lpa: value of the MII_LPA register
- *
- * A small helper function that translates MII_ADVERTISE bits
- * to linkmode advertisement settings when in SGMII mode.
- * Clears the old value of advertising.
- */
-static inline void mii_lpa_to_linkmode_lpa_sgmii(unsigned long *lp_advertising,
-                                                u32 lpa)
-{
-       linkmode_zero(lp_advertising);
-
-       mii_lpa_mod_linkmode_lpa_sgmii(lp_advertising, lpa);
-}
-
-/**
   * mii_adv_mod_linkmode_adv_t
   * @advertising:pointer to destination link mode.
   * @adv: value of the MII_ADVERTISE register
diff --git a/include/linux/netlink.h b/include/linux/netlink.h

index 1ec6318..bda1c38 100644 (file)
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -135,15 +135,6 @@ static inline void nl_set_extack_cookie_u64(struct netlink_ext_ack *extack,
         extack->cookie_len = sizeof(cookie);
  }
  
-static inline void nl_set_extack_cookie_u32(struct netlink_ext_ack *extack,
-                                           u32 cookie)
-{
-       if (!extack)
-               return;
-       memcpy(extack->cookie, &cookie, sizeof(cookie));
-       extack->cookie_len = sizeof(cookie);
-}
-
  void netlink_kernel_release(struct sock *sk);
  int __netlink_change_ngroups(struct sock *sk, unsigned int groups);
  int netlink_change_ngroups(struct sock *sk, unsigned int groups);
diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h

index add077a..266eb26 100644 (file)
--- a/include/linux/pcs/pcs-xpcs.h
+++ b/include/linux/pcs/pcs-xpcs.h
@@ -31,8 +31,7 @@ void xpcs_link_up(struct phylink_pcs *pcs, unsigned int mode,
                   phy_interface_t interface, int speed, int duplex);
  int xpcs_do_config(struct dw_xpcs *xpcs, phy_interface_t interface,
                    unsigned int mode);
-void xpcs_validate(struct dw_xpcs *xpcs, unsigned long *supported,
-                  struct phylink_link_state *state);
+void xpcs_get_interfaces(struct dw_xpcs *xpcs, unsigned long *interfaces);
  int xpcs_config_eee(struct dw_xpcs *xpcs, int mult_fact_100ns,
                     int enable);
  struct dw_xpcs *xpcs_create(struct mdio_device *mdiodev,
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h

index 8a636e6..a27bcc4 100644 (file)
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -557,6 +557,7 @@ struct skb_shared_info {
          * Warning : all fields before dataref are cleared in __alloc_skb()
          */
         atomic_t        dataref;
+       unsigned int    xdp_frags_size;
  
         /* Intermediate layers must ensure that destructor_arg
          * remains valid until skb destructor */
@@ -3898,11 +3899,6 @@ static inline ktime_t net_timedelta(ktime_t t)
         return ktime_sub(ktime_get_real(), t);
  }
  
-static inline ktime_t net_invalid_timestamp(void)
-{
-       return 0;
-}
-
  static inline u8 skb_metadata_len(const struct sk_buff *skb)
  {
         return skb_shinfo(skb)->meta_len;
diff --git a/include/linux/udp.h b/include/linux/udp.h

index ae66dad..254a265 100644 (file)
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -23,11 +23,6 @@ static inline struct udphdr *udp_hdr(const struct sk_buff *skb)
         return (struct udphdr *)skb_transport_header(skb);
  }
  
-static inline struct udphdr *inner_udp_hdr(const struct sk_buff *skb)
-{
-       return (struct udphdr *)skb_inner_transport_header(skb);
-}
-
  #define UDP_HTABLE_SIZE_MIN            (CONFIG_BASE_SMALL ? 128 : 256)
  
  static inline u32 udp_hashfn(const struct net *net, u32 num, u32 mask)
diff --git a/include/net/ax25.h b/include/net/ax25.h

index 526e495..cb628c5 100644 (file)
--- a/include/net/ax25.h
+++ b/include/net/ax25.h
@@ -187,18 +187,12 @@ typedef struct {
  
  typedef struct ax25_route {
         struct ax25_route       *next;
-       refcount_t              refcount;
         ax25_address            callsign;
         struct net_device       *dev;
         ax25_digi               *digipeat;
         char                    ip_mode;
  } ax25_route;
  
-static inline void ax25_hold_route(ax25_route *ax25_rt)
-{
-       refcount_inc(&ax25_rt->refcount);
-}
-
  void __ax25_put_route(ax25_route *ax25_rt);
  
  extern rwlock_t ax25_route_lock;
@@ -213,12 +207,6 @@ static inline void ax25_route_lock_unuse(void)
         read_unlock(&ax25_route_lock);
  }
  
-static inline void ax25_put_route(ax25_route *ax25_rt)
-{
-       if (refcount_dec_and_test(&ax25_rt->refcount))
-               __ax25_put_route(ax25_rt);
-}
-
  typedef struct {
         char                    slave;                  /* slave_mode?   */
         struct timer_list       slave_timer;            /* timeout timer */
diff --git a/include/net/bonding.h b/include/net/bonding.h

index 83cfd2d..7dead85 100644 (file)
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -699,20 +699,6 @@ static inline struct slave *bond_slave_has_mac(struct bonding *bond,
  }
  
  /* Caller must hold rcu_read_lock() for read */
-static inline struct slave *bond_slave_has_mac_rcu(struct bonding *bond,
-                                              const u8 *mac)
-{
-       struct list_head *iter;
-       struct slave *tmp;
-
-       bond_for_each_slave_rcu(bond, tmp, iter)
-               if (ether_addr_equal_64bits(mac, tmp->dev->dev_addr))
-                       return tmp;
-
-       return NULL;
-}
-
-/* Caller must hold rcu_read_lock() for read */
  static inline bool bond_slave_has_mac_rx(struct bonding *bond, const u8 *mac)
  {
         struct list_head *iter;
diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h

index dfd919b..463ae5d 100644 (file)
--- a/include/net/inet_timewait_sock.h
+++ b/include/net/inet_timewait_sock.h
@@ -65,13 +65,13 @@ struct inet_timewait_sock {
         /* these three are in inet_sock */
         __be16                  tw_sport;
         /* And these are ours. */
-       unsigned int            tw_kill         : 1,
-                               tw_transparent  : 1,
+       unsigned int            tw_transparent  : 1,
                                 tw_flowlabel    : 20,
-                               tw_pad          : 2,    /* 2 bits hole */
+                               tw_pad          : 3,    /* 3 bits hole */
                                 tw_tos          : 8;
         u32                     tw_txhash;
         u32                     tw_priority;
+       u32                     tw_bslot; /* bind bucket slot */
         struct timer_list       tw_timer;
         struct inet_bind_bucket *tw_tb;
  };
@@ -110,8 +110,6 @@ static inline void inet_twsk_reschedule(struct inet_timewait_sock *tw, int timeo
  
  void inet_twsk_deschedule_put(struct inet_timewait_sock *tw);
  
-void inet_twsk_purge(struct inet_hashinfo *hashinfo, int family);
-
  static inline
  struct net *twsk_net(const struct inet_timewait_sock *twsk)
  {
diff --git a/include/net/netfilter/nf_conntrack_bpf.h b/include/net/netfilter/nf_conntrack_bpf.h

new file mode 100644 (file)

index 0000000..a473b56
--- /dev/null
+++ b/include/net/netfilter/nf_conntrack_bpf.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _NF_CONNTRACK_BPF_H
+#define _NF_CONNTRACK_BPF_H
+
+#include <linux/btf.h>
+#include <linux/kconfig.h>
+
+#if (IS_BUILTIN(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) || \
+    (IS_MODULE(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES))
+
+extern int register_nf_conntrack_bpf(void);
+
+#else
+
+static inline int register_nf_conntrack_bpf(void)
+{
+       return 0;
+}
+
+#endif
+
+#endif /* _NF_CONNTRACK_BPF_H */
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h

index 7855764..f068786 100644 (file)
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -31,18 +31,16 @@ struct ping_group_range {
  struct inet_hashinfo;
  
  struct inet_timewait_death_row {
-       atomic_t                tw_count;
-       char                    tw_pad[L1_CACHE_BYTES - sizeof(atomic_t)];
+       refcount_t              tw_refcount;
  
-       struct inet_hashinfo    *hashinfo;
+       struct inet_hashinfo    *hashinfo ____cacheline_aligned_in_smp;
         int                     sysctl_max_tw_buckets;
  };
  
  struct tcp_fastopen_context;
  
  struct netns_ipv4 {
-       /* Please keep tcp_death_row at first field in netns_ipv4 */
-       struct inet_timewait_death_row tcp_death_row ____cacheline_aligned_in_smp;
+       struct inet_timewait_death_row *tcp_death_row;
  
  #ifdef CONFIG_SYSCTL
         struct ctl_table_header *forw_hdr;
@@ -70,11 +68,9 @@ struct netns_ipv4 {
         struct hlist_head       *fib_table_hash;
         struct sock             *fibnl;
  
-       struct sock  * __percpu *icmp_sk;
         struct sock             *mc_autojoin_sk;
  
         struct inet_peer_base   *peers;
-       struct sock  * __percpu *tcp_sk;
         struct fqdir            *fqdir;
  
         u8 sysctl_icmp_echo_ignore_all;
@@ -87,6 +83,7 @@ struct netns_ipv4 {
  
         u32 ip_rt_min_pmtu;
         int ip_rt_mtu_expires;
+       int ip_rt_min_advmss;
  
         struct local_ports ip_local_ports;
  
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h

index a4b5503..30cdfc4 100644 (file)
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -88,7 +88,6 @@ struct netns_ipv6 {
         struct fib6_table       *fib6_local_tbl;
         struct fib_rules_ops    *fib6_rules_ops;
  #endif
-       struct sock * __percpu  *icmp_sk;
         struct sock             *ndisc_sk;
         struct sock             *tcp_sk;
         struct sock             *igmp_sk;
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h

index 9e7b21c..44a3553 100644 (file)
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -63,12 +63,6 @@ static inline psched_time_t psched_get_time(void)
         return PSCHED_NS2TICKS(ktime_get_ns());
  }
  
-static inline psched_tdiff_t
-psched_tdiff_bounded(psched_time_t tv1, psched_time_t tv2, psched_time_t bound)
-{
-       return min(tv1 - tv2, bound);
-}
-
  struct qdisc_watchdog {
         u64             last_expires;
         struct hrtimer  timer;
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h

index 472843e..9bab396 100644 (file)
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -518,11 +518,6 @@ static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz)
         BUILD_BUG_ON(sizeof(qcb->data) < sz);
  }
  
-static inline int qdisc_qlen_cpu(const struct Qdisc *q)
-{
-       return this_cpu_ptr(q->cpu_qstats)->qlen;
-}
-
  static inline int qdisc_qlen(const struct Qdisc *q)
  {
         return q->q.qlen;
diff --git a/include/net/udplite.h b/include/net/udplite.h

index 9185e45..a3c5311 100644 (file)
--- a/include/net/udplite.h
+++ b/include/net/udplite.h
@@ -70,49 +70,6 @@ static inline int udplite_checksum_init(struct sk_buff *skb, struct udphdr *uh)
         return 0;
  }
  
-/* Slow-path computation of checksum. Socket is locked. */
-static inline __wsum udplite_csum_outgoing(struct sock *sk, struct sk_buff *skb)
-{
-       const struct udp_sock *up = udp_sk(skb->sk);
-       int cscov = up->len;
-       __wsum csum = 0;
-
-       if (up->pcflag & UDPLITE_SEND_CC) {
-               /*
-                * Sender has set `partial coverage' option on UDP-Lite socket.
-                * The special case "up->pcslen == 0" signifies full coverage.
-                */
-               if (up->pcslen < up->len) {
-                       if (0 < up->pcslen)
-                               cscov = up->pcslen;
-                       udp_hdr(skb)->len = htons(up->pcslen);
-               }
-               /*
-                * NOTE: Causes for the error case  `up->pcslen > up->len':
-                *        (i)  Application error (will not be penalized).
-                *       (ii)  Payload too big for send buffer: data is split
-                *             into several packets, each with its own header.
-                *             In this case (e.g. last segment), coverage may
-                *             exceed packet length.
-                *       Since packets with coverage length > packet length are
-                *       illegal, we fall back to the defaults here.
-                */
-       }
-
-       skb->ip_summed = CHECKSUM_NONE;     /* no HW support for checksumming */
-
-       skb_queue_walk(&sk->sk_write_queue, skb) {
-               const int off = skb_transport_offset(skb);
-               const int len = skb->len - off;
-
-               csum = skb_checksum(skb, off, (cscov > len)? len : cscov, csum);
-
-               if ((cscov -= len) <= 0)
-                       break;
-       }
-       return csum;
-}
-
  /* Fast-path computation of checksum. Socket may not be locked. */
  static inline __wsum udplite_csum(struct sk_buff *skb)
  {
diff --git a/include/net/xdp.h b/include/net/xdp.h

index 8f0812e..b7721c3 100644 (file)
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -60,12 +60,20 @@ struct xdp_rxq_info {
         u32 reg_state;
         struct xdp_mem_info mem;
         unsigned int napi_id;
+       u32 frag_size;
  } ____cacheline_aligned; /* perf critical, avoid false-sharing */
  
  struct xdp_txq_info {
         struct net_device *dev;
  };
  
+enum xdp_buff_flags {
+       XDP_FLAGS_HAS_FRAGS             = BIT(0), /* non-linear xdp buff */
+       XDP_FLAGS_FRAGS_PF_MEMALLOC     = BIT(1), /* xdp paged memory is under
+                                                  * pressure
+                                                  */
+};
+
  struct xdp_buff {
         void *data;
         void *data_end;
@@ -74,13 +82,40 @@ struct xdp_buff {
         struct xdp_rxq_info *rxq;
         struct xdp_txq_info *txq;
         u32 frame_sz; /* frame size to deduce data_hard_end/reserved tailroom*/
+       u32 flags; /* supported values defined in xdp_buff_flags */
  };
  
+static __always_inline bool xdp_buff_has_frags(struct xdp_buff *xdp)
+{
+       return !!(xdp->flags & XDP_FLAGS_HAS_FRAGS);
+}
+
+static __always_inline void xdp_buff_set_frags_flag(struct xdp_buff *xdp)
+{
+       xdp->flags |= XDP_FLAGS_HAS_FRAGS;
+}
+
+static __always_inline void xdp_buff_clear_frags_flag(struct xdp_buff *xdp)
+{
+       xdp->flags &= ~XDP_FLAGS_HAS_FRAGS;
+}
+
+static __always_inline bool xdp_buff_is_frag_pfmemalloc(struct xdp_buff *xdp)
+{
+       return !!(xdp->flags & XDP_FLAGS_FRAGS_PF_MEMALLOC);
+}
+
+static __always_inline void xdp_buff_set_frag_pfmemalloc(struct xdp_buff *xdp)
+{
+       xdp->flags |= XDP_FLAGS_FRAGS_PF_MEMALLOC;
+}
+
  static __always_inline void
  xdp_init_buff(struct xdp_buff *xdp, u32 frame_sz, struct xdp_rxq_info *rxq)
  {
         xdp->frame_sz = frame_sz;
         xdp->rxq = rxq;
+       xdp->flags = 0;
  }
  
  static __always_inline void
@@ -111,6 +146,20 @@ xdp_get_shared_info_from_buff(struct xdp_buff *xdp)
         return (struct skb_shared_info *)xdp_data_hard_end(xdp);
  }
  
+static __always_inline unsigned int xdp_get_buff_len(struct xdp_buff *xdp)
+{
+       unsigned int len = xdp->data_end - xdp->data;
+       struct skb_shared_info *sinfo;
+
+       if (likely(!xdp_buff_has_frags(xdp)))
+               goto out;
+
+       sinfo = xdp_get_shared_info_from_buff(xdp);
+       len += sinfo->xdp_frags_size;
+out:
+       return len;
+}
+
  struct xdp_frame {
         void *data;
         u16 len;
@@ -122,8 +171,19 @@ struct xdp_frame {
          */
         struct xdp_mem_info mem;
         struct net_device *dev_rx; /* used by cpumap */
+       u32 flags; /* supported values defined in xdp_buff_flags */
  };
  
+static __always_inline bool xdp_frame_has_frags(struct xdp_frame *frame)
+{
+       return !!(frame->flags & XDP_FLAGS_HAS_FRAGS);
+}
+
+static __always_inline bool xdp_frame_is_frag_pfmemalloc(struct xdp_frame *frame)
+{
+       return !!(frame->flags & XDP_FLAGS_FRAGS_PF_MEMALLOC);
+}
+
  #define XDP_BULK_QUEUE_SIZE    16
  struct xdp_frame_bulk {
         int count;
@@ -159,6 +219,19 @@ static inline void xdp_scrub_frame(struct xdp_frame *frame)
         frame->dev_rx = NULL;
  }
  
+static inline void
+xdp_update_skb_shared_info(struct sk_buff *skb, u8 nr_frags,
+                          unsigned int size, unsigned int truesize,
+                          bool pfmemalloc)
+{
+       skb_shinfo(skb)->nr_frags = nr_frags;
+
+       skb->len += size;
+       skb->data_len += size;
+       skb->truesize += truesize;
+       skb->pfmemalloc |= pfmemalloc;
+}
+
  /* Avoids inlining WARN macro in fast-path */
  void xdp_warn(const char *msg, const char *func, const int line);
  #define XDP_WARN(msg) xdp_warn(msg, __func__, __LINE__)
@@ -180,6 +253,7 @@ void xdp_convert_frame_to_buff(struct xdp_frame *frame, struct xdp_buff *xdp)
         xdp->data_end = frame->data + frame->len;
         xdp->data_meta = frame->data - frame->metasize;
         xdp->frame_sz = frame->frame_sz;
+       xdp->flags = frame->flags;
  }
  
  static inline
@@ -206,6 +280,7 @@ int xdp_update_frame_from_buff(struct xdp_buff *xdp,
         xdp_frame->headroom = headroom - sizeof(*xdp_frame);
         xdp_frame->metasize = metasize;
         xdp_frame->frame_sz = xdp->frame_sz;
+       xdp_frame->flags = xdp->flags;
  
         return 0;
  }
@@ -230,6 +305,8 @@ struct xdp_frame *xdp_convert_buff_to_frame(struct xdp_buff *xdp)
         return xdp_frame;
  }
  
+void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct,
+                 struct xdp_buff *xdp);
  void xdp_return_frame(struct xdp_frame *xdpf);
  void xdp_return_frame_rx_napi(struct xdp_frame *xdpf);
  void xdp_return_buff(struct xdp_buff *xdp);
@@ -246,14 +323,37 @@ void __xdp_release_frame(void *data, struct xdp_mem_info *mem);
  static inline void xdp_release_frame(struct xdp_frame *xdpf)
  {
         struct xdp_mem_info *mem = &xdpf->mem;
+       struct skb_shared_info *sinfo;
+       int i;
  
         /* Curr only page_pool needs this */
-       if (mem->type == MEM_TYPE_PAGE_POOL)
-               __xdp_release_frame(xdpf->data, mem);
+       if (mem->type != MEM_TYPE_PAGE_POOL)
+               return;
+
+       if (likely(!xdp_frame_has_frags(xdpf)))
+               goto out;
+
+       sinfo = xdp_get_shared_info_from_frame(xdpf);
+       for (i = 0; i < sinfo->nr_frags; i++) {
+               struct page *page = skb_frag_page(&sinfo->frags[i]);
+
+               __xdp_release_frame(page_address(page), mem);
+       }
+out:
+       __xdp_release_frame(xdpf->data, mem);
+}
+
+int __xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
+                      struct net_device *dev, u32 queue_index,
+                      unsigned int napi_id, u32 frag_size);
+static inline int
+xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
+                struct net_device *dev, u32 queue_index,
+                unsigned int napi_id)
+{
+       return __xdp_rxq_info_reg(xdp_rxq, dev, queue_index, napi_id, 0);
  }
  
-int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
-                    struct net_device *dev, u32 queue_index, unsigned int napi_id);
  void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq);
  void xdp_rxq_info_unused(struct xdp_rxq_info *xdp_rxq);
  bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h

index b0383d3..16a7574 100644 (file)
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -330,6 +330,8 @@ union bpf_iter_link_info {
   *                     *ctx_out*, *data_in* and *data_out* must be NULL.
   *                     *repeat* must be zero.
   *
+ *             BPF_PROG_RUN is an alias for BPF_PROG_TEST_RUN.
+ *
   *     Return
   *             Returns zero on success. On error, -1 is returned and *errno*
   *             is set appropriately.
@@ -1111,6 +1113,11 @@ enum bpf_link_type {
   */
  #define BPF_F_SLEEPABLE                (1U << 4)
  
+/* If BPF_F_XDP_HAS_FRAGS is used in BPF_PROG_LOAD command, the loaded program
+ * fully support xdp frags.
+ */
+#define BPF_F_XDP_HAS_FRAGS    (1U << 5)
+
  /* When BPF ldimm64's insn[0].src_reg != 0 then this can have
   * the following extensions:
   *
@@ -1775,6 +1782,8 @@ union bpf_attr {
   *             0 on success, or a negative error in case of failure.
   *
   * u64 bpf_get_current_pid_tgid(void)
+ *     Description
+ *             Get the current pid and tgid.
   *     Return
   *             A 64-bit integer containing the current tgid and pid, and
   *             created as such:
@@ -1782,6 +1791,8 @@ union bpf_attr {
   *             *current_task*\ **->pid**.
   *
   * u64 bpf_get_current_uid_gid(void)
+ *     Description
+ *             Get the current uid and gid.
   *     Return
   *             A 64-bit integer containing the current GID and UID, and
   *             created as such: *current_gid* **<< 32 \|** *current_uid*.
@@ -2256,6 +2267,8 @@ union bpf_attr {
   *             The 32-bit hash.
   *
   * u64 bpf_get_current_task(void)
+ *     Description
+ *             Get the current task.
   *     Return
   *             A pointer to the current task struct.
   *
@@ -2369,6 +2382,8 @@ union bpf_attr {
   *             indicate that the hash is outdated and to trigger a
   *             recalculation the next time the kernel tries to access this
   *             hash or when the **bpf_get_hash_recalc**\ () helper is called.
+ *     Return
+ *             void.
   *
   * long bpf_get_numa_node_id(void)
   *     Description
@@ -2466,6 +2481,8 @@ union bpf_attr {
   *             A 8-byte long unique number or 0 if *sk* is NULL.
   *
   * u32 bpf_get_socket_uid(struct sk_buff *skb)
+ *     Description
+ *             Get the owner UID of the socked associated to *skb*.
   *     Return
   *             The owner UID of the socket associated to *skb*. If the socket
   *             is **NULL**, or if it is not a full socket (i.e. if it is a
@@ -3240,6 +3257,9 @@ union bpf_attr {
   *             The id is returned or 0 in case the id could not be retrieved.
   *
   * u64 bpf_get_current_cgroup_id(void)
+ *     Description
+ *             Get the current cgroup id based on the cgroup within which
+ *             the current task is running.
   *     Return
   *             A 64-bit integer containing the current cgroup id based
   *             on the cgroup within which the current task is running.
@@ -5018,6 +5038,44 @@ union bpf_attr {
   *
   *     Return
   *             The number of arguments of the traced function.
+ *
+ * int bpf_get_retval(void)
+ *     Description
+ *             Get the syscall's return value that will be returned to userspace.
+ *
+ *             This helper is currently supported by cgroup programs only.
+ *     Return
+ *             The syscall's return value.
+ *
+ * int bpf_set_retval(int retval)
+ *     Description
+ *             Set the syscall's return value that will be returned to userspace.
+ *
+ *             This helper is currently supported by cgroup programs only.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * u64 bpf_xdp_get_buff_len(struct xdp_buff *xdp_md)
+ *     Description
+ *             Get the total size of a given xdp buff (linear and paged area)
+ *     Return
+ *             The total size of a given xdp buffer.
+ *
+ * long bpf_xdp_load_bytes(struct xdp_buff *xdp_md, u32 offset, void *buf, u32 len)
+ *     Description
+ *             This helper is provided as an easy way to load data from a
+ *             xdp buffer. It can be used to load *len* bytes from *offset* from
+ *             the frame associated to *xdp_md*, into the buffer pointed by
+ *             *buf*.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * long bpf_xdp_store_bytes(struct xdp_buff *xdp_md, u32 offset, void *buf, u32 len)
+ *     Description
+ *             Store *len* bytes from buffer *buf* into the frame
+ *             associated to *xdp_md*, at *offset*.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
   */
  #define __BPF_FUNC_MAPPER(FN)          \
         FN(unspec),                     \
@@ -5206,6 +5264,11 @@ union bpf_attr {
         FN(get_func_arg),               \
         FN(get_func_ret),               \
         FN(get_func_arg_cnt),           \
+       FN(get_retval),                 \
+       FN(set_retval),                 \
+       FN(xdp_get_buff_len),           \
+       FN(xdp_load_bytes),             \
+       FN(xdp_store_bytes),            \
         /* */
  
  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c

index c7a5be3..7f145ae 100644 (file)
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -837,13 +837,12 @@ static int fd_array_map_delete_elem(struct bpf_map *map, void *key)
  static void *prog_fd_array_get_ptr(struct bpf_map *map,
                                    struct file *map_file, int fd)
  {
-       struct bpf_array *array = container_of(map, struct bpf_array, map);
         struct bpf_prog *prog = bpf_prog_get(fd);
  
         if (IS_ERR(prog))
                 return prog;
  
-       if (!bpf_prog_array_compatible(array, prog)) {
+       if (!bpf_prog_map_compatible(map, prog)) {
                 bpf_prog_put(prog);
                 return ERR_PTR(-EINVAL);
         }
@@ -1071,7 +1070,6 @@ static struct bpf_map *prog_array_map_alloc(union bpf_attr *attr)
         INIT_WORK(&aux->work, prog_array_map_clear_deferred);
         INIT_LIST_HEAD(&aux->poke_progs);
         mutex_init(&aux->poke_mutex);
-       spin_lock_init(&aux->owner.lock);
  
         map = array_map_alloc(attr);
         if (IS_ERR(map)) {
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c

index e16dafe..a1c44c1 100644 (file)
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -198,6 +198,21 @@
  DEFINE_IDR(btf_idr);
  DEFINE_SPINLOCK(btf_idr_lock);
  
+enum btf_kfunc_hook {
+       BTF_KFUNC_HOOK_XDP,
+       BTF_KFUNC_HOOK_TC,
+       BTF_KFUNC_HOOK_STRUCT_OPS,
+       BTF_KFUNC_HOOK_MAX,
+};
+
+enum {
+       BTF_KFUNC_SET_MAX_CNT = 32,
+};
+
+struct btf_kfunc_set_tab {
+       struct btf_id_set *sets[BTF_KFUNC_HOOK_MAX][BTF_KFUNC_TYPE_MAX];
+};
+
  struct btf {
         void *data;
         struct btf_type **types;
@@ -212,6 +227,7 @@ struct btf {
         refcount_t refcnt;
         u32 id;
         struct rcu_head rcu;
+       struct btf_kfunc_set_tab *kfunc_set_tab;
  
         /* split BTF support */
         struct btf *base_btf;
@@ -1531,8 +1547,30 @@ static void btf_free_id(struct btf *btf)
         spin_unlock_irqrestore(&btf_idr_lock, flags);
  }
  
+static void btf_free_kfunc_set_tab(struct btf *btf)
+{
+       struct btf_kfunc_set_tab *tab = btf->kfunc_set_tab;
+       int hook, type;
+
+       if (!tab)
+               return;
+       /* For module BTF, we directly assign the sets being registered, so
+        * there is nothing to free except kfunc_set_tab.
+        */
+       if (btf_is_module(btf))
+               goto free_tab;
+       for (hook = 0; hook < ARRAY_SIZE(tab->sets); hook++) {
+               for (type = 0; type < ARRAY_SIZE(tab->sets[0]); type++)
+                       kfree(tab->sets[hook][type]);
+       }
+free_tab:
+       kfree(tab);
+       btf->kfunc_set_tab = NULL;
+}
+
  static void btf_free(struct btf *btf)
  {
+       btf_free_kfunc_set_tab(btf);
         kvfree(btf->types);
         kvfree(btf->resolved_sizes);
         kvfree(btf->resolved_ids);
@@ -5616,17 +5654,45 @@ static bool __btf_type_is_scalar_struct(struct bpf_verifier_log *log,
         return true;
  }
  
+static bool is_kfunc_arg_mem_size(const struct btf *btf,
+                                 const struct btf_param *arg,
+                                 const struct bpf_reg_state *reg)
+{
+       int len, sfx_len = sizeof("__sz") - 1;
+       const struct btf_type *t;
+       const char *param_name;
+
+       t = btf_type_skip_modifiers(btf, arg->type, NULL);
+       if (!btf_type_is_scalar(t) || reg->type != SCALAR_VALUE)
+               return false;
+
+       /* In the future, this can be ported to use BTF tagging */
+       param_name = btf_name_by_offset(btf, arg->name_off);
+       if (str_is_empty(param_name))
+               return false;
+       len = strlen(param_name);
+       if (len < sfx_len)
+               return false;
+       param_name += len - sfx_len;
+       if (strncmp(param_name, "__sz", sfx_len))
+               return false;
+
+       return true;
+}
+
  static int btf_check_func_arg_match(struct bpf_verifier_env *env,
                                     const struct btf *btf, u32 func_id,
                                     struct bpf_reg_state *regs,
                                     bool ptr_to_mem_ok)
  {
         struct bpf_verifier_log *log = &env->log;
+       u32 i, nargs, ref_id, ref_obj_id = 0;
         bool is_kfunc = btf_is_kernel(btf);
         const char *func_name, *ref_tname;
         const struct btf_type *t, *ref_t;
         const struct btf_param *args;
-       u32 i, nargs, ref_id;
+       int ref_regno = 0;
+       bool rel = false;
  
         t = btf_type_by_id(btf, func_id);
         if (!t || !btf_type_is_func(t)) {
@@ -5704,6 +5770,16 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
                         if (reg->type == PTR_TO_BTF_ID) {
                                 reg_btf = reg->btf;
                                 reg_ref_id = reg->btf_id;
+                               /* Ensure only one argument is referenced PTR_TO_BTF_ID */
+                               if (reg->ref_obj_id) {
+                                       if (ref_obj_id) {
+                                               bpf_log(log, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
+                                                       regno, reg->ref_obj_id, ref_obj_id);
+                                               return -EFAULT;
+                                       }
+                                       ref_regno = regno;
+                                       ref_obj_id = reg->ref_obj_id;
+                               }
                         } else {
                                 reg_btf = btf_vmlinux;
                                 reg_ref_id = *reg2btf_ids[reg->type];
@@ -5727,17 +5803,33 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
                         u32 type_size;
  
                         if (is_kfunc) {
+                               bool arg_mem_size = i + 1 < nargs && is_kfunc_arg_mem_size(btf, &args[i + 1], &regs[regno + 1]);
+
                                 /* Permit pointer to mem, but only when argument
                                  * type is pointer to scalar, or struct composed
                                  * (recursively) of scalars.
+                                * When arg_mem_size is true, the pointer can be
+                                * void *.
                                  */
                                 if (!btf_type_is_scalar(ref_t) &&
-                                   !__btf_type_is_scalar_struct(log, btf, ref_t, 0)) {
+                                   !__btf_type_is_scalar_struct(log, btf, ref_t, 0) &&
+                                   (arg_mem_size ? !btf_type_is_void(ref_t) : 1)) {
                                         bpf_log(log,
-                                               "arg#%d pointer type %s %s must point to scalar or struct with scalar\n",
-                                               i, btf_type_str(ref_t), ref_tname);
+                                               "arg#%d pointer type %s %s must point to %sscalar, or struct with scalar\n",
+                                               i, btf_type_str(ref_t), ref_tname, arg_mem_size ? "void, " : "");
                                         return -EINVAL;
                                 }
+
+                               /* Check for mem, len pair */
+                               if (arg_mem_size) {
+                                       if (check_kfunc_mem_size_reg(env, &regs[regno + 1], regno + 1)) {
+                                               bpf_log(log, "arg#%d arg#%d memory, len pair leads to invalid memory access\n",
+                                                       i, i + 1);
+                                               return -EINVAL;
+                                       }
+                                       i++;
+                                       continue;
+                               }
                         }
  
                         resolve_ret = btf_resolve_size(btf, ref_t, &type_size);
@@ -5758,7 +5850,23 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
                 }
         }
  
-       return 0;
+       /* Either both are set, or neither */
+       WARN_ON_ONCE((ref_obj_id && !ref_regno) || (!ref_obj_id && ref_regno));
+       if (is_kfunc) {
+               rel = btf_kfunc_id_set_contains(btf, resolve_prog_type(env->prog),
+                                               BTF_KFUNC_TYPE_RELEASE, func_id);
+               /* We already made sure ref_obj_id is set only for one argument */
+               if (rel && !ref_obj_id) {
+                       bpf_log(log, "release kernel function %s expects refcounted PTR_TO_BTF_ID\n",
+                               func_name);
+                       return -EINVAL;
+               }
+               /* Allow (!rel && ref_obj_id), so that passing such referenced PTR_TO_BTF_ID to
+                * other kfuncs works
+                */
+       }
+       /* returns argument register number > 0 in case of reference release kfunc */
+       return rel ? ref_regno : 0;
  }
  
  /* Compare BTF of a function with given bpf_reg_state.
@@ -6200,12 +6308,17 @@ bool btf_id_set_contains(const struct btf_id_set *set, u32 id)
         return bsearch(&id, set->ids, set->cnt, sizeof(u32), btf_id_cmp_func) != NULL;
  }
  
+enum {
+       BTF_MODULE_F_LIVE = (1 << 0),
+};
+
  #ifdef CONFIG_DEBUG_INFO_BTF_MODULES
  struct btf_module {
         struct list_head list;
         struct module *module;
         struct btf *btf;
         struct bin_attribute *sysfs_attr;
+       int flags;
  };
  
  static LIST_HEAD(btf_modules);
@@ -6233,7 +6346,8 @@ static int btf_module_notify(struct notifier_block *nb, unsigned long op,
         int err = 0;
  
         if (mod->btf_data_size == 0 ||
-           (op != MODULE_STATE_COMING && op != MODULE_STATE_GOING))
+           (op != MODULE_STATE_COMING && op != MODULE_STATE_LIVE &&
+            op != MODULE_STATE_GOING))
                 goto out;
  
         switch (op) {
@@ -6292,6 +6406,17 @@ static int btf_module_notify(struct notifier_block *nb, unsigned long op,
                 }
  
                 break;
+       case MODULE_STATE_LIVE:
+               mutex_lock(&btf_module_mutex);
+               list_for_each_entry_safe(btf_mod, tmp, &btf_modules, list) {
+                       if (btf_mod->module != module)
+                               continue;
+
+                       btf_mod->flags |= BTF_MODULE_F_LIVE;
+                       break;
+               }
+               mutex_unlock(&btf_module_mutex);
+               break;
         case MODULE_STATE_GOING:
                 mutex_lock(&btf_module_mutex);
                 list_for_each_entry_safe(btf_mod, tmp, &btf_modules, list) {
@@ -6338,7 +6463,12 @@ struct module *btf_try_get_module(const struct btf *btf)
                 if (btf_mod->btf != btf)
                         continue;
  
-               if (try_module_get(btf_mod->module))
+               /* We must only consider module whose __init routine has
+                * finished, hence we must check for BTF_MODULE_F_LIVE flag,
+                * which is set from the notifier callback for
+                * MODULE_STATE_LIVE.
+                */
+               if ((btf_mod->flags & BTF_MODULE_F_LIVE) && try_module_get(btf_mod->module))
                         res = btf_mod->module;
  
                 break;
@@ -6349,6 +6479,36 @@ struct module *btf_try_get_module(const struct btf *btf)
         return res;
  }
  
+/* Returns struct btf corresponding to the struct module
+ *
+ * This function can return NULL or ERR_PTR. Note that caller must
+ * release reference for struct btf iff btf_is_module is true.
+ */
+static struct btf *btf_get_module_btf(const struct module *module)
+{
+       struct btf *btf = NULL;
+#ifdef CONFIG_DEBUG_INFO_BTF_MODULES
+       struct btf_module *btf_mod, *tmp;
+#endif
+
+       if (!module)
+               return bpf_get_btf_vmlinux();
+#ifdef CONFIG_DEBUG_INFO_BTF_MODULES
+       mutex_lock(&btf_module_mutex);
+       list_for_each_entry_safe(btf_mod, tmp, &btf_modules, list) {
+               if (btf_mod->module != module)
+                       continue;
+
+               btf_get(btf_mod->btf);
+               btf = btf_mod->btf;
+               break;
+       }
+       mutex_unlock(&btf_module_mutex);
+#endif
+
+       return btf;
+}
+
  BPF_CALL_4(bpf_btf_find_by_name_kind, char *, name, int, name_sz, u32, kind, int, flags)
  {
         struct btf *btf;
@@ -6416,53 +6576,181 @@ BTF_ID_LIST_GLOBAL(btf_tracing_ids, MAX_BTF_TRACING_TYPE)
  BTF_TRACING_TYPE_xxx
  #undef BTF_TRACING_TYPE
  
-/* BTF ID set registration API for modules */
-
-#ifdef CONFIG_DEBUG_INFO_BTF_MODULES
+/* Kernel Function (kfunc) BTF ID set registration API */
  
-void register_kfunc_btf_id_set(struct kfunc_btf_id_list *l,
-                              struct kfunc_btf_id_set *s)
+static int __btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook,
+                                   enum btf_kfunc_type type,
+                                   struct btf_id_set *add_set, bool vmlinux_set)
  {
-       mutex_lock(&l->mutex);
-       list_add(&s->list, &l->list);
-       mutex_unlock(&l->mutex);
+       struct btf_kfunc_set_tab *tab;
+       struct btf_id_set *set;
+       u32 set_cnt;
+       int ret;
+
+       if (hook >= BTF_KFUNC_HOOK_MAX || type >= BTF_KFUNC_TYPE_MAX) {
+               ret = -EINVAL;
+               goto end;
+       }
+
+       if (!add_set->cnt)
+               return 0;
+
+       tab = btf->kfunc_set_tab;
+       if (!tab) {
+               tab = kzalloc(sizeof(*tab), GFP_KERNEL | __GFP_NOWARN);
+               if (!tab)
+                       return -ENOMEM;
+               btf->kfunc_set_tab = tab;
+       }
+
+       set = tab->sets[hook][type];
+       /* Warn when register_btf_kfunc_id_set is called twice for the same hook
+        * for module sets.
+        */
+       if (WARN_ON_ONCE(set && !vmlinux_set)) {
+               ret = -EINVAL;
+               goto end;
+       }
+
+       /* We don't need to allocate, concatenate, and sort module sets, because
+        * only one is allowed per hook. Hence, we can directly assign the
+        * pointer and return.
+        */
+       if (!vmlinux_set) {
+               tab->sets[hook][type] = add_set;
+               return 0;
+       }
+
+       /* In case of vmlinux sets, there may be more than one set being
+        * registered per hook. To create a unified set, we allocate a new set
+        * and concatenate all individual sets being registered. While each set
+        * is individually sorted, they may become unsorted when concatenated,
+        * hence re-sorting the final set again is required to make binary
+        * searching the set using btf_id_set_contains function work.
+        */
+       set_cnt = set ? set->cnt : 0;
+
+       if (set_cnt > U32_MAX - add_set->cnt) {
+               ret = -EOVERFLOW;
+               goto end;
+       }
+
+       if (set_cnt + add_set->cnt > BTF_KFUNC_SET_MAX_CNT) {
+               ret = -E2BIG;
+               goto end;
+       }
+
+       /* Grow set */
+       set = krealloc(tab->sets[hook][type],
+                      offsetof(struct btf_id_set, ids[set_cnt + add_set->cnt]),
+                      GFP_KERNEL | __GFP_NOWARN);
+       if (!set) {
+               ret = -ENOMEM;
+               goto end;
+       }
+
+       /* For newly allocated set, initialize set->cnt to 0 */
+       if (!tab->sets[hook][type])
+               set->cnt = 0;
+       tab->sets[hook][type] = set;
+
+       /* Concatenate the two sets */
+       memcpy(set->ids + set->cnt, add_set->ids, add_set->cnt * sizeof(set->ids[0]));
+       set->cnt += add_set->cnt;
+
+       sort(set->ids, set->cnt, sizeof(set->ids[0]), btf_id_cmp_func, NULL);
+
+       return 0;
+end:
+       btf_free_kfunc_set_tab(btf);
+       return ret;
  }
-EXPORT_SYMBOL_GPL(register_kfunc_btf_id_set);
  
-void unregister_kfunc_btf_id_set(struct kfunc_btf_id_list *l,
-                                struct kfunc_btf_id_set *s)
+static int btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook,
+                                 const struct btf_kfunc_id_set *kset)
  {
-       mutex_lock(&l->mutex);
-       list_del_init(&s->list);
-       mutex_unlock(&l->mutex);
+       bool vmlinux_set = !btf_is_module(btf);
+       int type, ret;
+
+       for (type = 0; type < ARRAY_SIZE(kset->sets); type++) {
+               if (!kset->sets[type])
+                       continue;
+
+               ret = __btf_populate_kfunc_set(btf, hook, type, kset->sets[type], vmlinux_set);
+               if (ret)
+                       break;
+       }
+       return ret;
  }
-EXPORT_SYMBOL_GPL(unregister_kfunc_btf_id_set);
  
-bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist, u32 kfunc_id,
-                             struct module *owner)
+static bool __btf_kfunc_id_set_contains(const struct btf *btf,
+                                       enum btf_kfunc_hook hook,
+                                       enum btf_kfunc_type type,
+                                       u32 kfunc_btf_id)
  {
-       struct kfunc_btf_id_set *s;
+       struct btf_id_set *set;
  
-       mutex_lock(&klist->mutex);
-       list_for_each_entry(s, &klist->list, list) {
-               if (s->owner == owner && btf_id_set_contains(s->set, kfunc_id)) {
-                       mutex_unlock(&klist->mutex);
-                       return true;
-               }
+       if (hook >= BTF_KFUNC_HOOK_MAX || type >= BTF_KFUNC_TYPE_MAX)
+               return false;
+       if (!btf->kfunc_set_tab)
+               return false;
+       set = btf->kfunc_set_tab->sets[hook][type];
+       if (!set)
+               return false;
+       return btf_id_set_contains(set, kfunc_btf_id);
+}
+
+static int bpf_prog_type_to_kfunc_hook(enum bpf_prog_type prog_type)
+{
+       switch (prog_type) {
+       case BPF_PROG_TYPE_XDP:
+               return BTF_KFUNC_HOOK_XDP;
+       case BPF_PROG_TYPE_SCHED_CLS:
+               return BTF_KFUNC_HOOK_TC;
+       case BPF_PROG_TYPE_STRUCT_OPS:
+               return BTF_KFUNC_HOOK_STRUCT_OPS;
+       default:
+               return BTF_KFUNC_HOOK_MAX;
         }
-       mutex_unlock(&klist->mutex);
-       return false;
  }
  
-#define DEFINE_KFUNC_BTF_ID_LIST(name)                                         \
-       struct kfunc_btf_id_list name = { LIST_HEAD_INIT(name.list),           \
-                                         __MUTEX_INITIALIZER(name.mutex) };   \
-       EXPORT_SYMBOL_GPL(name)
+/* Caution:
+ * Reference to the module (obtained using btf_try_get_module) corresponding to
+ * the struct btf *MUST* be held when calling this function from verifier
+ * context. This is usually true as we stash references in prog's kfunc_btf_tab;
+ * keeping the reference for the duration of the call provides the necessary
+ * protection for looking up a well-formed btf->kfunc_set_tab.
+ */
+bool btf_kfunc_id_set_contains(const struct btf *btf,
+                              enum bpf_prog_type prog_type,
+                              enum btf_kfunc_type type, u32 kfunc_btf_id)
+{
+       enum btf_kfunc_hook hook;
  
-DEFINE_KFUNC_BTF_ID_LIST(bpf_tcp_ca_kfunc_list);
-DEFINE_KFUNC_BTF_ID_LIST(prog_test_kfunc_list);
+       hook = bpf_prog_type_to_kfunc_hook(prog_type);
+       return __btf_kfunc_id_set_contains(btf, hook, type, kfunc_btf_id);
+}
  
-#endif
+/* This function must be invoked only from initcalls/module init functions */
+int register_btf_kfunc_id_set(enum bpf_prog_type prog_type,
+                             const struct btf_kfunc_id_set *kset)
+{
+       enum btf_kfunc_hook hook;
+       struct btf *btf;
+       int ret;
+
+       btf = btf_get_module_btf(kset->owner);
+       if (IS_ERR_OR_NULL(btf))
+               return btf ? PTR_ERR(btf) : -ENOENT;
+
+       hook = bpf_prog_type_to_kfunc_hook(prog_type);
+       ret = btf_populate_kfunc_set(btf, hook, kset);
+       /* reference is only taken for module BTF */
+       if (btf_is_module(btf))
+               btf_put(btf);
+       return ret;
+}
+EXPORT_SYMBOL_GPL(register_btf_kfunc_id_set);
  
  int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
                               const struct btf *targ_btf, __u32 targ_id)
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c

index 514b468..279ebbe 100644 (file)
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -1044,7 +1044,7 @@ int cgroup_bpf_prog_query(const union bpf_attr *attr,
   *   NET_XMIT_DROP       (1)   - drop packet and notify TCP to call cwr
   *   NET_XMIT_CN         (2)   - continue with packet output and notify TCP
   *                               to call cwr
- *   -EPERM                    - drop packet
+ *   -err                      - drop packet
   *
   * For ingress packets, this function will return -EPERM if any
   * attached program was found and if it returned != 1 during execution.
@@ -1079,8 +1079,9 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
                         cgrp->bpf.effective[atype], skb, __bpf_prog_run_save_cb);
         } else {
                 ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], skb,
-                                           __bpf_prog_run_save_cb);
-               ret = (ret == 1 ? 0 : -EPERM);
+                                           __bpf_prog_run_save_cb, 0);
+               if (ret && !IS_ERR_VALUE((long)ret))
+                       ret = -EFAULT;
         }
         bpf_restore_data_end(skb, saved_data_end);
         __skb_pull(skb, offset);
@@ -1107,10 +1108,9 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk,
                                enum cgroup_bpf_attach_type atype)
  {
         struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
-       int ret;
  
-       ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], sk, bpf_prog_run);
-       return ret == 1 ? 0 : -EPERM;
+       return BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], sk,
+                                    bpf_prog_run, 0);
  }
  EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
  
@@ -1142,7 +1142,6 @@ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
         };
         struct sockaddr_storage unspec;
         struct cgroup *cgrp;
-       int ret;
  
         /* Check socket family since not all sockets represent network
          * endpoint (e.g. AF_UNIX).
@@ -1156,10 +1155,8 @@ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
         }
  
         cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
-       ret = BPF_PROG_RUN_ARRAY_CG_FLAGS(cgrp->bpf.effective[atype], &ctx,
-                                         bpf_prog_run, flags);
-
-       return ret == 1 ? 0 : -EPERM;
+       return BPF_PROG_RUN_ARRAY_CG_FLAGS(cgrp->bpf.effective[atype], &ctx,
+                                          bpf_prog_run, 0, flags);
  }
  EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_addr);
  
@@ -1184,11 +1181,9 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
                                      enum cgroup_bpf_attach_type atype)
  {
         struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
-       int ret;
  
-       ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], sock_ops,
-                                   bpf_prog_run);
-       return ret == 1 ? 0 : -EPERM;
+       return BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], sock_ops,
+                                    bpf_prog_run, 0);
  }
  EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops);
  
@@ -1201,17 +1196,47 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
                 .major = major,
                 .minor = minor,
         };
-       int allow;
+       int ret;
  
         rcu_read_lock();
         cgrp = task_dfl_cgroup(current);
-       allow = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx,
-                                     bpf_prog_run);
+       ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx,
+                                   bpf_prog_run, 0);
         rcu_read_unlock();
  
-       return !allow;
+       return ret;
  }
  
+BPF_CALL_0(bpf_get_retval)
+{
+       struct bpf_cg_run_ctx *ctx =
+               container_of(current->bpf_ctx, struct bpf_cg_run_ctx, run_ctx);
+
+       return ctx->retval;
+}
+
+static const struct bpf_func_proto bpf_get_retval_proto = {
+       .func           = bpf_get_retval,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+};
+
+BPF_CALL_1(bpf_set_retval, int, retval)
+{
+       struct bpf_cg_run_ctx *ctx =
+               container_of(current->bpf_ctx, struct bpf_cg_run_ctx, run_ctx);
+
+       ctx->retval = retval;
+       return 0;
+}
+
+static const struct bpf_func_proto bpf_set_retval_proto = {
+       .func           = bpf_set_retval,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_ANYTHING,
+};
+
  static const struct bpf_func_proto *
  cgroup_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
  {
@@ -1224,6 +1249,10 @@ cgroup_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
                 return &bpf_get_current_cgroup_id_proto;
         case BPF_FUNC_perf_event_output:
                 return &bpf_event_output_data_proto;
+       case BPF_FUNC_get_retval:
+               return &bpf_get_retval_proto;
+       case BPF_FUNC_set_retval:
+               return &bpf_set_retval_proto;
         default:
                 return bpf_base_func_proto(func_id);
         }
@@ -1337,7 +1366,8 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
  
         rcu_read_lock();
         cgrp = task_dfl_cgroup(current);
-       ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx, bpf_prog_run);
+       ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx,
+                                   bpf_prog_run, 0);
         rcu_read_unlock();
  
         kfree(ctx.cur_val);
@@ -1350,7 +1380,7 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
                 kfree(ctx.new_val);
         }
  
-       return ret == 1 ? 0 : -EPERM;
+       return ret;
  }
  
  #ifdef CONFIG_NET
@@ -1452,13 +1482,11 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level,
  
         lock_sock(sk);
         ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_SETSOCKOPT],
-                                   &ctx, bpf_prog_run);
+                                   &ctx, bpf_prog_run, 0);
         release_sock(sk);
  
-       if (!ret) {
-               ret = -EPERM;
+       if (ret)
                 goto out;
-       }
  
         if (ctx.optlen == -1) {
                 /* optlen set to -1, bypass kernel */
@@ -1518,7 +1546,7 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
                 .sk = sk,
                 .level = level,
                 .optname = optname,
-               .retval = retval,
+               .current_task = current,
         };
         int ret;
  
@@ -1562,27 +1590,17 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
  
         lock_sock(sk);
         ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_GETSOCKOPT],
-                                   &ctx, bpf_prog_run);
+                                   &ctx, bpf_prog_run, retval);
         release_sock(sk);
  
-       if (!ret) {
-               ret = -EPERM;
+       if (ret < 0)
                 goto out;
-       }
  
         if (ctx.optlen > max_optlen || ctx.optlen < 0) {
                 ret = -EFAULT;
                 goto out;
         }
  
-       /* BPF programs only allowed to set retval to 0, not some
-        * arbitrary value.
-        */
-       if (ctx.retval != 0 && ctx.retval != retval) {
-               ret = -EFAULT;
-               goto out;
-       }
-
         if (ctx.optlen != 0) {
                 if (copy_to_user(optval, ctx.optval, ctx.optlen) ||
                     put_user(ctx.optlen, optlen)) {
@@ -1591,8 +1609,6 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
                 }
         }
  
-       ret = ctx.retval;
-
  out:
         sockopt_free_buf(&ctx, &buf);
         return ret;
@@ -1607,10 +1623,10 @@ int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level,
                 .sk = sk,
                 .level = level,
                 .optname = optname,
-               .retval = retval,
                 .optlen = *optlen,
                 .optval = optval,
                 .optval_end = optval + *optlen,
+               .current_task = current,
         };
         int ret;
  
@@ -1623,25 +1639,19 @@ int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level,
          */
  
         ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_GETSOCKOPT],
-                                   &ctx, bpf_prog_run);
-       if (!ret)
-               return -EPERM;
+                                   &ctx, bpf_prog_run, retval);
+       if (ret < 0)
+               return ret;
  
         if (ctx.optlen > *optlen)
                 return -EFAULT;
  
-       /* BPF programs only allowed to set retval to 0, not some
-        * arbitrary value.
-        */
-       if (ctx.retval != 0 && ctx.retval != retval)
-               return -EFAULT;
-
         /* BPF programs can shrink the buffer, export the modifications.
          */
         if (ctx.optlen != 0)
                 *optlen = ctx.optlen;
  
-       return ctx.retval;
+       return ret;
  }
  #endif
  
@@ -2057,10 +2067,39 @@ static u32 cg_sockopt_convert_ctx_access(enum bpf_access_type type,
                         *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optlen);
                 break;
         case offsetof(struct bpf_sockopt, retval):
-               if (type == BPF_WRITE)
-                       *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_STX_MEM, retval);
-               else
-                       *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, retval);
+               BUILD_BUG_ON(offsetof(struct bpf_cg_run_ctx, run_ctx) != 0);
+
+               if (type == BPF_WRITE) {
+                       int treg = BPF_REG_9;
+
+                       if (si->src_reg == treg || si->dst_reg == treg)
+                               --treg;
+                       if (si->src_reg == treg || si->dst_reg == treg)
+                               --treg;
+                       *insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, treg,
+                                             offsetof(struct bpf_sockopt_kern, tmp_reg));
+                       *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, current_task),
+                                             treg, si->dst_reg,
+                                             offsetof(struct bpf_sockopt_kern, current_task));
+                       *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct task_struct, bpf_ctx),
+                                             treg, treg,
+                                             offsetof(struct task_struct, bpf_ctx));
+                       *insn++ = BPF_STX_MEM(BPF_FIELD_SIZEOF(struct bpf_cg_run_ctx, retval),
+                                             treg, si->src_reg,
+                                             offsetof(struct bpf_cg_run_ctx, retval));
+                       *insn++ = BPF_LDX_MEM(BPF_DW, treg, si->dst_reg,
+                                             offsetof(struct bpf_sockopt_kern, tmp_reg));
+               } else {
+                       *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, current_task),
+                                             si->dst_reg, si->src_reg,
+                                             offsetof(struct bpf_sockopt_kern, current_task));
+                       *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct task_struct, bpf_ctx),
+                                             si->dst_reg, si->dst_reg,
+                                             offsetof(struct task_struct, bpf_ctx));
+                       *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_cg_run_ctx, retval),
+                                             si->dst_reg, si->dst_reg,
+                                             offsetof(struct bpf_cg_run_ctx, retval));
+               }
                 break;
         case offsetof(struct bpf_sockopt, optval):
                 *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optval);
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c

index de3e5bc..0a1cfd8 100644 (file)
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1829,28 +1829,30 @@ static unsigned int __bpf_prog_ret0_warn(const void *ctx,
  }
  #endif
  
-bool bpf_prog_array_compatible(struct bpf_array *array,
-                              const struct bpf_prog *fp)
+bool bpf_prog_map_compatible(struct bpf_map *map,
+                            const struct bpf_prog *fp)
  {
         bool ret;
  
         if (fp->kprobe_override)
                 return false;
  
-       spin_lock(&array->aux->owner.lock);
-
-       if (!array->aux->owner.type) {
+       spin_lock(&map->owner.lock);
+       if (!map->owner.type) {
                 /* There's no owner yet where we could check for
                  * compatibility.
                  */
-               array->aux->owner.type  = fp->type;
-               array->aux->owner.jited = fp->jited;
+               map->owner.type  = fp->type;
+               map->owner.jited = fp->jited;
+               map->owner.xdp_has_frags = fp->aux->xdp_has_frags;
                 ret = true;
         } else {
-               ret = array->aux->owner.type  == fp->type &&
-                     array->aux->owner.jited == fp->jited;
+               ret = map->owner.type  == fp->type &&
+                     map->owner.jited == fp->jited &&
+                     map->owner.xdp_has_frags == fp->aux->xdp_has_frags;
         }
-       spin_unlock(&array->aux->owner.lock);
+       spin_unlock(&map->owner.lock);
+
         return ret;
  }
  
@@ -1862,13 +1864,11 @@ static int bpf_check_tail_call(const struct bpf_prog *fp)
         mutex_lock(&aux->used_maps_mutex);
         for (i = 0; i < aux->used_map_cnt; i++) {
                 struct bpf_map *map = aux->used_maps[i];
-               struct bpf_array *array;
  
-               if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
+               if (!map_type_contains_progs(map))
                         continue;
  
-               array = container_of(map, struct bpf_array, map);
-               if (!bpf_prog_array_compatible(array, fp)) {
+               if (!bpf_prog_map_compatible(map, fp)) {
                         ret = -EINVAL;
                         goto out;
                 }
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c

index b3e6b94..650e5d2 100644 (file)
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -397,7 +397,8 @@ static int cpu_map_kthread_run(void *data)
         return 0;
  }
  
-static int __cpu_map_load_bpf_program(struct bpf_cpu_map_entry *rcpu, int fd)
+static int __cpu_map_load_bpf_program(struct bpf_cpu_map_entry *rcpu,
+                                     struct bpf_map *map, int fd)
  {
         struct bpf_prog *prog;
  
@@ -405,7 +406,8 @@ static int __cpu_map_load_bpf_program(struct bpf_cpu_map_entry *rcpu, int fd)
         if (IS_ERR(prog))
                 return PTR_ERR(prog);
  
-       if (prog->expected_attach_type != BPF_XDP_CPUMAP) {
+       if (prog->expected_attach_type != BPF_XDP_CPUMAP ||
+           !bpf_prog_map_compatible(map, prog)) {
                 bpf_prog_put(prog);
                 return -EINVAL;
         }
@@ -457,7 +459,7 @@ __cpu_map_entry_alloc(struct bpf_map *map, struct bpf_cpumap_val *value,
         rcpu->map_id = map->id;
         rcpu->value.qsize  = value->qsize;
  
-       if (fd > 0 && __cpu_map_load_bpf_program(rcpu, fd))
+       if (fd > 0 && __cpu_map_load_bpf_program(rcpu, map, fd))
                 goto free_ptr_ring;
  
         /* Setup kthread */
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c

index fe019db..038f6d7 100644 (file)
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -858,7 +858,8 @@ static struct bpf_dtab_netdev *__dev_map_alloc_node(struct net *net,
                                              BPF_PROG_TYPE_XDP, false);
                 if (IS_ERR(prog))
                         goto err_put_dev;
-               if (prog->expected_attach_type != BPF_XDP_DEVMAP)
+               if (prog->expected_attach_type != BPF_XDP_DEVMAP ||
+                   !bpf_prog_map_compatible(&dtab->map, prog))
                         goto err_put_prog;
         }
  
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c

index fa4505f..72ce1ed 100644 (file)
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -556,16 +556,14 @@ static unsigned long bpf_map_memory_footprint(const struct bpf_map *map)
  
  static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
  {
-       const struct bpf_map *map = filp->private_data;
-       const struct bpf_array *array;
+       struct bpf_map *map = filp->private_data;
         u32 type = 0, jited = 0;
  
-       if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) {
-               array = container_of(map, struct bpf_array, map);
-               spin_lock(&array->aux->owner.lock);
-               type  = array->aux->owner.type;
-               jited = array->aux->owner.jited;
-               spin_unlock(&array->aux->owner.lock);
+       if (map_type_contains_progs(map)) {
+               spin_lock(&map->owner.lock);
+               type  = map->owner.type;
+               jited = map->owner.jited;
+               spin_unlock(&map->owner.lock);
         }
  
         seq_printf(m,
@@ -874,6 +872,7 @@ static int map_create(union bpf_attr *attr)
         atomic64_set(&map->refcnt, 1);
         atomic64_set(&map->usercnt, 1);
         mutex_init(&map->freeze_mutex);
+       spin_lock_init(&map->owner.lock);
  
         map->spin_lock_off = -EINVAL;
         map->timer_off = -EINVAL;
@@ -2217,7 +2216,8 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr)
                                  BPF_F_ANY_ALIGNMENT |
                                  BPF_F_TEST_STATE_FREQ |
                                  BPF_F_SLEEPABLE |
-                                BPF_F_TEST_RND_HI32))
+                                BPF_F_TEST_RND_HI32 |
+                                BPF_F_XDP_HAS_FRAGS))
                 return -EINVAL;
  
         if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) &&
@@ -2303,6 +2303,7 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr)
         prog->aux->dst_prog = dst_prog;
         prog->aux->offload_requested = !!attr->prog_ifindex;
         prog->aux->sleepable = attr->prog_flags & BPF_F_SLEEPABLE;
+       prog->aux->xdp_has_frags = attr->prog_flags & BPF_F_XDP_HAS_FRAGS;
  
         err = security_bpf_prog_alloc(prog->aux);
         if (err)
@@ -3318,6 +3319,11 @@ static int bpf_prog_query(const union bpf_attr *attr,
         case BPF_FLOW_DISSECTOR:
         case BPF_SK_LOOKUP:
                 return netns_bpf_prog_query(attr, uattr);
+       case BPF_SK_SKB_STREAM_PARSER:
+       case BPF_SK_SKB_STREAM_VERDICT:
+       case BPF_SK_MSG_VERDICT:
+       case BPF_SK_SKB_VERDICT:
+               return sock_map_bpf_prog_query(attr, uattr);
         default:
                 return -EINVAL;
         }
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c

index a39eede..dcf065e 100644 (file)
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -452,7 +452,8 @@ static bool reg_type_may_be_refcounted_or_null(enum bpf_reg_type type)
  {
         return base_type(type) == PTR_TO_SOCKET ||
                 base_type(type) == PTR_TO_TCP_SOCK ||
-               base_type(type) == PTR_TO_MEM;
+               base_type(type) == PTR_TO_MEM ||
+               base_type(type) == PTR_TO_BTF_ID;
  }
  
  static bool type_is_rdonly_mem(u32 type)
@@ -1743,7 +1744,7 @@ find_kfunc_desc(const struct bpf_prog *prog, u32 func_id, u16 offset)
  }
  
  static struct btf *__find_kfunc_desc_btf(struct bpf_verifier_env *env,
-                                        s16 offset, struct module **btf_modp)
+                                        s16 offset)
  {
         struct bpf_kfunc_btf kf_btf = { .offset = offset };
         struct bpf_kfunc_btf_tab *tab;
@@ -1797,8 +1798,6 @@ static struct btf *__find_kfunc_desc_btf(struct bpf_verifier_env *env,
                 sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
                      kfunc_btf_cmp_by_off, NULL);
         }
-       if (btf_modp)
-               *btf_modp = b->module;
         return b->btf;
  }
  
@@ -1815,8 +1814,7 @@ void bpf_free_kfunc_btf_tab(struct bpf_kfunc_btf_tab *tab)
  }
  
  static struct btf *find_kfunc_desc_btf(struct bpf_verifier_env *env,
-                                      u32 func_id, s16 offset,
-                                      struct module **btf_modp)
+                                      u32 func_id, s16 offset)
  {
         if (offset) {
                 if (offset < 0) {
@@ -1827,7 +1825,7 @@ static struct btf *find_kfunc_desc_btf(struct bpf_verifier_env *env,
                         return ERR_PTR(-EINVAL);
                 }
  
-               return __find_kfunc_desc_btf(env, offset, btf_modp);
+               return __find_kfunc_desc_btf(env, offset);
         }
         return btf_vmlinux ?: ERR_PTR(-ENOENT);
  }
@@ -1890,7 +1888,7 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset)
                 prog_aux->kfunc_btf_tab = btf_tab;
         }
  
-       desc_btf = find_kfunc_desc_btf(env, func_id, offset, NULL);
+       desc_btf = find_kfunc_desc_btf(env, func_id, offset);
         if (IS_ERR(desc_btf)) {
                 verbose(env, "failed to find BTF for kernel function\n");
                 return PTR_ERR(desc_btf);
@@ -2351,7 +2349,7 @@ static const char *disasm_kfunc_name(void *data, const struct bpf_insn *insn)
         if (insn->src_reg != BPF_PSEUDO_KFUNC_CALL)
                 return NULL;
  
-       desc_btf = find_kfunc_desc_btf(data, insn->imm, insn->off, NULL);
+       desc_btf = find_kfunc_desc_btf(data, insn->imm, insn->off);
         if (IS_ERR(desc_btf))
                 return "<error>";
  
@@ -3498,11 +3496,6 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno,
  
  #define MAX_PACKET_OFF 0xffff
  
-static enum bpf_prog_type resolve_prog_type(struct bpf_prog *prog)
-{
-       return prog->aux->dst_prog ? prog->aux->dst_prog->type : prog->type;
-}
-
  static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
                                        const struct bpf_call_arg_meta *meta,
                                        enum bpf_access_type t)
@@ -4877,6 +4870,62 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
         }
  }
  
+static int check_mem_size_reg(struct bpf_verifier_env *env,
+                             struct bpf_reg_state *reg, u32 regno,
+                             bool zero_size_allowed,
+                             struct bpf_call_arg_meta *meta)
+{
+       int err;
+
+       /* This is used to refine r0 return value bounds for helpers
+        * that enforce this value as an upper bound on return values.
+        * See do_refine_retval_range() for helpers that can refine
+        * the return value. C type of helper is u32 so we pull register
+        * bound from umax_value however, if negative verifier errors
+        * out. Only upper bounds can be learned because retval is an
+        * int type and negative retvals are allowed.
+        */
+       if (meta)
+               meta->msize_max_value = reg->umax_value;
+
+       /* The register is SCALAR_VALUE; the access check
+        * happens using its boundaries.
+        */
+       if (!tnum_is_const(reg->var_off))
+               /* For unprivileged variable accesses, disable raw
+                * mode so that the program is required to
+                * initialize all the memory that the helper could
+                * just partially fill up.
+                */
+               meta = NULL;
+
+       if (reg->smin_value < 0) {
+               verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n",
+                       regno);
+               return -EACCES;
+       }
+
+       if (reg->umin_value == 0) {
+               err = check_helper_mem_access(env, regno - 1, 0,
+                                             zero_size_allowed,
+                                             meta);
+               if (err)
+                       return err;
+       }
+
+       if (reg->umax_value >= BPF_MAX_VAR_SIZ) {
+               verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
+                       regno);
+               return -EACCES;
+       }
+       err = check_helper_mem_access(env, regno - 1,
+                                     reg->umax_value,
+                                     zero_size_allowed, meta);
+       if (!err)
+               err = mark_chain_precision(env, regno);
+       return err;
+}
+
  int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
                    u32 regno, u32 mem_size)
  {
@@ -4900,6 +4949,28 @@ int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
         return check_helper_mem_access(env, regno, mem_size, true, NULL);
  }
  
+int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
+                            u32 regno)
+{
+       struct bpf_reg_state *mem_reg = &cur_regs(env)[regno - 1];
+       bool may_be_null = type_may_be_null(mem_reg->type);
+       struct bpf_reg_state saved_reg;
+       int err;
+
+       WARN_ON_ONCE(regno < BPF_REG_2 || regno > BPF_REG_5);
+
+       if (may_be_null) {
+               saved_reg = *mem_reg;
+               mark_ptr_not_null_reg(mem_reg);
+       }
+
+       err = check_mem_size_reg(env, reg, regno, true, NULL);
+
+       if (may_be_null)
+               *mem_reg = saved_reg;
+       return err;
+}
+
  /* Implementation details:
   * bpf_map_lookup returns PTR_TO_MAP_VALUE_OR_NULL
   * Two bpf_map_lookups (even with the same key) will have different reg->id.
@@ -5439,51 +5510,7 @@ skip_type_check:
         } else if (arg_type_is_mem_size(arg_type)) {
                 bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO);
  
-               /* This is used to refine r0 return value bounds for helpers
-                * that enforce this value as an upper bound on return values.
-                * See do_refine_retval_range() for helpers that can refine
-                * the return value. C type of helper is u32 so we pull register
-                * bound from umax_value however, if negative verifier errors
-                * out. Only upper bounds can be learned because retval is an
-                * int type and negative retvals are allowed.
-                */
-               meta->msize_max_value = reg->umax_value;
-
-               /* The register is SCALAR_VALUE; the access check
-                * happens using its boundaries.
-                */
-               if (!tnum_is_const(reg->var_off))
-                       /* For unprivileged variable accesses, disable raw
-                        * mode so that the program is required to
-                        * initialize all the memory that the helper could
-                        * just partially fill up.
-                        */
-                       meta = NULL;
-
-               if (reg->smin_value < 0) {
-                       verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n",
-                               regno);
-                       return -EACCES;
-               }
-
-               if (reg->umin_value == 0) {
-                       err = check_helper_mem_access(env, regno - 1, 0,
-                                                     zero_size_allowed,
-                                                     meta);
-                       if (err)
-                               return err;
-               }
-
-               if (reg->umax_value >= BPF_MAX_VAR_SIZ) {
-                       verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
-                               regno);
-                       return -EACCES;
-               }
-               err = check_helper_mem_access(env, regno - 1,
-                                             reg->umax_value,
-                                             zero_size_allowed, meta);
-               if (!err)
-                       err = mark_chain_precision(env, regno);
+               err = check_mem_size_reg(env, reg, regno, zero_size_allowed, meta);
         } else if (arg_type_is_alloc_size(arg_type)) {
                 if (!tnum_is_const(reg->var_off)) {
                         verbose(env, "R%d is not a known constant'\n",
@@ -6842,22 +6869,23 @@ static void mark_btf_func_reg_size(struct bpf_verifier_env *env, u32 regno,
         }
  }
  
-static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn)
+static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
+                           int *insn_idx_p)
  {
         const struct btf_type *t, *func, *func_proto, *ptr_type;
         struct bpf_reg_state *regs = cur_regs(env);
         const char *func_name, *ptr_type_name;
         u32 i, nargs, func_id, ptr_type_id;
-       struct module *btf_mod = NULL;
+       int err, insn_idx = *insn_idx_p;
         const struct btf_param *args;
         struct btf *desc_btf;
-       int err;
+       bool acq;
  
         /* skip for now, but return error when we find this in fixup_kfunc_call */
         if (!insn->imm)
                 return 0;
  
-       desc_btf = find_kfunc_desc_btf(env, insn->imm, insn->off, &btf_mod);
+       desc_btf = find_kfunc_desc_btf(env, insn->imm, insn->off);
         if (IS_ERR(desc_btf))
                 return PTR_ERR(desc_btf);
  
@@ -6866,23 +6894,43 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn)
         func_name = btf_name_by_offset(desc_btf, func->name_off);
         func_proto = btf_type_by_id(desc_btf, func->type);
  
-       if (!env->ops->check_kfunc_call ||
-           !env->ops->check_kfunc_call(func_id, btf_mod)) {
+       if (!btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog),
+                                     BTF_KFUNC_TYPE_CHECK, func_id)) {
                 verbose(env, "calling kernel function %s is not allowed\n",
                         func_name);
                 return -EACCES;
         }
  
+       acq = btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog),
+                                       BTF_KFUNC_TYPE_ACQUIRE, func_id);
+
         /* Check the arguments */
         err = btf_check_kfunc_arg_match(env, desc_btf, func_id, regs);
-       if (err)
+       if (err < 0)
                 return err;
+       /* In case of release function, we get register number of refcounted
+        * PTR_TO_BTF_ID back from btf_check_kfunc_arg_match, do the release now
+        */
+       if (err) {
+               err = release_reference(env, regs[err].ref_obj_id);
+               if (err) {
+                       verbose(env, "kfunc %s#%d reference has not been acquired before\n",
+                               func_name, func_id);
+                       return err;
+               }
+       }
  
         for (i = 0; i < CALLER_SAVED_REGS; i++)
                 mark_reg_not_init(env, regs, caller_saved[i]);
  
         /* Check return type */
         t = btf_type_skip_modifiers(desc_btf, func_proto->type, NULL);
+
+       if (acq && !btf_type_is_ptr(t)) {
+               verbose(env, "acquire kernel function does not return PTR_TO_BTF_ID\n");
+               return -EINVAL;
+       }
+
         if (btf_type_is_scalar(t)) {
                 mark_reg_unknown(env, regs, BPF_REG_0);
                 mark_btf_func_reg_size(env, BPF_REG_0, t->size);
@@ -6901,7 +6949,21 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn)
                 regs[BPF_REG_0].btf = desc_btf;
                 regs[BPF_REG_0].type = PTR_TO_BTF_ID;
                 regs[BPF_REG_0].btf_id = ptr_type_id;
+               if (btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog),
+                                             BTF_KFUNC_TYPE_RET_NULL, func_id)) {
+                       regs[BPF_REG_0].type |= PTR_MAYBE_NULL;
+                       /* For mark_ptr_or_null_reg, see 93c230e3f5bd6 */
+                       regs[BPF_REG_0].id = ++env->id_gen;
+               }
                 mark_btf_func_reg_size(env, BPF_REG_0, sizeof(void *));
+               if (acq) {
+                       int id = acquire_reference_state(env, insn_idx);
+
+                       if (id < 0)
+                               return id;
+                       regs[BPF_REG_0].id = id;
+                       regs[BPF_REG_0].ref_obj_id = id;
+               }
         } /* else { add_kfunc_call() ensures it is btf_type_is_void(t) } */
  
         nargs = btf_type_vlen(func_proto);
@@ -11549,7 +11611,7 @@ static int do_check(struct bpf_verifier_env *env)
                                 if (insn->src_reg == BPF_PSEUDO_CALL)
                                         err = check_func_call(env, insn, &env->insn_idx);
                                 else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL)
-                                       err = check_kfunc_call(env, insn);
+                                       err = check_kfunc_call(env, insn, &env->insn_idx);
                                 else
                                         err = check_helper_call(env, insn, &env->insn_idx);
                                 if (err)
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c

index 21aa306..06a9e22 100644 (file)
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1562,6 +1562,7 @@ static const struct bpf_func_proto bpf_perf_event_output_proto_raw_tp = {
  
  extern const struct bpf_func_proto bpf_skb_output_proto;
  extern const struct bpf_func_proto bpf_xdp_output_proto;
+extern const struct bpf_func_proto bpf_xdp_get_buff_len_trace_proto;
  
  BPF_CALL_3(bpf_get_stackid_raw_tp, struct bpf_raw_tracepoint_args *, args,
            struct bpf_map *, map, u64, flags)
@@ -1661,6 +1662,8 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
                 return &bpf_sock_from_file_proto;
         case BPF_FUNC_get_socket_cookie:
                 return &bpf_get_socket_ptr_cookie_proto;
+       case BPF_FUNC_xdp_get_buff_len:
+               return &bpf_xdp_get_buff_len_trace_proto;
  #endif
         case BPF_FUNC_seq_printf:
                 return prog->expected_attach_type == BPF_TRACE_ITER ?
diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c

index d0b2e09..be97dc6 100644 (file)
--- a/net/ax25/ax25_route.c
+++ b/net/ax25/ax25_route.c
@@ -111,7 +111,6 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route)
                 return -ENOMEM;
         }
  
-       refcount_set(&ax25_rt->refcount, 1);
         ax25_rt->callsign     = route->dest_addr;
         ax25_rt->dev          = ax25_dev->dev;
         ax25_rt->digipeat     = NULL;
@@ -160,12 +159,12 @@ static int ax25_rt_del(struct ax25_routes_struct *route)
                     ax25cmp(&route->dest_addr, &s->callsign) == 0) {
                         if (ax25_route_list == s) {
                                 ax25_route_list = s->next;
-                               ax25_put_route(s);
+                               __ax25_put_route(s);
                         } else {
                                 for (t = ax25_route_list; t != NULL; t = t->next) {
                                         if (t->next == s) {
                                                 t->next = s->next;
-                                               ax25_put_route(s);
+                                               __ax25_put_route(s);
                                                 break;
                                         }
                                 }
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c

index 46dd957..65b52b4 100644 (file)
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -5,6 +5,7 @@
  #include <linux/btf.h>
  #include <linux/btf_ids.h>
  #include <linux/slab.h>
+#include <linux/init.h>
  #include <linux/vmalloc.h>
  #include <linux/etherdevice.h>
  #include <linux/filter.h>
@@ -130,7 +131,8 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
  
  static int bpf_test_finish(const union bpf_attr *kattr,
                            union bpf_attr __user *uattr, const void *data,
-                          u32 size, u32 retval, u32 duration)
+                          struct skb_shared_info *sinfo, u32 size,
+                          u32 retval, u32 duration)
  {
         void __user *data_out = u64_to_user_ptr(kattr->test.data_out);
         int err = -EFAULT;
@@ -145,8 +147,36 @@ static int bpf_test_finish(const union bpf_attr *kattr,
                 err = -ENOSPC;
         }
  
-       if (data_out && copy_to_user(data_out, data, copy_size))
-               goto out;
+       if (data_out) {
+               int len = sinfo ? copy_size - sinfo->xdp_frags_size : copy_size;
+
+               if (copy_to_user(data_out, data, len))
+                       goto out;
+
+               if (sinfo) {
+                       int i, offset = len, data_len;
+
+                       for (i = 0; i < sinfo->nr_frags; i++) {
+                               skb_frag_t *frag = &sinfo->frags[i];
+
+                               if (offset >= copy_size) {
+                                       err = -ENOSPC;
+                                       break;
+                               }
+
+                               data_len = min_t(int, copy_size - offset,
+                                                skb_frag_size(frag));
+
+                               if (copy_to_user(data_out + offset,
+                                                skb_frag_address(frag),
+                                                data_len))
+                                       goto out;
+
+                               offset += data_len;
+                       }
+               }
+       }
+
         if (copy_to_user(&uattr->test.data_size_out, &size, sizeof(size)))
                 goto out;
         if (copy_to_user(&uattr->test.retval, &retval, sizeof(retval)))
@@ -171,6 +201,8 @@ int noinline bpf_fentry_test1(int a)
  {
         return a + 1;
  }
+EXPORT_SYMBOL_GPL(bpf_fentry_test1);
+ALLOW_ERROR_INJECTION(bpf_fentry_test1, ERRNO);
  
  int noinline bpf_fentry_test2(int a, u64 b)
  {
@@ -232,28 +264,142 @@ struct sock * noinline bpf_kfunc_call_test3(struct sock *sk)
         return sk;
  }
  
+struct prog_test_ref_kfunc {
+       int a;
+       int b;
+       struct prog_test_ref_kfunc *next;
+};
+
+static struct prog_test_ref_kfunc prog_test_struct = {
+       .a = 42,
+       .b = 108,
+       .next = &prog_test_struct,
+};
+
+noinline struct prog_test_ref_kfunc *
+bpf_kfunc_call_test_acquire(unsigned long *scalar_ptr)
+{
+       /* randomly return NULL */
+       if (get_jiffies_64() % 2)
+               return NULL;
+       return &prog_test_struct;
+}
+
+noinline void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p)
+{
+}
+
+struct prog_test_pass1 {
+       int x0;
+       struct {
+               int x1;
+               struct {
+                       int x2;
+                       struct {
+                               int x3;
+                       };
+               };
+       };
+};
+
+struct prog_test_pass2 {
+       int len;
+       short arr1[4];
+       struct {
+               char arr2[4];
+               unsigned long arr3[8];
+       } x;
+};
+
+struct prog_test_fail1 {
+       void *p;
+       int x;
+};
+
+struct prog_test_fail2 {
+       int x8;
+       struct prog_test_pass1 x;
+};
+
+struct prog_test_fail3 {
+       int len;
+       char arr1[2];
+       char arr2[];
+};
+
+noinline void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb)
+{
+}
+
+noinline void bpf_kfunc_call_test_pass1(struct prog_test_pass1 *p)
+{
+}
+
+noinline void bpf_kfunc_call_test_pass2(struct prog_test_pass2 *p)
+{
+}
+
+noinline void bpf_kfunc_call_test_fail1(struct prog_test_fail1 *p)
+{
+}
+
+noinline void bpf_kfunc_call_test_fail2(struct prog_test_fail2 *p)
+{
+}
+
+noinline void bpf_kfunc_call_test_fail3(struct prog_test_fail3 *p)
+{
+}
+
+noinline void bpf_kfunc_call_test_mem_len_pass1(void *mem, int mem__sz)
+{
+}
+
+noinline void bpf_kfunc_call_test_mem_len_fail1(void *mem, int len)
+{
+}
+
+noinline void bpf_kfunc_call_test_mem_len_fail2(u64 *mem, int len)
+{
+}
+
  __diag_pop();
  
  ALLOW_ERROR_INJECTION(bpf_modify_return_test, ERRNO);
  
-BTF_SET_START(test_sk_kfunc_ids)
+BTF_SET_START(test_sk_check_kfunc_ids)
  BTF_ID(func, bpf_kfunc_call_test1)
  BTF_ID(func, bpf_kfunc_call_test2)
  BTF_ID(func, bpf_kfunc_call_test3)
-BTF_SET_END(test_sk_kfunc_ids)
-
-bool bpf_prog_test_check_kfunc_call(u32 kfunc_id, struct module *owner)
-{
-       if (btf_id_set_contains(&test_sk_kfunc_ids, kfunc_id))
-               return true;
-       return bpf_check_mod_kfunc_call(&prog_test_kfunc_list, kfunc_id, owner);
-}
-
-static void *bpf_test_init(const union bpf_attr *kattr, u32 size,
-                          u32 headroom, u32 tailroom)
+BTF_ID(func, bpf_kfunc_call_test_acquire)
+BTF_ID(func, bpf_kfunc_call_test_release)
+BTF_ID(func, bpf_kfunc_call_test_pass_ctx)
+BTF_ID(func, bpf_kfunc_call_test_pass1)
+BTF_ID(func, bpf_kfunc_call_test_pass2)
+BTF_ID(func, bpf_kfunc_call_test_fail1)
+BTF_ID(func, bpf_kfunc_call_test_fail2)
+BTF_ID(func, bpf_kfunc_call_test_fail3)
+BTF_ID(func, bpf_kfunc_call_test_mem_len_pass1)
+BTF_ID(func, bpf_kfunc_call_test_mem_len_fail1)
+BTF_ID(func, bpf_kfunc_call_test_mem_len_fail2)
+BTF_SET_END(test_sk_check_kfunc_ids)
+
+BTF_SET_START(test_sk_acquire_kfunc_ids)
+BTF_ID(func, bpf_kfunc_call_test_acquire)
+BTF_SET_END(test_sk_acquire_kfunc_ids)
+
+BTF_SET_START(test_sk_release_kfunc_ids)
+BTF_ID(func, bpf_kfunc_call_test_release)
+BTF_SET_END(test_sk_release_kfunc_ids)
+
+BTF_SET_START(test_sk_ret_null_kfunc_ids)
+BTF_ID(func, bpf_kfunc_call_test_acquire)
+BTF_SET_END(test_sk_ret_null_kfunc_ids)
+
+static void *bpf_test_init(const union bpf_attr *kattr, u32 user_size,
+                          u32 size, u32 headroom, u32 tailroom)
  {
         void __user *data_in = u64_to_user_ptr(kattr->test.data_in);
-       u32 user_size = kattr->test.data_size_in;
         void *data;
  
         if (size < ETH_HLEN || size > PAGE_SIZE - headroom - tailroom)
@@ -581,7 +727,8 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
         if (kattr->test.flags || kattr->test.cpu)
                 return -EINVAL;
  
-       data = bpf_test_init(kattr, size, NET_SKB_PAD + NET_IP_ALIGN,
+       data = bpf_test_init(kattr, kattr->test.data_size_in,
+                            size, NET_SKB_PAD + NET_IP_ALIGN,
                              SKB_DATA_ALIGN(sizeof(struct skb_shared_info)));
         if (IS_ERR(data))
                 return PTR_ERR(data);
@@ -683,7 +830,8 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
         /* bpf program can never convert linear skb to non-linear */
         if (WARN_ON_ONCE(skb_is_nonlinear(skb)))
                 size = skb_headlen(skb);
-       ret = bpf_test_finish(kattr, uattr, skb->data, size, retval, duration);
+       ret = bpf_test_finish(kattr, uattr, skb->data, NULL, size, retval,
+                             duration);
         if (!ret)
                 ret = bpf_ctx_finish(kattr, uattr, ctx,
                                      sizeof(struct __sk_buff));
@@ -758,16 +906,16 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
                           union bpf_attr __user *uattr)
  {
         u32 tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
-       u32 headroom = XDP_PACKET_HEADROOM;
         u32 size = kattr->test.data_size_in;
+       u32 headroom = XDP_PACKET_HEADROOM;
+       u32 retval, duration, max_data_sz;
         u32 repeat = kattr->test.repeat;
         struct netdev_rx_queue *rxqueue;
+       struct skb_shared_info *sinfo;
         struct xdp_buff xdp = {};
-       u32 retval, duration;
+       int i, ret = -EINVAL;
         struct xdp_md *ctx;
-       u32 max_data_sz;
         void *data;
-       int ret = -EINVAL;
  
         if (prog->expected_attach_type == BPF_XDP_DEVMAP ||
             prog->expected_attach_type == BPF_XDP_CPUMAP)
@@ -787,26 +935,60 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
                 headroom -= ctx->data;
         }
  
-       /* XDP have extra tailroom as (most) drivers use full page */
         max_data_sz = 4096 - headroom - tailroom;
+       size = min_t(u32, size, max_data_sz);
  
-       data = bpf_test_init(kattr, max_data_sz, headroom, tailroom);
+       data = bpf_test_init(kattr, size, max_data_sz, headroom, tailroom);
         if (IS_ERR(data)) {
                 ret = PTR_ERR(data);
                 goto free_ctx;
         }
  
         rxqueue = __netif_get_rx_queue(current->nsproxy->net_ns->loopback_dev, 0);
-       xdp_init_buff(&xdp, headroom + max_data_sz + tailroom,
-                     &rxqueue->xdp_rxq);
+       rxqueue->xdp_rxq.frag_size = headroom + max_data_sz + tailroom;
+       xdp_init_buff(&xdp, rxqueue->xdp_rxq.frag_size, &rxqueue->xdp_rxq);
         xdp_prepare_buff(&xdp, data, headroom, size, true);
+       sinfo = xdp_get_shared_info_from_buff(&xdp);
  
         ret = xdp_convert_md_to_buff(ctx, &xdp);
         if (ret)
                 goto free_data;
  
+       if (unlikely(kattr->test.data_size_in > size)) {
+               void __user *data_in = u64_to_user_ptr(kattr->test.data_in);
+
+               while (size < kattr->test.data_size_in) {
+                       struct page *page;
+                       skb_frag_t *frag;
+                       int data_len;
+
+                       page = alloc_page(GFP_KERNEL);
+                       if (!page) {
+                               ret = -ENOMEM;
+                               goto out;
+                       }
+
+                       frag = &sinfo->frags[sinfo->nr_frags++];
+                       __skb_frag_set_page(frag, page);
+
+                       data_len = min_t(int, kattr->test.data_size_in - size,
+                                        PAGE_SIZE);
+                       skb_frag_size_set(frag, data_len);
+
+                       if (copy_from_user(page_address(page), data_in + size,
+                                          data_len)) {
+                               ret = -EFAULT;
+                               goto out;
+                       }
+                       sinfo->xdp_frags_size += data_len;
+                       size += data_len;
+               }
+               xdp_buff_set_frags_flag(&xdp);
+       }
+
         if (repeat > 1)
                 bpf_prog_change_xdp(NULL, prog);
+
         ret = bpf_test_run(prog, &xdp, repeat, &retval, &duration, true);
         /* We convert the xdp_buff back to an xdp_md before checking the return
          * code so the reference count of any held netdevice will be decremented
@@ -816,12 +998,9 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
         if (ret)
                 goto out;
  
-       if (xdp.data_meta != data + headroom ||
-           xdp.data_end != xdp.data_meta + size)
-               size = xdp.data_end - xdp.data_meta;
-
-       ret = bpf_test_finish(kattr, uattr, xdp.data_meta, size, retval,
-                             duration);
+       size = xdp.data_end - xdp.data_meta + sinfo->xdp_frags_size;
+       ret = bpf_test_finish(kattr, uattr, xdp.data_meta, sinfo, size,
+                             retval, duration);
         if (!ret)
                 ret = bpf_ctx_finish(kattr, uattr, ctx,
                                      sizeof(struct xdp_md));
@@ -830,6 +1009,8 @@ out:
         if (repeat > 1)
                 bpf_prog_change_xdp(prog, NULL);
  free_data:
+       for (i = 0; i < sinfo->nr_frags; i++)
+               __free_page(skb_frag_page(&sinfo->frags[i]));
         kfree(data);
  free_ctx:
         kfree(ctx);
@@ -876,7 +1057,7 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
         if (size < ETH_HLEN)
                 return -EINVAL;
  
-       data = bpf_test_init(kattr, size, 0, 0);
+       data = bpf_test_init(kattr, kattr->test.data_size_in, size, 0, 0);
         if (IS_ERR(data))
                 return PTR_ERR(data);
  
@@ -911,8 +1092,8 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
         if (ret < 0)
                 goto out;
  
-       ret = bpf_test_finish(kattr, uattr, &flow_keys, sizeof(flow_keys),
-                             retval, duration);
+       ret = bpf_test_finish(kattr, uattr, &flow_keys, NULL,
+                             sizeof(flow_keys), retval, duration);
         if (!ret)
                 ret = bpf_ctx_finish(kattr, uattr, user_ctx,
                                      sizeof(struct bpf_flow_keys));
@@ -1016,7 +1197,7 @@ int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kat
                 user_ctx->cookie = sock_gen_cookie(ctx.selected_sk);
         }
  
-       ret = bpf_test_finish(kattr, uattr, NULL, 0, retval, duration);
+       ret = bpf_test_finish(kattr, uattr, NULL, NULL, 0, retval, duration);
         if (!ret)
                 ret = bpf_ctx_finish(kattr, uattr, user_ctx, sizeof(*user_ctx));
  
@@ -1067,3 +1248,17 @@ out:
         kfree(ctx);
         return err;
  }
+
+static const struct btf_kfunc_id_set bpf_prog_test_kfunc_set = {
+       .owner        = THIS_MODULE,
+       .check_set    = &test_sk_check_kfunc_ids,
+       .acquire_set  = &test_sk_acquire_kfunc_ids,
+       .release_set  = &test_sk_release_kfunc_ids,
+       .ret_null_set = &test_sk_ret_null_kfunc_ids,
+};
+
+static int __init bpf_prog_test_run_init(void)
+{
+       return register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_prog_test_kfunc_set);
+}
+late_initcall(bpf_prog_test_run_init);
diff --git a/net/core/filter.c b/net/core/filter.c

index 4603b7c..a06931c 100644 (file)
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3783,6 +3783,28 @@ static const struct bpf_func_proto sk_skb_change_head_proto = {
         .arg2_type      = ARG_ANYTHING,
         .arg3_type      = ARG_ANYTHING,
  };
+
+BPF_CALL_1(bpf_xdp_get_buff_len, struct  xdp_buff*, xdp)
+{
+       return xdp_get_buff_len(xdp);
+}
+
+static const struct bpf_func_proto bpf_xdp_get_buff_len_proto = {
+       .func           = bpf_xdp_get_buff_len,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_CTX,
+};
+
+BTF_ID_LIST_SINGLE(bpf_xdp_get_buff_len_bpf_ids, struct, xdp_buff)
+
+const struct bpf_func_proto bpf_xdp_get_buff_len_trace_proto = {
+       .func           = bpf_xdp_get_buff_len,
+       .gpl_only       = false,
+       .arg1_type      = ARG_PTR_TO_BTF_ID,
+       .arg1_btf_id    = &bpf_xdp_get_buff_len_bpf_ids[0],
+};
+
  static unsigned long xdp_get_metalen(const struct xdp_buff *xdp)
  {
         return xdp_data_meta_unsupported(xdp) ? 0 :
@@ -3817,11 +3839,208 @@ static const struct bpf_func_proto bpf_xdp_adjust_head_proto = {
         .arg2_type      = ARG_ANYTHING,
  };
  
+static void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off,
+                            void *buf, unsigned long len, bool flush)
+{
+       unsigned long ptr_len, ptr_off = 0;
+       skb_frag_t *next_frag, *end_frag;
+       struct skb_shared_info *sinfo;
+       void *src, *dst;
+       u8 *ptr_buf;
+
+       if (likely(xdp->data_end - xdp->data >= off + len)) {
+               src = flush ? buf : xdp->data + off;
+               dst = flush ? xdp->data + off : buf;
+               memcpy(dst, src, len);
+               return;
+       }
+
+       sinfo = xdp_get_shared_info_from_buff(xdp);
+       end_frag = &sinfo->frags[sinfo->nr_frags];
+       next_frag = &sinfo->frags[0];
+
+       ptr_len = xdp->data_end - xdp->data;
+       ptr_buf = xdp->data;
+
+       while (true) {
+               if (off < ptr_off + ptr_len) {
+                       unsigned long copy_off = off - ptr_off;
+                       unsigned long copy_len = min(len, ptr_len - copy_off);
+
+                       src = flush ? buf : ptr_buf + copy_off;
+                       dst = flush ? ptr_buf + copy_off : buf;
+                       memcpy(dst, src, copy_len);
+
+                       off += copy_len;
+                       len -= copy_len;
+                       buf += copy_len;
+               }
+
+               if (!len || next_frag == end_frag)
+                       break;
+
+               ptr_off += ptr_len;
+               ptr_buf = skb_frag_address(next_frag);
+               ptr_len = skb_frag_size(next_frag);
+               next_frag++;
+       }
+}
+
+static void *bpf_xdp_pointer(struct xdp_buff *xdp, u32 offset, u32 len)
+{
+       struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
+       u32 size = xdp->data_end - xdp->data;
+       void *addr = xdp->data;
+       int i;
+
+       if (unlikely(offset > 0xffff || len > 0xffff))
+               return ERR_PTR(-EFAULT);
+
+       if (offset + len > xdp_get_buff_len(xdp))
+               return ERR_PTR(-EINVAL);
+
+       if (offset < size) /* linear area */
+               goto out;
+
+       offset -= size;
+       for (i = 0; i < sinfo->nr_frags; i++) { /* paged area */
+               u32 frag_size = skb_frag_size(&sinfo->frags[i]);
+
+               if  (offset < frag_size) {
+                       addr = skb_frag_address(&sinfo->frags[i]);
+                       size = frag_size;
+                       break;
+               }
+               offset -= frag_size;
+       }
+out:
+       return offset + len < size ? addr + offset : NULL;
+}
+
+BPF_CALL_4(bpf_xdp_load_bytes, struct xdp_buff *, xdp, u32, offset,
+          void *, buf, u32, len)
+{
+       void *ptr;
+
+       ptr = bpf_xdp_pointer(xdp, offset, len);
+       if (IS_ERR(ptr))
+               return PTR_ERR(ptr);
+
+       if (!ptr)
+               bpf_xdp_copy_buf(xdp, offset, buf, len, false);
+       else
+               memcpy(buf, ptr, len);
+
+       return 0;
+}
+
+static const struct bpf_func_proto bpf_xdp_load_bytes_proto = {
+       .func           = bpf_xdp_load_bytes,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_CTX,
+       .arg2_type      = ARG_ANYTHING,
+       .arg3_type      = ARG_PTR_TO_UNINIT_MEM,
+       .arg4_type      = ARG_CONST_SIZE,
+};
+
+BPF_CALL_4(bpf_xdp_store_bytes, struct xdp_buff *, xdp, u32, offset,
+          void *, buf, u32, len)
+{
+       void *ptr;
+
+       ptr = bpf_xdp_pointer(xdp, offset, len);
+       if (IS_ERR(ptr))
+               return PTR_ERR(ptr);
+
+       if (!ptr)
+               bpf_xdp_copy_buf(xdp, offset, buf, len, true);
+       else
+               memcpy(ptr, buf, len);
+
+       return 0;
+}
+
+static const struct bpf_func_proto bpf_xdp_store_bytes_proto = {
+       .func           = bpf_xdp_store_bytes,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_CTX,
+       .arg2_type      = ARG_ANYTHING,
+       .arg3_type      = ARG_PTR_TO_UNINIT_MEM,
+       .arg4_type      = ARG_CONST_SIZE,
+};
+
+static int bpf_xdp_frags_increase_tail(struct xdp_buff *xdp, int offset)
+{
+       struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
+       skb_frag_t *frag = &sinfo->frags[sinfo->nr_frags - 1];
+       struct xdp_rxq_info *rxq = xdp->rxq;
+       unsigned int tailroom;
+
+       if (!rxq->frag_size || rxq->frag_size > xdp->frame_sz)
+               return -EOPNOTSUPP;
+
+       tailroom = rxq->frag_size - skb_frag_size(frag) - skb_frag_off(frag);
+       if (unlikely(offset > tailroom))
+               return -EINVAL;
+
+       memset(skb_frag_address(frag) + skb_frag_size(frag), 0, offset);
+       skb_frag_size_add(frag, offset);
+       sinfo->xdp_frags_size += offset;
+
+       return 0;
+}
+
+static int bpf_xdp_frags_shrink_tail(struct xdp_buff *xdp, int offset)
+{
+       struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
+       int i, n_frags_free = 0, len_free = 0;
+
+       if (unlikely(offset > (int)xdp_get_buff_len(xdp) - ETH_HLEN))
+               return -EINVAL;
+
+       for (i = sinfo->nr_frags - 1; i >= 0 && offset > 0; i--) {
+               skb_frag_t *frag = &sinfo->frags[i];
+               int shrink = min_t(int, offset, skb_frag_size(frag));
+
+               len_free += shrink;
+               offset -= shrink;
+
+               if (skb_frag_size(frag) == shrink) {
+                       struct page *page = skb_frag_page(frag);
+
+                       __xdp_return(page_address(page), &xdp->rxq->mem,
+                                    false, NULL);
+                       n_frags_free++;
+               } else {
+                       skb_frag_size_sub(frag, shrink);
+                       break;
+               }
+       }
+       sinfo->nr_frags -= n_frags_free;
+       sinfo->xdp_frags_size -= len_free;
+
+       if (unlikely(!sinfo->nr_frags)) {
+               xdp_buff_clear_frags_flag(xdp);
+               xdp->data_end -= offset;
+       }
+
+       return 0;
+}
+
  BPF_CALL_2(bpf_xdp_adjust_tail, struct xdp_buff *, xdp, int, offset)
  {
         void *data_hard_end = xdp_data_hard_end(xdp); /* use xdp->frame_sz */
         void *data_end = xdp->data_end + offset;
  
+       if (unlikely(xdp_buff_has_frags(xdp))) { /* non-linear xdp buff */
+               if (offset < 0)
+                       return bpf_xdp_frags_shrink_tail(xdp, -offset);
+
+               return bpf_xdp_frags_increase_tail(xdp, offset);
+       }
+
         /* Notice that xdp_data_hard_end have reserved some tailroom */
         if (unlikely(data_end > data_hard_end))
                 return -EINVAL;
@@ -4047,6 +4266,14 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
         struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
         enum bpf_map_type map_type = ri->map_type;
  
+       /* XDP_REDIRECT is not fully supported yet for xdp frags since
+        * not all XDP capable drivers can map non-linear xdp_frame in
+        * ndo_xdp_xmit.
+        */
+       if (unlikely(xdp_buff_has_frags(xdp) &&
+                    map_type != BPF_MAP_TYPE_CPUMAP))
+               return -EOPNOTSUPP;
+
         if (map_type == BPF_MAP_TYPE_XSKMAP)
                 return __xdp_do_redirect_xsk(ri, dev, xdp, xdp_prog);
  
@@ -4590,10 +4817,12 @@ static const struct bpf_func_proto bpf_sk_ancestor_cgroup_id_proto = {
  };
  #endif
  
-static unsigned long bpf_xdp_copy(void *dst_buff, const void *src_buff,
+static unsigned long bpf_xdp_copy(void *dst, const void *ctx,
                                   unsigned long off, unsigned long len)
  {
-       memcpy(dst_buff, src_buff + off, len);
+       struct xdp_buff *xdp = (struct xdp_buff *)ctx;
+
+       bpf_xdp_copy_buf(xdp, off, dst, len, false);
         return 0;
  }
  
@@ -4604,11 +4833,11 @@ BPF_CALL_5(bpf_xdp_event_output, struct xdp_buff *, xdp, struct bpf_map *, map,
  
         if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK)))
                 return -EINVAL;
-       if (unlikely(!xdp ||
-                    xdp_size > (unsigned long)(xdp->data_end - xdp->data)))
+
+       if (unlikely(!xdp || xdp_size > xdp_get_buff_len(xdp)))
                 return -EFAULT;
  
-       return bpf_event_output(map, flags, meta, meta_size, xdp->data,
+       return bpf_event_output(map, flags, meta, meta_size, xdp,
                                 xdp_size, bpf_xdp_copy);
  }
  
@@ -7533,6 +7762,12 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
                 return &bpf_xdp_redirect_map_proto;
         case BPF_FUNC_xdp_adjust_tail:
                 return &bpf_xdp_adjust_tail_proto;
+       case BPF_FUNC_xdp_get_buff_len:
+               return &bpf_xdp_get_buff_len_proto;
+       case BPF_FUNC_xdp_load_bytes:
+               return &bpf_xdp_load_bytes_proto;
+       case BPF_FUNC_xdp_store_bytes:
+               return &bpf_xdp_store_bytes_proto;
         case BPF_FUNC_fib_lookup:
                 return &bpf_xdp_fib_lookup_proto;
         case BPF_FUNC_check_mtu:
@@ -10062,7 +10297,6 @@ const struct bpf_verifier_ops tc_cls_act_verifier_ops = {
         .convert_ctx_access     = tc_cls_act_convert_ctx_access,
         .gen_prologue           = tc_cls_act_prologue,
         .gen_ld_abs             = bpf_gen_ld_abs,
-       .check_kfunc_call       = bpf_prog_test_check_kfunc_call,
  };
  
  const struct bpf_prog_ops tc_cls_act_prog_ops = {
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c

index a5b5bb9..c53d9aa 100644 (file)
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -301,6 +301,7 @@ struct net *get_net_ns_by_id(const struct net *net, int id)
  
         return peer;
  }
+EXPORT_SYMBOL_GPL(get_net_ns_by_id);
  
  /*
   * setup_net runs the initializers for the network namespace object.
diff --git a/net/core/sock.c b/net/core/sock.c

index 4ff806d..cccf21f 100644 (file)
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2266,6 +2266,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
                         sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
                         /* pairs with the WRITE_ONCE() in netif_set_gso_max_size() */
                         sk->sk_gso_max_size = READ_ONCE(dst->dev->gso_max_size);
+                       sk->sk_gso_max_size -= (MAX_TCP_HEADER + 1);
                         /* pairs with the WRITE_ONCE() in netif_set_gso_max_segs() */
                         max_segs = max_t(u32, READ_ONCE(dst->dev->gso_max_segs), 1);
                 }
diff --git a/net/core/sock_map.c b/net/core/sock_map.c

index 1827669..2d213c4 100644 (file)
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -1416,38 +1416,50 @@ static struct sk_psock_progs *sock_map_progs(struct bpf_map *map)
         return NULL;
  }
  
-static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
-                               struct bpf_prog *old, u32 which)
+static int sock_map_prog_lookup(struct bpf_map *map, struct bpf_prog ***pprog,
+                               u32 which)
  {
         struct sk_psock_progs *progs = sock_map_progs(map);
-       struct bpf_prog **pprog;
  
         if (!progs)
                 return -EOPNOTSUPP;
  
         switch (which) {
         case BPF_SK_MSG_VERDICT:
-               pprog = &progs->msg_parser;
+               *pprog = &progs->msg_parser;
                 break;
  #if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
         case BPF_SK_SKB_STREAM_PARSER:
-               pprog = &progs->stream_parser;
+               *pprog = &progs->stream_parser;
                 break;
  #endif
         case BPF_SK_SKB_STREAM_VERDICT:
                 if (progs->skb_verdict)
                         return -EBUSY;
-               pprog = &progs->stream_verdict;
+               *pprog = &progs->stream_verdict;
                 break;
         case BPF_SK_SKB_VERDICT:
                 if (progs->stream_verdict)
                         return -EBUSY;
-               pprog = &progs->skb_verdict;
+               *pprog = &progs->skb_verdict;
                 break;
         default:
                 return -EOPNOTSUPP;
         }
  
+       return 0;
+}
+
+static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
+                               struct bpf_prog *old, u32 which)
+{
+       struct bpf_prog **pprog;
+       int ret;
+
+       ret = sock_map_prog_lookup(map, &pprog, which);
+       if (ret)
+               return ret;
+
         if (old)
                 return psock_replace_prog(pprog, prog, old);
  
@@ -1455,6 +1467,57 @@ static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
         return 0;
  }
  
+int sock_map_bpf_prog_query(const union bpf_attr *attr,
+                           union bpf_attr __user *uattr)
+{
+       __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
+       u32 prog_cnt = 0, flags = 0, ufd = attr->target_fd;
+       struct bpf_prog **pprog;
+       struct bpf_prog *prog;
+       struct bpf_map *map;
+       struct fd f;
+       u32 id = 0;
+       int ret;
+
+       if (attr->query.query_flags)
+               return -EINVAL;
+
+       f = fdget(ufd);
+       map = __bpf_map_get(f);
+       if (IS_ERR(map))
+               return PTR_ERR(map);
+
+       rcu_read_lock();
+
+       ret = sock_map_prog_lookup(map, &pprog, attr->query.attach_type);
+       if (ret)
+               goto end;
+
+       prog = *pprog;
+       prog_cnt = !prog ? 0 : 1;
+
+       if (!attr->query.prog_cnt || !prog_ids || !prog_cnt)
+               goto end;
+
+       /* we do not hold the refcnt, the bpf prog may be released
+        * asynchronously and the id would be set to 0.
+        */
+       id = data_race(prog->aux->id);
+       if (id == 0)
+               prog_cnt = 0;
+
+end:
+       rcu_read_unlock();
+
+       if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags)) ||
+           (id != 0 && copy_to_user(prog_ids, &id, sizeof(u32))) ||
+           copy_to_user(&uattr->query.prog_cnt, &prog_cnt, sizeof(prog_cnt)))
+               ret = -EFAULT;
+
+       fdput(f);
+       return ret;
+}
+
  static void sock_map_unlink(struct sock *sk, struct sk_psock_link *link)
  {
         switch (link->map->map_type) {
diff --git a/net/core/xdp.c b/net/core/xdp.c

index 7aba355..361df31 100644 (file)
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -162,8 +162,9 @@ static void xdp_rxq_info_init(struct xdp_rxq_info *xdp_rxq)
  }
  
  /* Returns 0 on success, negative on failure */
-int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
-                    struct net_device *dev, u32 queue_index, unsigned int napi_id)
+int __xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
+                      struct net_device *dev, u32 queue_index,
+                      unsigned int napi_id, u32 frag_size)
  {
         if (!dev) {
                 WARN(1, "Missing net_device from driver");
@@ -185,11 +186,12 @@ int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
         xdp_rxq->dev = dev;
         xdp_rxq->queue_index = queue_index;
         xdp_rxq->napi_id = napi_id;
+       xdp_rxq->frag_size = frag_size;
  
         xdp_rxq->reg_state = REG_STATE_REGISTERED;
         return 0;
  }
-EXPORT_SYMBOL_GPL(xdp_rxq_info_reg);
+EXPORT_SYMBOL_GPL(__xdp_rxq_info_reg);
  
  void xdp_rxq_info_unused(struct xdp_rxq_info *xdp_rxq)
  {
@@ -369,8 +371,8 @@ EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model);
   * is used for those calls sites.  Thus, allowing for faster recycling
   * of xdp_frames/pages in those cases.
   */
-static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct,
-                        struct xdp_buff *xdp)
+void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct,
+                 struct xdp_buff *xdp)
  {
         struct xdp_mem_allocator *xa;
         struct page *page;
@@ -406,12 +408,38 @@ static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct,
  
  void xdp_return_frame(struct xdp_frame *xdpf)
  {
+       struct skb_shared_info *sinfo;
+       int i;
+
+       if (likely(!xdp_frame_has_frags(xdpf)))
+               goto out;
+
+       sinfo = xdp_get_shared_info_from_frame(xdpf);
+       for (i = 0; i < sinfo->nr_frags; i++) {
+               struct page *page = skb_frag_page(&sinfo->frags[i]);
+
+               __xdp_return(page_address(page), &xdpf->mem, false, NULL);
+       }
+out:
         __xdp_return(xdpf->data, &xdpf->mem, false, NULL);
  }
  EXPORT_SYMBOL_GPL(xdp_return_frame);
  
  void xdp_return_frame_rx_napi(struct xdp_frame *xdpf)
  {
+       struct skb_shared_info *sinfo;
+       int i;
+
+       if (likely(!xdp_frame_has_frags(xdpf)))
+               goto out;
+
+       sinfo = xdp_get_shared_info_from_frame(xdpf);
+       for (i = 0; i < sinfo->nr_frags; i++) {
+               struct page *page = skb_frag_page(&sinfo->frags[i]);
+
+               __xdp_return(page_address(page), &xdpf->mem, true, NULL);
+       }
+out:
         __xdp_return(xdpf->data, &xdpf->mem, true, NULL);
  }
  EXPORT_SYMBOL_GPL(xdp_return_frame_rx_napi);
@@ -447,7 +475,7 @@ void xdp_return_frame_bulk(struct xdp_frame *xdpf,
         struct xdp_mem_allocator *xa;
  
         if (mem->type != MEM_TYPE_PAGE_POOL) {
-               __xdp_return(xdpf->data, &xdpf->mem, false, NULL);
+               xdp_return_frame(xdpf);
                 return;
         }
  
@@ -466,12 +494,38 @@ void xdp_return_frame_bulk(struct xdp_frame *xdpf,
                 bq->xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params);
         }
  
+       if (unlikely(xdp_frame_has_frags(xdpf))) {
+               struct skb_shared_info *sinfo;
+               int i;
+
+               sinfo = xdp_get_shared_info_from_frame(xdpf);
+               for (i = 0; i < sinfo->nr_frags; i++) {
+                       skb_frag_t *frag = &sinfo->frags[i];
+
+                       bq->q[bq->count++] = skb_frag_address(frag);
+                       if (bq->count == XDP_BULK_QUEUE_SIZE)
+                               xdp_flush_frame_bulk(bq);
+               }
+       }
         bq->q[bq->count++] = xdpf->data;
  }
  EXPORT_SYMBOL_GPL(xdp_return_frame_bulk);
  
  void xdp_return_buff(struct xdp_buff *xdp)
  {
+       struct skb_shared_info *sinfo;
+       int i;
+
+       if (likely(!xdp_buff_has_frags(xdp)))
+               goto out;
+
+       sinfo = xdp_get_shared_info_from_buff(xdp);
+       for (i = 0; i < sinfo->nr_frags; i++) {
+               struct page *page = skb_frag_page(&sinfo->frags[i]);
+
+               __xdp_return(page_address(page), &xdp->rxq->mem, true, xdp);
+       }
+out:
         __xdp_return(xdp->data, &xdp->rxq->mem, true, xdp);
  }
  
@@ -561,8 +615,14 @@ struct sk_buff *__xdp_build_skb_from_frame(struct xdp_frame *xdpf,
                                            struct sk_buff *skb,
                                            struct net_device *dev)
  {
+       struct skb_shared_info *sinfo = xdp_get_shared_info_from_frame(xdpf);
         unsigned int headroom, frame_size;
         void *hard_start;
+       u8 nr_frags;
+
+       /* xdp frags frame */
+       if (unlikely(xdp_frame_has_frags(xdpf)))
+               nr_frags = sinfo->nr_frags;
  
         /* Part of headroom was reserved to xdpf */
         headroom = sizeof(*xdpf) + xdpf->headroom;
@@ -582,6 +642,12 @@ struct sk_buff *__xdp_build_skb_from_frame(struct xdp_frame *xdpf,
         if (xdpf->metasize)
                 skb_metadata_set(skb, xdpf->metasize);
  
+       if (unlikely(xdp_frame_has_frags(xdpf)))
+               xdp_update_skb_shared_info(skb, nr_frags,
+                                          sinfo->xdp_frags_size,
+                                          nr_frags * xdpf->frame_sz,
+                                          xdp_frame_is_frag_pfmemalloc(xdpf));
+
         /* Essential SKB info: protocol and skb->dev */
         skb->protocol = eth_type_trans(skb, dev);
  
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h

index 5183e62..671c377 100644 (file)
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -136,11 +136,6 @@ static inline int between48(const u64 seq1, const u64 seq2, const u64 seq3)
         return (seq3 << 16) - (seq2 << 16) >= (seq1 << 16) - (seq2 << 16);
  }
  
-static inline u64 max48(const u64 seq1, const u64 seq2)
-{
-       return after48(seq1, seq2) ? seq1 : seq2;
-}
-
  /**
   * dccp_loss_count - Approximate the number of lost data packets in a burst loss
   * @s1:  last known sequence number before the loss ('hole')
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c

index 0ea2927..ae66256 100644 (file)
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -1030,15 +1030,9 @@ static void __net_exit dccp_v4_exit_net(struct net *net)
         inet_ctl_sock_destroy(pn->v4_ctl_sk);
  }
  
-static void __net_exit dccp_v4_exit_batch(struct list_head *net_exit_list)
-{
-       inet_twsk_purge(&dccp_hashinfo, AF_INET);
-}
-
  static struct pernet_operations dccp_v4_ops = {
         .init   = dccp_v4_init_net,
         .exit   = dccp_v4_exit_net,
-       .exit_batch = dccp_v4_exit_batch,
         .id     = &dccp_v4_pernet_id,
         .size   = sizeof(struct dccp_v4_pernet),
  };
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c

index fa66351..eab3bd1 100644 (file)
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -1115,15 +1115,9 @@ static void __net_exit dccp_v6_exit_net(struct net *net)
         inet_ctl_sock_destroy(pn->v6_ctl_sk);
  }
  
-static void __net_exit dccp_v6_exit_batch(struct list_head *net_exit_list)
-{
-       inet_twsk_purge(&dccp_hashinfo, AF_INET6);
-}
-
  static struct pernet_operations dccp_v6_ops = {
         .init   = dccp_v6_init_net,
         .exit   = dccp_v6_exit_net,
-       .exit_batch = dccp_v6_exit_batch,
         .id     = &dccp_v6_pernet_id,
         .size   = sizeof(struct dccp_v6_pernet),
  };
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c

index 91e7a22..64d805b 100644 (file)
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -22,6 +22,7 @@
  #include "feat.h"
  
  struct inet_timewait_death_row dccp_death_row = {
+       .tw_refcount = REFCOUNT_INIT(1),
         .sysctl_max_tw_buckets = NR_FILE * 2,
         .hashinfo       = &dccp_hashinfo,
  };
diff --git a/net/dsa/switch.c b/net/dsa/switch.c

index e3c7d26..517cc83 100644 (file)
--- a/net/dsa/switch.c
+++ b/net/dsa/switch.c
@@ -113,26 +113,15 @@ static int dsa_switch_bridge_join(struct dsa_switch *ds,
         return dsa_tag_8021q_bridge_join(ds, info);
  }
  
-static int dsa_switch_bridge_leave(struct dsa_switch *ds,
-                                  struct dsa_notifier_bridge_info *info)
+static int dsa_switch_sync_vlan_filtering(struct dsa_switch *ds,
+                                         struct dsa_notifier_bridge_info *info)
  {
-       struct dsa_switch_tree *dst = ds->dst;
         struct netlink_ext_ack extack = {0};
         bool change_vlan_filtering = false;
         bool vlan_filtering;
         struct dsa_port *dp;
         int err;
  
-       if (dst->index == info->tree_index && ds->index == info->sw_index &&
-           ds->ops->port_bridge_leave)
-               ds->ops->port_bridge_leave(ds, info->port, info->bridge);
-
-       if ((dst->index != info->tree_index || ds->index != info->sw_index) &&
-           ds->ops->crosschip_bridge_leave)
-               ds->ops->crosschip_bridge_leave(ds, info->tree_index,
-                                               info->sw_index, info->port,
-                                               info->bridge);
-
         if (ds->needs_standalone_vlan_filtering &&
             !br_vlan_enabled(info->bridge.dev)) {
                 change_vlan_filtering = true;
@@ -172,6 +161,31 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds,
                         return err;
         }
  
+       return 0;
+}
+
+static int dsa_switch_bridge_leave(struct dsa_switch *ds,
+                                  struct dsa_notifier_bridge_info *info)
+{
+       struct dsa_switch_tree *dst = ds->dst;
+       int err;
+
+       if (dst->index == info->tree_index && ds->index == info->sw_index &&
+           ds->ops->port_bridge_leave)
+               ds->ops->port_bridge_leave(ds, info->port, info->bridge);
+
+       if ((dst->index != info->tree_index || ds->index != info->sw_index) &&
+           ds->ops->crosschip_bridge_leave)
+               ds->ops->crosschip_bridge_leave(ds, info->tree_index,
+                                               info->sw_index, info->port,
+                                               info->bridge);
+
+       if (ds->dst->index == info->tree_index && ds->index == info->sw_index) {
+               err = dsa_switch_sync_vlan_filtering(ds, info);
+               if (err)
+                       return err;
+       }
+
         return dsa_tag_8021q_bridge_leave(ds, info);
  }
  
diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h

index 043e4e9..ff9ec76 100644 (file)
--- a/net/hsr/hsr_main.h
+++ b/net/hsr/hsr_main.h
@@ -259,11 +259,6 @@ static inline u16 prp_get_skb_sequence_nr(struct prp_rct *rct)
         return ntohs(rct->sequence_nr);
  }
  
-static inline u16 get_prp_lan_id(struct prp_rct *rct)
-{
-       return ntohs(rct->lan_id_and_LSDU_size) >> 12;
-}
-
  /* assume there is a valid rct */
  static inline bool prp_check_lsdu_size(struct sk_buff *skb,
                                        struct prp_rct *rct,
diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c

index de610cb..b60c9fd 100644 (file)
--- a/net/ipv4/bpf_tcp_ca.c
+++ b/net/ipv4/bpf_tcp_ca.c
@@ -1,6 +1,7 @@
  // SPDX-License-Identifier: GPL-2.0
  /* Copyright (c) 2019 Facebook  */
  
+#include <linux/init.h>
  #include <linux/types.h>
  #include <linux/bpf_verifier.h>
  #include <linux/bpf.h>
@@ -212,26 +213,23 @@ bpf_tcp_ca_get_func_proto(enum bpf_func_id func_id,
         }
  }
  
-BTF_SET_START(bpf_tcp_ca_kfunc_ids)
+BTF_SET_START(bpf_tcp_ca_check_kfunc_ids)
  BTF_ID(func, tcp_reno_ssthresh)
  BTF_ID(func, tcp_reno_cong_avoid)
  BTF_ID(func, tcp_reno_undo_cwnd)
  BTF_ID(func, tcp_slow_start)
  BTF_ID(func, tcp_cong_avoid_ai)
-BTF_SET_END(bpf_tcp_ca_kfunc_ids)
+BTF_SET_END(bpf_tcp_ca_check_kfunc_ids)
  
-static bool bpf_tcp_ca_check_kfunc_call(u32 kfunc_btf_id, struct module *owner)
-{
-       if (btf_id_set_contains(&bpf_tcp_ca_kfunc_ids, kfunc_btf_id))
-               return true;
-       return bpf_check_mod_kfunc_call(&bpf_tcp_ca_kfunc_list, kfunc_btf_id, owner);
-}
+static const struct btf_kfunc_id_set bpf_tcp_ca_kfunc_set = {
+       .owner     = THIS_MODULE,
+       .check_set = &bpf_tcp_ca_check_kfunc_ids,
+};
  
  static const struct bpf_verifier_ops bpf_tcp_ca_verifier_ops = {
         .get_func_proto         = bpf_tcp_ca_get_func_proto,
         .is_valid_access        = bpf_tcp_ca_is_valid_access,
         .btf_struct_access      = bpf_tcp_ca_btf_struct_access,
-       .check_kfunc_call       = bpf_tcp_ca_check_kfunc_call,
  };
  
  static int bpf_tcp_ca_init_member(const struct btf_type *t,
@@ -300,3 +298,9 @@ struct bpf_struct_ops bpf_tcp_congestion_ops = {
         .init = bpf_tcp_ca_init,
         .name = "tcp_congestion_ops",
  };
+
+static int __init bpf_tcp_ca_kfunc_init(void)
+{
+       return register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &bpf_tcp_ca_kfunc_set);
+}
+late_initcall(bpf_tcp_ca_kfunc_init);
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c

index b458986..4c53994 100644 (file)
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -1257,34 +1257,13 @@ fib_info_laddrhash_bucket(const struct net *net, __be32 val)
         return &fib_info_laddrhash[slot];
  }
  
-static struct hlist_head *fib_info_hash_alloc(int bytes)
-{
-       if (bytes <= PAGE_SIZE)
-               return kzalloc(bytes, GFP_KERNEL);
-       else
-               return (struct hlist_head *)
-                       __get_free_pages(GFP_KERNEL | __GFP_ZERO,
-                                        get_order(bytes));
-}
-
-static void fib_info_hash_free(struct hlist_head *hash, int bytes)
-{
-       if (!hash)
-               return;
-
-       if (bytes <= PAGE_SIZE)
-               kfree(hash);
-       else
-               free_pages((unsigned long) hash, get_order(bytes));
-}
-
  static void fib_info_hash_move(struct hlist_head *new_info_hash,
                                struct hlist_head *new_laddrhash,
                                unsigned int new_size)
  {
         struct hlist_head *old_info_hash, *old_laddrhash;
         unsigned int old_size = fib_info_hash_size;
-       unsigned int i, bytes;
+       unsigned int i;
  
         spin_lock_bh(&fib_info_lock);
         old_info_hash = fib_info_hash;
@@ -1325,9 +1304,8 @@ static void fib_info_hash_move(struct hlist_head *new_info_hash,
  
         spin_unlock_bh(&fib_info_lock);
  
-       bytes = old_size * sizeof(struct hlist_head *);
-       fib_info_hash_free(old_info_hash, bytes);
-       fib_info_hash_free(old_laddrhash, bytes);
+       kvfree(old_info_hash);
+       kvfree(old_laddrhash);
  }
  
  __be32 fib_info_update_nhc_saddr(struct net *net, struct fib_nh_common *nhc,
@@ -1444,19 +1422,19 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
                 unsigned int new_size = fib_info_hash_size << 1;
                 struct hlist_head *new_info_hash;
                 struct hlist_head *new_laddrhash;
-               unsigned int bytes;
+               size_t bytes;
  
                 if (!new_size)
                         new_size = 16;
-               bytes = new_size * sizeof(struct hlist_head *);
-               new_info_hash = fib_info_hash_alloc(bytes);
-               new_laddrhash = fib_info_hash_alloc(bytes);
+               bytes = (size_t)new_size * sizeof(struct hlist_head *);
+               new_info_hash = kvzalloc(bytes, GFP_KERNEL);
+               new_laddrhash = kvzalloc(bytes, GFP_KERNEL);
                 if (!new_info_hash || !new_laddrhash) {
-                       fib_info_hash_free(new_info_hash, bytes);
-                       fib_info_hash_free(new_laddrhash, bytes);
-               } else
+                       kvfree(new_info_hash);
+                       kvfree(new_laddrhash);
+               } else {
                         fib_info_hash_move(new_info_hash, new_laddrhash, new_size);
-
+               }
                 if (!fib_info_hash_size)
                         goto failure;
         }
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c

index b7e277d..72a375c 100644 (file)
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -192,24 +192,14 @@ struct icmp_control {
  
  static const struct icmp_control icmp_pointers[NR_ICMP_TYPES+1];
  
-/*
- *     The ICMP socket(s). This is the most convenient way to flow control
- *     our ICMP output as well as maintain a clean interface throughout
- *     all layers. All Socketless IP sends will soon be gone.
- *
- *     On SMP we have one ICMP socket per-cpu.
- */
-static struct sock *icmp_sk(struct net *net)
-{
-       return this_cpu_read(*net->ipv4.icmp_sk);
-}
+static DEFINE_PER_CPU(struct sock *, ipv4_icmp_sk);
  
  /* Called with BH disabled */
  static inline struct sock *icmp_xmit_lock(struct net *net)
  {
         struct sock *sk;
  
-       sk = icmp_sk(net);
+       sk = this_cpu_read(ipv4_icmp_sk);
  
         if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
                 /* This can happen if the output path signals a
@@ -217,11 +207,13 @@ static inline struct sock *icmp_xmit_lock(struct net *net)
                  */
                 return NULL;
         }
+       sock_net_set(sk, net);
         return sk;
  }
  
  static inline void icmp_xmit_unlock(struct sock *sk)
  {
+       sock_net_set(sk, &init_net);
         spin_unlock(&sk->sk_lock.slock);
  }
  
@@ -363,14 +355,13 @@ static int icmp_glue_bits(void *from, char *to, int offset, int len, int odd,
         return 0;
  }
  
-static void icmp_push_reply(struct icmp_bxm *icmp_param,
+static void icmp_push_reply(struct sock *sk,
+                           struct icmp_bxm *icmp_param,
                             struct flowi4 *fl4,
                             struct ipcm_cookie *ipc, struct rtable **rt)
  {
-       struct sock *sk;
         struct sk_buff *skb;
  
-       sk = icmp_sk(dev_net((*rt)->dst.dev));
         if (ip_append_data(sk, fl4, icmp_glue_bits, icmp_param,
                            icmp_param->data_len+icmp_param->head_len,
                            icmp_param->head_len,
@@ -452,7 +443,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
         if (IS_ERR(rt))
                 goto out_unlock;
         if (icmpv4_xrlim_allow(net, rt, &fl4, type, code))
-               icmp_push_reply(icmp_param, &fl4, &ipc, &rt);
+               icmp_push_reply(sk, icmp_param, &fl4, &ipc, &rt);
         ip_rt_put(rt);
  out_unlock:
         icmp_xmit_unlock(sk);
@@ -766,7 +757,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
         if (!fl4.saddr)
                 fl4.saddr = htonl(INADDR_DUMMY);
  
-       icmp_push_reply(&icmp_param, &fl4, &ipc, &rt);
+       icmp_push_reply(sk, &icmp_param, &fl4, &ipc, &rt);
  ende:
         ip_rt_put(rt);
  out_unlock:
@@ -1434,46 +1425,8 @@ static const struct icmp_control icmp_pointers[NR_ICMP_TYPES + 1] = {
         },
  };
  
-static void __net_exit icmp_sk_exit(struct net *net)
-{
-       int i;
-
-       for_each_possible_cpu(i)
-               inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.icmp_sk, i));
-       free_percpu(net->ipv4.icmp_sk);
-       net->ipv4.icmp_sk = NULL;
-}
-
  static int __net_init icmp_sk_init(struct net *net)
  {
-       int i, err;
-
-       net->ipv4.icmp_sk = alloc_percpu(struct sock *);
-       if (!net->ipv4.icmp_sk)
-               return -ENOMEM;
-
-       for_each_possible_cpu(i) {
-               struct sock *sk;
-
-               err = inet_ctl_sock_create(&sk, PF_INET,
-                                          SOCK_RAW, IPPROTO_ICMP, net);
-               if (err < 0)
-                       goto fail;
-
-               *per_cpu_ptr(net->ipv4.icmp_sk, i) = sk;
-
-               /* Enough space for 2 64K ICMP packets, including
-                * sk_buff/skb_shared_info struct overhead.
-                */
-               sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
-
-               /*
-                * Speedup sock_wfree()
-                */
-               sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
-               inet_sk(sk)->pmtudisc = IP_PMTUDISC_DONT;
-       }
-
         /* Control parameters for ECHO replies. */
         net->ipv4.sysctl_icmp_echo_ignore_all = 0;
         net->ipv4.sysctl_icmp_echo_enable_probe = 0;
@@ -1499,18 +1452,36 @@ static int __net_init icmp_sk_init(struct net *net)
         net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr = 0;
  
         return 0;
-
-fail:
-       icmp_sk_exit(net);
-       return err;
  }
  
  static struct pernet_operations __net_initdata icmp_sk_ops = {
         .init = icmp_sk_init,
-       .exit = icmp_sk_exit,
  };
  
  int __init icmp_init(void)
  {
+       int err, i;
+
+       for_each_possible_cpu(i) {
+               struct sock *sk;
+
+               err = inet_ctl_sock_create(&sk, PF_INET,
+                                          SOCK_RAW, IPPROTO_ICMP, &init_net);
+               if (err < 0)
+                       return err;
+
+               per_cpu(ipv4_icmp_sk, i) = sk;
+
+               /* Enough space for 2 64K ICMP packets, including
+                * sk_buff/skb_shared_info struct overhead.
+                */
+               sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
+
+               /*
+                * Speedup sock_wfree()
+                */
+               sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
+               inet_sk(sk)->pmtudisc = IP_PMTUDISC_DONT;
+       }
         return register_pernet_subsys(&icmp_sk_ops);
  }
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c

index 437afe3..9e0bbd0 100644 (file)
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -52,14 +52,15 @@ static void inet_twsk_kill(struct inet_timewait_sock *tw)
         spin_unlock(lock);
  
         /* Disassociate with bind bucket. */
-       bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), tw->tw_num,
-                       hashinfo->bhash_size)];
+       bhead = &hashinfo->bhash[tw->tw_bslot];
  
         spin_lock(&bhead->lock);
         inet_twsk_bind_unhash(tw, hashinfo);
         spin_unlock(&bhead->lock);
  
-       atomic_dec(&tw->tw_dr->tw_count);
+       if (refcount_dec_and_test(&tw->tw_dr->tw_refcount))
+               kfree(tw->tw_dr);
+
         inet_twsk_put(tw);
  }
  
@@ -110,8 +111,12 @@ void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
            Note, that any socket with inet->num != 0 MUST be bound in
            binding cache, even if it is closed.
          */
-       bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->inet_num,
-                       hashinfo->bhash_size)];
+       /* Cache inet_bhashfn(), because 'struct net' might be no longer
+        * available later in inet_twsk_kill().
+        */
+       tw->tw_bslot = inet_bhashfn(twsk_net(tw), inet->inet_num,
+                                   hashinfo->bhash_size);
+       bhead = &hashinfo->bhash[tw->tw_bslot];
         spin_lock(&bhead->lock);
         tw->tw_tb = icsk->icsk_bind_hash;
         WARN_ON(!icsk->icsk_bind_hash);
@@ -145,10 +150,6 @@ static void tw_timer_handler(struct timer_list *t)
  {
         struct inet_timewait_sock *tw = from_timer(tw, t, tw_timer);
  
-       if (tw->tw_kill)
-               __NET_INC_STATS(twsk_net(tw), LINUX_MIB_TIMEWAITKILLED);
-       else
-               __NET_INC_STATS(twsk_net(tw), LINUX_MIB_TIMEWAITED);
         inet_twsk_kill(tw);
  }
  
@@ -158,7 +159,7 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk,
  {
         struct inet_timewait_sock *tw;
  
-       if (atomic_read(&dr->tw_count) >= dr->sysctl_max_tw_buckets)
+       if (refcount_read(&dr->tw_refcount) - 1 >= dr->sysctl_max_tw_buckets)
                 return NULL;
  
         tw = kmem_cache_alloc(sk->sk_prot_creator->twsk_prot->twsk_slab,
@@ -244,59 +245,15 @@ void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo, bool rearm)
          * of PAWS.
          */
  
-       tw->tw_kill = timeo <= 4*HZ;
         if (!rearm) {
+               bool kill = timeo <= 4*HZ;
+
+               __NET_INC_STATS(twsk_net(tw), kill ? LINUX_MIB_TIMEWAITKILLED :
+                                                    LINUX_MIB_TIMEWAITED);
                 BUG_ON(mod_timer(&tw->tw_timer, jiffies + timeo));
-               atomic_inc(&tw->tw_dr->tw_count);
+               refcount_inc(&tw->tw_dr->tw_refcount);
         } else {
                 mod_timer_pending(&tw->tw_timer, jiffies + timeo);
         }
  }
  EXPORT_SYMBOL_GPL(__inet_twsk_schedule);
-
-void inet_twsk_purge(struct inet_hashinfo *hashinfo, int family)
-{
-       struct inet_timewait_sock *tw;
-       struct sock *sk;
-       struct hlist_nulls_node *node;
-       unsigned int slot;
-
-       for (slot = 0; slot <= hashinfo->ehash_mask; slot++) {
-               struct inet_ehash_bucket *head = &hashinfo->ehash[slot];
-restart_rcu:
-               cond_resched();
-               rcu_read_lock();
-restart:
-               sk_nulls_for_each_rcu(sk, node, &head->chain) {
-                       if (sk->sk_state != TCP_TIME_WAIT)
-                               continue;
-                       tw = inet_twsk(sk);
-                       if ((tw->tw_family != family) ||
-                               refcount_read(&twsk_net(tw)->ns.count))
-                               continue;
-
-                       if (unlikely(!refcount_inc_not_zero(&tw->tw_refcnt)))
-                               continue;
-
-                       if (unlikely((tw->tw_family != family) ||
-                                    refcount_read(&twsk_net(tw)->ns.count))) {
-                               inet_twsk_put(tw);
-                               goto restart;
-                       }
-
-                       rcu_read_unlock();
-                       local_bh_disable();
-                       inet_twsk_deschedule_put(tw);
-                       local_bh_enable();
-                       goto restart_rcu;
-               }
-               /* If the nulls value we got at the end of this lookup is
-                * not the expected one, we must restart lookup.
-                * We probably met an item that was moved to another chain.
-                */
-               if (get_nulls_value(node) != slot)
-                       goto restart;
-               rcu_read_unlock();
-       }
-}
-EXPORT_SYMBOL_GPL(inet_twsk_purge);
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c

index f30273a..2883607 100644 (file)
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -59,8 +59,8 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
         socket_seq_show(seq);
         seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %ld\n",
                    sock_prot_inuse_get(net, &tcp_prot), orphans,
-                  atomic_read(&net->ipv4.tcp_death_row.tw_count), sockets,
-                  proto_memory_allocated(&tcp_prot));
+                  refcount_read(&net->ipv4.tcp_death_row->tw_refcount) - 1,
+                  sockets, proto_memory_allocated(&tcp_prot));
         seq_printf(seq, "UDP: inuse %d mem %ld\n",
                    sock_prot_inuse_get(net, &udp_prot),
                    proto_memory_allocated(&udp_prot));
diff --git a/net/ipv4/route.c b/net/ipv4/route.c

index ff6f91c..e42e283 100644 (file)
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -112,14 +112,13 @@
  
  #define DEFAULT_MIN_PMTU (512 + 20 + 20)
  #define DEFAULT_MTU_EXPIRES (10 * 60 * HZ)
-
+#define DEFAULT_MIN_ADVMSS 256
  static int ip_rt_max_size;
  static int ip_rt_redirect_number __read_mostly = 9;
  static int ip_rt_redirect_load __read_mostly   = HZ / 50;
  static int ip_rt_redirect_silence __read_mostly        = ((HZ / 50) << (9 + 1));
  static int ip_rt_error_cost __read_mostly      = HZ;
  static int ip_rt_error_burst __read_mostly     = 5 * HZ;
-static int ip_rt_min_advmss __read_mostly      = 256;
  
  static int ip_rt_gc_timeout __read_mostly      = RT_GC_TIMEOUT;
  
@@ -1298,9 +1297,10 @@ static void set_class_tag(struct rtable *rt, u32 tag)
  
  static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
  {
+       struct net *net = dev_net(dst->dev);
         unsigned int header_size = sizeof(struct tcphdr) + sizeof(struct iphdr);
         unsigned int advmss = max_t(unsigned int, ipv4_mtu(dst) - header_size,
-                                   ip_rt_min_advmss);
+                                   net->ipv4.ip_rt_min_advmss);
  
         return min(advmss, IPV4_MAX_PMTU - header_size);
  }
@@ -3535,13 +3535,6 @@ static struct ctl_table ipv4_route_table[] = {
                 .mode           = 0644,
                 .proc_handler   = proc_dointvec,
         },
-       {
-               .procname       = "min_adv_mss",
-               .data           = &ip_rt_min_advmss,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec,
-       },
         { }
  };
  
@@ -3569,6 +3562,13 @@ static struct ctl_table ipv4_route_netns_table[] = {
                 .mode           = 0644,
                 .proc_handler   = proc_dointvec_jiffies,
         },
+       {
+               .procname   = "min_adv_mss",
+               .data       = &init_net.ipv4.ip_rt_min_advmss,
+               .maxlen     = sizeof(int),
+               .mode       = 0644,
+               .proc_handler   = proc_dointvec,
+       },
         { },
  };
  
@@ -3631,6 +3631,7 @@ static __net_init int netns_ip_rt_init(struct net *net)
         /* Set default value for namespaceified sysctls */
         net->ipv4.ip_rt_min_pmtu = DEFAULT_MIN_PMTU;
         net->ipv4.ip_rt_mtu_expires = DEFAULT_MTU_EXPIRES;
+       net->ipv4.ip_rt_min_advmss = DEFAULT_MIN_ADVMSS;
         return 0;
  }
  
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c

index 97eb547..1cae27b 100644 (file)
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -589,6 +589,14 @@ static struct ctl_table ipv4_table[] = {
  };
  
  static struct ctl_table ipv4_net_table[] = {
+       /* tcp_max_tw_buckets must be first in this table. */
+       {
+               .procname       = "tcp_max_tw_buckets",
+/*             .data           = &init_net.ipv4.tcp_death_row.sysctl_max_tw_buckets, */
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec
+       },
         {
                 .procname       = "icmp_echo_ignore_all",
                 .data           = &init_net.ipv4.sysctl_icmp_echo_ignore_all,
@@ -1001,13 +1009,6 @@ static struct ctl_table ipv4_net_table[] = {
                 .extra2         = &two,
         },
         {
-               .procname       = "tcp_max_tw_buckets",
-               .data           = &init_net.ipv4.tcp_death_row.sysctl_max_tw_buckets,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec
-       },
-       {
                 .procname       = "tcp_max_syn_backlog",
                 .data           = &init_net.ipv4.sysctl_max_syn_backlog,
                 .maxlen         = sizeof(int),
@@ -1400,7 +1401,8 @@ static __net_init int ipv4_sysctl_init_net(struct net *net)
                 if (!table)
                         goto err_alloc;
  
-               for (i = 0; i < ARRAY_SIZE(ipv4_net_table) - 1; i++) {
+               /* skip first entry (sysctl_max_tw_buckets) */
+               for (i = 1; i < ARRAY_SIZE(ipv4_net_table) - 1; i++) {
                         if (table[i].data) {
                                 /* Update the variables to point into
                                  * the current struct net
@@ -1415,6 +1417,8 @@ static __net_init int ipv4_sysctl_init_net(struct net *net)
                 }
         }
  
+       table[0].data = &net->ipv4.tcp_death_row->sysctl_max_tw_buckets;
+
         net->ipv4.ipv4_hdr = register_net_sysctl(net, "net/ipv4", table);
         if (!net->ipv4.ipv4_hdr)
                 goto err_reg;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c

index 78e8146..cf1ce81 100644 (file)
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -894,8 +894,7 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
                 return mss_now;
  
         /* Note : tcp_tso_autosize() will eventually split this later */
-       new_size_goal = sk->sk_gso_max_size - 1 - MAX_TCP_HEADER;
-       new_size_goal = tcp_bound_to_half_wnd(tp, new_size_goal);
+       new_size_goal = tcp_bound_to_half_wnd(tp, sk->sk_gso_max_size);
  
         /* We try hard to avoid divides here */
         size_goal = tp->gso_segs * mss_now;
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c

index ec55500..02e8626 100644 (file)
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -1154,7 +1154,7 @@ static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = {
         .set_state      = bbr_set_state,
  };
  
-BTF_SET_START(tcp_bbr_kfunc_ids)
+BTF_SET_START(tcp_bbr_check_kfunc_ids)
  #ifdef CONFIG_X86
  #ifdef CONFIG_DYNAMIC_FTRACE
  BTF_ID(func, bbr_init)
@@ -1167,25 +1167,27 @@ BTF_ID(func, bbr_min_tso_segs)
  BTF_ID(func, bbr_set_state)
  #endif
  #endif
-BTF_SET_END(tcp_bbr_kfunc_ids)
+BTF_SET_END(tcp_bbr_check_kfunc_ids)
  
-static DEFINE_KFUNC_BTF_ID_SET(&tcp_bbr_kfunc_ids, tcp_bbr_kfunc_btf_set);
+static const struct btf_kfunc_id_set tcp_bbr_kfunc_set = {
+       .owner     = THIS_MODULE,
+       .check_set = &tcp_bbr_check_kfunc_ids,
+};
  
  static int __init bbr_register(void)
  {
         int ret;
  
         BUILD_BUG_ON(sizeof(struct bbr) > ICSK_CA_PRIV_SIZE);
-       ret = tcp_register_congestion_control(&tcp_bbr_cong_ops);
-       if (ret)
+
+       ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &tcp_bbr_kfunc_set);
+       if (ret < 0)
                 return ret;
-       register_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_bbr_kfunc_btf_set);
-       return 0;
+       return tcp_register_congestion_control(&tcp_bbr_cong_ops);
  }
  
  static void __exit bbr_unregister(void)
  {
-       unregister_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_bbr_kfunc_btf_set);
         tcp_unregister_congestion_control(&tcp_bbr_cong_ops);
  }
  
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c

index e07837e..24d562d 100644 (file)
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -485,7 +485,7 @@ static struct tcp_congestion_ops cubictcp __read_mostly = {
         .name           = "cubic",
  };
  
-BTF_SET_START(tcp_cubic_kfunc_ids)
+BTF_SET_START(tcp_cubic_check_kfunc_ids)
  #ifdef CONFIG_X86
  #ifdef CONFIG_DYNAMIC_FTRACE
  BTF_ID(func, cubictcp_init)
@@ -496,9 +496,12 @@ BTF_ID(func, cubictcp_cwnd_event)
  BTF_ID(func, cubictcp_acked)
  #endif
  #endif
-BTF_SET_END(tcp_cubic_kfunc_ids)
+BTF_SET_END(tcp_cubic_check_kfunc_ids)
  
-static DEFINE_KFUNC_BTF_ID_SET(&tcp_cubic_kfunc_ids, tcp_cubic_kfunc_btf_set);
+static const struct btf_kfunc_id_set tcp_cubic_kfunc_set = {
+       .owner     = THIS_MODULE,
+       .check_set = &tcp_cubic_check_kfunc_ids,
+};
  
  static int __init cubictcp_register(void)
  {
@@ -534,16 +537,14 @@ static int __init cubictcp_register(void)
         /* divide by bic_scale and by constant Srtt (100ms) */
         do_div(cube_factor, bic_scale * 10);
  
-       ret = tcp_register_congestion_control(&cubictcp);
-       if (ret)
+       ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &tcp_cubic_kfunc_set);
+       if (ret < 0)
                 return ret;
-       register_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_cubic_kfunc_btf_set);
-       return 0;
+       return tcp_register_congestion_control(&cubictcp);
  }
  
  static void __exit cubictcp_unregister(void)
  {
-       unregister_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_cubic_kfunc_btf_set);
         tcp_unregister_congestion_control(&cubictcp);
  }
  
diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c

index 0d7ab3c..1943a66 100644 (file)
--- a/net/ipv4/tcp_dctcp.c
+++ b/net/ipv4/tcp_dctcp.c
@@ -238,7 +238,7 @@ static struct tcp_congestion_ops dctcp_reno __read_mostly = {
         .name           = "dctcp-reno",
  };
  
-BTF_SET_START(tcp_dctcp_kfunc_ids)
+BTF_SET_START(tcp_dctcp_check_kfunc_ids)
  #ifdef CONFIG_X86
  #ifdef CONFIG_DYNAMIC_FTRACE
  BTF_ID(func, dctcp_init)
@@ -249,25 +249,27 @@ BTF_ID(func, dctcp_cwnd_undo)
  BTF_ID(func, dctcp_state)
  #endif
  #endif
-BTF_SET_END(tcp_dctcp_kfunc_ids)
+BTF_SET_END(tcp_dctcp_check_kfunc_ids)
  
-static DEFINE_KFUNC_BTF_ID_SET(&tcp_dctcp_kfunc_ids, tcp_dctcp_kfunc_btf_set);
+static const struct btf_kfunc_id_set tcp_dctcp_kfunc_set = {
+       .owner     = THIS_MODULE,
+       .check_set = &tcp_dctcp_check_kfunc_ids,
+};
  
  static int __init dctcp_register(void)
  {
         int ret;
  
         BUILD_BUG_ON(sizeof(struct dctcp) > ICSK_CA_PRIV_SIZE);
-       ret = tcp_register_congestion_control(&dctcp);
-       if (ret)
+
+       ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &tcp_dctcp_kfunc_set);
+       if (ret < 0)
                 return ret;
-       register_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_dctcp_kfunc_btf_set);
-       return 0;
+       return tcp_register_congestion_control(&dctcp);
  }
  
  static void __exit dctcp_unregister(void)
  {
-       unregister_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_dctcp_kfunc_btf_set);
         tcp_unregister_congestion_control(&dctcp);
  }
  
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c

index fec656f..6873f46 100644 (file)
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -91,6 +91,8 @@ static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
  struct inet_hashinfo tcp_hashinfo;
  EXPORT_SYMBOL(tcp_hashinfo);
  
+static DEFINE_PER_CPU(struct sock *, ipv4_tcp_sk);
+
  static u32 tcp_v4_init_seq(const struct sk_buff *skb)
  {
         return secure_tcp_seq(ip_hdr(skb)->daddr,
@@ -206,7 +208,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
         struct rtable *rt;
         int err;
         struct ip_options_rcu *inet_opt;
-       struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
+       struct inet_timewait_death_row *tcp_death_row = sock_net(sk)->ipv4.tcp_death_row;
  
         if (addr_len < sizeof(struct sockaddr_in))
                 return -EINVAL;
@@ -810,7 +812,8 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
         arg.tos = ip_hdr(skb)->tos;
         arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
         local_bh_disable();
-       ctl_sk = this_cpu_read(*net->ipv4.tcp_sk);
+       ctl_sk = this_cpu_read(ipv4_tcp_sk);
+       sock_net_set(ctl_sk, net);
         if (sk) {
                 ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
                                    inet_twsk(sk)->tw_mark : sk->sk_mark;
@@ -825,6 +828,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
                               transmit_time);
  
         ctl_sk->sk_mark = 0;
+       sock_net_set(ctl_sk, &init_net);
         __TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
         __TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
         local_bh_enable();
@@ -908,7 +912,8 @@ static void tcp_v4_send_ack(const struct sock *sk,
         arg.tos = tos;
         arg.uid = sock_net_uid(net, sk_fullsock(sk) ? sk : NULL);
         local_bh_disable();
-       ctl_sk = this_cpu_read(*net->ipv4.tcp_sk);
+       ctl_sk = this_cpu_read(ipv4_tcp_sk);
+       sock_net_set(ctl_sk, net);
         ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
                            inet_twsk(sk)->tw_mark : sk->sk_mark;
         ctl_sk->sk_priority = (sk->sk_state == TCP_TIME_WAIT) ?
@@ -921,6 +926,7 @@ static void tcp_v4_send_ack(const struct sock *sk,
                               transmit_time);
  
         ctl_sk->sk_mark = 0;
+       sock_net_set(ctl_sk, &init_net);
         __TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
         local_bh_enable();
  }
@@ -3111,41 +3117,18 @@ EXPORT_SYMBOL(tcp_prot);
  
  static void __net_exit tcp_sk_exit(struct net *net)
  {
-       int cpu;
+       struct inet_timewait_death_row *tcp_death_row = net->ipv4.tcp_death_row;
  
         if (net->ipv4.tcp_congestion_control)
                 bpf_module_put(net->ipv4.tcp_congestion_control,
                                net->ipv4.tcp_congestion_control->owner);
-
-       for_each_possible_cpu(cpu)
-               inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu));
-       free_percpu(net->ipv4.tcp_sk);
+       if (refcount_dec_and_test(&tcp_death_row->tw_refcount))
+               kfree(tcp_death_row);
  }
  
  static int __net_init tcp_sk_init(struct net *net)
  {
-       int res, cpu, cnt;
-
-       net->ipv4.tcp_sk = alloc_percpu(struct sock *);
-       if (!net->ipv4.tcp_sk)
-               return -ENOMEM;
-
-       for_each_possible_cpu(cpu) {
-               struct sock *sk;
-
-               res = inet_ctl_sock_create(&sk, PF_INET, SOCK_RAW,
-                                          IPPROTO_TCP, net);
-               if (res)
-                       goto fail;
-               sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
-
-               /* Please enforce IP_DF and IPID==0 for RST and
-                * ACK sent in SYN-RECV and TIME-WAIT state.
-                */
-               inet_sk(sk)->pmtudisc = IP_PMTUDISC_DO;
-
-               *per_cpu_ptr(net->ipv4.tcp_sk, cpu) = sk;
-       }
+       int cnt;
  
         net->ipv4.sysctl_tcp_ecn = 2;
         net->ipv4.sysctl_tcp_ecn_fallback = 1;
@@ -3172,9 +3155,13 @@ static int __net_init tcp_sk_init(struct net *net)
         net->ipv4.sysctl_tcp_tw_reuse = 2;
         net->ipv4.sysctl_tcp_no_ssthresh_metrics_save = 1;
  
+       net->ipv4.tcp_death_row = kzalloc(sizeof(struct inet_timewait_death_row), GFP_KERNEL);
+       if (!net->ipv4.tcp_death_row)
+               return -ENOMEM;
+       refcount_set(&net->ipv4.tcp_death_row->tw_refcount, 1);
         cnt = tcp_hashinfo.ehash_mask + 1;
-       net->ipv4.tcp_death_row.sysctl_max_tw_buckets = cnt / 2;
-       net->ipv4.tcp_death_row.hashinfo = &tcp_hashinfo;
+       net->ipv4.tcp_death_row->sysctl_max_tw_buckets = cnt / 2;
+       net->ipv4.tcp_death_row->hashinfo = &tcp_hashinfo;
  
         net->ipv4.sysctl_max_syn_backlog = max(128, cnt / 128);
         net->ipv4.sysctl_tcp_sack = 1;
@@ -3229,18 +3216,12 @@ static int __net_init tcp_sk_init(struct net *net)
                 net->ipv4.tcp_congestion_control = &tcp_reno;
  
         return 0;
-fail:
-       tcp_sk_exit(net);
-
-       return res;
  }
  
  static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
  {
         struct net *net;
  
-       inet_twsk_purge(&tcp_hashinfo, AF_INET);
-
         list_for_each_entry(net, net_exit_list, exit_list)
                 tcp_fastopen_ctx_destroy(net);
  }
@@ -3326,6 +3307,24 @@ static void __init bpf_iter_register(void)
  
  void __init tcp_v4_init(void)
  {
+       int cpu, res;
+
+       for_each_possible_cpu(cpu) {
+               struct sock *sk;
+
+               res = inet_ctl_sock_create(&sk, PF_INET, SOCK_RAW,
+                                          IPPROTO_TCP, &init_net);
+               if (res)
+                       panic("Failed to create the TCP control socket.\n");
+               sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
+
+               /* Please enforce IP_DF and IPID==0 for RST and
+                * ACK sent in SYN-RECV and TIME-WAIT state.
+                */
+               inet_sk(sk)->pmtudisc = IP_PMTUDISC_DO;
+
+               per_cpu(ipv4_tcp_sk, cpu) = sk;
+       }
         if (register_pernet_subsys(&tcp_sk_ops))
                 panic("Failed to create the TCP control socket.\n");
  
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c

index 7c2d3ac..3977257 100644 (file)
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -248,7 +248,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
         const struct inet_connection_sock *icsk = inet_csk(sk);
         const struct tcp_sock *tp = tcp_sk(sk);
         struct inet_timewait_sock *tw;
-       struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
+       struct inet_timewait_death_row *tcp_death_row = sock_net(sk)->ipv4.tcp_death_row;
  
         tw = inet_twsk_alloc(sk, tcp_death_row, state);
  
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c

index 5079832..11c06b9 100644 (file)
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1960,7 +1960,7 @@ static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
  
         bytes = min_t(unsigned long,
                       sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift),
-                     sk->sk_gso_max_size - 1 - MAX_TCP_HEADER);
+                     sk->sk_gso_max_size);
  
         /* Goal is to send at least one packet per ms,
          * not one big TSO packet every 100 ms.
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c

index 96c5cc0..e6b978e 100644 (file)
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -69,17 +69,7 @@
  
  #include <linux/uaccess.h>
  
-/*
- *     The ICMP socket(s). This is the most convenient way to flow control
- *     our ICMP output as well as maintain a clean interface throughout
- *     all layers. All Socketless IP sends will soon be gone.
- *
- *     On SMP we have one ICMP socket per-cpu.
- */
-static struct sock *icmpv6_sk(struct net *net)
-{
-       return this_cpu_read(*net->ipv6.icmp_sk);
-}
+static DEFINE_PER_CPU(struct sock *, ipv6_icmp_sk);
  
  static int icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
                        u8 type, u8 code, int offset, __be32 info)
@@ -110,11 +100,11 @@ static const struct inet6_protocol icmpv6_protocol = {
  };
  
  /* Called with BH disabled */
-static __inline__ struct sock *icmpv6_xmit_lock(struct net *net)
+static struct sock *icmpv6_xmit_lock(struct net *net)
  {
         struct sock *sk;
  
-       sk = icmpv6_sk(net);
+       sk = this_cpu_read(ipv6_icmp_sk);
         if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
                 /* This can happen if the output path (f.e. SIT or
                  * ip6ip6 tunnel) signals dst_link_failure() for an
@@ -122,11 +112,13 @@ static __inline__ struct sock *icmpv6_xmit_lock(struct net *net)
                  */
                 return NULL;
         }
+       sock_net_set(sk, net);
         return sk;
  }
  
-static __inline__ void icmpv6_xmit_unlock(struct sock *sk)
+static void icmpv6_xmit_unlock(struct sock *sk)
  {
+       sock_net_set(sk, &init_net);
         spin_unlock(&sk->sk_lock.slock);
  }
  
@@ -1034,59 +1026,27 @@ void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
         security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6));
  }
  
-static void __net_exit icmpv6_sk_exit(struct net *net)
-{
-       int i;
-
-       for_each_possible_cpu(i)
-               inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv6.icmp_sk, i));
-       free_percpu(net->ipv6.icmp_sk);
-}
-
-static int __net_init icmpv6_sk_init(struct net *net)
+int __init icmpv6_init(void)
  {
         struct sock *sk;
         int err, i;
  
-       net->ipv6.icmp_sk = alloc_percpu(struct sock *);
-       if (!net->ipv6.icmp_sk)
-               return -ENOMEM;
-
         for_each_possible_cpu(i) {
                 err = inet_ctl_sock_create(&sk, PF_INET6,
-                                          SOCK_RAW, IPPROTO_ICMPV6, net);
+                                          SOCK_RAW, IPPROTO_ICMPV6, &init_net);
                 if (err < 0) {
                         pr_err("Failed to initialize the ICMP6 control socket (err %d)\n",
                                err);
-                       goto fail;
+                       return err;
                 }
  
-               *per_cpu_ptr(net->ipv6.icmp_sk, i) = sk;
+               per_cpu(ipv6_icmp_sk, i) = sk;
  
                 /* Enough space for 2 64K ICMP packets, including
                  * sk_buff struct overhead.
                  */
                 sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
         }
-       return 0;
-
- fail:
-       icmpv6_sk_exit(net);
-       return err;
-}
-
-static struct pernet_operations icmpv6_sk_ops = {
-       .init = icmpv6_sk_init,
-       .exit = icmpv6_sk_exit,
-};
-
-int __init icmpv6_init(void)
-{
-       int err;
-
-       err = register_pernet_subsys(&icmpv6_sk_ops);
-       if (err < 0)
-               return err;
  
         err = -EAGAIN;
         if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
@@ -1101,14 +1061,12 @@ sender_reg_err:
         inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
  fail:
         pr_err("Failed to register ICMP6 protocol\n");
-       unregister_pernet_subsys(&icmpv6_sk_ops);
         return err;
  }
  
  void icmpv6_cleanup(void)
  {
         inet6_unregister_icmp_sender(icmp6_send);
-       unregister_pernet_subsys(&icmpv6_sk_ops);
         inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
  }
  
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c

index b29e9ba..d37a79a 100644 (file)
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -249,7 +249,7 @@ INDIRECT_CALLABLE_SCOPE struct sk_buff *ipv6_gro_receive(struct list_head *head,
                  if ((first_word & htonl(0xF00FFFFF)) ||
                      !ipv6_addr_equal(&iph->saddr, &iph2->saddr) ||
                      !ipv6_addr_equal(&iph->daddr, &iph2->daddr) ||
-                    *(u16 *)&iph->nexthdr != *(u16 *)&iph2->nexthdr) {
+                    iph->nexthdr != iph2->nexthdr) {
  not_same_flow:
                         NAPI_GRO_CB(p)->same_flow = 0;
                         continue;
@@ -260,7 +260,8 @@ not_same_flow:
                                 goto not_same_flow;
                 }
                 /* flush if Traffic Class fields are different */
-               NAPI_GRO_CB(p)->flush |= !!(first_word & htonl(0x0FF00000));
+               NAPI_GRO_CB(p)->flush |= !!((first_word & htonl(0x0FF00000)) |
+                       (__force __be32)(iph->hop_limit ^ iph2->hop_limit));
                 NAPI_GRO_CB(p)->flush |= flush;
  
                 /* If the previous IP ID value was based on an atomic
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c

index 97ade83..b47ffc8 100644 (file)
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1121,6 +1121,11 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield,
  
                         memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr));
                         neigh_release(neigh);
+               } else if (skb->protocol == htons(ETH_P_IP)) {
+                       struct rtable *rt = skb_rtable(skb);
+
+                       if (rt->rt_gw_family == AF_INET6)
+                               memcpy(&fl6->daddr, &rt->rt_gw6, sizeof(fl6->daddr));
                 }
         } else if (t->parms.proto != 0 && !(t->parms.flags &
                                             (IP6_TNL_F_USE_ORIG_TCLASS |
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c

index 075ee8a..0c648bf 100644 (file)
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -148,6 +148,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
         struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
         struct inet_sock *inet = inet_sk(sk);
         struct inet_connection_sock *icsk = inet_csk(sk);
+       struct inet_timewait_death_row *tcp_death_row;
         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
         struct tcp_sock *tp = tcp_sk(sk);
         struct in6_addr *saddr = NULL, *final_p, final;
@@ -156,7 +157,6 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
         struct dst_entry *dst;
         int addr_type;
         int err;
-       struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
  
         if (addr_len < SIN6_LEN_RFC2133)
                 return -EINVAL;
@@ -308,6 +308,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
         inet->inet_dport = usin->sin6_port;
  
         tcp_set_state(sk, TCP_SYN_SENT);
+       tcp_death_row = sock_net(sk)->ipv4.tcp_death_row;
         err = inet6_hash_connect(tcp_death_row, sk);
         if (err)
                 goto late_failure;
@@ -2237,15 +2238,9 @@ static void __net_exit tcpv6_net_exit(struct net *net)
         inet_ctl_sock_destroy(net->ipv6.tcp_sk);
  }
  
-static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
-{
-       inet_twsk_purge(&tcp_hashinfo, AF_INET6);
-}
-
  static struct pernet_operations tcpv6_net_ops = {
         .init       = tcpv6_net_init,
         .exit       = tcpv6_net_exit,
-       .exit_batch = tcpv6_net_exit_batch,
  };
  
  int __init tcpv6_init(void)
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile

index a135b1a..238b6a6 100644 (file)
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -14,6 +14,11 @@ nf_conntrack-$(CONFIG_NF_CONNTRACK_LABELS) += nf_conntrack_labels.o
  nf_conntrack-$(CONFIG_NF_CT_PROTO_DCCP) += nf_conntrack_proto_dccp.o
  nf_conntrack-$(CONFIG_NF_CT_PROTO_SCTP) += nf_conntrack_proto_sctp.o
  nf_conntrack-$(CONFIG_NF_CT_PROTO_GRE) += nf_conntrack_proto_gre.o
+ifeq ($(CONFIG_NF_CONNTRACK),m)
+nf_conntrack-$(CONFIG_DEBUG_INFO_BTF_MODULES) += nf_conntrack_bpf.o
+else ifeq ($(CONFIG_NF_CONNTRACK),y)
+nf_conntrack-$(CONFIG_DEBUG_INFO_BTF) += nf_conntrack_bpf.o
+endif
  
  obj-$(CONFIG_NETFILTER) = netfilter.o
  
diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c

new file mode 100644 (file)

index 0000000..8ad3f52
--- /dev/null
+++ b/net/netfilter/nf_conntrack_bpf.c
@@ -0,0 +1,257 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Unstable Conntrack Helpers for XDP and TC-BPF hook
+ *
+ * These are called from the XDP and SCHED_CLS BPF programs. Note that it is
+ * allowed to break compatibility for these functions since the interface they
+ * are exposed through to BPF programs is explicitly unstable.
+ */
+
+#include <linux/bpf.h>
+#include <linux/btf.h>
+#include <linux/types.h>
+#include <linux/btf_ids.h>
+#include <linux/net_namespace.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_core.h>
+
+/* bpf_ct_opts - Options for CT lookup helpers
+ *
+ * Members:
+ * @netns_id   - Specify the network namespace for lookup
+ *              Values:
+ *                BPF_F_CURRENT_NETNS (-1)
+ *                  Use namespace associated with ctx (xdp_md, __sk_buff)
+ *                [0, S32_MAX]
+ *                  Network Namespace ID
+ * @error      - Out parameter, set for any errors encountered
+ *              Values:
+ *                -EINVAL - Passed NULL for bpf_tuple pointer
+ *                -EINVAL - opts->reserved is not 0
+ *                -EINVAL - netns_id is less than -1
+ *                -EINVAL - opts__sz isn't NF_BPF_CT_OPTS_SZ (12)
+ *                -EPROTO - l4proto isn't one of IPPROTO_TCP or IPPROTO_UDP
+ *                -ENONET - No network namespace found for netns_id
+ *                -ENOENT - Conntrack lookup could not find entry for tuple
+ *                -EAFNOSUPPORT - tuple__sz isn't one of sizeof(tuple->ipv4)
+ *                                or sizeof(tuple->ipv6)
+ * @l4proto    - Layer 4 protocol
+ *              Values:
+ *                IPPROTO_TCP, IPPROTO_UDP
+ * @reserved   - Reserved member, will be reused for more options in future
+ *              Values:
+ *                0
+ */
+struct bpf_ct_opts {
+       s32 netns_id;
+       s32 error;
+       u8 l4proto;
+       u8 reserved[3];
+};
+
+enum {
+       NF_BPF_CT_OPTS_SZ = 12,
+};
+
+static struct nf_conn *__bpf_nf_ct_lookup(struct net *net,
+                                         struct bpf_sock_tuple *bpf_tuple,
+                                         u32 tuple_len, u8 protonum,
+                                         s32 netns_id)
+{
+       struct nf_conntrack_tuple_hash *hash;
+       struct nf_conntrack_tuple tuple;
+
+       if (unlikely(protonum != IPPROTO_TCP && protonum != IPPROTO_UDP))
+               return ERR_PTR(-EPROTO);
+       if (unlikely(netns_id < BPF_F_CURRENT_NETNS))
+               return ERR_PTR(-EINVAL);
+
+       memset(&tuple, 0, sizeof(tuple));
+       switch (tuple_len) {
+       case sizeof(bpf_tuple->ipv4):
+               tuple.src.l3num = AF_INET;
+               tuple.src.u3.ip = bpf_tuple->ipv4.saddr;
+               tuple.src.u.tcp.port = bpf_tuple->ipv4.sport;
+               tuple.dst.u3.ip = bpf_tuple->ipv4.daddr;
+               tuple.dst.u.tcp.port = bpf_tuple->ipv4.dport;
+               break;
+       case sizeof(bpf_tuple->ipv6):
+               tuple.src.l3num = AF_INET6;
+               memcpy(tuple.src.u3.ip6, bpf_tuple->ipv6.saddr, sizeof(bpf_tuple->ipv6.saddr));
+               tuple.src.u.tcp.port = bpf_tuple->ipv6.sport;
+               memcpy(tuple.dst.u3.ip6, bpf_tuple->ipv6.daddr, sizeof(bpf_tuple->ipv6.daddr));
+               tuple.dst.u.tcp.port = bpf_tuple->ipv6.dport;
+               break;
+       default:
+               return ERR_PTR(-EAFNOSUPPORT);
+       }
+
+       tuple.dst.protonum = protonum;
+
+       if (netns_id >= 0) {
+               net = get_net_ns_by_id(net, netns_id);
+               if (unlikely(!net))
+                       return ERR_PTR(-ENONET);
+       }
+
+       hash = nf_conntrack_find_get(net, &nf_ct_zone_dflt, &tuple);
+       if (netns_id >= 0)
+               put_net(net);
+       if (!hash)
+               return ERR_PTR(-ENOENT);
+       return nf_ct_tuplehash_to_ctrack(hash);
+}
+
+__diag_push();
+__diag_ignore(GCC, 8, "-Wmissing-prototypes",
+             "Global functions as their definitions will be in nf_conntrack BTF");
+
+/* bpf_xdp_ct_lookup - Lookup CT entry for the given tuple, and acquire a
+ *                    reference to it
+ *
+ * Parameters:
+ * @xdp_ctx    - Pointer to ctx (xdp_md) in XDP program
+ *                 Cannot be NULL
+ * @bpf_tuple  - Pointer to memory representing the tuple to look up
+ *                 Cannot be NULL
+ * @tuple__sz  - Length of the tuple structure
+ *                 Must be one of sizeof(bpf_tuple->ipv4) or
+ *                 sizeof(bpf_tuple->ipv6)
+ * @opts       - Additional options for lookup (documented above)
+ *                 Cannot be NULL
+ * @opts__sz   - Length of the bpf_ct_opts structure
+ *                 Must be NF_BPF_CT_OPTS_SZ (12)
+ */
+struct nf_conn *
+bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
+                 u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz)
+{
+       struct xdp_buff *ctx = (struct xdp_buff *)xdp_ctx;
+       struct net *caller_net;
+       struct nf_conn *nfct;
+
+       BUILD_BUG_ON(sizeof(struct bpf_ct_opts) != NF_BPF_CT_OPTS_SZ);
+
+       if (!opts)
+               return NULL;
+       if (!bpf_tuple || opts->reserved[0] || opts->reserved[1] ||
+           opts->reserved[2] || opts__sz != NF_BPF_CT_OPTS_SZ) {
+               opts->error = -EINVAL;
+               return NULL;
+       }
+       caller_net = dev_net(ctx->rxq->dev);
+       nfct = __bpf_nf_ct_lookup(caller_net, bpf_tuple, tuple__sz, opts->l4proto,
+                                 opts->netns_id);
+       if (IS_ERR(nfct)) {
+               opts->error = PTR_ERR(nfct);
+               return NULL;
+       }
+       return nfct;
+}
+
+/* bpf_skb_ct_lookup - Lookup CT entry for the given tuple, and acquire a
+ *                    reference to it
+ *
+ * Parameters:
+ * @skb_ctx    - Pointer to ctx (__sk_buff) in TC program
+ *                 Cannot be NULL
+ * @bpf_tuple  - Pointer to memory representing the tuple to look up
+ *                 Cannot be NULL
+ * @tuple__sz  - Length of the tuple structure
+ *                 Must be one of sizeof(bpf_tuple->ipv4) or
+ *                 sizeof(bpf_tuple->ipv6)
+ * @opts       - Additional options for lookup (documented above)
+ *                 Cannot be NULL
+ * @opts__sz   - Length of the bpf_ct_opts structure
+ *                 Must be NF_BPF_CT_OPTS_SZ (12)
+ */
+struct nf_conn *
+bpf_skb_ct_lookup(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple,
+                 u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz)
+{
+       struct sk_buff *skb = (struct sk_buff *)skb_ctx;
+       struct net *caller_net;
+       struct nf_conn *nfct;
+
+       BUILD_BUG_ON(sizeof(struct bpf_ct_opts) != NF_BPF_CT_OPTS_SZ);
+
+       if (!opts)
+               return NULL;
+       if (!bpf_tuple || opts->reserved[0] || opts->reserved[1] ||
+           opts->reserved[2] || opts__sz != NF_BPF_CT_OPTS_SZ) {
+               opts->error = -EINVAL;
+               return NULL;
+       }
+       caller_net = skb->dev ? dev_net(skb->dev) : sock_net(skb->sk);
+       nfct = __bpf_nf_ct_lookup(caller_net, bpf_tuple, tuple__sz, opts->l4proto,
+                                 opts->netns_id);
+       if (IS_ERR(nfct)) {
+               opts->error = PTR_ERR(nfct);
+               return NULL;
+       }
+       return nfct;
+}
+
+/* bpf_ct_release - Release acquired nf_conn object
+ *
+ * This must be invoked for referenced PTR_TO_BTF_ID, and the verifier rejects
+ * the program if any references remain in the program in all of the explored
+ * states.
+ *
+ * Parameters:
+ * @nf_conn     - Pointer to referenced nf_conn object, obtained using
+ *                bpf_xdp_ct_lookup or bpf_skb_ct_lookup.
+ */
+void bpf_ct_release(struct nf_conn *nfct)
+{
+       if (!nfct)
+               return;
+       nf_ct_put(nfct);
+}
+
+__diag_pop()
+
+BTF_SET_START(nf_ct_xdp_check_kfunc_ids)
+BTF_ID(func, bpf_xdp_ct_lookup)
+BTF_ID(func, bpf_ct_release)
+BTF_SET_END(nf_ct_xdp_check_kfunc_ids)
+
+BTF_SET_START(nf_ct_tc_check_kfunc_ids)
+BTF_ID(func, bpf_skb_ct_lookup)
+BTF_ID(func, bpf_ct_release)
+BTF_SET_END(nf_ct_tc_check_kfunc_ids)
+
+BTF_SET_START(nf_ct_acquire_kfunc_ids)
+BTF_ID(func, bpf_xdp_ct_lookup)
+BTF_ID(func, bpf_skb_ct_lookup)
+BTF_SET_END(nf_ct_acquire_kfunc_ids)
+
+BTF_SET_START(nf_ct_release_kfunc_ids)
+BTF_ID(func, bpf_ct_release)
+BTF_SET_END(nf_ct_release_kfunc_ids)
+
+/* Both sets are identical */
+#define nf_ct_ret_null_kfunc_ids nf_ct_acquire_kfunc_ids
+
+static const struct btf_kfunc_id_set nf_conntrack_xdp_kfunc_set = {
+       .owner        = THIS_MODULE,
+       .check_set    = &nf_ct_xdp_check_kfunc_ids,
+       .acquire_set  = &nf_ct_acquire_kfunc_ids,
+       .release_set  = &nf_ct_release_kfunc_ids,
+       .ret_null_set = &nf_ct_ret_null_kfunc_ids,
+};
+
+static const struct btf_kfunc_id_set nf_conntrack_tc_kfunc_set = {
+       .owner        = THIS_MODULE,
+       .check_set    = &nf_ct_tc_check_kfunc_ids,
+       .acquire_set  = &nf_ct_acquire_kfunc_ids,
+       .release_set  = &nf_ct_release_kfunc_ids,
+       .ret_null_set = &nf_ct_ret_null_kfunc_ids,
+};
+
+int register_nf_conntrack_bpf(void)
+{
+       int ret;
+
+       ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &nf_conntrack_xdp_kfunc_set);
+       return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &nf_conntrack_tc_kfunc_set);
+}
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c

index d6aa5b4..d38d689 100644 (file)
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -34,6 +34,7 @@
  #include <linux/rculist_nulls.h>
  
  #include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_bpf.h>
  #include <net/netfilter/nf_conntrack_l4proto.h>
  #include <net/netfilter/nf_conntrack_expect.h>
  #include <net/netfilter/nf_conntrack_helper.h>
@@ -2750,8 +2751,15 @@ int nf_conntrack_init_start(void)
         conntrack_gc_work_init(&conntrack_gc_work);
         queue_delayed_work(system_power_efficient_wq, &conntrack_gc_work.dwork, HZ);
  
+       ret = register_nf_conntrack_bpf();
+       if (ret < 0)
+               goto err_kfunc;
+
         return 0;
  
+err_kfunc:
+       cancel_delayed_work_sync(&conntrack_gc_work.dwork);
+       nf_conntrack_proto_fini();
  err_proto:
         nf_conntrack_seqadj_fini();
  err_seqadj:
diff --git a/net/tipc/msg.h b/net/tipc/msg.h

index 64ae4c4..c5eec16 100644 (file)
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -226,14 +226,6 @@ static inline void msg_set_bits(struct tipc_msg *m, u32 w,
         m->hdr[w] |= htonl(val);
  }
  
-static inline void msg_swap_words(struct tipc_msg *msg, u32 a, u32 b)
-{
-       u32 temp = msg->hdr[a];
-
-       msg->hdr[a] = msg->hdr[b];
-       msg->hdr[b] = temp;
-}
-
  /*
   * Word 0
   */
@@ -480,11 +472,6 @@ static inline void msg_incr_reroute_cnt(struct tipc_msg *m)
         msg_set_bits(m, 1, 21, 0xf, msg_reroute_cnt(m) + 1);
  }
  
-static inline void msg_reset_reroute_cnt(struct tipc_msg *m)
-{
-       msg_set_bits(m, 1, 21, 0xf, 0);
-}
-
  static inline u32 msg_lookup_scope(struct tipc_msg *m)
  {
         return msg_bits(m, 1, 19, 0x3);
@@ -800,11 +787,6 @@ static inline void msg_set_dest_domain(struct tipc_msg *m, u32 n)
         msg_set_word(m, 2, n);
  }
  
-static inline u32 msg_bcgap_after(struct tipc_msg *m)
-{
-       return msg_bits(m, 2, 16, 0xffff);
-}
-
  static inline void msg_set_bcgap_after(struct tipc_msg *m, u32 n)
  {
         msg_set_bits(m, 2, 16, 0xffff, n);
@@ -868,11 +850,6 @@ static inline void msg_set_next_sent(struct tipc_msg *m, u16 n)
         msg_set_bits(m, 4, 0, 0xffff, n);
  }
  
-static inline void msg_set_long_msgno(struct tipc_msg *m, u32 n)
-{
-       msg_set_bits(m, 4, 0, 0xffff, n);
-}
-
  static inline u32 msg_bc_netid(struct tipc_msg *m)
  {
         return msg_word(m, 4);
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c

index c195698..3e0d628 100644 (file)
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -3240,49 +3240,58 @@ static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
         return sk;
  }
  
-static struct sock *unix_next_socket(struct seq_file *seq,
-                                    struct sock *sk,
-                                    loff_t *pos)
+static struct sock *unix_get_first(struct seq_file *seq, loff_t *pos)
  {
         unsigned long bucket = get_bucket(*pos);
+       struct sock *sk;
  
-       while (sk > (struct sock *)SEQ_START_TOKEN) {
-               sk = sk_next(sk);
-               if (!sk)
-                       goto next_bucket;
-               if (sock_net(sk) == seq_file_net(seq))
-                       return sk;
-       }
-
-       do {
+       while (bucket < ARRAY_SIZE(unix_socket_table)) {
                 spin_lock(&unix_table_locks[bucket]);
+
                 sk = unix_from_bucket(seq, pos);
                 if (sk)
                         return sk;
  
-next_bucket:
-               spin_unlock(&unix_table_locks[bucket++]);
-               *pos = set_bucket_offset(bucket, 1);
-       } while (bucket < ARRAY_SIZE(unix_socket_table));
+               spin_unlock(&unix_table_locks[bucket]);
+
+               *pos = set_bucket_offset(++bucket, 1);
+       }
  
         return NULL;
  }
  
+static struct sock *unix_get_next(struct seq_file *seq, struct sock *sk,
+                                 loff_t *pos)
+{
+       unsigned long bucket = get_bucket(*pos);
+
+       for (sk = sk_next(sk); sk; sk = sk_next(sk))
+               if (sock_net(sk) == seq_file_net(seq))
+                       return sk;
+
+       spin_unlock(&unix_table_locks[bucket]);
+
+       *pos = set_bucket_offset(++bucket, 1);
+
+       return unix_get_first(seq, pos);
+}
+
  static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
  {
         if (!*pos)
                 return SEQ_START_TOKEN;
  
-       if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
-               return NULL;
-
-       return unix_next_socket(seq, NULL, pos);
+       return unix_get_first(seq, pos);
  }
  
  static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
  {
         ++*pos;
-       return unix_next_socket(seq, v, pos);
+
+       if (v == SEQ_START_TOKEN)
+               return unix_get_first(seq, pos);
+
+       return unix_get_next(seq, v, pos);
  }
  
  static void unix_seq_stop(struct seq_file *seq, void *v)
@@ -3347,6 +3356,15 @@ static const struct seq_operations unix_seq_ops = {
  };
  
  #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL)
+struct bpf_unix_iter_state {
+       struct seq_net_private p;
+       unsigned int cur_sk;
+       unsigned int end_sk;
+       unsigned int max_sk;
+       struct sock **batch;
+       bool st_bucket_done;
+};
+
  struct bpf_iter__unix {
         __bpf_md_ptr(struct bpf_iter_meta *, meta);
         __bpf_md_ptr(struct unix_sock *, unix_sk);
@@ -3365,24 +3383,156 @@ static int unix_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta,
         return bpf_iter_run_prog(prog, &ctx);
  }
  
+static int bpf_iter_unix_hold_batch(struct seq_file *seq, struct sock *start_sk)
+
+{
+       struct bpf_unix_iter_state *iter = seq->private;
+       unsigned int expected = 1;
+       struct sock *sk;
+
+       sock_hold(start_sk);
+       iter->batch[iter->end_sk++] = start_sk;
+
+       for (sk = sk_next(start_sk); sk; sk = sk_next(sk)) {
+               if (sock_net(sk) != seq_file_net(seq))
+                       continue;
+
+               if (iter->end_sk < iter->max_sk) {
+                       sock_hold(sk);
+                       iter->batch[iter->end_sk++] = sk;
+               }
+
+               expected++;
+       }
+
+       spin_unlock(&unix_table_locks[start_sk->sk_hash]);
+
+       return expected;
+}
+
+static void bpf_iter_unix_put_batch(struct bpf_unix_iter_state *iter)
+{
+       while (iter->cur_sk < iter->end_sk)
+               sock_put(iter->batch[iter->cur_sk++]);
+}
+
+static int bpf_iter_unix_realloc_batch(struct bpf_unix_iter_state *iter,
+                                      unsigned int new_batch_sz)
+{
+       struct sock **new_batch;
+
+       new_batch = kvmalloc(sizeof(*new_batch) * new_batch_sz,
+                            GFP_USER | __GFP_NOWARN);
+       if (!new_batch)
+               return -ENOMEM;
+
+       bpf_iter_unix_put_batch(iter);
+       kvfree(iter->batch);
+       iter->batch = new_batch;
+       iter->max_sk = new_batch_sz;
+
+       return 0;
+}
+
+static struct sock *bpf_iter_unix_batch(struct seq_file *seq,
+                                       loff_t *pos)
+{
+       struct bpf_unix_iter_state *iter = seq->private;
+       unsigned int expected;
+       bool resized = false;
+       struct sock *sk;
+
+       if (iter->st_bucket_done)
+               *pos = set_bucket_offset(get_bucket(*pos) + 1, 1);
+
+again:
+       /* Get a new batch */
+       iter->cur_sk = 0;
+       iter->end_sk = 0;
+
+       sk = unix_get_first(seq, pos);
+       if (!sk)
+               return NULL; /* Done */
+
+       expected = bpf_iter_unix_hold_batch(seq, sk);
+
+       if (iter->end_sk == expected) {
+               iter->st_bucket_done = true;
+               return sk;
+       }
+
+       if (!resized && !bpf_iter_unix_realloc_batch(iter, expected * 3 / 2)) {
+               resized = true;
+               goto again;
+       }
+
+       return sk;
+}
+
+static void *bpf_iter_unix_seq_start(struct seq_file *seq, loff_t *pos)
+{
+       if (!*pos)
+               return SEQ_START_TOKEN;
+
+       /* bpf iter does not support lseek, so it always
+        * continue from where it was stop()-ped.
+        */
+       return bpf_iter_unix_batch(seq, pos);
+}
+
+static void *bpf_iter_unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+       struct bpf_unix_iter_state *iter = seq->private;
+       struct sock *sk;
+
+       /* Whenever seq_next() is called, the iter->cur_sk is
+        * done with seq_show(), so advance to the next sk in
+        * the batch.
+        */
+       if (iter->cur_sk < iter->end_sk)
+               sock_put(iter->batch[iter->cur_sk++]);
+
+       ++*pos;
+
+       if (iter->cur_sk < iter->end_sk)
+               sk = iter->batch[iter->cur_sk];
+       else
+               sk = bpf_iter_unix_batch(seq, pos);
+
+       return sk;
+}
+
  static int bpf_iter_unix_seq_show(struct seq_file *seq, void *v)
  {
         struct bpf_iter_meta meta;
         struct bpf_prog *prog;
         struct sock *sk = v;
         uid_t uid;
+       bool slow;
+       int ret;
  
         if (v == SEQ_START_TOKEN)
                 return 0;
  
+       slow = lock_sock_fast(sk);
+
+       if (unlikely(sk_unhashed(sk))) {
+               ret = SEQ_SKIP;
+               goto unlock;
+       }
+
         uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk));
         meta.seq = seq;
         prog = bpf_iter_get_info(&meta, false);
-       return unix_prog_seq_show(prog, &meta, v, uid);
+       ret = unix_prog_seq_show(prog, &meta, v, uid);
+unlock:
+       unlock_sock_fast(sk, slow);
+       return ret;
  }
  
  static void bpf_iter_unix_seq_stop(struct seq_file *seq, void *v)
  {
+       struct bpf_unix_iter_state *iter = seq->private;
         struct bpf_iter_meta meta;
         struct bpf_prog *prog;
  
@@ -3393,12 +3543,13 @@ static void bpf_iter_unix_seq_stop(struct seq_file *seq, void *v)
                         (void)unix_prog_seq_show(prog, &meta, v, 0);
         }
  
-       unix_seq_stop(seq, v);
+       if (iter->cur_sk < iter->end_sk)
+               bpf_iter_unix_put_batch(iter);
  }
  
  static const struct seq_operations bpf_iter_unix_seq_ops = {
-       .start  = unix_seq_start,
-       .next   = unix_seq_next,
+       .start  = bpf_iter_unix_seq_start,
+       .next   = bpf_iter_unix_seq_next,
         .stop   = bpf_iter_unix_seq_stop,
         .show   = bpf_iter_unix_seq_show,
  };
@@ -3447,13 +3598,55 @@ static struct pernet_operations unix_net_ops = {
  DEFINE_BPF_ITER_FUNC(unix, struct bpf_iter_meta *meta,
                      struct unix_sock *unix_sk, uid_t uid)
  
+#define INIT_BATCH_SZ 16
+
+static int bpf_iter_init_unix(void *priv_data, struct bpf_iter_aux_info *aux)
+{
+       struct bpf_unix_iter_state *iter = priv_data;
+       int err;
+
+       err = bpf_iter_init_seq_net(priv_data, aux);
+       if (err)
+               return err;
+
+       err = bpf_iter_unix_realloc_batch(iter, INIT_BATCH_SZ);
+       if (err) {
+               bpf_iter_fini_seq_net(priv_data);
+               return err;
+       }
+
+       return 0;
+}
+
+static void bpf_iter_fini_unix(void *priv_data)
+{
+       struct bpf_unix_iter_state *iter = priv_data;
+
+       bpf_iter_fini_seq_net(priv_data);
+       kvfree(iter->batch);
+}
+
  static const struct bpf_iter_seq_info unix_seq_info = {
         .seq_ops                = &bpf_iter_unix_seq_ops,
-       .init_seq_private       = bpf_iter_init_seq_net,
-       .fini_seq_private       = bpf_iter_fini_seq_net,
-       .seq_priv_size          = sizeof(struct seq_net_private),
+       .init_seq_private       = bpf_iter_init_unix,
+       .fini_seq_private       = bpf_iter_fini_unix,
+       .seq_priv_size          = sizeof(struct bpf_unix_iter_state),
  };
  
+static const struct bpf_func_proto *
+bpf_iter_unix_get_func_proto(enum bpf_func_id func_id,
+                            const struct bpf_prog *prog)
+{
+       switch (func_id) {
+       case BPF_FUNC_setsockopt:
+               return &bpf_sk_setsockopt_proto;
+       case BPF_FUNC_getsockopt:
+               return &bpf_sk_getsockopt_proto;
+       default:
+               return NULL;
+       }
+}
+
  static struct bpf_iter_reg unix_reg_info = {
         .target                 = "unix",
         .ctx_arg_info_size      = 1,
@@ -3461,6 +3654,7 @@ static struct bpf_iter_reg unix_reg_info = {
                 { offsetof(struct bpf_iter__unix, unix_sk),
                   PTR_TO_BTF_ID_OR_NULL },
         },
+       .get_func_proto         = bpf_iter_unix_get_func_proto,
         .seq_info               = &unix_seq_info,
  };
  
diff --git a/samples/bpf/xdp1_user.c b/samples/bpf/xdp1_user.c

index 8675fa5..3ec8ad9 100644 (file)
--- a/samples/bpf/xdp1_user.c
+++ b/samples/bpf/xdp1_user.c
@@ -26,12 +26,12 @@ static void int_exit(int sig)
  {
         __u32 curr_prog_id = 0;
  
-       if (bpf_get_link_xdp_id(ifindex, &curr_prog_id, xdp_flags)) {
-               printf("bpf_get_link_xdp_id failed\n");
+       if (bpf_xdp_query_id(ifindex, xdp_flags, &curr_prog_id)) {
+               printf("bpf_xdp_query_id failed\n");
                 exit(1);
         }
         if (prog_id == curr_prog_id)
-               bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
+               bpf_xdp_detach(ifindex, xdp_flags, NULL);
         else if (!curr_prog_id)
                 printf("couldn't find a prog id on a given interface\n");
         else
@@ -143,7 +143,7 @@ int main(int argc, char **argv)
         signal(SIGINT, int_exit);
         signal(SIGTERM, int_exit);
  
-       if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) {
+       if (bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL) < 0) {
                 printf("link set xdp fd failed\n");
                 return 1;
         }
diff --git a/samples/bpf/xdp_adjust_tail_user.c b/samples/bpf/xdp_adjust_tail_user.c

index a70b094..6c61d5f 100644 (file)
--- a/samples/bpf/xdp_adjust_tail_user.c
+++ b/samples/bpf/xdp_adjust_tail_user.c
@@ -34,12 +34,12 @@ static void int_exit(int sig)
         __u32 curr_prog_id = 0;
  
         if (ifindex > -1) {
-               if (bpf_get_link_xdp_id(ifindex, &curr_prog_id, xdp_flags)) {
-                       printf("bpf_get_link_xdp_id failed\n");
+               if (bpf_xdp_query_id(ifindex, xdp_flags, &curr_prog_id)) {
+                       printf("bpf_xdp_query_id failed\n");
                         exit(1);
                 }
                 if (prog_id == curr_prog_id)
-                       bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
+                       bpf_xdp_detach(ifindex, xdp_flags, NULL);
                 else if (!curr_prog_id)
                         printf("couldn't find a prog id on a given iface\n");
                 else
@@ -173,7 +173,7 @@ int main(int argc, char **argv)
         signal(SIGINT, int_exit);
         signal(SIGTERM, int_exit);
  
-       if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) {
+       if (bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL) < 0) {
                 printf("link set xdp fd failed\n");
                 return 1;
         }
diff --git a/samples/bpf/xdp_fwd_user.c b/samples/bpf/xdp_fwd_user.c

index 4ad8967..79ccd98 100644 (file)
--- a/samples/bpf/xdp_fwd_user.c
+++ b/samples/bpf/xdp_fwd_user.c
@@ -33,7 +33,7 @@ static int do_attach(int idx, int prog_fd, int map_fd, const char *name)
  {
         int err;
  
-       err = bpf_set_link_xdp_fd(idx, prog_fd, xdp_flags);
+       err = bpf_xdp_attach(idx, prog_fd, xdp_flags, NULL);
         if (err < 0) {
                 printf("ERROR: failed to attach program to %s\n", name);
                 return err;
@@ -51,7 +51,7 @@ static int do_detach(int idx, const char *name)
  {
         int err;
  
-       err = bpf_set_link_xdp_fd(idx, -1, xdp_flags);
+       err = bpf_xdp_detach(idx, xdp_flags, NULL);
         if (err < 0)
                 printf("ERROR: failed to detach program from %s\n", name);
  
diff --git a/samples/bpf/xdp_router_ipv4_user.c b/samples/bpf/xdp_router_ipv4_user.c

index cfaf7e5..2d565ba 100644 (file)
--- a/samples/bpf/xdp_router_ipv4_user.c
+++ b/samples/bpf/xdp_router_ipv4_user.c
@@ -43,13 +43,13 @@ static void int_exit(int sig)
         int i = 0;
  
         for (i = 0; i < total_ifindex; i++) {
-               if (bpf_get_link_xdp_id(ifindex_list[i], &prog_id, flags)) {
-                       printf("bpf_get_link_xdp_id on iface %d failed\n",
+               if (bpf_xdp_query_id(ifindex_list[i], flags, &prog_id)) {
+                       printf("bpf_xdp_query_id on iface %d failed\n",
                                ifindex_list[i]);
                         exit(1);
                 }
                 if (prog_id_list[i] == prog_id)
-                       bpf_set_link_xdp_fd(ifindex_list[i], -1, flags);
+                       bpf_xdp_detach(ifindex_list[i], flags, NULL);
                 else if (!prog_id)
                         printf("couldn't find a prog id on iface %d\n",
                                ifindex_list[i]);
@@ -716,12 +716,12 @@ int main(int ac, char **argv)
         }
         prog_id_list = (__u32 *)calloc(total_ifindex, sizeof(__u32 *));
         for (i = 0; i < total_ifindex; i++) {
-               if (bpf_set_link_xdp_fd(ifindex_list[i], prog_fd, flags) < 0) {
+               if (bpf_xdp_attach(ifindex_list[i], prog_fd, flags, NULL) < 0) {
                         printf("link set xdp fd failed\n");
                         int recovery_index = i;
  
                         for (i = 0; i < recovery_index; i++)
-                               bpf_set_link_xdp_fd(ifindex_list[i], -1, flags);
+                               bpf_xdp_detach(ifindex_list[i], flags, NULL);
  
                         return 1;
                 }
diff --git a/samples/bpf/xdp_rxq_info_user.c b/samples/bpf/xdp_rxq_info_user.c

index 74a2926..fb2532d 100644 (file)
--- a/samples/bpf/xdp_rxq_info_user.c
+++ b/samples/bpf/xdp_rxq_info_user.c
@@ -62,15 +62,15 @@ static void int_exit(int sig)
         __u32 curr_prog_id = 0;
  
         if (ifindex > -1) {
-               if (bpf_get_link_xdp_id(ifindex, &curr_prog_id, xdp_flags)) {
-                       printf("bpf_get_link_xdp_id failed\n");
+               if (bpf_xdp_query_id(ifindex, xdp_flags, &curr_prog_id)) {
+                       printf("bpf_xdp_query_id failed\n");
                         exit(EXIT_FAIL);
                 }
                 if (prog_id == curr_prog_id) {
                         fprintf(stderr,
                                 "Interrupted: Removing XDP program on ifindex:%d device:%s\n",
                                 ifindex, ifname);
-                       bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
+                       bpf_xdp_detach(ifindex, xdp_flags, NULL);
                 } else if (!curr_prog_id) {
                         printf("couldn't find a prog id on a given iface\n");
                 } else {
@@ -209,7 +209,7 @@ static struct datarec *alloc_record_per_cpu(void)
  
  static struct record *alloc_record_per_rxq(void)
  {
-       unsigned int nr_rxqs = bpf_map__def(rx_queue_index_map)->max_entries;
+       unsigned int nr_rxqs = bpf_map__max_entries(rx_queue_index_map);
         struct record *array;
  
         array = calloc(nr_rxqs, sizeof(struct record));
@@ -222,7 +222,7 @@ static struct record *alloc_record_per_rxq(void)
  
  static struct stats_record *alloc_stats_record(void)
  {
-       unsigned int nr_rxqs = bpf_map__def(rx_queue_index_map)->max_entries;
+       unsigned int nr_rxqs = bpf_map__max_entries(rx_queue_index_map);
         struct stats_record *rec;
         int i;
  
@@ -241,7 +241,7 @@ static struct stats_record *alloc_stats_record(void)
  
  static void free_stats_record(struct stats_record *r)
  {
-       unsigned int nr_rxqs = bpf_map__def(rx_queue_index_map)->max_entries;
+       unsigned int nr_rxqs = bpf_map__max_entries(rx_queue_index_map);
         int i;
  
         for (i = 0; i < nr_rxqs; i++)
@@ -289,7 +289,7 @@ static void stats_collect(struct stats_record *rec)
         map_collect_percpu(fd, 0, &rec->stats);
  
         fd = bpf_map__fd(rx_queue_index_map);
-       max_rxqs = bpf_map__def(rx_queue_index_map)->max_entries;
+       max_rxqs = bpf_map__max_entries(rx_queue_index_map);
         for (i = 0; i < max_rxqs; i++)
                 map_collect_percpu(fd, i, &rec->rxq[i]);
  }
@@ -335,7 +335,7 @@ static void stats_print(struct stats_record *stats_rec,
                         struct stats_record *stats_prev,
                         int action, __u32 cfg_opt)
  {
-       unsigned int nr_rxqs = bpf_map__def(rx_queue_index_map)->max_entries;
+       unsigned int nr_rxqs = bpf_map__max_entries(rx_queue_index_map);
         unsigned int nr_cpus = bpf_num_possible_cpus();
         double pps = 0, err = 0;
         struct record *rec, *prev;
@@ -582,7 +582,7 @@ int main(int argc, char **argv)
         signal(SIGINT, int_exit);
         signal(SIGTERM, int_exit);
  
-       if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) {
+       if (bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL) < 0) {
                 fprintf(stderr, "link set xdp fd failed\n");
                 return EXIT_FAIL_XDP;
         }
diff --git a/samples/bpf/xdp_sample_pkts_user.c b/samples/bpf/xdp_sample_pkts_user.c

index 587eacb..0a2b3e9 100644 (file)
--- a/samples/bpf/xdp_sample_pkts_user.c
+++ b/samples/bpf/xdp_sample_pkts_user.c
@@ -30,7 +30,7 @@ static int do_attach(int idx, int fd, const char *name)
         __u32 info_len = sizeof(info);
         int err;
  
-       err = bpf_set_link_xdp_fd(idx, fd, xdp_flags);
+       err = bpf_xdp_attach(idx, fd, xdp_flags, NULL);
         if (err < 0) {
                 printf("ERROR: failed to attach program to %s\n", name);
                 return err;
@@ -51,13 +51,13 @@ static int do_detach(int idx, const char *name)
         __u32 curr_prog_id = 0;
         int err = 0;
  
-       err = bpf_get_link_xdp_id(idx, &curr_prog_id, xdp_flags);
+       err = bpf_xdp_query_id(idx, xdp_flags, &curr_prog_id);
         if (err) {
-               printf("bpf_get_link_xdp_id failed\n");
+               printf("bpf_xdp_query_id failed\n");
                 return err;
         }
         if (prog_id == curr_prog_id) {
-               err = bpf_set_link_xdp_fd(idx, -1, xdp_flags);
+               err = bpf_xdp_detach(idx, xdp_flags, NULL);
                 if (err < 0)
                         printf("ERROR: failed to detach prog from %s\n", name);
         } else if (!curr_prog_id) {
diff --git a/samples/bpf/xdp_sample_user.c b/samples/bpf/xdp_sample_user.c

index 8740838..ae70a79 100644 (file)
--- a/samples/bpf/xdp_sample_user.c
+++ b/samples/bpf/xdp_sample_user.c
@@ -1265,7 +1265,7 @@ static int __sample_remove_xdp(int ifindex, __u32 prog_id, int xdp_flags)
         int ret;
  
         if (prog_id) {
-               ret = bpf_get_link_xdp_id(ifindex, &cur_prog_id, xdp_flags);
+               ret = bpf_xdp_query_id(ifindex, xdp_flags, &cur_prog_id);
                 if (ret < 0)
                         return -errno;
  
@@ -1278,7 +1278,7 @@ static int __sample_remove_xdp(int ifindex, __u32 prog_id, int xdp_flags)
                 }
         }
  
-       return bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
+       return bpf_xdp_detach(ifindex, xdp_flags, NULL);
  }
  
  int sample_install_xdp(struct bpf_program *xdp_prog, int ifindex, bool generic,
@@ -1295,8 +1295,7 @@ int sample_install_xdp(struct bpf_program *xdp_prog, int ifindex, bool generic,
  
         xdp_flags |= !force ? XDP_FLAGS_UPDATE_IF_NOEXIST : 0;
         xdp_flags |= generic ? XDP_FLAGS_SKB_MODE : XDP_FLAGS_DRV_MODE;
-       ret = bpf_set_link_xdp_fd(ifindex, bpf_program__fd(xdp_prog),
-                                 xdp_flags);
+       ret = bpf_xdp_attach(ifindex, bpf_program__fd(xdp_prog), xdp_flags, NULL);
         if (ret < 0) {
                 ret = -errno;
                 fprintf(stderr,
@@ -1308,7 +1307,7 @@ int sample_install_xdp(struct bpf_program *xdp_prog, int ifindex, bool generic,
                 return ret;
         }
  
-       ret = bpf_get_link_xdp_id(ifindex, &prog_id, xdp_flags);
+       ret = bpf_xdp_query_id(ifindex, xdp_flags, &prog_id);
         if (ret < 0) {
                 ret = -errno;
                 fprintf(stderr,
diff --git a/samples/bpf/xdp_tx_iptunnel_user.c b/samples/bpf/xdp_tx_iptunnel_user.c

index 1d4f305..7370c03 100644 (file)
--- a/samples/bpf/xdp_tx_iptunnel_user.c
+++ b/samples/bpf/xdp_tx_iptunnel_user.c
@@ -32,12 +32,12 @@ static void int_exit(int sig)
         __u32 curr_prog_id = 0;
  
         if (ifindex > -1) {
-               if (bpf_get_link_xdp_id(ifindex, &curr_prog_id, xdp_flags)) {
-                       printf("bpf_get_link_xdp_id failed\n");
+               if (bpf_xdp_query_id(ifindex, xdp_flags, &curr_prog_id)) {
+                       printf("bpf_xdp_query_id failed\n");
                         exit(1);
                 }
                 if (prog_id == curr_prog_id)
-                       bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
+                       bpf_xdp_detach(ifindex, xdp_flags, NULL);
                 else if (!curr_prog_id)
                         printf("couldn't find a prog id on a given iface\n");
                 else
@@ -288,7 +288,7 @@ int main(int argc, char **argv)
                 }
         }
  
-       if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) {
+       if (bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL) < 0) {
                 printf("link set xdp fd failed\n");
                 return 1;
         }
@@ -302,7 +302,7 @@ int main(int argc, char **argv)
  
         poll_stats(kill_after_s);
  
-       bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
+       bpf_xdp_detach(ifindex, xdp_flags, NULL);
  
         return 0;
  }
diff --git a/samples/bpf/xdpsock_ctrl_proc.c b/samples/bpf/xdpsock_ctrl_proc.c

index cc44087..28b5f2a 100644 (file)
--- a/samples/bpf/xdpsock_ctrl_proc.c
+++ b/samples/bpf/xdpsock_ctrl_proc.c
@@ -173,7 +173,7 @@ main(int argc, char **argv)
         unlink(SOCKET_NAME);
  
         /* Unset fd for given ifindex */
-       err = bpf_set_link_xdp_fd(ifindex, -1, 0);
+       err = bpf_xdp_detach(ifindex, 0, NULL);
         if (err) {
                 fprintf(stderr, "Error when unsetting bpf prog_fd for ifindex(%d)\n", ifindex);
                 return err;
diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c

index aa50864..19288a2 100644 (file)
--- a/samples/bpf/xdpsock_user.c
+++ b/samples/bpf/xdpsock_user.c
@@ -571,13 +571,13 @@ static void remove_xdp_program(void)
  {
         u32 curr_prog_id = 0;
  
-       if (bpf_get_link_xdp_id(opt_ifindex, &curr_prog_id, opt_xdp_flags)) {
-               printf("bpf_get_link_xdp_id failed\n");
+       if (bpf_xdp_query_id(opt_ifindex, opt_xdp_flags, &curr_prog_id)) {
+               printf("bpf_xdp_query_id failed\n");
                 exit(EXIT_FAILURE);
         }
  
         if (prog_id == curr_prog_id)
-               bpf_set_link_xdp_fd(opt_ifindex, -1, opt_xdp_flags);
+               bpf_xdp_detach(opt_ifindex, opt_xdp_flags, NULL);
         else if (!curr_prog_id)
                 printf("couldn't find a prog id on a given interface\n");
         else
@@ -1027,7 +1027,7 @@ static struct xsk_socket_info *xsk_configure_socket(struct xsk_umem_info *umem,
         if (ret)
                 exit_with_error(-ret);
  
-       ret = bpf_get_link_xdp_id(opt_ifindex, &prog_id, opt_xdp_flags);
+       ret = bpf_xdp_query_id(opt_ifindex, opt_xdp_flags, &prog_id);
         if (ret)
                 exit_with_error(-ret);
  
@@ -1760,7 +1760,7 @@ static void load_xdp_program(char **argv, struct bpf_object **obj)
                 exit(EXIT_FAILURE);
         }
  
-       if (bpf_set_link_xdp_fd(opt_ifindex, prog_fd, opt_xdp_flags) < 0) {
+       if (bpf_xdp_attach(opt_ifindex, prog_fd, opt_xdp_flags, NULL) < 0) {
                 fprintf(stderr, "ERROR: link set xdp fd failed\n");
                 exit(EXIT_FAILURE);
         }
diff --git a/samples/bpf/xsk_fwd.c b/samples/bpf/xsk_fwd.c

index 52e7c4f..2220509 100644 (file)
--- a/samples/bpf/xsk_fwd.c
+++ b/samples/bpf/xsk_fwd.c
@@ -974,8 +974,8 @@ static void remove_xdp_program(void)
         int i;
  
         for (i = 0 ; i < n_ports; i++)
-               bpf_set_link_xdp_fd(if_nametoindex(port_params[i].iface), -1,
-                                   port_params[i].xsk_cfg.xdp_flags);
+               bpf_xdp_detach(if_nametoindex(port_params[i].iface),
+                              port_params[i].xsk_cfg.xdp_flags, NULL);
  }
  
  int main(int argc, char **argv)
diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py

index a6403dd..0966252 100755 (executable)
--- a/scripts/bpf_doc.py
+++ b/scripts/bpf_doc.py
@@ -87,21 +87,25 @@ class HeaderParser(object):
          self.line = ''
          self.helpers = []
          self.commands = []
+        self.desc_unique_helpers = set()
+        self.define_unique_helpers = []
+        self.desc_syscalls = []
+        self.enum_syscalls = []
  
      def parse_element(self):
          proto    = self.parse_symbol()
-        desc     = self.parse_desc()
-        ret      = self.parse_ret()
+        desc     = self.parse_desc(proto)
+        ret      = self.parse_ret(proto)
          return APIElement(proto=proto, desc=desc, ret=ret)
  
      def parse_helper(self):
          proto    = self.parse_proto()
-        desc     = self.parse_desc()
-        ret      = self.parse_ret()
+        desc     = self.parse_desc(proto)
+        ret      = self.parse_ret(proto)
          return Helper(proto=proto, desc=desc, ret=ret)
  
      def parse_symbol(self):
-        p = re.compile(' \* ?(.+)$')
+        p = re.compile(' \* ?(BPF\w+)$')
          capture = p.match(self.line)
          if not capture:
              raise NoSyscallCommandFound
@@ -127,16 +131,15 @@ class HeaderParser(object):
          self.line = self.reader.readline()
          return capture.group(1)
  
-    def parse_desc(self):
+    def parse_desc(self, proto):
          p = re.compile(' \* ?(?:\t| {5,8})Description$')
          capture = p.match(self.line)
          if not capture:
-            # Helper can have empty description and we might be parsing another
-            # attribute: return but do not consume.
-            return ''
+            raise Exception("No description section found for " + proto)
          # Description can be several lines, some of them possibly empty, and it
          # stops when another subsection title is met.
          desc = ''
+        desc_present = False
          while True:
              self.line = self.reader.readline()
              if self.line == ' *\n':
@@ -145,21 +148,24 @@ class HeaderParser(object):
                  p = re.compile(' \* ?(?:\t| {5,8})(?:\t| {8})(.*)')
                  capture = p.match(self.line)
                  if capture:
+                    desc_present = True
                      desc += capture.group(1) + '\n'
                  else:
                      break
+
+        if not desc_present:
+            raise Exception("No description found for " + proto)
          return desc
  
-    def parse_ret(self):
+    def parse_ret(self, proto):
          p = re.compile(' \* ?(?:\t| {5,8})Return$')
          capture = p.match(self.line)
          if not capture:
-            # Helper can have empty retval and we might be parsing another
-            # attribute: return but do not consume.
-            return ''
+            raise Exception("No return section found for " + proto)
          # Return value description can be several lines, some of them possibly
          # empty, and it stops when another subsection title is met.
          ret = ''
+        ret_present = False
          while True:
              self.line = self.reader.readline()
              if self.line == ' *\n':
@@ -168,44 +174,101 @@ class HeaderParser(object):
                  p = re.compile(' \* ?(?:\t| {5,8})(?:\t| {8})(.*)')
                  capture = p.match(self.line)
                  if capture:
+                    ret_present = True
                      ret += capture.group(1) + '\n'
                  else:
                      break
+
+        if not ret_present:
+            raise Exception("No return found for " + proto)
          return ret
  
-    def seek_to(self, target, help_message):
+    def seek_to(self, target, help_message, discard_lines = 1):
          self.reader.seek(0)
          offset = self.reader.read().find(target)
          if offset == -1:
              raise Exception(help_message)
          self.reader.seek(offset)
          self.reader.readline()
-        self.reader.readline()
+        for _ in range(discard_lines):
+            self.reader.readline()
          self.line = self.reader.readline()
  
-    def parse_syscall(self):
+    def parse_desc_syscall(self):
          self.seek_to('* DOC: eBPF Syscall Commands',
                       'Could not find start of eBPF syscall descriptions list')
          while True:
              try:
                  command = self.parse_element()
                  self.commands.append(command)
+                self.desc_syscalls.append(command.proto)
+
              except NoSyscallCommandFound:
                  break
  
-    def parse_helpers(self):
+    def parse_enum_syscall(self):
+        self.seek_to('enum bpf_cmd {',
+                     'Could not find start of bpf_cmd enum', 0)
+        # Searches for either one or more BPF\w+ enums
+        bpf_p = re.compile('\s*(BPF\w+)+')
+        # Searches for an enum entry assigned to another entry,
+        # for e.g. BPF_PROG_RUN = BPF_PROG_TEST_RUN, which is
+        # not documented hence should be skipped in check to
+        # determine if the right number of syscalls are documented
+        assign_p = re.compile('\s*(BPF\w+)\s*=\s*(BPF\w+)')
+        bpf_cmd_str = ''
+        while True:
+            capture = assign_p.match(self.line)
+            if capture:
+                # Skip line if an enum entry is assigned to another entry
+                self.line = self.reader.readline()
+                continue
+            capture = bpf_p.match(self.line)
+            if capture:
+                bpf_cmd_str += self.line
+            else:
+                break
+            self.line = self.reader.readline()
+        # Find the number of occurences of BPF\w+
+        self.enum_syscalls = re.findall('(BPF\w+)+', bpf_cmd_str)
+
+    def parse_desc_helpers(self):
          self.seek_to('* Start of BPF helper function descriptions:',
                       'Could not find start of eBPF helper descriptions list')
          while True:
              try:
                  helper = self.parse_helper()
                  self.helpers.append(helper)
+                proto = helper.proto_break_down()
+                self.desc_unique_helpers.add(proto['name'])
              except NoHelperFound:
                  break
  
+    def parse_define_helpers(self):
+        # Parse the number of FN(...) in #define __BPF_FUNC_MAPPER to compare
+        # later with the number of unique function names present in description.
+        # Note: seek_to(..) discards the first line below the target search text,
+        # resulting in FN(unspec) being skipped and not added to self.define_unique_helpers.
+        self.seek_to('#define __BPF_FUNC_MAPPER(FN)',
+                     'Could not find start of eBPF helper definition list')
+        # Searches for either one or more FN(\w+) defines or a backslash for newline
+        p = re.compile('\s*(FN\(\w+\))+|\\\\')
+        fn_defines_str = ''
+        while True:
+            capture = p.match(self.line)
+            if capture:
+                fn_defines_str += self.line
+            else:
+                break
+            self.line = self.reader.readline()
+        # Find the number of occurences of FN(\w+)
+        self.define_unique_helpers = re.findall('FN\(\w+\)', fn_defines_str)
+
      def run(self):
-        self.parse_syscall()
-        self.parse_helpers()
+        self.parse_desc_syscall()
+        self.parse_enum_syscall()
+        self.parse_desc_helpers()
+        self.parse_define_helpers()
          self.reader.close()
  
  ###############################################################################
@@ -235,6 +298,25 @@ class Printer(object):
              self.print_one(elem)
          self.print_footer()
  
+    def elem_number_check(self, desc_unique_elem, define_unique_elem, type, instance):
+        """
+        Checks the number of helpers/syscalls documented within the header file
+        description with those defined as part of enum/macro and raise an
+        Exception if they don't match.
+        """
+        nr_desc_unique_elem = len(desc_unique_elem)
+        nr_define_unique_elem = len(define_unique_elem)
+        if nr_desc_unique_elem != nr_define_unique_elem:
+            exception_msg = '''
+The number of unique %s in description (%d) doesn\'t match the number of unique %s defined in %s (%d)
+''' % (type, nr_desc_unique_elem, type, instance, nr_define_unique_elem)
+            if nr_desc_unique_elem < nr_define_unique_elem:
+                # Function description is parsed until no helper is found (which can be due to
+                # misformatting). Hence, only print the first missing/misformatted helper/enum.
+                exception_msg += '''
+The description for %s is not present or formatted correctly.
+''' % (define_unique_elem[nr_desc_unique_elem])
+            raise Exception(exception_msg)
  
  class PrinterRST(Printer):
      """
@@ -295,7 +377,6 @@ class PrinterRST(Printer):
  
          print('')
  
-
  class PrinterHelpersRST(PrinterRST):
      """
      A printer for dumping collected information about helpers as a ReStructured
@@ -305,6 +386,7 @@ class PrinterHelpersRST(PrinterRST):
      """
      def __init__(self, parser):
          self.elements = parser.helpers
+        self.elem_number_check(parser.desc_unique_helpers, parser.define_unique_helpers, 'helper', '__BPF_FUNC_MAPPER')
  
      def print_header(self):
          header = '''\
@@ -478,6 +560,7 @@ class PrinterSyscallRST(PrinterRST):
      """
      def __init__(self, parser):
          self.elements = parser.commands
+        self.elem_number_check(parser.desc_syscalls, parser.enum_syscalls, 'syscall', 'bpf_cmd')
  
      def print_header(self):
          header = '''\
@@ -509,6 +592,7 @@ class PrinterHelpers(Printer):
      """
      def __init__(self, parser):
          self.elements = parser.helpers
+        self.elem_number_check(parser.desc_unique_helpers, parser.define_unique_helpers, 'helper', '__BPF_FUNC_MAPPER')
  
      type_fwds = [
              'struct bpf_fib_lookup',
diff --git a/security/device_cgroup.c b/security/device_cgroup.c

index 842889f..a9f8c63 100644 (file)
--- a/security/device_cgroup.c
+++ b/security/device_cgroup.c
@@ -838,7 +838,7 @@ int devcgroup_check_permission(short type, u32 major, u32 minor, short access)
         int rc = BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type, major, minor, access);
  
         if (rc)
-               return -EPERM;
+               return rc;
  
         #ifdef CONFIG_CGROUP_DEVICE
         return devcgroup_legacy_check_permission(type, major, minor, access);
diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c

index 5983312..a2c665b 100644 (file)
--- a/tools/bpf/bpftool/btf.c
+++ b/tools/bpf/bpftool/btf.c
@@ -902,7 +902,7 @@ static int do_show(int argc, char **argv)
                                       equal_fn_for_key_as_id, NULL);
         btf_map_table = hashmap__new(hash_fn_for_key_as_id,
                                      equal_fn_for_key_as_id, NULL);
-       if (!btf_prog_table || !btf_map_table) {
+       if (IS_ERR(btf_prog_table) || IS_ERR(btf_map_table)) {
                 hashmap__free(btf_prog_table);
                 hashmap__free(btf_map_table);
                 if (fd >= 0)
diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c

index 3571a28..effe136 100644 (file)
--- a/tools/bpf/bpftool/cgroup.c
+++ b/tools/bpf/bpftool/cgroup.c
@@ -50,6 +50,7 @@ static int show_bpf_prog(int id, enum bpf_attach_type attach_type,
                          const char *attach_flags_str,
                          int level)
  {
+       char prog_name[MAX_PROG_FULL_NAME];
         struct bpf_prog_info info = {};
         __u32 info_len = sizeof(info);
         int prog_fd;
@@ -63,6 +64,7 @@ static int show_bpf_prog(int id, enum bpf_attach_type attach_type,
                 return -1;
         }
  
+       get_prog_full_name(&info, prog_fd, prog_name, sizeof(prog_name));
         if (json_output) {
                 jsonw_start_object(json_wtr);
                 jsonw_uint_field(json_wtr, "id", info.id);
@@ -73,7 +75,7 @@ static int show_bpf_prog(int id, enum bpf_attach_type attach_type,
                         jsonw_uint_field(json_wtr, "attach_type", attach_type);
                 jsonw_string_field(json_wtr, "attach_flags",
                                    attach_flags_str);
-               jsonw_string_field(json_wtr, "name", info.name);
+               jsonw_string_field(json_wtr, "name", prog_name);
                 jsonw_end_object(json_wtr);
         } else {
                 printf("%s%-8u ", level ? "    " : "", info.id);
@@ -81,7 +83,7 @@ static int show_bpf_prog(int id, enum bpf_attach_type attach_type,
                         printf("%-15s", attach_type_name[attach_type]);
                 else
                         printf("type %-10u", attach_type);
-               printf(" %-15s %-15s\n", attach_flags_str, info.name);
+               printf(" %-15s %-15s\n", attach_flags_str, prog_name);
         }
  
         close(prog_fd);
diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c

index fa8eb81..111dff8 100644 (file)
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -24,6 +24,7 @@
  #include <bpf/bpf.h>
  #include <bpf/hashmap.h>
  #include <bpf/libbpf.h> /* libbpf_num_possible_cpus */
+#include <bpf/btf.h>
  
  #include "main.h"
  
@@ -304,6 +305,49 @@ const char *get_fd_type_name(enum bpf_obj_type type)
         return names[type];
  }
  
+void get_prog_full_name(const struct bpf_prog_info *prog_info, int prog_fd,
+                       char *name_buff, size_t buff_len)
+{
+       const char *prog_name = prog_info->name;
+       const struct btf_type *func_type;
+       const struct bpf_func_info finfo;
+       struct bpf_prog_info info = {};
+       __u32 info_len = sizeof(info);
+       struct btf *prog_btf = NULL;
+
+       if (buff_len <= BPF_OBJ_NAME_LEN ||
+           strlen(prog_info->name) < BPF_OBJ_NAME_LEN - 1)
+               goto copy_name;
+
+       if (!prog_info->btf_id || prog_info->nr_func_info == 0)
+               goto copy_name;
+
+       info.nr_func_info = 1;
+       info.func_info_rec_size = prog_info->func_info_rec_size;
+       if (info.func_info_rec_size > sizeof(finfo))
+               info.func_info_rec_size = sizeof(finfo);
+       info.func_info = ptr_to_u64(&finfo);
+
+       if (bpf_obj_get_info_by_fd(prog_fd, &info, &info_len))
+               goto copy_name;
+
+       prog_btf = btf__load_from_kernel_by_id(info.btf_id);
+       if (!prog_btf)
+               goto copy_name;
+
+       func_type = btf__type_by_id(prog_btf, finfo.type_id);
+       if (!func_type || !btf_is_func(func_type))
+               goto copy_name;
+
+       prog_name = btf__name_by_offset(prog_btf, func_type->name_off);
+
+copy_name:
+       snprintf(name_buff, buff_len, "%s", prog_name);
+
+       if (prog_btf)
+               btf__free(prog_btf);
+}
+
  int get_fd_type(int fd)
  {
         char path[PATH_MAX];
diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c

index b4695df..43e3f87 100644 (file)
--- a/tools/bpf/bpftool/gen.c
+++ b/tools/bpf/bpftool/gen.c
@@ -227,7 +227,7 @@ static int codegen_datasecs(struct bpf_object *obj, const char *obj_name)
                 /* only generate definitions for memory-mapped internal maps */
                 if (!bpf_map__is_internal(map))
                         continue;
-               if (!(bpf_map__def(map)->map_flags & BPF_F_MMAPABLE))
+               if (!(bpf_map__map_flags(map) & BPF_F_MMAPABLE))
                         continue;
  
                 if (!get_map_ident(map, map_ident, sizeof(map_ident)))
@@ -468,7 +468,7 @@ static void codegen_destroy(struct bpf_object *obj, const char *obj_name)
                 if (!get_map_ident(map, ident, sizeof(ident)))
                         continue;
                 if (bpf_map__is_internal(map) &&
-                   (bpf_map__def(map)->map_flags & BPF_F_MMAPABLE))
+                   (bpf_map__map_flags(map) & BPF_F_MMAPABLE))
                         printf("\tmunmap(skel->%1$s, %2$zd);\n",
                                ident, bpf_map_mmap_sz(map));
                 codegen("\
@@ -536,7 +536,7 @@ static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *h
                         continue;
  
                 if (!bpf_map__is_internal(map) ||
-                   !(bpf_map__def(map)->map_flags & BPF_F_MMAPABLE))
+                   !(bpf_map__map_flags(map) & BPF_F_MMAPABLE))
                         continue;
  
                 codegen("\
@@ -600,10 +600,10 @@ static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *h
                         continue;
  
                 if (!bpf_map__is_internal(map) ||
-                   !(bpf_map__def(map)->map_flags & BPF_F_MMAPABLE))
+                   !(bpf_map__map_flags(map) & BPF_F_MMAPABLE))
                         continue;
  
-               if (bpf_map__def(map)->map_flags & BPF_F_RDONLY_PROG)
+               if (bpf_map__map_flags(map) & BPF_F_RDONLY_PROG)
                         mmap_flags = "PROT_READ";
                 else
                         mmap_flags = "PROT_READ | PROT_WRITE";
@@ -927,7 +927,6 @@ static int do_skeleton(int argc, char **argv)
                         s = (struct bpf_object_skeleton *)calloc(1, sizeof(*s));\n\
                         if (!s)                                             \n\
                                 goto err;                                   \n\
-                       obj->skeleton = s;                                  \n\
                                                                             \n\
                         s->sz = sizeof(*s);                                 \n\
                         s->name = \"%1$s\";                                 \n\
@@ -962,7 +961,7 @@ static int do_skeleton(int argc, char **argv)
                                 i, bpf_map__name(map), i, ident);
                         /* memory-mapped internal maps */
                         if (bpf_map__is_internal(map) &&
-                           (bpf_map__def(map)->map_flags & BPF_F_MMAPABLE)) {
+                           (bpf_map__map_flags(map) & BPF_F_MMAPABLE)) {
                                 printf("\ts->maps[%zu].mmaped = (void **)&obj->%s;\n",
                                        i, ident);
                         }
@@ -1000,6 +999,7 @@ static int do_skeleton(int argc, char **argv)
                                                                             \n\
                         s->data = (void *)%2$s__elf_bytes(&s->data_sz);     \n\
                                                                             \n\
+                       obj->skeleton = s;                                  \n\
                         return 0;                                           \n\
                 err:                                                        \n\
                         bpf_object__destroy_skeleton(s);                    \n\
diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c

index 2c258db..97dec81 100644 (file)
--- a/tools/bpf/bpftool/link.c
+++ b/tools/bpf/bpftool/link.c
@@ -2,6 +2,7 @@
  /* Copyright (C) 2020 Facebook */
  
  #include <errno.h>
+#include <linux/err.h>
  #include <net/if.h>
  #include <stdio.h>
  #include <unistd.h>
@@ -306,7 +307,7 @@ static int do_show(int argc, char **argv)
         if (show_pinned) {
                 link_table = hashmap__new(hash_fn_for_key_as_id,
                                           equal_fn_for_key_as_id, NULL);
-               if (!link_table) {
+               if (IS_ERR(link_table)) {
                         p_err("failed to create hashmap for pinned paths");
                         return -1;
                 }
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c

index 020e91a..9d01fa9 100644 (file)
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -478,7 +478,14 @@ int main(int argc, char **argv)
         }
  
         if (!legacy_libbpf) {
-               ret = libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+               enum libbpf_strict_mode mode;
+
+               /* Allow legacy map definitions for skeleton generation.
+                * It will still be rejected if users use LIBBPF_STRICT_ALL
+                * mode for loading generated skeleton.
+                */
+               mode = (__LIBBPF_STRICT_LAST - 1) & ~LIBBPF_STRICT_MAP_DEFINITIONS;
+               ret = libbpf_set_strict_mode(mode);
                 if (ret)
                         p_err("failed to enable libbpf strict mode: %d", ret);
         }
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h

index 8d76d93..0c38405 100644 (file)
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -140,6 +140,10 @@ struct cmd {
  int cmd_select(const struct cmd *cmds, int argc, char **argv,
                int (*help)(int argc, char **argv));
  
+#define MAX_PROG_FULL_NAME 128
+void get_prog_full_name(const struct bpf_prog_info *prog_info, int prog_fd,
+                       char *name_buff, size_t buff_len);
+
  int get_fd_type(int fd);
  const char *get_fd_type_name(enum bpf_obj_type type);
  char *get_fdinfo(int fd, const char *key);
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c

index cc530a2..c66a3c9 100644 (file)
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -699,7 +699,7 @@ static int do_show(int argc, char **argv)
         if (show_pinned) {
                 map_table = hashmap__new(hash_fn_for_key_as_id,
                                          equal_fn_for_key_as_id, NULL);
-               if (!map_table) {
+               if (IS_ERR(map_table)) {
                         p_err("failed to create hashmap for pinned paths");
                         return -1;
                 }
diff --git a/tools/bpf/bpftool/net.c b/tools/bpf/bpftool/net.c

index 6490537..526a332 100644 (file)
--- a/tools/bpf/bpftool/net.c
+++ b/tools/bpf/bpftool/net.c
@@ -551,7 +551,7 @@ static int do_attach_detach_xdp(int progfd, enum net_attach_type attach_type,
         if (attach_type == NET_ATTACH_TYPE_XDP_OFFLOAD)
                 flags |= XDP_FLAGS_HW_MODE;
  
-       return bpf_set_link_xdp_fd(ifindex, progfd, flags);
+       return bpf_xdp_attach(ifindex, progfd, flags, NULL);
  }
  
  static int do_attach(int argc, char **argv)
diff --git a/tools/bpf/bpftool/pids.c b/tools/bpf/bpftool/pids.c

index 56b598e..7c384d1 100644 (file)
--- a/tools/bpf/bpftool/pids.c
+++ b/tools/bpf/bpftool/pids.c
@@ -1,6 +1,7 @@
  // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
  /* Copyright (C) 2020 Facebook */
  #include <errno.h>
+#include <linux/err.h>
  #include <stdbool.h>
  #include <stdio.h>
  #include <stdlib.h>
@@ -101,7 +102,7 @@ int build_obj_refs_table(struct hashmap **map, enum bpf_obj_type type)
         libbpf_print_fn_t default_print;
  
         *map = hashmap__new(hash_fn_for_key_as_id, equal_fn_for_key_as_id, NULL);
-       if (!*map) {
+       if (IS_ERR(*map)) {
                 p_err("failed to create hashmap for PID references");
                 return -1;
         }
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c

index 2a21d50..cf935c6 100644 (file)
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -424,8 +424,10 @@ out_free:
         free(value);
  }
  
-static void print_prog_header_json(struct bpf_prog_info *info)
+static void print_prog_header_json(struct bpf_prog_info *info, int fd)
  {
+       char prog_name[MAX_PROG_FULL_NAME];
+
         jsonw_uint_field(json_wtr, "id", info->id);
         if (info->type < ARRAY_SIZE(prog_type_name))
                 jsonw_string_field(json_wtr, "type",
@@ -433,8 +435,10 @@ static void print_prog_header_json(struct bpf_prog_info *info)
         else
                 jsonw_uint_field(json_wtr, "type", info->type);
  
-       if (*info->name)
-               jsonw_string_field(json_wtr, "name", info->name);
+       if (*info->name) {
+               get_prog_full_name(info, fd, prog_name, sizeof(prog_name));
+               jsonw_string_field(json_wtr, "name", prog_name);
+       }
  
         jsonw_name(json_wtr, "tag");
         jsonw_printf(json_wtr, "\"" BPF_TAG_FMT "\"",
@@ -455,7 +459,7 @@ static void print_prog_json(struct bpf_prog_info *info, int fd)
         char *memlock;
  
         jsonw_start_object(json_wtr);
-       print_prog_header_json(info);
+       print_prog_header_json(info, fd);
         print_dev_json(info->ifindex, info->netns_dev, info->netns_ino);
  
         if (info->load_time) {
@@ -507,16 +511,20 @@ static void print_prog_json(struct bpf_prog_info *info, int fd)
         jsonw_end_object(json_wtr);
  }
  
-static void print_prog_header_plain(struct bpf_prog_info *info)
+static void print_prog_header_plain(struct bpf_prog_info *info, int fd)
  {
+       char prog_name[MAX_PROG_FULL_NAME];
+
         printf("%u: ", info->id);
         if (info->type < ARRAY_SIZE(prog_type_name))
                 printf("%s  ", prog_type_name[info->type]);
         else
                 printf("type %u  ", info->type);
  
-       if (*info->name)
-               printf("name %s  ", info->name);
+       if (*info->name) {
+               get_prog_full_name(info, fd, prog_name, sizeof(prog_name));
+               printf("name %s  ", prog_name);
+       }
  
         printf("tag ");
         fprint_hex(stdout, info->tag, BPF_TAG_SIZE, "");
@@ -534,7 +542,7 @@ static void print_prog_plain(struct bpf_prog_info *info, int fd)
  {
         char *memlock;
  
-       print_prog_header_plain(info);
+       print_prog_header_plain(info, fd);
  
         if (info->load_time) {
                 char buf[32];
@@ -641,7 +649,7 @@ static int do_show(int argc, char **argv)
         if (show_pinned) {
                 prog_table = hashmap__new(hash_fn_for_key_as_id,
                                           equal_fn_for_key_as_id, NULL);
-               if (!prog_table) {
+               if (IS_ERR(prog_table)) {
                         p_err("failed to create hashmap for pinned paths");
                         return -1;
                 }
@@ -972,10 +980,10 @@ static int do_dump(int argc, char **argv)
  
                 if (json_output && nb_fds > 1) {
                         jsonw_start_object(json_wtr);   /* prog object */
-                       print_prog_header_json(&info);
+                       print_prog_header_json(&info, fds[i]);
                         jsonw_name(json_wtr, "insns");
                 } else if (nb_fds > 1) {
-                       print_prog_header_plain(&info);
+                       print_prog_header_plain(&info, fds[i]);
                 }
  
                 err = prog_dump(&info, mode, filepath, opcodes, visual, linum);
diff --git a/tools/bpf/bpftool/struct_ops.c b/tools/bpf/bpftool/struct_ops.c

index 2f693b0..e08a6ff 100644 (file)
--- a/tools/bpf/bpftool/struct_ops.c
+++ b/tools/bpf/bpftool/struct_ops.c
@@ -480,7 +480,6 @@ static int do_unregister(int argc, char **argv)
  static int do_register(int argc, char **argv)
  {
         LIBBPF_OPTS(bpf_object_open_opts, open_opts);
-       const struct bpf_map_def *def;
         struct bpf_map_info info = {};
         __u32 info_len = sizeof(info);
         int nr_errs = 0, nr_maps = 0;
@@ -510,8 +509,7 @@ static int do_register(int argc, char **argv)
         }
  
         bpf_object__for_each_map(map, obj) {
-               def = bpf_map__def(map);
-               if (def->type != BPF_MAP_TYPE_STRUCT_OPS)
+               if (bpf_map__type(map) != BPF_MAP_TYPE_STRUCT_OPS)
                         continue;
  
                 link = bpf_map__attach_struct_ops(map);
diff --git a/tools/bpf/resolve_btfids/Makefile b/tools/bpf/resolve_btfids/Makefile

index 9ddeca9..a7f87cd 100644 (file)
--- a/tools/bpf/resolve_btfids/Makefile
+++ b/tools/bpf/resolve_btfids/Makefile
@@ -20,6 +20,8 @@ LD       = $(HOSTLD)
  ARCH     = $(HOSTARCH)
  RM      ?= rm
  CROSS_COMPILE =
+CFLAGS  := $(KBUILD_HOSTCFLAGS)
+LDFLAGS := $(KBUILD_HOSTLDFLAGS)
  
  OUTPUT ?= $(srctree)/tools/bpf/resolve_btfids/
  
@@ -47,10 +49,10 @@ $(SUBCMDOBJ): fixdep FORCE | $(OUTPUT)/libsubcmd
  
  $(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(LIBBPF_OUT)
         $(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC) OUTPUT=$(LIBBPF_OUT)    \
-                   DESTDIR=$(LIBBPF_DESTDIR) prefix=                          \
+                   DESTDIR=$(LIBBPF_DESTDIR) prefix= EXTRA_CFLAGS="$(CFLAGS)" \
                     $(abspath $@) install_headers
  
-CFLAGS := -g \
+CFLAGS += -g \
            -I$(srctree)/tools/include \
            -I$(srctree)/tools/include/uapi \
            -I$(LIBBPF_INCLUDE) \
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h

index b0383d3..16a7574 100644 (file)
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -330,6 +330,8 @@ union bpf_iter_link_info {
   *                     *ctx_out*, *data_in* and *data_out* must be NULL.
   *                     *repeat* must be zero.
   *
+ *             BPF_PROG_RUN is an alias for BPF_PROG_TEST_RUN.
+ *
   *     Return
   *             Returns zero on success. On error, -1 is returned and *errno*
   *             is set appropriately.
@@ -1111,6 +1113,11 @@ enum bpf_link_type {
   */
  #define BPF_F_SLEEPABLE                (1U << 4)
  
+/* If BPF_F_XDP_HAS_FRAGS is used in BPF_PROG_LOAD command, the loaded program
+ * fully support xdp frags.
+ */
+#define BPF_F_XDP_HAS_FRAGS    (1U << 5)
+
  /* When BPF ldimm64's insn[0].src_reg != 0 then this can have
   * the following extensions:
   *
@@ -1775,6 +1782,8 @@ union bpf_attr {
   *             0 on success, or a negative error in case of failure.
   *
   * u64 bpf_get_current_pid_tgid(void)
+ *     Description
+ *             Get the current pid and tgid.
   *     Return
   *             A 64-bit integer containing the current tgid and pid, and
   *             created as such:
@@ -1782,6 +1791,8 @@ union bpf_attr {
   *             *current_task*\ **->pid**.
   *
   * u64 bpf_get_current_uid_gid(void)
+ *     Description
+ *             Get the current uid and gid.
   *     Return
   *             A 64-bit integer containing the current GID and UID, and
   *             created as such: *current_gid* **<< 32 \|** *current_uid*.
@@ -2256,6 +2267,8 @@ union bpf_attr {
   *             The 32-bit hash.
   *
   * u64 bpf_get_current_task(void)
+ *     Description
+ *             Get the current task.
   *     Return
   *             A pointer to the current task struct.
   *
@@ -2369,6 +2382,8 @@ union bpf_attr {
   *             indicate that the hash is outdated and to trigger a
   *             recalculation the next time the kernel tries to access this
   *             hash or when the **bpf_get_hash_recalc**\ () helper is called.
+ *     Return
+ *             void.
   *
   * long bpf_get_numa_node_id(void)
   *     Description
@@ -2466,6 +2481,8 @@ union bpf_attr {
   *             A 8-byte long unique number or 0 if *sk* is NULL.
   *
   * u32 bpf_get_socket_uid(struct sk_buff *skb)
+ *     Description
+ *             Get the owner UID of the socked associated to *skb*.
   *     Return
   *             The owner UID of the socket associated to *skb*. If the socket
   *             is **NULL**, or if it is not a full socket (i.e. if it is a
@@ -3240,6 +3257,9 @@ union bpf_attr {
   *             The id is returned or 0 in case the id could not be retrieved.
   *
   * u64 bpf_get_current_cgroup_id(void)
+ *     Description
+ *             Get the current cgroup id based on the cgroup within which
+ *             the current task is running.
   *     Return
   *             A 64-bit integer containing the current cgroup id based
   *             on the cgroup within which the current task is running.
@@ -5018,6 +5038,44 @@ union bpf_attr {
   *
   *     Return
   *             The number of arguments of the traced function.
+ *
+ * int bpf_get_retval(void)
+ *     Description
+ *             Get the syscall's return value that will be returned to userspace.
+ *
+ *             This helper is currently supported by cgroup programs only.
+ *     Return
+ *             The syscall's return value.
+ *
+ * int bpf_set_retval(int retval)
+ *     Description
+ *             Set the syscall's return value that will be returned to userspace.
+ *
+ *             This helper is currently supported by cgroup programs only.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * u64 bpf_xdp_get_buff_len(struct xdp_buff *xdp_md)
+ *     Description
+ *             Get the total size of a given xdp buff (linear and paged area)
+ *     Return
+ *             The total size of a given xdp buffer.
+ *
+ * long bpf_xdp_load_bytes(struct xdp_buff *xdp_md, u32 offset, void *buf, u32 len)
+ *     Description
+ *             This helper is provided as an easy way to load data from a
+ *             xdp buffer. It can be used to load *len* bytes from *offset* from
+ *             the frame associated to *xdp_md*, into the buffer pointed by
+ *             *buf*.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * long bpf_xdp_store_bytes(struct xdp_buff *xdp_md, u32 offset, void *buf, u32 len)
+ *     Description
+ *             Store *len* bytes from buffer *buf* into the frame
+ *             associated to *xdp_md*, at *offset*.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
   */
  #define __BPF_FUNC_MAPPER(FN)          \
         FN(unspec),                     \
@@ -5206,6 +5264,11 @@ union bpf_attr {
         FN(get_func_arg),               \
         FN(get_func_ret),               \
         FN(get_func_arg_cnt),           \
+       FN(get_retval),                 \
+       FN(set_retval),                 \
+       FN(xdp_get_buff_len),           \
+       FN(xdp_load_bytes),             \
+       FN(xdp_store_bytes),            \
         /* */
  
  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c

index 550b4cb..418b259 100644 (file)
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -754,10 +754,10 @@ int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type,
                 .flags = flags,
         );
  
-       return bpf_prog_attach_xattr(prog_fd, target_fd, type, &opts);
+       return bpf_prog_attach_opts(prog_fd, target_fd, type, &opts);
  }
  
-int bpf_prog_attach_xattr(int prog_fd, int target_fd,
+int bpf_prog_attach_opts(int prog_fd, int target_fd,
                           enum bpf_attach_type type,
                           const struct bpf_prog_attach_opts *opts)
  {
@@ -778,6 +778,11 @@ int bpf_prog_attach_xattr(int prog_fd, int target_fd,
         return libbpf_err_errno(ret);
  }
  
+__attribute__((alias("bpf_prog_attach_opts")))
+int bpf_prog_attach_xattr(int prog_fd, int target_fd,
+                         enum bpf_attach_type type,
+                         const struct bpf_prog_attach_opts *opts);
+
  int bpf_prog_detach(int target_fd, enum bpf_attach_type type)
  {
         union bpf_attr attr;
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h

index 14e0d97..c2e8327 100644 (file)
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -391,6 +391,10 @@ struct bpf_prog_attach_opts {
  
  LIBBPF_API int bpf_prog_attach(int prog_fd, int attachable_fd,
                                enum bpf_attach_type type, unsigned int flags);
+LIBBPF_API int bpf_prog_attach_opts(int prog_fd, int attachable_fd,
+                                    enum bpf_attach_type type,
+                                    const struct bpf_prog_attach_opts *opts);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_prog_attach_opts() instead")
  LIBBPF_API int bpf_prog_attach_xattr(int prog_fd, int attachable_fd,
                                      enum bpf_attach_type type,
                                      const struct bpf_prog_attach_opts *opts);
diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h

index 963b106..44df982 100644 (file)
--- a/tools/lib/bpf/bpf_helpers.h
+++ b/tools/lib/bpf/bpf_helpers.h
@@ -133,7 +133,7 @@ struct bpf_map_def {
         unsigned int value_size;
         unsigned int max_entries;
         unsigned int map_flags;
-};
+} __attribute__((deprecated("use BTF-defined maps in .maps section")));
  
  enum libbpf_pin_type {
         LIBBPF_PIN_NONE,
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c

index 9aa19c8..1383e26 100644 (file)
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -1620,20 +1620,37 @@ static int btf_commit_type(struct btf *btf, int data_sz)
  struct btf_pipe {
         const struct btf *src;
         struct btf *dst;
+       struct hashmap *str_off_map; /* map string offsets from src to dst */
  };
  
  static int btf_rewrite_str(__u32 *str_off, void *ctx)
  {
         struct btf_pipe *p = ctx;
-       int off;
+       void *mapped_off;
+       int off, err;
  
         if (!*str_off) /* nothing to do for empty strings */
                 return 0;
  
+       if (p->str_off_map &&
+           hashmap__find(p->str_off_map, (void *)(long)*str_off, &mapped_off)) {
+               *str_off = (__u32)(long)mapped_off;
+               return 0;
+       }
+
         off = btf__add_str(p->dst, btf__str_by_offset(p->src, *str_off));
         if (off < 0)
                 return off;
  
+       /* Remember string mapping from src to dst.  It avoids
+        * performing expensive string comparisons.
+        */
+       if (p->str_off_map) {
+               err = hashmap__append(p->str_off_map, (void *)(long)*str_off, (void *)(long)off);
+               if (err)
+                       return err;
+       }
+
         *str_off = off;
         return 0;
  }
@@ -1680,6 +1697,9 @@ static int btf_rewrite_type_ids(__u32 *type_id, void *ctx)
         return 0;
  }
  
+static size_t btf_dedup_identity_hash_fn(const void *key, void *ctx);
+static bool btf_dedup_equal_fn(const void *k1, const void *k2, void *ctx);
+
  int btf__add_btf(struct btf *btf, const struct btf *src_btf)
  {
         struct btf_pipe p = { .src = src_btf, .dst = btf };
@@ -1713,6 +1733,11 @@ int btf__add_btf(struct btf *btf, const struct btf *src_btf)
         if (!off)
                 return libbpf_err(-ENOMEM);
  
+       /* Map the string offsets from src_btf to the offsets from btf to improve performance */
+       p.str_off_map = hashmap__new(btf_dedup_identity_hash_fn, btf_dedup_equal_fn, NULL);
+       if (IS_ERR(p.str_off_map))
+               return libbpf_err(-ENOMEM);
+
         /* bulk copy types data for all types from src_btf */
         memcpy(t, src_btf->types_data, data_sz);
  
@@ -1754,6 +1779,8 @@ int btf__add_btf(struct btf *btf, const struct btf *src_btf)
         btf->hdr->str_off += data_sz;
         btf->nr_types += cnt;
  
+       hashmap__free(p.str_off_map);
+
         /* return type ID of the first added BTF type */
         return btf->start_id + btf->nr_types - cnt;
  err_out:
@@ -1767,6 +1794,8 @@ err_out:
          * wasn't modified, so doesn't need restoring, see big comment above */
         btf->hdr->str_len = old_strs_len;
  
+       hashmap__free(p.str_off_map);
+
         return libbpf_err(err);
  }
  
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h

index 061839f..51862fd 100644 (file)
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -375,8 +375,28 @@ btf_dump__dump_type_data(struct btf_dump *d, __u32 id,
                          const struct btf_dump_type_data_opts *opts);
  
  /*
- * A set of helpers for easier BTF types handling
+ * A set of helpers for easier BTF types handling.
+ *
+ * The inline functions below rely on constants from the kernel headers which
+ * may not be available for applications including this header file. To avoid
+ * compilation errors, we define all the constants here that were added after
+ * the initial introduction of the BTF_KIND* constants.
   */
+#ifndef BTF_KIND_FUNC
+#define BTF_KIND_FUNC          12      /* Function     */
+#define BTF_KIND_FUNC_PROTO    13      /* Function Proto       */
+#endif
+#ifndef BTF_KIND_VAR
+#define BTF_KIND_VAR           14      /* Variable     */
+#define BTF_KIND_DATASEC       15      /* Section      */
+#endif
+#ifndef BTF_KIND_FLOAT
+#define BTF_KIND_FLOAT         16      /* Floating point       */
+#endif
+/* The kernel header switched to enums, so these two were never #defined */
+#define BTF_KIND_DECL_TAG      17      /* Decl Tag */
+#define BTF_KIND_TYPE_TAG      18      /* Type Tag */
+
  static inline __u16 btf_kind(const struct btf_type *t)
  {
         return BTF_INFO_KIND(t->info);
diff --git a/tools/lib/bpf/hashmap.c b/tools/lib/bpf/hashmap.c

index 3c20b12..aeb09c2 100644 (file)
--- a/tools/lib/bpf/hashmap.c
+++ b/tools/lib/bpf/hashmap.c
@@ -75,7 +75,7 @@ void hashmap__clear(struct hashmap *map)
  
  void hashmap__free(struct hashmap *map)
  {
-       if (!map)
+       if (IS_ERR_OR_NULL(map))
                 return;
  
         hashmap__clear(map);
@@ -238,4 +238,3 @@ bool hashmap__delete(struct hashmap *map, const void *key,
  
         return true;
  }
-
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c

index 7f10dd5..a8c7503 100644 (file)
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -235,6 +235,8 @@ enum sec_def_flags {
         SEC_SLEEPABLE = 8,
         /* allow non-strict prefix matching */
         SEC_SLOPPY_PFX = 16,
+       /* BPF program support non-linear XDP buffer */
+       SEC_XDP_FRAGS = 32,
  };
  
  struct bpf_sec_def {
@@ -1937,6 +1939,11 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict)
         if (obj->efile.maps_shndx < 0)
                 return 0;
  
+       if (libbpf_mode & LIBBPF_STRICT_MAP_DEFINITIONS) {
+               pr_warn("legacy map definitions in SEC(\"maps\") are not supported\n");
+               return -EOPNOTSUPP;
+       }
+
         if (!symbols)
                 return -EINVAL;
  
@@ -1999,6 +2006,8 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict)
                         return -LIBBPF_ERRNO__FORMAT;
                 }
  
+               pr_warn("map '%s' (legacy): legacy map definitions are deprecated, use BTF-defined maps instead\n", map_name);
+
                 if (ELF64_ST_BIND(sym->st_info) == STB_LOCAL) {
                         pr_warn("map '%s' (legacy): static maps are not supported\n", map_name);
                         return -ENOTSUP;
@@ -4190,6 +4199,7 @@ static int bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map)
                 return 0;
  
         if (!bpf_map__is_internal(map)) {
+               pr_warn("Use of BPF_ANNOTATE_KV_PAIR is deprecated, use BTF-defined maps in .maps section instead\n");
                 ret = btf__get_map_kv_tids(obj->btf, map->name, def->key_size,
                                            def->value_size, &key_type_id,
                                            &value_type_id);
@@ -6562,6 +6572,9 @@ static int libbpf_preload_prog(struct bpf_program *prog,
         if (def & SEC_SLEEPABLE)
                 opts->prog_flags |= BPF_F_SLEEPABLE;
  
+       if (prog->type == BPF_PROG_TYPE_XDP && (def & SEC_XDP_FRAGS))
+               opts->prog_flags |= BPF_F_XDP_HAS_FRAGS;
+
         if ((prog->type == BPF_PROG_TYPE_TRACING ||
              prog->type == BPF_PROG_TYPE_LSM ||
              prog->type == BPF_PROG_TYPE_EXT) && !prog->attach_btf_id) {
@@ -8600,8 +8613,11 @@ static const struct bpf_sec_def section_defs[] = {
         SEC_DEF("lsm.s/",               LSM, BPF_LSM_MAC, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_lsm),
         SEC_DEF("iter/",                TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF, attach_iter),
         SEC_DEF("syscall",              SYSCALL, 0, SEC_SLEEPABLE),
+       SEC_DEF("xdp.frags/devmap",     XDP, BPF_XDP_DEVMAP, SEC_XDP_FRAGS),
         SEC_DEF("xdp_devmap/",          XDP, BPF_XDP_DEVMAP, SEC_ATTACHABLE),
+       SEC_DEF("xdp.frags/cpumap",     XDP, BPF_XDP_CPUMAP, SEC_XDP_FRAGS),
         SEC_DEF("xdp_cpumap/",          XDP, BPF_XDP_CPUMAP, SEC_ATTACHABLE),
+       SEC_DEF("xdp.frags",            XDP, BPF_XDP, SEC_XDP_FRAGS),
         SEC_DEF("xdp",                  XDP, BPF_XDP, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX),
         SEC_DEF("perf_event",           PERF_EVENT, 0, SEC_NONE | SEC_SLOPPY_PFX),
         SEC_DEF("lwt_in",               LWT_IN, 0, SEC_NONE | SEC_SLOPPY_PFX),
@@ -11795,6 +11811,9 @@ void bpf_object__detach_skeleton(struct bpf_object_skeleton *s)
  
  void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s)
  {
+       if (!s)
+               return;
+
         if (s->progs)
                 bpf_object__detach_skeleton(s);
         if (s->obj)
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h

index 8b9bc5e..9467006 100644 (file)
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -706,7 +706,8 @@ bpf_object__prev_map(const struct bpf_object *obj, const struct bpf_map *map);
  LIBBPF_API int bpf_map__fd(const struct bpf_map *map);
  LIBBPF_API int bpf_map__reuse_fd(struct bpf_map *map, int fd);
  /* get map definition */
-LIBBPF_API const struct bpf_map_def *bpf_map__def(const struct bpf_map *map);
+LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 8, "use appropriate getters or setters instead")
+const struct bpf_map_def *bpf_map__def(const struct bpf_map *map);
  /* get map name */
  LIBBPF_API const char *bpf_map__name(const struct bpf_map *map);
  /* get/set map type */
@@ -832,13 +833,42 @@ struct bpf_xdp_set_link_opts {
  };
  #define bpf_xdp_set_link_opts__last_field old_fd
  
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_xdp_attach() instead")
  LIBBPF_API int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_xdp_attach() instead")
  LIBBPF_API int bpf_set_link_xdp_fd_opts(int ifindex, int fd, __u32 flags,
                                         const struct bpf_xdp_set_link_opts *opts);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_xdp_query_id() instead")
  LIBBPF_API int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_xdp_query() instead")
  LIBBPF_API int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info,
                                      size_t info_size, __u32 flags);
  
+struct bpf_xdp_attach_opts {
+       size_t sz;
+       int old_prog_fd;
+       size_t :0;
+};
+#define bpf_xdp_attach_opts__last_field old_prog_fd
+
+struct bpf_xdp_query_opts {
+       size_t sz;
+       __u32 prog_id;          /* output */
+       __u32 drv_prog_id;      /* output */
+       __u32 hw_prog_id;       /* output */
+       __u32 skb_prog_id;      /* output */
+       __u8 attach_mode;       /* output */
+       size_t :0;
+};
+#define bpf_xdp_query_opts__last_field attach_mode
+
+LIBBPF_API int bpf_xdp_attach(int ifindex, int prog_fd, __u32 flags,
+                             const struct bpf_xdp_attach_opts *opts);
+LIBBPF_API int bpf_xdp_detach(int ifindex, __u32 flags,
+                             const struct bpf_xdp_attach_opts *opts);
+LIBBPF_API int bpf_xdp_query(int ifindex, int flags, struct bpf_xdp_query_opts *opts);
+LIBBPF_API int bpf_xdp_query_id(int ifindex, int flags, __u32 *prog_id);
+
  /* TC related API */
  enum bpf_tc_attach_point {
         BPF_TC_INGRESS = 1 << 0,
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map

index 5297839..e10f082 100644 (file)
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -247,6 +247,7 @@ LIBBPF_0.0.8 {
                 bpf_link_create;
                 bpf_link_update;
                 bpf_map__set_initial_value;
+               bpf_prog_attach_opts;
                 bpf_program__attach_cgroup;
                 bpf_program__attach_lsm;
                 bpf_program__is_lsm;
@@ -427,6 +428,10 @@ LIBBPF_0.7.0 {
                 bpf_program__log_level;
                 bpf_program__set_log_buf;
                 bpf_program__set_log_level;
+               bpf_xdp_attach;
+               bpf_xdp_detach;
+               bpf_xdp_query;
+               bpf_xdp_query_id;
                 libbpf_probe_bpf_helper;
                 libbpf_probe_bpf_map_type;
                 libbpf_probe_bpf_prog_type;
diff --git a/tools/lib/bpf/libbpf_legacy.h b/tools/lib/bpf/libbpf_legacy.h

index 79131f7..3c2b281 100644 (file)
--- a/tools/lib/bpf/libbpf_legacy.h
+++ b/tools/lib/bpf/libbpf_legacy.h
@@ -73,6 +73,11 @@ enum libbpf_strict_mode {
          * operation.
          */
         LIBBPF_STRICT_AUTO_RLIMIT_MEMLOCK = 0x10,
+       /*
+        * Error out on any SEC("maps") map definition, which are deprecated
+        * in favor of BTF-defined map definitions in SEC(".maps").
+        */
+       LIBBPF_STRICT_MAP_DEFINITIONS = 0x20,
  
         __LIBBPF_STRICT_LAST,
  };
diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c

index 39f25e0..c39c37f 100644 (file)
--- a/tools/lib/bpf/netlink.c
+++ b/tools/lib/bpf/netlink.c
@@ -217,6 +217,28 @@ static int __bpf_set_link_xdp_fd_replace(int ifindex, int fd, int old_fd,
         return libbpf_netlink_send_recv(&req, NULL, NULL, NULL);
  }
  
+int bpf_xdp_attach(int ifindex, int prog_fd, __u32 flags, const struct bpf_xdp_attach_opts *opts)
+{
+       int old_prog_fd, err;
+
+       if (!OPTS_VALID(opts, bpf_xdp_attach_opts))
+               return libbpf_err(-EINVAL);
+
+       old_prog_fd = OPTS_GET(opts, old_prog_fd, 0);
+       if (old_prog_fd)
+               flags |= XDP_FLAGS_REPLACE;
+       else
+               old_prog_fd = -1;
+
+       err = __bpf_set_link_xdp_fd_replace(ifindex, prog_fd, old_prog_fd, flags);
+       return libbpf_err(err);
+}
+
+int bpf_xdp_detach(int ifindex, __u32 flags, const struct bpf_xdp_attach_opts *opts)
+{
+       return bpf_xdp_attach(ifindex, -1, flags, opts);
+}
+
  int bpf_set_link_xdp_fd_opts(int ifindex, int fd, __u32 flags,
                              const struct bpf_xdp_set_link_opts *opts)
  {
@@ -303,69 +325,98 @@ static int get_xdp_info(void *cookie, void *msg, struct nlattr **tb)
         return 0;
  }
  
-int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info,
-                         size_t info_size, __u32 flags)
+int bpf_xdp_query(int ifindex, int xdp_flags, struct bpf_xdp_query_opts *opts)
  {
-       struct xdp_id_md xdp_id = {};
-       __u32 mask;
-       int ret;
         struct libbpf_nla_req req = {
                 .nh.nlmsg_len      = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
                 .nh.nlmsg_type     = RTM_GETLINK,
                 .nh.nlmsg_flags    = NLM_F_DUMP | NLM_F_REQUEST,
                 .ifinfo.ifi_family = AF_PACKET,
         };
+       struct xdp_id_md xdp_id = {};
+       int err;
  
-       if (flags & ~XDP_FLAGS_MASK || !info_size)
+       if (!OPTS_VALID(opts, bpf_xdp_query_opts))
+               return libbpf_err(-EINVAL);
+
+       if (xdp_flags & ~XDP_FLAGS_MASK)
                 return libbpf_err(-EINVAL);
  
         /* Check whether the single {HW,DRV,SKB} mode is set */
-       flags &= (XDP_FLAGS_SKB_MODE | XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE);
-       mask = flags - 1;
-       if (flags && flags & mask)
+       xdp_flags &= XDP_FLAGS_SKB_MODE | XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE;
+       if (xdp_flags & (xdp_flags - 1))
                 return libbpf_err(-EINVAL);
  
         xdp_id.ifindex = ifindex;
-       xdp_id.flags = flags;
+       xdp_id.flags = xdp_flags;
  
-       ret = libbpf_netlink_send_recv(&req, __dump_link_nlmsg,
+       err = libbpf_netlink_send_recv(&req, __dump_link_nlmsg,
                                        get_xdp_info, &xdp_id);
-       if (!ret) {
-               size_t sz = min(info_size, sizeof(xdp_id.info));
+       if (err)
+               return libbpf_err(err);
  
-               memcpy(info, &xdp_id.info, sz);
-               memset((void *) info + sz, 0, info_size - sz);
-       }
+       OPTS_SET(opts, prog_id, xdp_id.info.prog_id);
+       OPTS_SET(opts, drv_prog_id, xdp_id.info.drv_prog_id);
+       OPTS_SET(opts, hw_prog_id, xdp_id.info.hw_prog_id);
+       OPTS_SET(opts, skb_prog_id, xdp_id.info.skb_prog_id);
+       OPTS_SET(opts, attach_mode, xdp_id.info.attach_mode);
  
-       return libbpf_err(ret);
+       return 0;
  }
  
-static __u32 get_xdp_id(struct xdp_link_info *info, __u32 flags)
+int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info,
+                         size_t info_size, __u32 flags)
  {
-       flags &= XDP_FLAGS_MODES;
+       LIBBPF_OPTS(bpf_xdp_query_opts, opts);
+       size_t sz;
+       int err;
+
+       if (!info_size)
+               return libbpf_err(-EINVAL);
  
-       if (info->attach_mode != XDP_ATTACHED_MULTI && !flags)
-               return info->prog_id;
-       if (flags & XDP_FLAGS_DRV_MODE)
-               return info->drv_prog_id;
-       if (flags & XDP_FLAGS_HW_MODE)
-               return info->hw_prog_id;
-       if (flags & XDP_FLAGS_SKB_MODE)
-               return info->skb_prog_id;
+       err = bpf_xdp_query(ifindex, flags, &opts);
+       if (err)
+               return libbpf_err(err);
+
+       /* struct xdp_link_info field layout matches struct bpf_xdp_query_opts
+        * layout after sz field
+        */
+       sz = min(info_size, offsetofend(struct xdp_link_info, attach_mode));
+       memcpy(info, &opts.prog_id, sz);
+       memset((void *)info + sz, 0, info_size - sz);
  
         return 0;
  }
  
-int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags)
+int bpf_xdp_query_id(int ifindex, int flags, __u32 *prog_id)
  {
-       struct xdp_link_info info;
+       LIBBPF_OPTS(bpf_xdp_query_opts, opts);
         int ret;
  
-       ret = bpf_get_link_xdp_info(ifindex, &info, sizeof(info), flags);
-       if (!ret)
-               *prog_id = get_xdp_id(&info, flags);
+       ret = bpf_xdp_query(ifindex, flags, &opts);
+       if (ret)
+               return libbpf_err(ret);
+
+       flags &= XDP_FLAGS_MODES;
  
-       return libbpf_err(ret);
+       if (opts.attach_mode != XDP_ATTACHED_MULTI && !flags)
+               *prog_id = opts.prog_id;
+       else if (flags & XDP_FLAGS_DRV_MODE)
+               *prog_id = opts.drv_prog_id;
+       else if (flags & XDP_FLAGS_HW_MODE)
+               *prog_id = opts.hw_prog_id;
+       else if (flags & XDP_FLAGS_SKB_MODE)
+               *prog_id = opts.skb_prog_id;
+       else
+               *prog_id = 0;
+
+       return 0;
+}
+
+
+int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags)
+{
+       return bpf_xdp_query_id(ifindex, flags, prog_id);
  }
  
  typedef int (*qdisc_config_t)(struct libbpf_nla_req *req);
diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c

index 7ecfaac..ef2832b 100644 (file)
--- a/tools/perf/util/bpf-loader.c
+++ b/tools/perf/util/bpf-loader.c
@@ -1005,24 +1005,22 @@ __bpf_map__config_value(struct bpf_map *map,
  {
         struct bpf_map_op *op;
         const char *map_name = bpf_map__name(map);
-       const struct bpf_map_def *def = bpf_map__def(map);
  
-       if (IS_ERR(def)) {
-               pr_debug("Unable to get map definition from '%s'\n",
-                        map_name);
+       if (!map) {
+               pr_debug("Map '%s' is invalid\n", map_name);
                 return -BPF_LOADER_ERRNO__INTERNAL;
         }
  
-       if (def->type != BPF_MAP_TYPE_ARRAY) {
+       if (bpf_map__type(map) != BPF_MAP_TYPE_ARRAY) {
                 pr_debug("Map %s type is not BPF_MAP_TYPE_ARRAY\n",
                          map_name);
                 return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE;
         }
-       if (def->key_size < sizeof(unsigned int)) {
+       if (bpf_map__key_size(map) < sizeof(unsigned int)) {
                 pr_debug("Map %s has incorrect key size\n", map_name);
                 return -BPF_LOADER_ERRNO__OBJCONF_MAP_KEYSIZE;
         }
-       switch (def->value_size) {
+       switch (bpf_map__value_size(map)) {
         case 1:
         case 2:
         case 4:
@@ -1064,7 +1062,6 @@ __bpf_map__config_event(struct bpf_map *map,
                         struct parse_events_term *term,
                         struct evlist *evlist)
  {
-       const struct bpf_map_def *def;
         struct bpf_map_op *op;
         const char *map_name = bpf_map__name(map);
         struct evsel *evsel = evlist__find_evsel_by_str(evlist, term->val.str);
@@ -1075,18 +1072,16 @@ __bpf_map__config_event(struct bpf_map *map,
                 return -BPF_LOADER_ERRNO__OBJCONF_MAP_NOEVT;
         }
  
-       def = bpf_map__def(map);
-       if (IS_ERR(def)) {
-               pr_debug("Unable to get map definition from '%s'\n",
-                        map_name);
-               return PTR_ERR(def);
+       if (!map) {
+               pr_debug("Map '%s' is invalid\n", map_name);
+               return PTR_ERR(map);
         }
  
         /*
          * No need to check key_size and value_size:
          * kernel has already checked them.
          */
-       if (def->type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
+       if (bpf_map__type(map) != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
                 pr_debug("Map %s type is not BPF_MAP_TYPE_PERF_EVENT_ARRAY\n",
                          map_name);
                 return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE;
@@ -1135,7 +1130,6 @@ config_map_indices_range_check(struct parse_events_term *term,
                                const char *map_name)
  {
         struct parse_events_array *array = &term->array;
-       const struct bpf_map_def *def;
         unsigned int i;
  
         if (!array->nr_ranges)
@@ -1146,10 +1140,8 @@ config_map_indices_range_check(struct parse_events_term *term,
                 return -BPF_LOADER_ERRNO__INTERNAL;
         }
  
-       def = bpf_map__def(map);
-       if (IS_ERR(def)) {
-               pr_debug("ERROR: Unable to get map definition from '%s'\n",
-                        map_name);
+       if (!map) {
+               pr_debug("Map '%s' is invalid\n", map_name);
                 return -BPF_LOADER_ERRNO__INTERNAL;
         }
  
@@ -1158,7 +1150,7 @@ config_map_indices_range_check(struct parse_events_term *term,
                 size_t length = array->ranges[i].length;
                 unsigned int idx = start + length - 1;
  
-               if (idx >= def->max_entries) {
+               if (idx >= bpf_map__max_entries(map)) {
                         pr_debug("ERROR: index %d too large\n", idx);
                         return -BPF_LOADER_ERRNO__OBJCONF_MAP_IDX2BIG;
                 }
@@ -1251,21 +1243,21 @@ out:
  }
  
  typedef int (*map_config_func_t)(const char *name, int map_fd,
-                                const struct bpf_map_def *pdef,
+                                const struct bpf_map *map,
                                  struct bpf_map_op *op,
                                  void *pkey, void *arg);
  
  static int
  foreach_key_array_all(map_config_func_t func,
                       void *arg, const char *name,
-                     int map_fd, const struct bpf_map_def *pdef,
+                     int map_fd, const struct bpf_map *map,
                       struct bpf_map_op *op)
  {
         unsigned int i;
         int err;
  
-       for (i = 0; i < pdef->max_entries; i++) {
-               err = func(name, map_fd, pdef, op, &i, arg);
+       for (i = 0; i < bpf_map__max_entries(map); i++) {
+               err = func(name, map_fd, map, op, &i, arg);
                 if (err) {
                         pr_debug("ERROR: failed to insert value to %s[%u]\n",
                                  name, i);
@@ -1278,7 +1270,7 @@ foreach_key_array_all(map_config_func_t func,
  static int
  foreach_key_array_ranges(map_config_func_t func, void *arg,
                          const char *name, int map_fd,
-                        const struct bpf_map_def *pdef,
+                        const struct bpf_map *map,
                          struct bpf_map_op *op)
  {
         unsigned int i, j;
@@ -1291,7 +1283,7 @@ foreach_key_array_ranges(map_config_func_t func, void *arg,
                 for (j = 0; j < length; j++) {
                         unsigned int idx = start + j;
  
-                       err = func(name, map_fd, pdef, op, &idx, arg);
+                       err = func(name, map_fd, map, op, &idx, arg);
                         if (err) {
                                 pr_debug("ERROR: failed to insert value to %s[%u]\n",
                                          name, idx);
@@ -1307,9 +1299,8 @@ bpf_map_config_foreach_key(struct bpf_map *map,
                            map_config_func_t func,
                            void *arg)
  {
-       int err, map_fd;
+       int err, map_fd, type;
         struct bpf_map_op *op;
-       const struct bpf_map_def *def;
         const char *name = bpf_map__name(map);
         struct bpf_map_priv *priv = bpf_map__priv(map);
  
@@ -1322,9 +1313,8 @@ bpf_map_config_foreach_key(struct bpf_map *map,
                 return 0;
         }
  
-       def = bpf_map__def(map);
-       if (IS_ERR(def)) {
-               pr_debug("ERROR: failed to get definition from map %s\n", name);
+       if (!map) {
+               pr_debug("Map '%s' is invalid\n", name);
                 return -BPF_LOADER_ERRNO__INTERNAL;
         }
         map_fd = bpf_map__fd(map);
@@ -1333,19 +1323,19 @@ bpf_map_config_foreach_key(struct bpf_map *map,
                 return map_fd;
         }
  
+       type = bpf_map__type(map);
         list_for_each_entry(op, &priv->ops_list, list) {
-               switch (def->type) {
+               switch (type) {
                 case BPF_MAP_TYPE_ARRAY:
                 case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
                         switch (op->key_type) {
                         case BPF_MAP_KEY_ALL:
                                 err = foreach_key_array_all(func, arg, name,
-                                                           map_fd, def, op);
+                                                           map_fd, map, op);
                                 break;
                         case BPF_MAP_KEY_RANGES:
                                 err = foreach_key_array_ranges(func, arg, name,
-                                                              map_fd, def,
-                                                              op);
+                                                              map_fd, map, op);
                                 break;
                         default:
                                 pr_debug("ERROR: keytype for map '%s' invalid\n",
@@ -1454,7 +1444,7 @@ apply_config_evsel_for_key(const char *name, int map_fd, void *pkey,
  
  static int
  apply_obj_config_map_for_key(const char *name, int map_fd,
-                            const struct bpf_map_def *pdef,
+                            const struct bpf_map *map,
                              struct bpf_map_op *op,
                              void *pkey, void *arg __maybe_unused)
  {
@@ -1463,7 +1453,7 @@ apply_obj_config_map_for_key(const char *name, int map_fd,
         switch (op->op_type) {
         case BPF_MAP_OP_SET_VALUE:
                 err = apply_config_value_for_key(map_fd, pkey,
-                                                pdef->value_size,
+                                                bpf_map__value_size(map),
                                                  op->v.value);
                 break;
         case BPF_MAP_OP_SET_EVSEL:
diff --git a/tools/perf/util/bpf_map.c b/tools/perf/util/bpf_map.c

index eb853ca..c863ae0 100644 (file)
--- a/tools/perf/util/bpf_map.c
+++ b/tools/perf/util/bpf_map.c
@@ -9,25 +9,25 @@
  #include <stdlib.h>
  #include <unistd.h>
  
-static bool bpf_map_def__is_per_cpu(const struct bpf_map_def *def)
+static bool bpf_map__is_per_cpu(enum bpf_map_type type)
  {
-       return def->type == BPF_MAP_TYPE_PERCPU_HASH ||
-              def->type == BPF_MAP_TYPE_PERCPU_ARRAY ||
-              def->type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
-              def->type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE;
+       return type == BPF_MAP_TYPE_PERCPU_HASH ||
+              type == BPF_MAP_TYPE_PERCPU_ARRAY ||
+              type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
+              type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE;
  }
  
-static void *bpf_map_def__alloc_value(const struct bpf_map_def *def)
+static void *bpf_map__alloc_value(const struct bpf_map *map)
  {
-       if (bpf_map_def__is_per_cpu(def))
-               return malloc(round_up(def->value_size, 8) * sysconf(_SC_NPROCESSORS_CONF));
+       if (bpf_map__is_per_cpu(bpf_map__type(map)))
+               return malloc(round_up(bpf_map__value_size(map), 8) *
+                             sysconf(_SC_NPROCESSORS_CONF));
  
-       return malloc(def->value_size);
+       return malloc(bpf_map__value_size(map));
  }
  
  int bpf_map__fprintf(struct bpf_map *map, FILE *fp)
  {
-       const struct bpf_map_def *def = bpf_map__def(map);
         void *prev_key = NULL, *key, *value;
         int fd = bpf_map__fd(map), err;
         int printed = 0;
@@ -35,15 +35,15 @@ int bpf_map__fprintf(struct bpf_map *map, FILE *fp)
         if (fd < 0)
                 return fd;
  
-       if (IS_ERR(def))
-               return PTR_ERR(def);
+       if (!map)
+               return PTR_ERR(map);
  
         err = -ENOMEM;
-       key = malloc(def->key_size);
+       key = malloc(bpf_map__key_size(map));
         if (key == NULL)
                 goto out;
  
-       value = bpf_map_def__alloc_value(def);
+       value = bpf_map__alloc_value(map);
         if (value == NULL)
                 goto out_free_key;
  
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile

index 42ffc24..945f92d 100644 (file)
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -21,7 +21,7 @@ endif
  
  BPF_GCC                ?= $(shell command -v bpf-gcc;)
  SAN_CFLAGS     ?=
-CFLAGS += -g -O0 -rdynamic -Wall $(GENFLAGS) $(SAN_CFLAGS)             \
+CFLAGS += -g -O0 -rdynamic -Wall -Werror $(GENFLAGS) $(SAN_CFLAGS)     \
           -I$(CURDIR) -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR)          \
           -I$(TOOLSINCDIR) -I$(APIDIR) -I$(OUTPUT)
  LDFLAGS += $(SAN_CFLAGS)
@@ -292,7 +292,7 @@ IS_LITTLE_ENDIAN = $(shell $(CC) -dM -E - </dev/null | \
  MENDIAN=$(if $(IS_LITTLE_ENDIAN),-mlittle-endian,-mbig-endian)
  
  CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG))
-BPF_CFLAGS = -g -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN)                  \
+BPF_CFLAGS = -g -Werror -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN)          \
              -I$(INCLUDE_DIR) -I$(CURDIR) -I$(APIDIR)                   \
              -I$(abspath $(OUTPUT)/../usr/include)
  
diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c

index df3b292..bdbacf5 100644 (file)
--- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
+++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
@@ -109,26 +109,31 @@ static struct bin_attribute bin_attr_bpf_testmod_file __ro_after_init = {
         .write = bpf_testmod_test_write,
  };
  
-BTF_SET_START(bpf_testmod_kfunc_ids)
+BTF_SET_START(bpf_testmod_check_kfunc_ids)
  BTF_ID(func, bpf_testmod_test_mod_kfunc)
-BTF_SET_END(bpf_testmod_kfunc_ids)
+BTF_SET_END(bpf_testmod_check_kfunc_ids)
  
-static DEFINE_KFUNC_BTF_ID_SET(&bpf_testmod_kfunc_ids, bpf_testmod_kfunc_btf_set);
+static const struct btf_kfunc_id_set bpf_testmod_kfunc_set = {
+       .owner     = THIS_MODULE,
+       .check_set = &bpf_testmod_check_kfunc_ids,
+};
+
+extern int bpf_fentry_test1(int a);
  
  static int bpf_testmod_init(void)
  {
         int ret;
  
-       ret = sysfs_create_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file);
-       if (ret)
+       ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_testmod_kfunc_set);
+       if (ret < 0)
                 return ret;
-       register_kfunc_btf_id_set(&prog_test_kfunc_list, &bpf_testmod_kfunc_btf_set);
-       return 0;
+       if (bpf_fentry_test1(0) < 0)
+               return -EINVAL;
+       return sysfs_create_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file);
  }
  
  static void bpf_testmod_exit(void)
  {
-       unregister_kfunc_btf_id_set(&prog_test_kfunc_list, &bpf_testmod_kfunc_btf_set);
         return sysfs_remove_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file);
  }
  
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config

index f628713..763db63 100644 (file)
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -48,3 +48,8 @@ CONFIG_IMA_READ_POLICY=y
  CONFIG_BLK_DEV_LOOP=y
  CONFIG_FUNCTION_TRACER=y
  CONFIG_DYNAMIC_FTRACE=y
+CONFIG_NETFILTER=y
+CONFIG_NF_DEFRAG_IPV4=y
+CONFIG_NF_DEFRAG_IPV6=y
+CONFIG_NF_CONNTRACK=y
+CONFIG_USERFAULTFD=y
diff --git a/tools/testing/selftests/bpf/prog_tests/bind_perm.c b/tools/testing/selftests/bpf/prog_tests/bind_perm.c

index d0f06e4..eac71fb 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/bind_perm.c
+++ b/tools/testing/selftests/bpf/prog_tests/bind_perm.c
@@ -1,13 +1,24 @@
  // SPDX-License-Identifier: GPL-2.0
-#include <test_progs.h>
-#include "bind_perm.skel.h"
-
+#define _GNU_SOURCE
+#include <sched.h>
+#include <stdlib.h>
  #include <sys/types.h>
  #include <sys/socket.h>
  #include <sys/capability.h>
  
+#include "test_progs.h"
+#include "bind_perm.skel.h"
+
  static int duration;
  
+static int create_netns(void)
+{
+       if (!ASSERT_OK(unshare(CLONE_NEWNET), "create netns"))
+               return -1;
+
+       return 0;
+}
+
  void try_bind(int family, int port, int expected_errno)
  {
         struct sockaddr_storage addr = {};
@@ -75,6 +86,9 @@ void test_bind_perm(void)
         struct bind_perm *skel;
         int cgroup_fd;
  
+       if (create_netns())
+               return;
+
         cgroup_fd = test__join_cgroup("/bind_perm");
         if (CHECK(cgroup_fd < 0, "cg-join", "errno %d", errno))
                 return;
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt_unix.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt_unix.c

new file mode 100644 (file)

index 0000000..ee725d4
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt_unix.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Amazon.com Inc. or its affiliates. */
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <test_progs.h>
+#include "bpf_iter_setsockopt_unix.skel.h"
+
+#define NR_CASES 5
+
+static int create_unix_socket(struct bpf_iter_setsockopt_unix *skel)
+{
+       struct sockaddr_un addr = {
+               .sun_family = AF_UNIX,
+               .sun_path = "",
+       };
+       socklen_t len;
+       int fd, err;
+
+       fd = socket(AF_UNIX, SOCK_STREAM, 0);
+       if (!ASSERT_NEQ(fd, -1, "socket"))
+               return -1;
+
+       len = offsetof(struct sockaddr_un, sun_path);
+       err = bind(fd, (struct sockaddr *)&addr, len);
+       if (!ASSERT_OK(err, "bind"))
+               return -1;
+
+       len = sizeof(addr);
+       err = getsockname(fd, (struct sockaddr *)&addr, &len);
+       if (!ASSERT_OK(err, "getsockname"))
+               return -1;
+
+       memcpy(&skel->bss->sun_path, &addr.sun_path,
+              len - offsetof(struct sockaddr_un, sun_path));
+
+       return fd;
+}
+
+static void test_sndbuf(struct bpf_iter_setsockopt_unix *skel, int fd)
+{
+       socklen_t optlen;
+       int i, err;
+
+       for (i = 0; i < NR_CASES; i++) {
+               if (!ASSERT_NEQ(skel->data->sndbuf_getsockopt[i], -1,
+                               "bpf_(get|set)sockopt"))
+                       return;
+
+               err = setsockopt(fd, SOL_SOCKET, SO_SNDBUF,
+                                &(skel->data->sndbuf_setsockopt[i]),
+                                sizeof(skel->data->sndbuf_setsockopt[i]));
+               if (!ASSERT_OK(err, "setsockopt"))
+                       return;
+
+               optlen = sizeof(skel->bss->sndbuf_getsockopt_expected[i]);
+               err = getsockopt(fd, SOL_SOCKET, SO_SNDBUF,
+                                &(skel->bss->sndbuf_getsockopt_expected[i]),
+                                &optlen);
+               if (!ASSERT_OK(err, "getsockopt"))
+                       return;
+
+               if (!ASSERT_EQ(skel->data->sndbuf_getsockopt[i],
+                              skel->bss->sndbuf_getsockopt_expected[i],
+                              "bpf_(get|set)sockopt"))
+                       return;
+       }
+}
+
+void test_bpf_iter_setsockopt_unix(void)
+{
+       struct bpf_iter_setsockopt_unix *skel;
+       int err, unix_fd, iter_fd;
+       char buf;
+
+       skel = bpf_iter_setsockopt_unix__open_and_load();
+       if (!ASSERT_OK_PTR(skel, "open_and_load"))
+               return;
+
+       unix_fd = create_unix_socket(skel);
+       if (!ASSERT_NEQ(unix_fd, -1, "create_unix_server"))
+               goto destroy;
+
+       skel->links.change_sndbuf = bpf_program__attach_iter(skel->progs.change_sndbuf, NULL);
+       if (!ASSERT_OK_PTR(skel->links.change_sndbuf, "bpf_program__attach_iter"))
+               goto destroy;
+
+       iter_fd = bpf_iter_create(bpf_link__fd(skel->links.change_sndbuf));
+       if (!ASSERT_GE(iter_fd, 0, "bpf_iter_create"))
+               goto destroy;
+
+       while ((err = read(iter_fd, &buf, sizeof(buf))) == -1 &&
+              errno == EAGAIN)
+               ;
+       if (!ASSERT_OK(err, "read iter error"))
+               goto destroy;
+
+       test_sndbuf(skel, unix_fd);
+destroy:
+       bpf_iter_setsockopt_unix__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c b/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c

new file mode 100644 (file)

index 0000000..d43f548
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c
@@ -0,0 +1,230 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <unistd.h>
+#include <pthread.h>
+#include <sys/mman.h>
+#include <stdatomic.h>
+#include <test_progs.h>
+#include <sys/syscall.h>
+#include <linux/module.h>
+#include <linux/userfaultfd.h>
+
+#include "ksym_race.skel.h"
+#include "bpf_mod_race.skel.h"
+#include "kfunc_call_race.skel.h"
+
+/* This test crafts a race between btf_try_get_module and do_init_module, and
+ * checks whether btf_try_get_module handles the invocation for a well-formed
+ * but uninitialized module correctly. Unless the module has completed its
+ * initcalls, the verifier should fail the program load and return ENXIO.
+ *
+ * userfaultfd is used to trigger a fault in an fmod_ret program, and make it
+ * sleep, then the BPF program is loaded and the return value from verifier is
+ * inspected. After this, the userfaultfd is closed so that the module loading
+ * thread makes forward progress, and fmod_ret injects an error so that the
+ * module load fails and it is freed.
+ *
+ * If the verifier succeeded in loading the supplied program, it will end up
+ * taking reference to freed module, and trigger a crash when the program fd
+ * is closed later. This is true for both kfuncs and ksyms. In both cases,
+ * the crash is triggered inside bpf_prog_free_deferred, when module reference
+ * is finally released.
+ */
+
+struct test_config {
+       const char *str_open;
+       void *(*bpf_open_and_load)();
+       void (*bpf_destroy)(void *);
+};
+
+enum test_state {
+       _TS_INVALID,
+       TS_MODULE_LOAD,
+       TS_MODULE_LOAD_FAIL,
+};
+
+static _Atomic enum test_state state = _TS_INVALID;
+
+static int sys_finit_module(int fd, const char *param_values, int flags)
+{
+       return syscall(__NR_finit_module, fd, param_values, flags);
+}
+
+static int sys_delete_module(const char *name, unsigned int flags)
+{
+       return syscall(__NR_delete_module, name, flags);
+}
+
+static int load_module(const char *mod)
+{
+       int ret, fd;
+
+       fd = open("bpf_testmod.ko", O_RDONLY);
+       if (fd < 0)
+               return fd;
+
+       ret = sys_finit_module(fd, "", 0);
+       close(fd);
+       if (ret < 0)
+               return ret;
+       return 0;
+}
+
+static void *load_module_thread(void *p)
+{
+
+       if (!ASSERT_NEQ(load_module("bpf_testmod.ko"), 0, "load_module_thread must fail"))
+               atomic_store(&state, TS_MODULE_LOAD);
+       else
+               atomic_store(&state, TS_MODULE_LOAD_FAIL);
+       return p;
+}
+
+static int sys_userfaultfd(int flags)
+{
+       return syscall(__NR_userfaultfd, flags);
+}
+
+static int test_setup_uffd(void *fault_addr)
+{
+       struct uffdio_register uffd_register = {};
+       struct uffdio_api uffd_api = {};
+       int uffd;
+
+       uffd = sys_userfaultfd(O_CLOEXEC);
+       if (uffd < 0)
+               return -errno;
+
+       uffd_api.api = UFFD_API;
+       uffd_api.features = 0;
+       if (ioctl(uffd, UFFDIO_API, &uffd_api)) {
+               close(uffd);
+               return -1;
+       }
+
+       uffd_register.range.start = (unsigned long)fault_addr;
+       uffd_register.range.len = 4096;
+       uffd_register.mode = UFFDIO_REGISTER_MODE_MISSING;
+       if (ioctl(uffd, UFFDIO_REGISTER, &uffd_register)) {
+               close(uffd);
+               return -1;
+       }
+       return uffd;
+}
+
+static void test_bpf_mod_race_config(const struct test_config *config)
+{
+       void *fault_addr, *skel_fail;
+       struct bpf_mod_race *skel;
+       struct uffd_msg uffd_msg;
+       pthread_t load_mod_thrd;
+       _Atomic int *blockingp;
+       int uffd, ret;
+
+       fault_addr = mmap(0, 4096, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+       if (!ASSERT_NEQ(fault_addr, MAP_FAILED, "mmap for uffd registration"))
+               return;
+
+       if (!ASSERT_OK(sys_delete_module("bpf_testmod", 0), "unload bpf_testmod"))
+               goto end_mmap;
+
+       skel = bpf_mod_race__open();
+       if (!ASSERT_OK_PTR(skel, "bpf_mod_kfunc_race__open"))
+               goto end_module;
+
+       skel->rodata->bpf_mod_race_config.tgid = getpid();
+       skel->rodata->bpf_mod_race_config.inject_error = -4242;
+       skel->rodata->bpf_mod_race_config.fault_addr = fault_addr;
+       if (!ASSERT_OK(bpf_mod_race__load(skel), "bpf_mod___load"))
+               goto end_destroy;
+       blockingp = (_Atomic int *)&skel->bss->bpf_blocking;
+
+       if (!ASSERT_OK(bpf_mod_race__attach(skel), "bpf_mod_kfunc_race__attach"))
+               goto end_destroy;
+
+       uffd = test_setup_uffd(fault_addr);
+       if (!ASSERT_GE(uffd, 0, "userfaultfd open + register address"))
+               goto end_destroy;
+
+       if (!ASSERT_OK(pthread_create(&load_mod_thrd, NULL, load_module_thread, NULL),
+                      "load module thread"))
+               goto end_uffd;
+
+       /* Now, we either fail loading module, or block in bpf prog, spin to find out */
+       while (!atomic_load(&state) && !atomic_load(blockingp))
+               ;
+       if (!ASSERT_EQ(state, _TS_INVALID, "module load should block"))
+               goto end_join;
+       if (!ASSERT_EQ(*blockingp, 1, "module load blocked")) {
+               pthread_kill(load_mod_thrd, SIGKILL);
+               goto end_uffd;
+       }
+
+       /* We might have set bpf_blocking to 1, but may have not blocked in
+        * bpf_copy_from_user. Read userfaultfd descriptor to verify that.
+        */
+       if (!ASSERT_EQ(read(uffd, &uffd_msg, sizeof(uffd_msg)), sizeof(uffd_msg),
+                      "read uffd block event"))
+               goto end_join;
+       if (!ASSERT_EQ(uffd_msg.event, UFFD_EVENT_PAGEFAULT, "read uffd event is pagefault"))
+               goto end_join;
+
+       /* We know that load_mod_thrd is blocked in the fmod_ret program, the
+        * module state is still MODULE_STATE_COMING because mod->init hasn't
+        * returned. This is the time we try to load a program calling kfunc and
+        * check if we get ENXIO from verifier.
+        */
+       skel_fail = config->bpf_open_and_load();
+       ret = errno;
+       if (!ASSERT_EQ(skel_fail, NULL, config->str_open)) {
+               /* Close uffd to unblock load_mod_thrd */
+               close(uffd);
+               uffd = -1;
+               while (atomic_load(blockingp) != 2)
+                       ;
+               ASSERT_OK(kern_sync_rcu(), "kern_sync_rcu");
+               config->bpf_destroy(skel_fail);
+               goto end_join;
+
+       }
+       ASSERT_EQ(ret, ENXIO, "verifier returns ENXIO");
+       ASSERT_EQ(skel->data->res_try_get_module, false, "btf_try_get_module == false");
+
+       close(uffd);
+       uffd = -1;
+end_join:
+       pthread_join(load_mod_thrd, NULL);
+       if (uffd < 0)
+               ASSERT_EQ(atomic_load(&state), TS_MODULE_LOAD_FAIL, "load_mod_thrd success");
+end_uffd:
+       if (uffd >= 0)
+               close(uffd);
+end_destroy:
+       bpf_mod_race__destroy(skel);
+       ASSERT_OK(kern_sync_rcu(), "kern_sync_rcu");
+end_module:
+       sys_delete_module("bpf_testmod", 0);
+       ASSERT_OK(load_module("bpf_testmod.ko"), "restore bpf_testmod");
+end_mmap:
+       munmap(fault_addr, 4096);
+       atomic_store(&state, _TS_INVALID);
+}
+
+static const struct test_config ksym_config = {
+       .str_open = "ksym_race__open_and_load",
+       .bpf_open_and_load = (void *)ksym_race__open_and_load,
+       .bpf_destroy = (void *)ksym_race__destroy,
+};
+
+static const struct test_config kfunc_config = {
+       .str_open = "kfunc_call_race__open_and_load",
+       .bpf_open_and_load = (void *)kfunc_call_race__open_and_load,
+       .bpf_destroy = (void *)kfunc_call_race__destroy,
+};
+
+void serial_test_bpf_mod_race(void)
+{
+       if (test__start_subtest("ksym (used_btfs UAF)"))
+               test_bpf_mod_race_config(&ksym_config);
+       if (test__start_subtest("kfunc (kfunc_btf_tab UAF)"))
+               test_bpf_mod_race_config(&kfunc_config);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c

new file mode 100644 (file)

index 0000000..e3166a8
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <network_helpers.h>
+#include "test_bpf_nf.skel.h"
+
+enum {
+       TEST_XDP,
+       TEST_TC_BPF,
+};
+
+void test_bpf_nf_ct(int mode)
+{
+       struct test_bpf_nf *skel;
+       int prog_fd, err, retval;
+
+       skel = test_bpf_nf__open_and_load();
+       if (!ASSERT_OK_PTR(skel, "test_bpf_nf__open_and_load"))
+               return;
+
+       if (mode == TEST_XDP)
+               prog_fd = bpf_program__fd(skel->progs.nf_xdp_ct_test);
+       else
+               prog_fd = bpf_program__fd(skel->progs.nf_skb_ct_test);
+
+       err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), NULL, NULL,
+                               (__u32 *)&retval, NULL);
+       if (!ASSERT_OK(err, "bpf_prog_test_run"))
+               goto end;
+
+       ASSERT_EQ(skel->bss->test_einval_bpf_tuple, -EINVAL, "Test EINVAL for NULL bpf_tuple");
+       ASSERT_EQ(skel->bss->test_einval_reserved, -EINVAL, "Test EINVAL for reserved not set to 0");
+       ASSERT_EQ(skel->bss->test_einval_netns_id, -EINVAL, "Test EINVAL for netns_id < -1");
+       ASSERT_EQ(skel->bss->test_einval_len_opts, -EINVAL, "Test EINVAL for len__opts != NF_BPF_CT_OPTS_SZ");
+       ASSERT_EQ(skel->bss->test_eproto_l4proto, -EPROTO, "Test EPROTO for l4proto != TCP or UDP");
+       ASSERT_EQ(skel->bss->test_enonet_netns_id, -ENONET, "Test ENONET for bad but valid netns_id");
+       ASSERT_EQ(skel->bss->test_enoent_lookup, -ENOENT, "Test ENOENT for failed lookup");
+       ASSERT_EQ(skel->bss->test_eafnosupport, -EAFNOSUPPORT, "Test EAFNOSUPPORT for invalid len__tuple");
+end:
+       test_bpf_nf__destroy(skel);
+}
+
+void test_bpf_nf(void)
+{
+       if (test__start_subtest("xdp-ct"))
+               test_bpf_nf_ct(TEST_XDP);
+       if (test__start_subtest("tc-bpf-ct"))
+               test_bpf_nf_ct(TEST_TC_BPF);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c

index 8ba53ac..14f9b61 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf.c
@@ -4560,6 +4560,8 @@ static void do_test_file(unsigned int test_num)
         has_btf_ext = btf_ext != NULL;
         btf_ext__free(btf_ext);
  
+       /* temporary disable LIBBPF_STRICT_MAP_DEFINITIONS to test legacy maps */
+       libbpf_set_strict_mode((__LIBBPF_STRICT_LAST - 1) & ~LIBBPF_STRICT_MAP_DEFINITIONS);
         obj = bpf_object__open(test->file);
         err = libbpf_get_error(obj);
         if (CHECK(err, "obj: %d", err))
@@ -4684,6 +4686,8 @@ skip:
         fprintf(stderr, "OK");
  
  done:
+       libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
         btf__free(btf);
         free(func_info);
         bpf_object__close(obj);
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c

index d3e8f72..38b3c47 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c
@@ -194,14 +194,14 @@ void serial_test_cgroup_attach_multi(void)
  
         attach_opts.flags = BPF_F_ALLOW_OVERRIDE | BPF_F_REPLACE;
         attach_opts.replace_prog_fd = allow_prog[0];
-       if (CHECK(!bpf_prog_attach_xattr(allow_prog[6], cg1,
+       if (CHECK(!bpf_prog_attach_opts(allow_prog[6], cg1,
                                          BPF_CGROUP_INET_EGRESS, &attach_opts),
                   "fail_prog_replace_override", "unexpected success\n"))
                 goto err;
         CHECK_FAIL(errno != EINVAL);
  
         attach_opts.flags = BPF_F_REPLACE;
-       if (CHECK(!bpf_prog_attach_xattr(allow_prog[6], cg1,
+       if (CHECK(!bpf_prog_attach_opts(allow_prog[6], cg1,
                                          BPF_CGROUP_INET_EGRESS, &attach_opts),
                   "fail_prog_replace_no_multi", "unexpected success\n"))
                 goto err;
@@ -209,7 +209,7 @@ void serial_test_cgroup_attach_multi(void)
  
         attach_opts.flags = BPF_F_ALLOW_MULTI | BPF_F_REPLACE;
         attach_opts.replace_prog_fd = -1;
-       if (CHECK(!bpf_prog_attach_xattr(allow_prog[6], cg1,
+       if (CHECK(!bpf_prog_attach_opts(allow_prog[6], cg1,
                                          BPF_CGROUP_INET_EGRESS, &attach_opts),
                   "fail_prog_replace_bad_fd", "unexpected success\n"))
                 goto err;
@@ -217,7 +217,7 @@ void serial_test_cgroup_attach_multi(void)
  
         /* replacing a program that is not attached to cgroup should fail  */
         attach_opts.replace_prog_fd = allow_prog[3];
-       if (CHECK(!bpf_prog_attach_xattr(allow_prog[6], cg1,
+       if (CHECK(!bpf_prog_attach_opts(allow_prog[6], cg1,
                                          BPF_CGROUP_INET_EGRESS, &attach_opts),
                   "fail_prog_replace_no_ent", "unexpected success\n"))
                 goto err;
@@ -225,14 +225,14 @@ void serial_test_cgroup_attach_multi(void)
  
         /* replace 1st from the top program */
         attach_opts.replace_prog_fd = allow_prog[0];
-       if (CHECK(bpf_prog_attach_xattr(allow_prog[6], cg1,
+       if (CHECK(bpf_prog_attach_opts(allow_prog[6], cg1,
                                         BPF_CGROUP_INET_EGRESS, &attach_opts),
                   "prog_replace", "errno=%d\n", errno))
                 goto err;
  
         /* replace program with itself */
         attach_opts.replace_prog_fd = allow_prog[6];
-       if (CHECK(bpf_prog_attach_xattr(allow_prog[6], cg1,
+       if (CHECK(bpf_prog_attach_opts(allow_prog[6], cg1,
                                         BPF_CGROUP_INET_EGRESS, &attach_opts),
                   "prog_replace", "errno=%d\n", errno))
                 goto err;
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_getset_retval.c b/tools/testing/selftests/bpf/prog_tests/cgroup_getset_retval.c

new file mode 100644 (file)

index 0000000..0b47c3c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_getset_retval.c
@@ -0,0 +1,481 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright 2021 Google LLC.
+ */
+
+#include <test_progs.h>
+#include <cgroup_helpers.h>
+#include <network_helpers.h>
+
+#include "cgroup_getset_retval_setsockopt.skel.h"
+#include "cgroup_getset_retval_getsockopt.skel.h"
+
+#define SOL_CUSTOM     0xdeadbeef
+
+static int zero;
+
+static void test_setsockopt_set(int cgroup_fd, int sock_fd)
+{
+       struct cgroup_getset_retval_setsockopt *obj;
+       struct bpf_link *link_set_eunatch = NULL;
+
+       obj = cgroup_getset_retval_setsockopt__open_and_load();
+       if (!ASSERT_OK_PTR(obj, "skel-load"))
+               return;
+
+       /* Attach setsockopt that sets EUNATCH, assert that
+        * we actually get that error when we run setsockopt()
+        */
+       link_set_eunatch = bpf_program__attach_cgroup(obj->progs.set_eunatch,
+                                                     cgroup_fd);
+       if (!ASSERT_OK_PTR(link_set_eunatch, "cg-attach-set_eunatch"))
+               goto close_bpf_object;
+
+       if (!ASSERT_ERR(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR,
+                                  &zero, sizeof(int)), "setsockopt"))
+               goto close_bpf_object;
+       if (!ASSERT_EQ(errno, EUNATCH, "setsockopt-errno"))
+               goto close_bpf_object;
+
+       if (!ASSERT_EQ(obj->bss->invocations, 1, "invocations"))
+               goto close_bpf_object;
+       if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+               goto close_bpf_object;
+
+close_bpf_object:
+       bpf_link__destroy(link_set_eunatch);
+
+       cgroup_getset_retval_setsockopt__destroy(obj);
+}
+
+static void test_setsockopt_set_and_get(int cgroup_fd, int sock_fd)
+{
+       struct cgroup_getset_retval_setsockopt *obj;
+       struct bpf_link *link_set_eunatch = NULL, *link_get_retval = NULL;
+
+       obj = cgroup_getset_retval_setsockopt__open_and_load();
+       if (!ASSERT_OK_PTR(obj, "skel-load"))
+               return;
+
+       /* Attach setsockopt that sets EUNATCH, and one that gets the
+        * previously set errno. Assert that we get the same errno back.
+        */
+       link_set_eunatch = bpf_program__attach_cgroup(obj->progs.set_eunatch,
+                                                     cgroup_fd);
+       if (!ASSERT_OK_PTR(link_set_eunatch, "cg-attach-set_eunatch"))
+               goto close_bpf_object;
+       link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval,
+                                                    cgroup_fd);
+       if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval"))
+               goto close_bpf_object;
+
+       if (!ASSERT_ERR(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR,
+                                  &zero, sizeof(int)), "setsockopt"))
+               goto close_bpf_object;
+       if (!ASSERT_EQ(errno, EUNATCH, "setsockopt-errno"))
+               goto close_bpf_object;
+
+       if (!ASSERT_EQ(obj->bss->invocations, 2, "invocations"))
+               goto close_bpf_object;
+       if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+               goto close_bpf_object;
+       if (!ASSERT_EQ(obj->bss->retval_value, -EUNATCH, "retval_value"))
+               goto close_bpf_object;
+
+close_bpf_object:
+       bpf_link__destroy(link_set_eunatch);
+       bpf_link__destroy(link_get_retval);
+
+       cgroup_getset_retval_setsockopt__destroy(obj);
+}
+
+static void test_setsockopt_default_zero(int cgroup_fd, int sock_fd)
+{
+       struct cgroup_getset_retval_setsockopt *obj;
+       struct bpf_link *link_get_retval = NULL;
+
+       obj = cgroup_getset_retval_setsockopt__open_and_load();
+       if (!ASSERT_OK_PTR(obj, "skel-load"))
+               return;
+
+       /* Attach setsockopt that gets the previously set errno.
+        * Assert that, without anything setting one, we get 0.
+        */
+       link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval,
+                                                    cgroup_fd);
+       if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval"))
+               goto close_bpf_object;
+
+       if (!ASSERT_OK(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR,
+                                 &zero, sizeof(int)), "setsockopt"))
+               goto close_bpf_object;
+
+       if (!ASSERT_EQ(obj->bss->invocations, 1, "invocations"))
+               goto close_bpf_object;
+       if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+               goto close_bpf_object;
+       if (!ASSERT_EQ(obj->bss->retval_value, 0, "retval_value"))
+               goto close_bpf_object;
+
+close_bpf_object:
+       bpf_link__destroy(link_get_retval);
+
+       cgroup_getset_retval_setsockopt__destroy(obj);
+}
+
+static void test_setsockopt_default_zero_and_set(int cgroup_fd, int sock_fd)
+{
+       struct cgroup_getset_retval_setsockopt *obj;
+       struct bpf_link *link_get_retval = NULL, *link_set_eunatch = NULL;
+
+       obj = cgroup_getset_retval_setsockopt__open_and_load();
+       if (!ASSERT_OK_PTR(obj, "skel-load"))
+               return;
+
+       /* Attach setsockopt that gets the previously set errno, and then
+        * one that sets the errno to EUNATCH. Assert that the get does not
+        * see EUNATCH set later, and does not prevent EUNATCH from being set.
+        */
+       link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval,
+                                                    cgroup_fd);
+       if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval"))
+               goto close_bpf_object;
+       link_set_eunatch = bpf_program__attach_cgroup(obj->progs.set_eunatch,
+                                                     cgroup_fd);
+       if (!ASSERT_OK_PTR(link_set_eunatch, "cg-attach-set_eunatch"))
+               goto close_bpf_object;
+
+       if (!ASSERT_ERR(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR,
+                                  &zero, sizeof(int)), "setsockopt"))
+               goto close_bpf_object;
+       if (!ASSERT_EQ(errno, EUNATCH, "setsockopt-errno"))
+               goto close_bpf_object;
+
+       if (!ASSERT_EQ(obj->bss->invocations, 2, "invocations"))
+               goto close_bpf_object;
+       if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+               goto close_bpf_object;
+       if (!ASSERT_EQ(obj->bss->retval_value, 0, "retval_value"))
+               goto close_bpf_object;
+
+close_bpf_object:
+       bpf_link__destroy(link_get_retval);
+       bpf_link__destroy(link_set_eunatch);
+
+       cgroup_getset_retval_setsockopt__destroy(obj);
+}
+
+static void test_setsockopt_override(int cgroup_fd, int sock_fd)
+{
+       struct cgroup_getset_retval_setsockopt *obj;
+       struct bpf_link *link_set_eunatch = NULL, *link_set_eisconn = NULL;
+       struct bpf_link *link_get_retval = NULL;
+
+       obj = cgroup_getset_retval_setsockopt__open_and_load();
+       if (!ASSERT_OK_PTR(obj, "skel-load"))
+               return;
+
+       /* Attach setsockopt that sets EUNATCH, then one that sets EISCONN,
+        * and then one that gets the exported errno. Assert both the syscall
+        * and the helper sees the last set errno.
+        */
+       link_set_eunatch = bpf_program__attach_cgroup(obj->progs.set_eunatch,
+                                                     cgroup_fd);
+       if (!ASSERT_OK_PTR(link_set_eunatch, "cg-attach-set_eunatch"))
+               goto close_bpf_object;
+       link_set_eisconn = bpf_program__attach_cgroup(obj->progs.set_eisconn,
+                                                     cgroup_fd);
+       if (!ASSERT_OK_PTR(link_set_eisconn, "cg-attach-set_eisconn"))
+               goto close_bpf_object;
+       link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval,
+                                                    cgroup_fd);
+       if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval"))
+               goto close_bpf_object;
+
+       if (!ASSERT_ERR(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR,
+                                  &zero, sizeof(int)), "setsockopt"))
+               goto close_bpf_object;
+       if (!ASSERT_EQ(errno, EISCONN, "setsockopt-errno"))
+               goto close_bpf_object;
+
+       if (!ASSERT_EQ(obj->bss->invocations, 3, "invocations"))
+               goto close_bpf_object;
+       if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+               goto close_bpf_object;
+       if (!ASSERT_EQ(obj->bss->retval_value, -EISCONN, "retval_value"))
+               goto close_bpf_object;
+
+close_bpf_object:
+       bpf_link__destroy(link_set_eunatch);
+       bpf_link__destroy(link_set_eisconn);
+       bpf_link__destroy(link_get_retval);
+
+       cgroup_getset_retval_setsockopt__destroy(obj);
+}
+
+static void test_setsockopt_legacy_eperm(int cgroup_fd, int sock_fd)
+{
+       struct cgroup_getset_retval_setsockopt *obj;
+       struct bpf_link *link_legacy_eperm = NULL, *link_get_retval = NULL;
+
+       obj = cgroup_getset_retval_setsockopt__open_and_load();
+       if (!ASSERT_OK_PTR(obj, "skel-load"))
+               return;
+
+       /* Attach setsockopt that return a reject without setting errno
+        * (legacy reject), and one that gets the errno. Assert that for
+        * backward compatibility the syscall result in EPERM, and this
+        * is also visible to the helper.
+        */
+       link_legacy_eperm = bpf_program__attach_cgroup(obj->progs.legacy_eperm,
+                                                      cgroup_fd);
+       if (!ASSERT_OK_PTR(link_legacy_eperm, "cg-attach-legacy_eperm"))
+               goto close_bpf_object;
+       link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval,
+                                                    cgroup_fd);
+       if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval"))
+               goto close_bpf_object;
+
+       if (!ASSERT_ERR(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR,
+                                  &zero, sizeof(int)), "setsockopt"))
+               goto close_bpf_object;
+       if (!ASSERT_EQ(errno, EPERM, "setsockopt-errno"))
+               goto close_bpf_object;
+
+       if (!ASSERT_EQ(obj->bss->invocations, 2, "invocations"))
+               goto close_bpf_object;
+       if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+               goto close_bpf_object;
+       if (!ASSERT_EQ(obj->bss->retval_value, -EPERM, "retval_value"))
+               goto close_bpf_object;
+
+close_bpf_object:
+       bpf_link__destroy(link_legacy_eperm);
+       bpf_link__destroy(link_get_retval);
+
+       cgroup_getset_retval_setsockopt__destroy(obj);
+}
+
+static void test_setsockopt_legacy_no_override(int cgroup_fd, int sock_fd)
+{
+       struct cgroup_getset_retval_setsockopt *obj;
+       struct bpf_link *link_set_eunatch = NULL, *link_legacy_eperm = NULL;
+       struct bpf_link *link_get_retval = NULL;
+
+       obj = cgroup_getset_retval_setsockopt__open_and_load();
+       if (!ASSERT_OK_PTR(obj, "skel-load"))
+               return;
+
+       /* Attach setsockopt that sets EUNATCH, then one that return a reject
+        * without setting errno, and then one that gets the exported errno.
+        * Assert both the syscall and the helper's errno are unaffected by
+        * the second prog (i.e. legacy rejects does not override the errno
+        * to EPERM).
+        */
+       link_set_eunatch = bpf_program__attach_cgroup(obj->progs.set_eunatch,
+                                                     cgroup_fd);
+       if (!ASSERT_OK_PTR(link_set_eunatch, "cg-attach-set_eunatch"))
+               goto close_bpf_object;
+       link_legacy_eperm = bpf_program__attach_cgroup(obj->progs.legacy_eperm,
+                                                      cgroup_fd);
+       if (!ASSERT_OK_PTR(link_legacy_eperm, "cg-attach-legacy_eperm"))
+               goto close_bpf_object;
+       link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval,
+                                                    cgroup_fd);
+       if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval"))
+               goto close_bpf_object;
+
+       if (!ASSERT_ERR(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR,
+                                  &zero, sizeof(int)), "setsockopt"))
+               goto close_bpf_object;
+       if (!ASSERT_EQ(errno, EUNATCH, "setsockopt-errno"))
+               goto close_bpf_object;
+
+       if (!ASSERT_EQ(obj->bss->invocations, 3, "invocations"))
+               goto close_bpf_object;
+       if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+               goto close_bpf_object;
+       if (!ASSERT_EQ(obj->bss->retval_value, -EUNATCH, "retval_value"))
+               goto close_bpf_object;
+
+close_bpf_object:
+       bpf_link__destroy(link_set_eunatch);
+       bpf_link__destroy(link_legacy_eperm);
+       bpf_link__destroy(link_get_retval);
+
+       cgroup_getset_retval_setsockopt__destroy(obj);
+}
+
+static void test_getsockopt_get(int cgroup_fd, int sock_fd)
+{
+       struct cgroup_getset_retval_getsockopt *obj;
+       struct bpf_link *link_get_retval = NULL;
+       int buf;
+       socklen_t optlen = sizeof(buf);
+
+       obj = cgroup_getset_retval_getsockopt__open_and_load();
+       if (!ASSERT_OK_PTR(obj, "skel-load"))
+               return;
+
+       /* Attach getsockopt that gets previously set errno. Assert that the
+        * error from kernel is in both ctx_retval_value and retval_value.
+        */
+       link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval,
+                                                    cgroup_fd);
+       if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval"))
+               goto close_bpf_object;
+
+       if (!ASSERT_ERR(getsockopt(sock_fd, SOL_CUSTOM, 0,
+                                  &buf, &optlen), "getsockopt"))
+               goto close_bpf_object;
+       if (!ASSERT_EQ(errno, EOPNOTSUPP, "getsockopt-errno"))
+               goto close_bpf_object;
+
+       if (!ASSERT_EQ(obj->bss->invocations, 1, "invocations"))
+               goto close_bpf_object;
+       if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+               goto close_bpf_object;
+       if (!ASSERT_EQ(obj->bss->retval_value, -EOPNOTSUPP, "retval_value"))
+               goto close_bpf_object;
+       if (!ASSERT_EQ(obj->bss->ctx_retval_value, -EOPNOTSUPP, "ctx_retval_value"))
+               goto close_bpf_object;
+
+close_bpf_object:
+       bpf_link__destroy(link_get_retval);
+
+       cgroup_getset_retval_getsockopt__destroy(obj);
+}
+
+static void test_getsockopt_override(int cgroup_fd, int sock_fd)
+{
+       struct cgroup_getset_retval_getsockopt *obj;
+       struct bpf_link *link_set_eisconn = NULL;
+       int buf;
+       socklen_t optlen = sizeof(buf);
+
+       obj = cgroup_getset_retval_getsockopt__open_and_load();
+       if (!ASSERT_OK_PTR(obj, "skel-load"))
+               return;
+
+       /* Attach getsockopt that sets retval to -EISCONN. Assert that this
+        * overrides the value from kernel.
+        */
+       link_set_eisconn = bpf_program__attach_cgroup(obj->progs.set_eisconn,
+                                                     cgroup_fd);
+       if (!ASSERT_OK_PTR(link_set_eisconn, "cg-attach-set_eisconn"))
+               goto close_bpf_object;
+
+       if (!ASSERT_ERR(getsockopt(sock_fd, SOL_CUSTOM, 0,
+                                  &buf, &optlen), "getsockopt"))
+               goto close_bpf_object;
+       if (!ASSERT_EQ(errno, EISCONN, "getsockopt-errno"))
+               goto close_bpf_object;
+
+       if (!ASSERT_EQ(obj->bss->invocations, 1, "invocations"))
+               goto close_bpf_object;
+       if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+               goto close_bpf_object;
+
+close_bpf_object:
+       bpf_link__destroy(link_set_eisconn);
+
+       cgroup_getset_retval_getsockopt__destroy(obj);
+}
+
+static void test_getsockopt_retval_sync(int cgroup_fd, int sock_fd)
+{
+       struct cgroup_getset_retval_getsockopt *obj;
+       struct bpf_link *link_set_eisconn = NULL, *link_clear_retval = NULL;
+       struct bpf_link *link_get_retval = NULL;
+       int buf;
+       socklen_t optlen = sizeof(buf);
+
+       obj = cgroup_getset_retval_getsockopt__open_and_load();
+       if (!ASSERT_OK_PTR(obj, "skel-load"))
+               return;
+
+       /* Attach getsockopt that sets retval to -EISCONN, and one that clears
+        * ctx retval. Assert that the clearing ctx retval is synced to helper
+        * and clears any errors both from kernel and BPF..
+        */
+       link_set_eisconn = bpf_program__attach_cgroup(obj->progs.set_eisconn,
+                                                     cgroup_fd);
+       if (!ASSERT_OK_PTR(link_set_eisconn, "cg-attach-set_eisconn"))
+               goto close_bpf_object;
+       link_clear_retval = bpf_program__attach_cgroup(obj->progs.clear_retval,
+                                                      cgroup_fd);
+       if (!ASSERT_OK_PTR(link_clear_retval, "cg-attach-clear_retval"))
+               goto close_bpf_object;
+       link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval,
+                                                    cgroup_fd);
+       if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval"))
+               goto close_bpf_object;
+
+       if (!ASSERT_OK(getsockopt(sock_fd, SOL_CUSTOM, 0,
+                                 &buf, &optlen), "getsockopt"))
+               goto close_bpf_object;
+
+       if (!ASSERT_EQ(obj->bss->invocations, 3, "invocations"))
+               goto close_bpf_object;
+       if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+               goto close_bpf_object;
+       if (!ASSERT_EQ(obj->bss->retval_value, 0, "retval_value"))
+               goto close_bpf_object;
+       if (!ASSERT_EQ(obj->bss->ctx_retval_value, 0, "ctx_retval_value"))
+               goto close_bpf_object;
+
+close_bpf_object:
+       bpf_link__destroy(link_set_eisconn);
+       bpf_link__destroy(link_clear_retval);
+       bpf_link__destroy(link_get_retval);
+
+       cgroup_getset_retval_getsockopt__destroy(obj);
+}
+
+void test_cgroup_getset_retval(void)
+{
+       int cgroup_fd = -1;
+       int sock_fd = -1;
+
+       cgroup_fd = test__join_cgroup("/cgroup_getset_retval");
+       if (!ASSERT_GE(cgroup_fd, 0, "cg-create"))
+               goto close_fd;
+
+       sock_fd = start_server(AF_INET, SOCK_DGRAM, NULL, 0, 0);
+       if (!ASSERT_GE(sock_fd, 0, "start-server"))
+               goto close_fd;
+
+       if (test__start_subtest("setsockopt-set"))
+               test_setsockopt_set(cgroup_fd, sock_fd);
+
+       if (test__start_subtest("setsockopt-set_and_get"))
+               test_setsockopt_set_and_get(cgroup_fd, sock_fd);
+
+       if (test__start_subtest("setsockopt-default_zero"))
+               test_setsockopt_default_zero(cgroup_fd, sock_fd);
+
+       if (test__start_subtest("setsockopt-default_zero_and_set"))
+               test_setsockopt_default_zero_and_set(cgroup_fd, sock_fd);
+
+       if (test__start_subtest("setsockopt-override"))
+               test_setsockopt_override(cgroup_fd, sock_fd);
+
+       if (test__start_subtest("setsockopt-legacy_eperm"))
+               test_setsockopt_legacy_eperm(cgroup_fd, sock_fd);
+
+       if (test__start_subtest("setsockopt-legacy_no_override"))
+               test_setsockopt_legacy_no_override(cgroup_fd, sock_fd);
+
+       if (test__start_subtest("getsockopt-get"))
+               test_getsockopt_get(cgroup_fd, sock_fd);
+
+       if (test__start_subtest("getsockopt-override"))
+               test_getsockopt_override(cgroup_fd, sock_fd);
+
+       if (test__start_subtest("getsockopt-retval_sync"))
+               test_getsockopt_retval_sync(cgroup_fd, sock_fd);
+
+close_fd:
+       close(cgroup_fd);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c

index ac54e3f..dfafd62 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
@@ -457,7 +457,7 @@ static int init_prog_array(struct bpf_object *obj, struct bpf_map *prog_array)
         if (map_fd < 0)
                 return -1;
  
-       for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+       for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
                 snprintf(prog_name, sizeof(prog_name), "flow_dissector_%d", i);
  
                 prog = bpf_object__find_program_by_name(obj, prog_name);
diff --git a/tools/testing/selftests/bpf/prog_tests/global_data.c b/tools/testing/selftests/bpf/prog_tests/global_data.c

index 9da131b..917165e 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/global_data.c
+++ b/tools/testing/selftests/bpf/prog_tests/global_data.c
@@ -121,7 +121,7 @@ static void test_global_data_rdonly(struct bpf_object *obj, __u32 duration)
         if (CHECK_FAIL(map_fd < 0))
                 return;
  
-       buff = malloc(bpf_map__def(map)->value_size);
+       buff = malloc(bpf_map__value_size(map));
         if (buff)
                 err = bpf_map_update_elem(map_fd, &zero, buff, 0);
         free(buff);
diff --git a/tools/testing/selftests/bpf/prog_tests/global_data_init.c b/tools/testing/selftests/bpf/prog_tests/global_data_init.c

index 1db86ea..57331c6 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/global_data_init.c
+++ b/tools/testing/selftests/bpf/prog_tests/global_data_init.c
@@ -20,7 +20,7 @@ void test_global_data_init(void)
         if (CHECK_FAIL(!map || !bpf_map__is_internal(map)))
                 goto out;
  
-       sz = bpf_map__def(map)->value_size;
+       sz = bpf_map__value_size(map);
         newval = malloc(sz);
         if (CHECK_FAIL(!newval))
                 goto out;
diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c

index 7d7445c..b39a4f0 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
+++ b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
@@ -27,6 +27,12 @@ static void test_main(void)
         ASSERT_OK(err, "bpf_prog_test_run(test2)");
         ASSERT_EQ(retval, 3, "test2-retval");
  
+       prog_fd = skel->progs.kfunc_call_test_ref_btf_id.prog_fd;
+       err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
+                               NULL, NULL, (__u32 *)&retval, NULL);
+       ASSERT_OK(err, "bpf_prog_test_run(test_ref_btf_id)");
+       ASSERT_EQ(retval, 0, "test_ref_btf_id-retval");
+
         kfunc_call_test_lskel__destroy(skel);
  }
  
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c

index 85db0f4..b97a8f2 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
@@ -8,6 +8,7 @@
  #include "test_sockmap_update.skel.h"
  #include "test_sockmap_invalid_update.skel.h"
  #include "test_sockmap_skb_verdict_attach.skel.h"
+#include "test_sockmap_progs_query.skel.h"
  #include "bpf_iter_sockmap.skel.h"
  
  #define TCP_REPAIR             19      /* TCP sock is under repair right now */
@@ -315,6 +316,63 @@ out:
         test_sockmap_skb_verdict_attach__destroy(skel);
  }
  
+static __u32 query_prog_id(int prog_fd)
+{
+       struct bpf_prog_info info = {};
+       __u32 info_len = sizeof(info);
+       int err;
+
+       err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
+       if (!ASSERT_OK(err, "bpf_obj_get_info_by_fd") ||
+           !ASSERT_EQ(info_len, sizeof(info), "bpf_obj_get_info_by_fd"))
+               return 0;
+
+       return info.id;
+}
+
+static void test_sockmap_progs_query(enum bpf_attach_type attach_type)
+{
+       struct test_sockmap_progs_query *skel;
+       int err, map_fd, verdict_fd;
+       __u32 attach_flags = 0;
+       __u32 prog_ids[3] = {};
+       __u32 prog_cnt = 3;
+
+       skel = test_sockmap_progs_query__open_and_load();
+       if (!ASSERT_OK_PTR(skel, "test_sockmap_progs_query__open_and_load"))
+               return;
+
+       map_fd = bpf_map__fd(skel->maps.sock_map);
+
+       if (attach_type == BPF_SK_MSG_VERDICT)
+               verdict_fd = bpf_program__fd(skel->progs.prog_skmsg_verdict);
+       else
+               verdict_fd = bpf_program__fd(skel->progs.prog_skb_verdict);
+
+       err = bpf_prog_query(map_fd, attach_type, 0 /* query flags */,
+                            &attach_flags, prog_ids, &prog_cnt);
+       ASSERT_OK(err, "bpf_prog_query failed");
+       ASSERT_EQ(attach_flags,  0, "wrong attach_flags on query");
+       ASSERT_EQ(prog_cnt, 0, "wrong program count on query");
+
+       err = bpf_prog_attach(verdict_fd, map_fd, attach_type, 0);
+       if (!ASSERT_OK(err, "bpf_prog_attach failed"))
+               goto out;
+
+       prog_cnt = 1;
+       err = bpf_prog_query(map_fd, attach_type, 0 /* query flags */,
+                            &attach_flags, prog_ids, &prog_cnt);
+       ASSERT_OK(err, "bpf_prog_query failed");
+       ASSERT_EQ(attach_flags, 0, "wrong attach_flags on query");
+       ASSERT_EQ(prog_cnt, 1, "wrong program count on query");
+       ASSERT_EQ(prog_ids[0], query_prog_id(verdict_fd),
+                 "wrong prog_ids on query");
+
+       bpf_prog_detach2(verdict_fd, map_fd, attach_type);
+out:
+       test_sockmap_progs_query__destroy(skel);
+}
+
  void test_sockmap_basic(void)
  {
         if (test__start_subtest("sockmap create_update_free"))
@@ -341,4 +399,12 @@ void test_sockmap_basic(void)
                 test_sockmap_skb_verdict_attach(BPF_SK_SKB_STREAM_VERDICT,
                                                 BPF_SK_SKB_VERDICT);
         }
+       if (test__start_subtest("sockmap msg_verdict progs query"))
+               test_sockmap_progs_query(BPF_SK_MSG_VERDICT);
+       if (test__start_subtest("sockmap stream_parser progs query"))
+               test_sockmap_progs_query(BPF_SK_SKB_STREAM_PARSER);
+       if (test__start_subtest("sockmap stream_verdict progs query"))
+               test_sockmap_progs_query(BPF_SK_SKB_STREAM_VERDICT);
+       if (test__start_subtest("sockmap skb_verdict progs query"))
+               test_sockmap_progs_query(BPF_SK_SKB_VERDICT);
  }
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c

index 7e21bfa..2cf0c7a 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
@@ -1413,14 +1413,12 @@ close_srv1:
  
  static void test_ops_cleanup(const struct bpf_map *map)
  {
-       const struct bpf_map_def *def;
         int err, mapfd;
         u32 key;
  
-       def = bpf_map__def(map);
         mapfd = bpf_map__fd(map);
  
-       for (key = 0; key < def->max_entries; key++) {
+       for (key = 0; key < bpf_map__max_entries(map); key++) {
                 err = bpf_map_delete_elem(mapfd, &key);
                 if (err && errno != EINVAL && errno != ENOENT)
                         FAIL_ERRNO("map_delete: expected EINVAL/ENOENT");
@@ -1443,13 +1441,13 @@ static const char *family_str(sa_family_t family)
  
  static const char *map_type_str(const struct bpf_map *map)
  {
-       const struct bpf_map_def *def;
+       int type;
  
-       def = bpf_map__def(map);
-       if (IS_ERR(def))
+       if (!map)
                 return "invalid";
+       type = bpf_map__type(map);
  
-       switch (def->type) {
+       switch (type) {
         case BPF_MAP_TYPE_SOCKMAP:
                 return "sockmap";
         case BPF_MAP_TYPE_SOCKHASH:
diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c

index 4b937e5..30a99d2 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c
@@ -173,11 +173,11 @@ static int getsetsockopt(void)
         }
  
         memset(&buf, 0, sizeof(buf));
-       buf.zc.address = 12345; /* rejected by BPF */
+       buf.zc.address = 12345; /* Not page aligned. Rejected by tcp_zerocopy_receive() */
         optlen = sizeof(buf.zc);
         errno = 0;
         err = getsockopt(fd, SOL_TCP, TCP_ZEROCOPY_RECEIVE, &buf, &optlen);
-       if (errno != EPERM) {
+       if (errno != EINVAL) {
                 log_err("Unexpected getsockopt(TCP_ZEROCOPY_RECEIVE) err=%d errno=%d",
                         err, errno);
                 goto err;
diff --git a/tools/testing/selftests/bpf/prog_tests/tailcalls.c b/tools/testing/selftests/bpf/prog_tests/tailcalls.c

index 5dc0f42..796f231 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/tailcalls.c
+++ b/tools/testing/selftests/bpf/prog_tests/tailcalls.c
@@ -37,7 +37,7 @@ static void test_tailcall_1(void)
         if (CHECK_FAIL(map_fd < 0))
                 goto out;
  
-       for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+       for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
                 snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
  
                 prog = bpf_object__find_program_by_name(obj, prog_name);
@@ -53,7 +53,7 @@ static void test_tailcall_1(void)
                         goto out;
         }
  
-       for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+       for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
                 err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
                                         &duration, &retval, NULL);
                 CHECK(err || retval != i, "tailcall",
@@ -69,7 +69,7 @@ static void test_tailcall_1(void)
         CHECK(err || retval != 3, "tailcall", "err %d errno %d retval %d\n",
               err, errno, retval);
  
-       for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+       for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
                 snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
  
                 prog = bpf_object__find_program_by_name(obj, prog_name);
@@ -90,8 +90,8 @@ static void test_tailcall_1(void)
         CHECK(err || retval != 0, "tailcall", "err %d errno %d retval %d\n",
               err, errno, retval);
  
-       for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
-               j = bpf_map__def(prog_array)->max_entries - 1 - i;
+       for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
+               j = bpf_map__max_entries(prog_array) - 1 - i;
                 snprintf(prog_name, sizeof(prog_name), "classifier_%d", j);
  
                 prog = bpf_object__find_program_by_name(obj, prog_name);
@@ -107,8 +107,8 @@ static void test_tailcall_1(void)
                         goto out;
         }
  
-       for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
-               j = bpf_map__def(prog_array)->max_entries - 1 - i;
+       for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
+               j = bpf_map__max_entries(prog_array) - 1 - i;
  
                 err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
                                         &duration, &retval, NULL);
@@ -125,7 +125,7 @@ static void test_tailcall_1(void)
         CHECK(err || retval != 3, "tailcall", "err %d errno %d retval %d\n",
               err, errno, retval);
  
-       for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+       for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
                 err = bpf_map_delete_elem(map_fd, &i);
                 if (CHECK_FAIL(err >= 0 || errno != ENOENT))
                         goto out;
@@ -175,7 +175,7 @@ static void test_tailcall_2(void)
         if (CHECK_FAIL(map_fd < 0))
                 goto out;
  
-       for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+       for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
                 snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
  
                 prog = bpf_object__find_program_by_name(obj, prog_name);
@@ -353,7 +353,7 @@ static void test_tailcall_4(void)
         if (CHECK_FAIL(map_fd < 0))
                 return;
  
-       for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+       for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
                 snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
  
                 prog = bpf_object__find_program_by_name(obj, prog_name);
@@ -369,7 +369,7 @@ static void test_tailcall_4(void)
                         goto out;
         }
  
-       for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+       for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
                 err = bpf_map_update_elem(data_fd, &zero, &i, BPF_ANY);
                 if (CHECK_FAIL(err))
                         goto out;
@@ -380,7 +380,7 @@ static void test_tailcall_4(void)
                       "err %d errno %d retval %d\n", err, errno, retval);
         }
  
-       for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+       for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
                 err = bpf_map_update_elem(data_fd, &zero, &i, BPF_ANY);
                 if (CHECK_FAIL(err))
                         goto out;
@@ -441,7 +441,7 @@ static void test_tailcall_5(void)
         if (CHECK_FAIL(map_fd < 0))
                 return;
  
-       for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+       for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
                 snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
  
                 prog = bpf_object__find_program_by_name(obj, prog_name);
@@ -457,7 +457,7 @@ static void test_tailcall_5(void)
                         goto out;
         }
  
-       for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+       for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
                 err = bpf_map_update_elem(data_fd, &zero, &key[i], BPF_ANY);
                 if (CHECK_FAIL(err))
                         goto out;
@@ -468,7 +468,7 @@ static void test_tailcall_5(void)
                       "err %d errno %d retval %d\n", err, errno, retval);
         }
  
-       for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+       for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
                 err = bpf_map_update_elem(data_fd, &zero, &key[i], BPF_ANY);
                 if (CHECK_FAIL(err))
                         goto out;
@@ -520,7 +520,7 @@ static void test_tailcall_bpf2bpf_1(void)
                 goto out;
  
         /* nop -> jmp */
-       for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+       for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
                 snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
  
                 prog = bpf_object__find_program_by_name(obj, prog_name);
@@ -681,7 +681,7 @@ static void test_tailcall_bpf2bpf_3(void)
         if (CHECK_FAIL(map_fd < 0))
                 goto out;
  
-       for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+       for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
                 snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
  
                 prog = bpf_object__find_program_by_name(obj, prog_name);
@@ -778,7 +778,7 @@ static void test_tailcall_bpf2bpf_4(bool noise)
         if (CHECK_FAIL(map_fd < 0))
                 goto out;
  
-       for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+       for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
                 snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
  
                 prog = bpf_object__find_program_by_name(obj, prog_name);
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_frags.c b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_frags.c

new file mode 100644 (file)

index 0000000..31c1886
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_frags.c
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <network_helpers.h>
+
+void test_xdp_update_frags(void)
+{
+       const char *file = "./test_xdp_update_frags.o";
+       __u32 duration, retval, size;
+       struct bpf_program *prog;
+       struct bpf_object *obj;
+       int err, prog_fd;
+       __u32 *offset;
+       __u8 *buf;
+
+       obj = bpf_object__open(file);
+       if (libbpf_get_error(obj))
+               return;
+
+       prog = bpf_object__next_program(obj, NULL);
+       if (bpf_object__load(obj))
+               return;
+
+       prog_fd = bpf_program__fd(prog);
+
+       buf = malloc(128);
+       if (!ASSERT_OK_PTR(buf, "alloc buf 128b"))
+               goto out;
+
+       memset(buf, 0, 128);
+       offset = (__u32 *)buf;
+       *offset = 16;
+       buf[*offset] = 0xaa;            /* marker at offset 16 (head) */
+       buf[*offset + 15] = 0xaa;       /* marker at offset 31 (head) */
+
+       err = bpf_prog_test_run(prog_fd, 1, buf, 128,
+                               buf, &size, &retval, &duration);
+
+       /* test_xdp_update_frags: buf[16,31]: 0xaa -> 0xbb */
+       ASSERT_OK(err, "xdp_update_frag");
+       ASSERT_EQ(retval, XDP_PASS, "xdp_update_frag retval");
+       ASSERT_EQ(buf[16], 0xbb, "xdp_update_frag buf[16]");
+       ASSERT_EQ(buf[31], 0xbb, "xdp_update_frag buf[31]");
+
+       free(buf);
+
+       buf = malloc(9000);
+       if (!ASSERT_OK_PTR(buf, "alloc buf 9Kb"))
+               goto out;
+
+       memset(buf, 0, 9000);
+       offset = (__u32 *)buf;
+       *offset = 5000;
+       buf[*offset] = 0xaa;            /* marker at offset 5000 (frag0) */
+       buf[*offset + 15] = 0xaa;       /* marker at offset 5015 (frag0) */
+
+       err = bpf_prog_test_run(prog_fd, 1, buf, 9000,
+                               buf, &size, &retval, &duration);
+
+       /* test_xdp_update_frags: buf[5000,5015]: 0xaa -> 0xbb */
+       ASSERT_OK(err, "xdp_update_frag");
+       ASSERT_EQ(retval, XDP_PASS, "xdp_update_frag retval");
+       ASSERT_EQ(buf[5000], 0xbb, "xdp_update_frag buf[5000]");
+       ASSERT_EQ(buf[5015], 0xbb, "xdp_update_frag buf[5015]");
+
+       memset(buf, 0, 9000);
+       offset = (__u32 *)buf;
+       *offset = 3510;
+       buf[*offset] = 0xaa;            /* marker at offset 3510 (head) */
+       buf[*offset + 15] = 0xaa;       /* marker at offset 3525 (frag0) */
+
+       err = bpf_prog_test_run(prog_fd, 1, buf, 9000,
+                               buf, &size, &retval, &duration);
+
+       /* test_xdp_update_frags: buf[3510,3525]: 0xaa -> 0xbb */
+       ASSERT_OK(err, "xdp_update_frag");
+       ASSERT_EQ(retval, XDP_PASS, "xdp_update_frag retval");
+       ASSERT_EQ(buf[3510], 0xbb, "xdp_update_frag buf[3510]");
+       ASSERT_EQ(buf[3525], 0xbb, "xdp_update_frag buf[3525]");
+
+       memset(buf, 0, 9000);
+       offset = (__u32 *)buf;
+       *offset = 7606;
+       buf[*offset] = 0xaa;            /* marker at offset 7606 (frag0) */
+       buf[*offset + 15] = 0xaa;       /* marker at offset 7621 (frag1) */
+
+       err = bpf_prog_test_run(prog_fd, 1, buf, 9000,
+                               buf, &size, &retval, &duration);
+
+       /* test_xdp_update_frags: buf[7606,7621]: 0xaa -> 0xbb */
+       ASSERT_OK(err, "xdp_update_frag");
+       ASSERT_EQ(retval, XDP_PASS, "xdp_update_frag retval");
+       ASSERT_EQ(buf[7606], 0xbb, "xdp_update_frag buf[7606]");
+       ASSERT_EQ(buf[7621], 0xbb, "xdp_update_frag buf[7621]");
+
+       free(buf);
+out:
+       bpf_object__close(obj);
+}
+
+void test_xdp_adjust_frags(void)
+{
+       if (test__start_subtest("xdp_adjust_frags"))
+               test_xdp_update_frags();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c

index 3f5a17c..ccc9e63 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
@@ -11,22 +11,21 @@ static void test_xdp_adjust_tail_shrink(void)
         char buf[128];
  
         err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
-       if (CHECK_FAIL(err))
+       if (ASSERT_OK(err, "test_xdp_adjust_tail_shrink"))
                 return;
  
         err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
                                 buf, &size, &retval, &duration);
-
-       CHECK(err || retval != XDP_DROP,
-             "ipv4", "err %d errno %d retval %d size %d\n",
-             err, errno, retval, size);
+       ASSERT_OK(err, "ipv4");
+       ASSERT_EQ(retval, XDP_DROP, "ipv4 retval");
  
         expect_sz = sizeof(pkt_v6) - 20;  /* Test shrink with 20 bytes */
         err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6),
                                 buf, &size, &retval, &duration);
-       CHECK(err || retval != XDP_TX || size != expect_sz,
-             "ipv6", "err %d errno %d retval %d size %d expect-size %d\n",
-             err, errno, retval, size, expect_sz);
+       ASSERT_OK(err, "ipv6");
+       ASSERT_EQ(retval, XDP_TX, "ipv6 retval");
+       ASSERT_EQ(size, expect_sz, "ipv6 size");
+
         bpf_object__close(obj);
  }
  
@@ -39,21 +38,20 @@ static void test_xdp_adjust_tail_grow(void)
         int err, prog_fd;
  
         err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
-       if (CHECK_FAIL(err))
+       if (ASSERT_OK(err, "test_xdp_adjust_tail_grow"))
                 return;
  
         err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
                                 buf, &size, &retval, &duration);
-       CHECK(err || retval != XDP_DROP,
-             "ipv4", "err %d errno %d retval %d size %d\n",
-             err, errno, retval, size);
+       ASSERT_OK(err, "ipv4");
+       ASSERT_EQ(retval, XDP_DROP, "ipv4 retval");
  
         expect_sz = sizeof(pkt_v6) + 40; /* Test grow with 40 bytes */
         err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6) /* 74 */,
                                 buf, &size, &retval, &duration);
-       CHECK(err || retval != XDP_TX || size != expect_sz,
-             "ipv6", "err %d errno %d retval %d size %d expect-size %d\n",
-             err, errno, retval, size, expect_sz);
+       ASSERT_OK(err, "ipv6");
+       ASSERT_EQ(retval, XDP_TX, "ipv6 retval");
+       ASSERT_EQ(size, expect_sz, "ipv6 size");
  
         bpf_object__close(obj);
  }
@@ -76,7 +74,7 @@ static void test_xdp_adjust_tail_grow2(void)
         };
  
         err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &tattr.prog_fd);
-       if (CHECK_ATTR(err, "load", "err %d errno %d\n", err, errno))
+       if (ASSERT_OK(err, "test_xdp_adjust_tail_grow"))
                 return;
  
         /* Test case-64 */
@@ -86,21 +84,17 @@ static void test_xdp_adjust_tail_grow2(void)
         /* Kernel side alloc packet memory area that is zero init */
         err = bpf_prog_test_run_xattr(&tattr);
  
-       CHECK_ATTR(errno != ENOSPC /* Due limit copy_size in bpf_test_finish */
-                  || tattr.retval != XDP_TX
-                  || tattr.data_size_out != 192, /* Expected grow size */
-                  "case-64",
-                  "err %d errno %d retval %d size %d\n",
-                  err, errno, tattr.retval, tattr.data_size_out);
+       ASSERT_EQ(errno, ENOSPC, "case-64 errno"); /* Due limit copy_size in bpf_test_finish */
+       ASSERT_EQ(tattr.retval, XDP_TX, "case-64 retval");
+       ASSERT_EQ(tattr.data_size_out, 192, "case-64 data_size_out"); /* Expected grow size */
  
         /* Extra checks for data contents */
-       CHECK_ATTR(tattr.data_size_out != 192
-                  || buf[0]   != 1 ||  buf[63]  != 1  /*  0-63  memset to 1 */
-                  || buf[64]  != 0 ||  buf[127] != 0  /* 64-127 memset to 0 */
-                  || buf[128] != 1 ||  buf[191] != 1, /*128-191 memset to 1 */
-                  "case-64-data",
-                  "err %d errno %d retval %d size %d\n",
-                  err, errno, tattr.retval, tattr.data_size_out);
+       ASSERT_EQ(buf[0], 1, "case-64-data buf[0]"); /*  0-63  memset to 1 */
+       ASSERT_EQ(buf[63], 1, "case-64-data buf[63]");
+       ASSERT_EQ(buf[64], 0, "case-64-data buf[64]"); /* 64-127 memset to 0 */
+       ASSERT_EQ(buf[127], 0, "case-64-data buf[127]");
+       ASSERT_EQ(buf[128], 1, "case-64-data buf[128]"); /* 128-191 memset to 1 */
+       ASSERT_EQ(buf[191], 1, "case-64-data buf[191]");
  
         /* Test case-128 */
         memset(buf, 2, sizeof(buf));
@@ -109,24 +103,139 @@ static void test_xdp_adjust_tail_grow2(void)
         err = bpf_prog_test_run_xattr(&tattr);
  
         max_grow = 4096 - XDP_PACKET_HEADROOM - tailroom; /* 3520 */
-       CHECK_ATTR(err
-                  || tattr.retval != XDP_TX
-                  || tattr.data_size_out != max_grow,/* Expect max grow size */
-                  "case-128",
-                  "err %d errno %d retval %d size %d expect-size %d\n",
-                  err, errno, tattr.retval, tattr.data_size_out, max_grow);
+       ASSERT_OK(err, "case-128");
+       ASSERT_EQ(tattr.retval, XDP_TX, "case-128 retval");
+       ASSERT_EQ(tattr.data_size_out, max_grow, "case-128 data_size_out"); /* Expect max grow */
  
         /* Extra checks for data content: Count grow size, will contain zeros */
         for (i = 0, cnt = 0; i < sizeof(buf); i++) {
                 if (buf[i] == 0)
                         cnt++;
         }
-       CHECK_ATTR((cnt != (max_grow - tattr.data_size_in)) /* Grow increase */
-                  || tattr.data_size_out != max_grow, /* Total grow size */
-                  "case-128-data",
-                  "err %d errno %d retval %d size %d grow-size %d\n",
-                  err, errno, tattr.retval, tattr.data_size_out, cnt);
+       ASSERT_EQ(cnt, max_grow - tattr.data_size_in, "case-128-data cnt"); /* Grow increase */
+       ASSERT_EQ(tattr.data_size_out, max_grow, "case-128-data data_size_out"); /* Total grow */
+
+       bpf_object__close(obj);
+}
+
+void test_xdp_adjust_frags_tail_shrink(void)
+{
+       const char *file = "./test_xdp_adjust_tail_shrink.o";
+       __u32 duration, retval, size, exp_size;
+       struct bpf_program *prog;
+       struct bpf_object *obj;
+       int err, prog_fd;
+       __u8 *buf;
+
+       /* For the individual test cases, the first byte in the packet
+        * indicates which test will be run.
+        */
+       obj = bpf_object__open(file);
+       if (libbpf_get_error(obj))
+               return;
+
+       prog = bpf_object__next_program(obj, NULL);
+       if (bpf_object__load(obj))
+               return;
+
+       prog_fd = bpf_program__fd(prog);
+
+       buf = malloc(9000);
+       if (!ASSERT_OK_PTR(buf, "alloc buf 9Kb"))
+               goto out;
+
+       memset(buf, 0, 9000);
+
+       /* Test case removing 10 bytes from last frag, NOT freeing it */
+       exp_size = 8990; /* 9000 - 10 */
+       err = bpf_prog_test_run(prog_fd, 1, buf, 9000,
+                               buf, &size, &retval, &duration);
+
+       ASSERT_OK(err, "9Kb-10b");
+       ASSERT_EQ(retval, XDP_TX, "9Kb-10b retval");
+       ASSERT_EQ(size, exp_size, "9Kb-10b size");
+
+       /* Test case removing one of two pages, assuming 4K pages */
+       buf[0] = 1;
+       exp_size = 4900; /* 9000 - 4100 */
+       err = bpf_prog_test_run(prog_fd, 1, buf, 9000,
+                               buf, &size, &retval, &duration);
+
+       ASSERT_OK(err, "9Kb-4Kb");
+       ASSERT_EQ(retval, XDP_TX, "9Kb-4Kb retval");
+       ASSERT_EQ(size, exp_size, "9Kb-4Kb size");
+
+       /* Test case removing two pages resulting in a linear xdp_buff */
+       buf[0] = 2;
+       exp_size = 800; /* 9000 - 8200 */
+       err = bpf_prog_test_run(prog_fd, 1, buf, 9000,
+                               buf, &size, &retval, &duration);
+
+       ASSERT_OK(err, "9Kb-9Kb");
+       ASSERT_EQ(retval, XDP_TX, "9Kb-9Kb retval");
+       ASSERT_EQ(size, exp_size, "9Kb-9Kb size");
+
+       free(buf);
+out:
+       bpf_object__close(obj);
+}
+
+void test_xdp_adjust_frags_tail_grow(void)
+{
+       const char *file = "./test_xdp_adjust_tail_grow.o";
+       __u32 duration, retval, size, exp_size;
+       struct bpf_program *prog;
+       struct bpf_object *obj;
+       int err, i, prog_fd;
+       __u8 *buf;
+
+       obj = bpf_object__open(file);
+       if (libbpf_get_error(obj))
+               return;
+
+       prog = bpf_object__next_program(obj, NULL);
+       if (bpf_object__load(obj))
+               return;
+
+       prog_fd = bpf_program__fd(prog);
+
+       buf = malloc(16384);
+       if (!ASSERT_OK_PTR(buf, "alloc buf 16Kb"))
+               goto out;
+
+       /* Test case add 10 bytes to last frag */
+       memset(buf, 1, 16384);
+       size = 9000;
+       exp_size = size + 10;
+       err = bpf_prog_test_run(prog_fd, 1, buf, size,
+                               buf, &size, &retval, &duration);
+
+       ASSERT_OK(err, "9Kb+10b");
+       ASSERT_EQ(retval, XDP_TX, "9Kb+10b retval");
+       ASSERT_EQ(size, exp_size, "9Kb+10b size");
+
+       for (i = 0; i < 9000; i++)
+               ASSERT_EQ(buf[i], 1, "9Kb+10b-old");
+
+       for (i = 9000; i < 9010; i++)
+               ASSERT_EQ(buf[i], 0, "9Kb+10b-new");
+
+       for (i = 9010; i < 16384; i++)
+               ASSERT_EQ(buf[i], 1, "9Kb+10b-untouched");
+
+       /* Test a too large grow */
+       memset(buf, 1, 16384);
+       size = 9001;
+       exp_size = size;
+       err = bpf_prog_test_run(prog_fd, 1, buf, size,
+                               buf, &size, &retval, &duration);
+
+       ASSERT_OK(err, "9Kb+10b");
+       ASSERT_EQ(retval, XDP_DROP, "9Kb+10b retval");
+       ASSERT_EQ(size, exp_size, "9Kb+10b size");
  
+       free(buf);
+out:
         bpf_object__close(obj);
  }
  
@@ -138,4 +247,8 @@ void test_xdp_adjust_tail(void)
                 test_xdp_adjust_tail_grow();
         if (test__start_subtest("xdp_adjust_tail_grow2"))
                 test_xdp_adjust_tail_grow2();
+       if (test__start_subtest("xdp_adjust_frags_tail_shrink"))
+               test_xdp_adjust_frags_tail_shrink();
+       if (test__start_subtest("xdp_adjust_frags_tail_grow"))
+               test_xdp_adjust_frags_tail_grow();
  }
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c

index c98a897..9c395ea 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
@@ -10,40 +10,97 @@ struct meta {
         int pkt_len;
  };
  
+struct test_ctx_s {
+       bool passed;
+       int pkt_size;
+};
+
+struct test_ctx_s test_ctx;
+
  static void on_sample(void *ctx, int cpu, void *data, __u32 size)
  {
-       int duration = 0;
         struct meta *meta = (struct meta *)data;
         struct ipv4_packet *trace_pkt_v4 = data + sizeof(*meta);
+       unsigned char *raw_pkt = data + sizeof(*meta);
+       struct test_ctx_s *tst_ctx = ctx;
+
+       ASSERT_GE(size, sizeof(pkt_v4) + sizeof(*meta), "check_size");
+       ASSERT_EQ(meta->ifindex, if_nametoindex("lo"), "check_meta_ifindex");
+       ASSERT_EQ(meta->pkt_len, tst_ctx->pkt_size, "check_meta_pkt_len");
+       ASSERT_EQ(memcmp(trace_pkt_v4, &pkt_v4, sizeof(pkt_v4)), 0,
+                 "check_packet_content");
+
+       if (meta->pkt_len > sizeof(pkt_v4)) {
+               for (int i = 0; i < meta->pkt_len - sizeof(pkt_v4); i++)
+                       ASSERT_EQ(raw_pkt[i + sizeof(pkt_v4)], (unsigned char)i,
+                                 "check_packet_content");
+       }
+
+       tst_ctx->passed = true;
+}
  
-       if (CHECK(size < sizeof(pkt_v4) + sizeof(*meta),
-                 "check_size", "size %u < %zu\n",
-                 size, sizeof(pkt_v4) + sizeof(*meta)))
-               return;
+#define BUF_SZ 9000
  
-       if (CHECK(meta->ifindex != if_nametoindex("lo"), "check_meta_ifindex",
-                 "meta->ifindex = %d\n", meta->ifindex))
+static void run_xdp_bpf2bpf_pkt_size(int pkt_fd, struct perf_buffer *pb,
+                                    struct test_xdp_bpf2bpf *ftrace_skel,
+                                    int pkt_size)
+{
+       __u32 duration = 0, retval, size;
+       __u8 *buf, *buf_in;
+       int err;
+
+       if (!ASSERT_LE(pkt_size, BUF_SZ, "pkt_size") ||
+           !ASSERT_GE(pkt_size, sizeof(pkt_v4), "pkt_size"))
                 return;
  
-       if (CHECK(meta->pkt_len != sizeof(pkt_v4), "check_meta_pkt_len",
-                 "meta->pkt_len = %zd\n", sizeof(pkt_v4)))
+       buf_in = malloc(BUF_SZ);
+       if (!ASSERT_OK_PTR(buf_in, "buf_in malloc()"))
                 return;
  
-       if (CHECK(memcmp(trace_pkt_v4, &pkt_v4, sizeof(pkt_v4)),
-                 "check_packet_content", "content not the same\n"))
+       buf = malloc(BUF_SZ);
+       if (!ASSERT_OK_PTR(buf, "buf malloc()")) {
+               free(buf_in);
                 return;
+       }
+
+       test_ctx.passed = false;
+       test_ctx.pkt_size = pkt_size;
+
+       memcpy(buf_in, &pkt_v4, sizeof(pkt_v4));
+       if (pkt_size > sizeof(pkt_v4)) {
+               for (int i = 0; i < (pkt_size - sizeof(pkt_v4)); i++)
+                       buf_in[i + sizeof(pkt_v4)] = i;
+       }
+
+       /* Run test program */
+       err = bpf_prog_test_run(pkt_fd, 1, buf_in, pkt_size,
+                               buf, &size, &retval, &duration);
+
+       ASSERT_OK(err, "ipv4");
+       ASSERT_EQ(retval, XDP_PASS, "ipv4 retval");
+       ASSERT_EQ(size, pkt_size, "ipv4 size");
+
+       /* Make sure bpf_xdp_output() was triggered and it sent the expected
+        * data to the perf ring buffer.
+        */
+       err = perf_buffer__poll(pb, 100);
  
-       *(bool *)ctx = true;
+       ASSERT_GE(err, 0, "perf_buffer__poll");
+       ASSERT_TRUE(test_ctx.passed, "test passed");
+       /* Verify test results */
+       ASSERT_EQ(ftrace_skel->bss->test_result_fentry, if_nametoindex("lo"),
+                 "fentry result");
+       ASSERT_EQ(ftrace_skel->bss->test_result_fexit, XDP_PASS, "fexit result");
+
+       free(buf);
+       free(buf_in);
  }
  
  void test_xdp_bpf2bpf(void)
  {
-       __u32 duration = 0, retval, size;
-       char buf[128];
         int err, pkt_fd, map_fd;
-       bool passed = false;
-       struct iphdr iph;
-       struct iptnl_info value4 = {.family = AF_INET};
+       int pkt_sizes[] = {sizeof(pkt_v4), 1024, 4100, 8200};
+       struct iptnl_info value4 = {.family = AF_INET6};
         struct test_xdp *pkt_skel = NULL;
         struct test_xdp_bpf2bpf *ftrace_skel = NULL;
         struct vip key4 = {.protocol = 6, .family = AF_INET};
@@ -52,7 +109,7 @@ void test_xdp_bpf2bpf(void)
  
         /* Load XDP program to introspect */
         pkt_skel = test_xdp__open_and_load();
-       if (CHECK(!pkt_skel, "pkt_skel_load", "test_xdp skeleton failed\n"))
+       if (!ASSERT_OK_PTR(pkt_skel, "test_xdp__open_and_load"))
                 return;
  
         pkt_fd = bpf_program__fd(pkt_skel->progs._xdp_tx_iptunnel);
@@ -62,7 +119,7 @@ void test_xdp_bpf2bpf(void)
  
         /* Load trace program */
         ftrace_skel = test_xdp_bpf2bpf__open();
-       if (CHECK(!ftrace_skel, "__open", "ftrace skeleton failed\n"))
+       if (!ASSERT_OK_PTR(ftrace_skel, "test_xdp_bpf2bpf__open"))
                 goto out;
  
         /* Demonstrate the bpf_program__set_attach_target() API rather than
@@ -77,50 +134,24 @@ void test_xdp_bpf2bpf(void)
         bpf_program__set_attach_target(prog, pkt_fd, "_xdp_tx_iptunnel");
  
         err = test_xdp_bpf2bpf__load(ftrace_skel);
-       if (CHECK(err, "__load", "ftrace skeleton failed\n"))
+       if (!ASSERT_OK(err, "test_xdp_bpf2bpf__load"))
                 goto out;
  
         err = test_xdp_bpf2bpf__attach(ftrace_skel);
-       if (CHECK(err, "ftrace_attach", "ftrace attach failed: %d\n", err))
+       if (!ASSERT_OK(err, "test_xdp_bpf2bpf__attach"))
                 goto out;
  
         /* Set up perf buffer */
-       pb = perf_buffer__new(bpf_map__fd(ftrace_skel->maps.perf_buf_map), 1,
-                             on_sample, NULL, &passed, NULL);
+       pb = perf_buffer__new(bpf_map__fd(ftrace_skel->maps.perf_buf_map), 8,
+                             on_sample, NULL, &test_ctx, NULL);
         if (!ASSERT_OK_PTR(pb, "perf_buf__new"))
                 goto out;
  
-       /* Run test program */
-       err = bpf_prog_test_run(pkt_fd, 1, &pkt_v4, sizeof(pkt_v4),
-                               buf, &size, &retval, &duration);
-       memcpy(&iph, buf + sizeof(struct ethhdr), sizeof(iph));
-       if (CHECK(err || retval != XDP_TX || size != 74 ||
-                 iph.protocol != IPPROTO_IPIP, "ipv4",
-                 "err %d errno %d retval %d size %d\n",
-                 err, errno, retval, size))
-               goto out;
-
-       /* Make sure bpf_xdp_output() was triggered and it sent the expected
-        * data to the perf ring buffer.
-        */
-       err = perf_buffer__poll(pb, 100);
-       if (CHECK(err < 0, "perf_buffer__poll", "err %d\n", err))
-               goto out;
-
-       CHECK_FAIL(!passed);
-
-       /* Verify test results */
-       if (CHECK(ftrace_skel->bss->test_result_fentry != if_nametoindex("lo"),
-                 "result", "fentry failed err %llu\n",
-                 ftrace_skel->bss->test_result_fentry))
-               goto out;
-
-       CHECK(ftrace_skel->bss->test_result_fexit != XDP_TX, "result",
-             "fexit failed err %llu\n", ftrace_skel->bss->test_result_fexit);
-
+       for (int i = 0; i < ARRAY_SIZE(pkt_sizes); i++)
+               run_xdp_bpf2bpf_pkt_size(pkt_fd, pb, ftrace_skel,
+                                        pkt_sizes[i]);
  out:
-       if (pb)
-               perf_buffer__free(pb);
+       perf_buffer__free(pb);
         test_xdp__destroy(pkt_skel);
         test_xdp_bpf2bpf__destroy(ftrace_skel);
  }
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c

index fd812bd..13aabb3 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
@@ -3,11 +3,12 @@
  #include <linux/if_link.h>
  #include <test_progs.h>
  
+#include "test_xdp_with_cpumap_frags_helpers.skel.h"
  #include "test_xdp_with_cpumap_helpers.skel.h"
  
  #define IFINDEX_LO     1
  
-void serial_test_xdp_cpumap_attach(void)
+void test_xdp_with_cpumap_helpers(void)
  {
         struct test_xdp_with_cpumap_helpers *skel;
         struct bpf_prog_info info = {};
@@ -54,6 +55,67 @@ void serial_test_xdp_cpumap_attach(void)
         err = bpf_map_update_elem(map_fd, &idx, &val, 0);
         ASSERT_NEQ(err, 0, "Add non-BPF_XDP_CPUMAP program to cpumap entry");
  
+       /* Try to attach BPF_XDP program with frags to cpumap when we have
+        * already loaded a BPF_XDP program on the map
+        */
+       idx = 1;
+       val.qsize = 192;
+       val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_cm_frags);
+       err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+       ASSERT_NEQ(err, 0, "Add BPF_XDP program with frags to cpumap entry");
+
  out_close:
         test_xdp_with_cpumap_helpers__destroy(skel);
  }
+
+void test_xdp_with_cpumap_frags_helpers(void)
+{
+       struct test_xdp_with_cpumap_frags_helpers *skel;
+       struct bpf_prog_info info = {};
+       __u32 len = sizeof(info);
+       struct bpf_cpumap_val val = {
+               .qsize = 192,
+       };
+       int err, frags_prog_fd, map_fd;
+       __u32 idx = 0;
+
+       skel = test_xdp_with_cpumap_frags_helpers__open_and_load();
+       if (!ASSERT_OK_PTR(skel, "test_xdp_with_cpumap_helpers__open_and_load"))
+               return;
+
+       frags_prog_fd = bpf_program__fd(skel->progs.xdp_dummy_cm_frags);
+       map_fd = bpf_map__fd(skel->maps.cpu_map);
+       err = bpf_obj_get_info_by_fd(frags_prog_fd, &info, &len);
+       if (!ASSERT_OK(err, "bpf_obj_get_info_by_fd"))
+               goto out_close;
+
+       val.bpf_prog.fd = frags_prog_fd;
+       err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+       ASSERT_OK(err, "Add program to cpumap entry");
+
+       err = bpf_map_lookup_elem(map_fd, &idx, &val);
+       ASSERT_OK(err, "Read cpumap entry");
+       ASSERT_EQ(info.id, val.bpf_prog.id,
+                 "Match program id to cpumap entry prog_id");
+
+       /* Try to attach BPF_XDP program to cpumap when we have
+        * already loaded a BPF_XDP program with frags on the map
+        */
+       idx = 1;
+       val.qsize = 192;
+       val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_cm);
+       err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+       ASSERT_NEQ(err, 0, "Add BPF_XDP program to cpumap entry");
+
+out_close:
+       test_xdp_with_cpumap_frags_helpers__destroy(skel);
+}
+
+void serial_test_xdp_cpumap_attach(void)
+{
+       if (test__start_subtest("CPUMAP with programs in entries"))
+               test_xdp_with_cpumap_helpers();
+
+       if (test__start_subtest("CPUMAP with frags programs in entries"))
+               test_xdp_with_cpumap_frags_helpers();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c

index 3079d55..2a784cc 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
@@ -4,6 +4,7 @@
  #include <test_progs.h>
  
  #include "test_xdp_devmap_helpers.skel.h"
+#include "test_xdp_with_devmap_frags_helpers.skel.h"
  #include "test_xdp_with_devmap_helpers.skel.h"
  
  #define IFINDEX_LO 1
@@ -56,6 +57,15 @@ static void test_xdp_with_devmap_helpers(void)
         err = bpf_map_update_elem(map_fd, &idx, &val, 0);
         ASSERT_NEQ(err, 0, "Add non-BPF_XDP_DEVMAP program to devmap entry");
  
+       /* Try to attach BPF_XDP program with frags to devmap when we have
+        * already loaded a BPF_XDP program on the map
+        */
+       idx = 1;
+       val.ifindex = 1;
+       val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_dm_frags);
+       err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+       ASSERT_NEQ(err, 0, "Add BPF_XDP program with frags to devmap entry");
+
  out_close:
         test_xdp_with_devmap_helpers__destroy(skel);
  }
@@ -71,12 +81,57 @@ static void test_neg_xdp_devmap_helpers(void)
         }
  }
  
+void test_xdp_with_devmap_frags_helpers(void)
+{
+       struct test_xdp_with_devmap_frags_helpers *skel;
+       struct bpf_prog_info info = {};
+       struct bpf_devmap_val val = {
+               .ifindex = IFINDEX_LO,
+       };
+       __u32 len = sizeof(info);
+       int err, dm_fd_frags, map_fd;
+       __u32 idx = 0;
+
+       skel = test_xdp_with_devmap_frags_helpers__open_and_load();
+       if (!ASSERT_OK_PTR(skel, "test_xdp_with_devmap_helpers__open_and_load"))
+               return;
+
+       dm_fd_frags = bpf_program__fd(skel->progs.xdp_dummy_dm_frags);
+       map_fd = bpf_map__fd(skel->maps.dm_ports);
+       err = bpf_obj_get_info_by_fd(dm_fd_frags, &info, &len);
+       if (!ASSERT_OK(err, "bpf_obj_get_info_by_fd"))
+               goto out_close;
+
+       val.bpf_prog.fd = dm_fd_frags;
+       err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+       ASSERT_OK(err, "Add frags program to devmap entry");
+
+       err = bpf_map_lookup_elem(map_fd, &idx, &val);
+       ASSERT_OK(err, "Read devmap entry");
+       ASSERT_EQ(info.id, val.bpf_prog.id,
+                 "Match program id to devmap entry prog_id");
+
+       /* Try to attach BPF_XDP program to devmap when we have
+        * already loaded a BPF_XDP program with frags on the map
+        */
+       idx = 1;
+       val.ifindex = 1;
+       val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_dm);
+       err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+       ASSERT_NEQ(err, 0, "Add BPF_XDP program to devmap entry");
+
+out_close:
+       test_xdp_with_devmap_frags_helpers__destroy(skel);
+}
  
  void serial_test_xdp_devmap_attach(void)
  {
         if (test__start_subtest("DEVMAP with programs in entries"))
                 test_xdp_with_devmap_helpers();
  
+       if (test__start_subtest("DEVMAP with frags programs in entries"))
+               test_xdp_with_devmap_frags_helpers();
+
         if (test__start_subtest("Verifier check of DEVMAP programs"))
                 test_neg_xdp_devmap_helpers();
  }
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt_unix.c b/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt_unix.c

new file mode 100644 (file)

index 0000000..eafc877
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt_unix.c
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Amazon.com Inc. or its affiliates. */
+#include "bpf_iter.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <limits.h>
+
+#define AUTOBIND_LEN 6
+char sun_path[AUTOBIND_LEN];
+
+#define NR_CASES 5
+int sndbuf_setsockopt[NR_CASES] = {-1, 0, 8192, INT_MAX / 2, INT_MAX};
+int sndbuf_getsockopt[NR_CASES] = {-1, -1, -1, -1, -1};
+int sndbuf_getsockopt_expected[NR_CASES];
+
+static inline int cmpname(struct unix_sock *unix_sk)
+{
+       int i;
+
+       for (i = 0; i < AUTOBIND_LEN; i++) {
+               if (unix_sk->addr->name->sun_path[i] != sun_path[i])
+                       return -1;
+       }
+
+       return 0;
+}
+
+SEC("iter/unix")
+int change_sndbuf(struct bpf_iter__unix *ctx)
+{
+       struct unix_sock *unix_sk = ctx->unix_sk;
+       int i, err;
+
+       if (!unix_sk || !unix_sk->addr)
+               return 0;
+
+       if (unix_sk->addr->name->sun_path[0])
+               return 0;
+
+       if (cmpname(unix_sk))
+               return 0;
+
+       for (i = 0; i < NR_CASES; i++) {
+               err = bpf_setsockopt(unix_sk, SOL_SOCKET, SO_SNDBUF,
+                                    &sndbuf_setsockopt[i],
+                                    sizeof(sndbuf_setsockopt[i]));
+               if (err)
+                       break;
+
+               err = bpf_getsockopt(unix_sk, SOL_SOCKET, SO_SNDBUF,
+                                    &sndbuf_getsockopt[i],
+                                    sizeof(sndbuf_getsockopt[i]));
+               if (err)
+                       break;
+       }
+
+       return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_unix.c b/tools/testing/selftests/bpf/progs/bpf_iter_unix.c

index c21e3f5..e6aefae 100644 (file)
--- a/tools/testing/selftests/bpf/progs/bpf_iter_unix.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_unix.c
@@ -63,7 +63,7 @@ int dump_unix(struct bpf_iter__unix *ctx)
                         BPF_SEQ_PRINTF(seq, " @");
  
                         for (i = 1; i < len; i++) {
-                               /* unix_mkname() tests this upper bound. */
+                               /* unix_validate_addr() tests this upper bound. */
                                 if (i >= sizeof(struct sockaddr_un))
                                         break;
  
diff --git a/tools/testing/selftests/bpf/progs/bpf_mod_race.c b/tools/testing/selftests/bpf/progs/bpf_mod_race.c

new file mode 100644 (file)

index 0000000..82a5c6c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_mod_race.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+const volatile struct {
+       /* thread to activate trace programs for */
+       pid_t tgid;
+       /* return error from __init function */
+       int inject_error;
+       /* uffd monitored range start address */
+       void *fault_addr;
+} bpf_mod_race_config = { -1 };
+
+int bpf_blocking = 0;
+int res_try_get_module = -1;
+
+static __always_inline bool check_thread_id(void)
+{
+       struct task_struct *task = bpf_get_current_task_btf();
+
+       return task->tgid == bpf_mod_race_config.tgid;
+}
+
+/* The trace of execution is something like this:
+ *
+ * finit_module()
+ *   load_module()
+ *     prepare_coming_module()
+ *       notifier_call(MODULE_STATE_COMING)
+ *         btf_parse_module()
+ *         btf_alloc_id()              // Visible to userspace at this point
+ *         list_add(btf_mod->list, &btf_modules)
+ *     do_init_module()
+ *       freeinit = kmalloc()
+ *       ret = mod->init()
+ *         bpf_prog_widen_race()
+ *           bpf_copy_from_user()
+ *             ...<sleep>...
+ *       if (ret < 0)
+ *         ...
+ *         free_module()
+ * return ret
+ *
+ * At this point, module loading thread is blocked, we now load the program:
+ *
+ * bpf_check
+ *   add_kfunc_call/check_pseudo_btf_id
+ *     btf_try_get_module
+ *       try_get_module_live == false
+ *     return -ENXIO
+ *
+ * Without the fix (try_get_module_live in btf_try_get_module):
+ *
+ * bpf_check
+ *   add_kfunc_call/check_pseudo_btf_id
+ *     btf_try_get_module
+ *       try_get_module == true
+ *     <store module reference in btf_kfunc_tab or used_btf array>
+ *   ...
+ * return fd
+ *
+ * Now, if we inject an error in the blocked program, our module will be freed
+ * (going straight from MODULE_STATE_COMING to MODULE_STATE_GOING).
+ * Later, when bpf program is freed, it will try to module_put already freed
+ * module. This is why try_get_module_live returns false if mod->state is not
+ * MODULE_STATE_LIVE.
+ */
+
+SEC("fmod_ret.s/bpf_fentry_test1")
+int BPF_PROG(widen_race, int a, int ret)
+{
+       char dst;
+
+       if (!check_thread_id())
+               return 0;
+       /* Indicate that we will attempt to block */
+       bpf_blocking = 1;
+       bpf_copy_from_user(&dst, 1, bpf_mod_race_config.fault_addr);
+       return bpf_mod_race_config.inject_error;
+}
+
+SEC("fexit/do_init_module")
+int BPF_PROG(fexit_init_module, struct module *mod, int ret)
+{
+       if (!check_thread_id())
+               return 0;
+       /* Indicate that we finished blocking */
+       bpf_blocking = 2;
+       return 0;
+}
+
+SEC("fexit/btf_try_get_module")
+int BPF_PROG(fexit_module_get, const struct btf *btf, struct module *mod)
+{
+       res_try_get_module = !!mod;
+       return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h

index e0f4260..1c1289b 100644 (file)
--- a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
+++ b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
@@ -5,6 +5,8 @@
  #define AF_INET                        2
  #define AF_INET6               10
  
+#define SOL_SOCKET             1
+#define SO_SNDBUF              7
  #define __SO_ACCEPTCON         (1 << 16)
  
  #define SOL_TCP                        6
diff --git a/tools/testing/selftests/bpf/progs/cgroup_getset_retval_getsockopt.c b/tools/testing/selftests/bpf/progs/cgroup_getset_retval_getsockopt.c

new file mode 100644 (file)

index 0000000..b2a409e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgroup_getset_retval_getsockopt.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright 2021 Google LLC.
+ */
+
+#include <errno.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+__u32 invocations = 0;
+__u32 assertion_error = 0;
+__u32 retval_value = 0;
+__u32 ctx_retval_value = 0;
+
+SEC("cgroup/getsockopt")
+int get_retval(struct bpf_sockopt *ctx)
+{
+       retval_value = bpf_get_retval();
+       ctx_retval_value = ctx->retval;
+       __sync_fetch_and_add(&invocations, 1);
+
+       return 1;
+}
+
+SEC("cgroup/getsockopt")
+int set_eisconn(struct bpf_sockopt *ctx)
+{
+       __sync_fetch_and_add(&invocations, 1);
+
+       if (bpf_set_retval(-EISCONN))
+               assertion_error = 1;
+
+       return 1;
+}
+
+SEC("cgroup/getsockopt")
+int clear_retval(struct bpf_sockopt *ctx)
+{
+       __sync_fetch_and_add(&invocations, 1);
+
+       ctx->retval = 0;
+
+       return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/cgroup_getset_retval_setsockopt.c b/tools/testing/selftests/bpf/progs/cgroup_getset_retval_setsockopt.c

new file mode 100644 (file)

index 0000000..d6e5903
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgroup_getset_retval_setsockopt.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright 2021 Google LLC.
+ */
+
+#include <errno.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+__u32 invocations = 0;
+__u32 assertion_error = 0;
+__u32 retval_value = 0;
+
+SEC("cgroup/setsockopt")
+int get_retval(struct bpf_sockopt *ctx)
+{
+       retval_value = bpf_get_retval();
+       __sync_fetch_and_add(&invocations, 1);
+
+       return 1;
+}
+
+SEC("cgroup/setsockopt")
+int set_eunatch(struct bpf_sockopt *ctx)
+{
+       __sync_fetch_and_add(&invocations, 1);
+
+       if (bpf_set_retval(-EUNATCH))
+               assertion_error = 1;
+
+       return 0;
+}
+
+SEC("cgroup/setsockopt")
+int set_eisconn(struct bpf_sockopt *ctx)
+{
+       __sync_fetch_and_add(&invocations, 1);
+
+       if (bpf_set_retval(-EISCONN))
+               assertion_error = 1;
+
+       return 0;
+}
+
+SEC("cgroup/setsockopt")
+int legacy_eperm(struct bpf_sockopt *ctx)
+{
+       __sync_fetch_and_add(&invocations, 1);
+
+       return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/freplace_cls_redirect.c b/tools/testing/selftests/bpf/progs/freplace_cls_redirect.c

index 68a5a9d..7e94412 100644 (file)
--- a/tools/testing/selftests/bpf/progs/freplace_cls_redirect.c
+++ b/tools/testing/selftests/bpf/progs/freplace_cls_redirect.c
@@ -7,12 +7,12 @@
  #include <bpf/bpf_endian.h>
  #include <bpf/bpf_helpers.h>
  
-struct bpf_map_def SEC("maps") sock_map = {
-       .type = BPF_MAP_TYPE_SOCKMAP,
-       .key_size = sizeof(int),
-       .value_size = sizeof(int),
-       .max_entries = 2,
-};
+struct {
+       __uint(type, BPF_MAP_TYPE_SOCKMAP);
+       __type(key, int);
+       __type(value, int);
+       __uint(max_entries, 2);
+} sock_map SEC(".maps");
  
  SEC("freplace/cls_redirect")
  int freplace_cls_redirect_test(struct __sk_buff *skb)
diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_race.c b/tools/testing/selftests/bpf/progs/kfunc_call_race.c

new file mode 100644 (file)

index 0000000..4e8fed7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/kfunc_call_race.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+
+extern void bpf_testmod_test_mod_kfunc(int i) __ksym;
+
+SEC("tc")
+int kfunc_call_fail(struct __sk_buff *ctx)
+{
+       bpf_testmod_test_mod_kfunc(0);
+       return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_test.c b/tools/testing/selftests/bpf/progs/kfunc_call_test.c

index 8a8cf59..5aecbb9 100644 (file)
--- a/tools/testing/selftests/bpf/progs/kfunc_call_test.c
+++ b/tools/testing/selftests/bpf/progs/kfunc_call_test.c
@@ -1,13 +1,20 @@
  // SPDX-License-Identifier: GPL-2.0
  /* Copyright (c) 2021 Facebook */
-#include <linux/bpf.h>
+#include <vmlinux.h>
  #include <bpf/bpf_helpers.h>
-#include "bpf_tcp_helpers.h"
  
  extern int bpf_kfunc_call_test2(struct sock *sk, __u32 a, __u32 b) __ksym;
  extern __u64 bpf_kfunc_call_test1(struct sock *sk, __u32 a, __u64 b,
                                   __u32 c, __u64 d) __ksym;
  
+extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp) __ksym;
+extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym;
+extern void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb) __ksym;
+extern void bpf_kfunc_call_test_pass1(struct prog_test_pass1 *p) __ksym;
+extern void bpf_kfunc_call_test_pass2(struct prog_test_pass2 *p) __ksym;
+extern void bpf_kfunc_call_test_mem_len_pass1(void *mem, int len) __ksym;
+extern void bpf_kfunc_call_test_mem_len_fail2(__u64 *mem, int len) __ksym;
+
  SEC("tc")
  int kfunc_call_test2(struct __sk_buff *skb)
  {
@@ -44,4 +51,45 @@ int kfunc_call_test1(struct __sk_buff *skb)
         return ret;
  }
  
+SEC("tc")
+int kfunc_call_test_ref_btf_id(struct __sk_buff *skb)
+{
+       struct prog_test_ref_kfunc *pt;
+       unsigned long s = 0;
+       int ret = 0;
+
+       pt = bpf_kfunc_call_test_acquire(&s);
+       if (pt) {
+               if (pt->a != 42 || pt->b != 108)
+                       ret = -1;
+               bpf_kfunc_call_test_release(pt);
+       }
+       return ret;
+}
+
+SEC("tc")
+int kfunc_call_test_pass(struct __sk_buff *skb)
+{
+       struct prog_test_pass1 p1 = {};
+       struct prog_test_pass2 p2 = {};
+       short a = 0;
+       __u64 b = 0;
+       long c = 0;
+       char d = 0;
+       int e = 0;
+
+       bpf_kfunc_call_test_pass_ctx(skb);
+       bpf_kfunc_call_test_pass1(&p1);
+       bpf_kfunc_call_test_pass2(&p2);
+
+       bpf_kfunc_call_test_mem_len_pass1(&a, sizeof(a));
+       bpf_kfunc_call_test_mem_len_pass1(&b, sizeof(b));
+       bpf_kfunc_call_test_mem_len_pass1(&c, sizeof(c));
+       bpf_kfunc_call_test_mem_len_pass1(&d, sizeof(d));
+       bpf_kfunc_call_test_mem_len_pass1(&e, sizeof(e));
+       bpf_kfunc_call_test_mem_len_fail2(&b, -1);
+
+       return 0;
+}
+
  char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/ksym_race.c b/tools/testing/selftests/bpf/progs/ksym_race.c

new file mode 100644 (file)

index 0000000..def97f2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/ksym_race.c
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+
+extern int bpf_testmod_ksym_percpu __ksym;
+
+SEC("tc")
+int ksym_fail(struct __sk_buff *ctx)
+{
+       return *(int *)bpf_this_cpu_ptr(&bpf_testmod_ksym_percpu);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/sample_map_ret0.c b/tools/testing/selftests/bpf/progs/sample_map_ret0.c

index 1612a32..495990d 100644 (file)
--- a/tools/testing/selftests/bpf/progs/sample_map_ret0.c
+++ b/tools/testing/selftests/bpf/progs/sample_map_ret0.c
@@ -2,19 +2,19 @@
  #include <linux/bpf.h>
  #include <bpf/bpf_helpers.h>
  
-struct bpf_map_def SEC("maps") htab = {
-       .type = BPF_MAP_TYPE_HASH,
-       .key_size = sizeof(__u32),
-       .value_size = sizeof(long),
-       .max_entries = 2,
-};
+struct {
+       __uint(type, BPF_MAP_TYPE_HASH);
+       __type(key, __u32);
+       __type(value, long);
+       __uint(max_entries, 2);
+} htab SEC(".maps");
  
-struct bpf_map_def SEC("maps") array = {
-       .type = BPF_MAP_TYPE_ARRAY,
-       .key_size = sizeof(__u32),
-       .value_size = sizeof(long),
-       .max_entries = 2,
-};
+struct {
+       __uint(type, BPF_MAP_TYPE_ARRAY);
+       __type(key, __u32);
+       __type(value, long);
+       __uint(max_entries, 2);
+} array SEC(".maps");
  
  /* Sample program which should always load for testing control paths. */
  SEC(".text") int func()
diff --git a/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c b/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c

index 95d5b94..c9abfe3 100644 (file)
--- a/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c
+++ b/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c
@@ -7,8 +7,6 @@ int bpf_prog1(struct __sk_buff *skb)
  {
         void *data_end = (void *)(long) skb->data_end;
         void *data = (void *)(long) skb->data;
-       __u32 lport = skb->local_port;
-       __u32 rport = skb->remote_port;
         __u8 *d = data;
         int err;
  
diff --git a/tools/testing/selftests/bpf/progs/sockopt_sk.c b/tools/testing/selftests/bpf/progs/sockopt_sk.c

index 79c8139..d0298dc 100644 (file)
--- a/tools/testing/selftests/bpf/progs/sockopt_sk.c
+++ b/tools/testing/selftests/bpf/progs/sockopt_sk.c
@@ -73,17 +73,17 @@ int _getsockopt(struct bpf_sockopt *ctx)
                  */
  
                 if (optval + sizeof(struct tcp_zerocopy_receive) > optval_end)
-                       return 0; /* EPERM, bounds check */
+                       return 0; /* bounds check */
  
                 if (((struct tcp_zerocopy_receive *)optval)->address != 0)
-                       return 0; /* EPERM, unexpected data */
+                       return 0; /* unexpected data */
  
                 return 1;
         }
  
         if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) {
                 if (optval + 1 > optval_end)
-                       return 0; /* EPERM, bounds check */
+                       return 0; /* bounds check */
  
                 ctx->retval = 0; /* Reset system call return value to zero */
  
@@ -96,24 +96,24 @@ int _getsockopt(struct bpf_sockopt *ctx)
                  * bytes of data.
                  */
                 if (optval_end - optval != page_size)
-                       return 0; /* EPERM, unexpected data size */
+                       return 0; /* unexpected data size */
  
                 return 1;
         }
  
         if (ctx->level != SOL_CUSTOM)
-               return 0; /* EPERM, deny everything except custom level */
+               return 0; /* deny everything except custom level */
  
         if (optval + 1 > optval_end)
-               return 0; /* EPERM, bounds check */
+               return 0; /* bounds check */
  
         storage = bpf_sk_storage_get(&socket_storage_map, ctx->sk, 0,
                                      BPF_SK_STORAGE_GET_F_CREATE);
         if (!storage)
-               return 0; /* EPERM, couldn't get sk storage */
+               return 0; /* couldn't get sk storage */
  
         if (!ctx->retval)
-               return 0; /* EPERM, kernel should not have handled
+               return 0; /* kernel should not have handled
                            * SOL_CUSTOM, something is wrong!
                            */
         ctx->retval = 0; /* Reset system call return value to zero */
@@ -152,7 +152,7 @@ int _setsockopt(struct bpf_sockopt *ctx)
                 /* Overwrite SO_SNDBUF value */
  
                 if (optval + sizeof(__u32) > optval_end)
-                       return 0; /* EPERM, bounds check */
+                       return 0; /* bounds check */
  
                 *(__u32 *)optval = 0x55AA;
                 ctx->optlen = 4;
@@ -164,7 +164,7 @@ int _setsockopt(struct bpf_sockopt *ctx)
                 /* Always use cubic */
  
                 if (optval + 5 > optval_end)
-                       return 0; /* EPERM, bounds check */
+                       return 0; /* bounds check */
  
                 memcpy(optval, "cubic", 5);
                 ctx->optlen = 5;
@@ -175,10 +175,10 @@ int _setsockopt(struct bpf_sockopt *ctx)
         if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) {
                 /* Original optlen is larger than PAGE_SIZE. */
                 if (ctx->optlen != page_size * 2)
-                       return 0; /* EPERM, unexpected data size */
+                       return 0; /* unexpected data size */
  
                 if (optval + 1 > optval_end)
-                       return 0; /* EPERM, bounds check */
+                       return 0; /* bounds check */
  
                 /* Make sure we can trim the buffer. */
                 optval[0] = 0;
@@ -189,21 +189,21 @@ int _setsockopt(struct bpf_sockopt *ctx)
                  * bytes of data.
                  */
                 if (optval_end - optval != page_size)
-                       return 0; /* EPERM, unexpected data size */
+                       return 0; /* unexpected data size */
  
                 return 1;
         }
  
         if (ctx->level != SOL_CUSTOM)
-               return 0; /* EPERM, deny everything except custom level */
+               return 0; /* deny everything except custom level */
  
         if (optval + 1 > optval_end)
-               return 0; /* EPERM, bounds check */
+               return 0; /* bounds check */
  
         storage = bpf_sk_storage_get(&socket_storage_map, ctx->sk, 0,
                                      BPF_SK_STORAGE_GET_F_CREATE);
         if (!storage)
-               return 0; /* EPERM, couldn't get sk storage */
+               return 0; /* couldn't get sk storage */
  
         storage->val = optval[0];
         ctx->optlen = -1; /* BPF has consumed this option, don't call kernel
diff --git a/tools/testing/selftests/bpf/progs/test_bpf_nf.c b/tools/testing/selftests/bpf/progs/test_bpf_nf.c

new file mode 100644 (file)

index 0000000..f00a973
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_bpf_nf.c
@@ -0,0 +1,118 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+
+#define EAFNOSUPPORT 97
+#define EPROTO 71
+#define ENONET 64
+#define EINVAL 22
+#define ENOENT 2
+
+int test_einval_bpf_tuple = 0;
+int test_einval_reserved = 0;
+int test_einval_netns_id = 0;
+int test_einval_len_opts = 0;
+int test_eproto_l4proto = 0;
+int test_enonet_netns_id = 0;
+int test_enoent_lookup = 0;
+int test_eafnosupport = 0;
+
+struct nf_conn;
+
+struct bpf_ct_opts___local {
+       s32 netns_id;
+       s32 error;
+       u8 l4proto;
+       u8 reserved[3];
+} __attribute__((preserve_access_index));
+
+struct nf_conn *bpf_xdp_ct_lookup(struct xdp_md *, struct bpf_sock_tuple *, u32,
+                                 struct bpf_ct_opts___local *, u32) __ksym;
+struct nf_conn *bpf_skb_ct_lookup(struct __sk_buff *, struct bpf_sock_tuple *, u32,
+                                 struct bpf_ct_opts___local *, u32) __ksym;
+void bpf_ct_release(struct nf_conn *) __ksym;
+
+static __always_inline void
+nf_ct_test(struct nf_conn *(*func)(void *, struct bpf_sock_tuple *, u32,
+                                  struct bpf_ct_opts___local *, u32),
+          void *ctx)
+{
+       struct bpf_ct_opts___local opts_def = { .l4proto = IPPROTO_TCP, .netns_id = -1 };
+       struct bpf_sock_tuple bpf_tuple;
+       struct nf_conn *ct;
+
+       __builtin_memset(&bpf_tuple, 0, sizeof(bpf_tuple.ipv4));
+
+       ct = func(ctx, NULL, 0, &opts_def, sizeof(opts_def));
+       if (ct)
+               bpf_ct_release(ct);
+       else
+               test_einval_bpf_tuple = opts_def.error;
+
+       opts_def.reserved[0] = 1;
+       ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def));
+       opts_def.reserved[0] = 0;
+       opts_def.l4proto = IPPROTO_TCP;
+       if (ct)
+               bpf_ct_release(ct);
+       else
+               test_einval_reserved = opts_def.error;
+
+       opts_def.netns_id = -2;
+       ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def));
+       opts_def.netns_id = -1;
+       if (ct)
+               bpf_ct_release(ct);
+       else
+               test_einval_netns_id = opts_def.error;
+
+       ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def) - 1);
+       if (ct)
+               bpf_ct_release(ct);
+       else
+               test_einval_len_opts = opts_def.error;
+
+       opts_def.l4proto = IPPROTO_ICMP;
+       ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def));
+       opts_def.l4proto = IPPROTO_TCP;
+       if (ct)
+               bpf_ct_release(ct);
+       else
+               test_eproto_l4proto = opts_def.error;
+
+       opts_def.netns_id = 0xf00f;
+       ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def));
+       opts_def.netns_id = -1;
+       if (ct)
+               bpf_ct_release(ct);
+       else
+               test_enonet_netns_id = opts_def.error;
+
+       ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def));
+       if (ct)
+               bpf_ct_release(ct);
+       else
+               test_enoent_lookup = opts_def.error;
+
+       ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4) - 1, &opts_def, sizeof(opts_def));
+       if (ct)
+               bpf_ct_release(ct);
+       else
+               test_eafnosupport = opts_def.error;
+}
+
+SEC("xdp")
+int nf_xdp_ct_test(struct xdp_md *ctx)
+{
+       nf_ct_test((void *)bpf_xdp_ct_lookup, ctx);
+       return 0;
+}
+
+SEC("tc")
+int nf_skb_ct_test(struct __sk_buff *ctx)
+{
+       nf_ct_test((void *)bpf_skb_ct_lookup, ctx);
+       return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_btf_haskv.c b/tools/testing/selftests/bpf/progs/test_btf_haskv.c

index 160ead6..07c94df 100644 (file)
--- a/tools/testing/selftests/bpf/progs/test_btf_haskv.c
+++ b/tools/testing/selftests/bpf/progs/test_btf_haskv.c
@@ -9,12 +9,15 @@ struct ipv_counts {
         unsigned int v6;
  };
  
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
  struct bpf_map_def SEC("maps") btf_map = {
         .type = BPF_MAP_TYPE_ARRAY,
         .key_size = sizeof(int),
         .value_size = sizeof(struct ipv_counts),
         .max_entries = 4,
  };
+#pragma GCC diagnostic pop
  
  BPF_ANNOTATE_KV_PAIR(btf_map, int, struct ipv_counts);
  
diff --git a/tools/testing/selftests/bpf/progs/test_btf_newkv.c b/tools/testing/selftests/bpf/progs/test_btf_newkv.c

index 1884a5b..762671a 100644 (file)
--- a/tools/testing/selftests/bpf/progs/test_btf_newkv.c
+++ b/tools/testing/selftests/bpf/progs/test_btf_newkv.c
@@ -9,6 +9,8 @@ struct ipv_counts {
         unsigned int v6;
  };
  
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
  /* just to validate we can handle maps in multiple sections */
  struct bpf_map_def SEC("maps") btf_map_legacy = {
         .type = BPF_MAP_TYPE_ARRAY,
@@ -16,6 +18,7 @@ struct bpf_map_def SEC("maps") btf_map_legacy = {
         .value_size = sizeof(long long),
         .max_entries = 4,
  };
+#pragma GCC diagnostic pop
  
  BPF_ANNOTATE_KV_PAIR(btf_map_legacy, int, struct ipv_counts);
  
diff --git a/tools/testing/selftests/bpf/progs/test_btf_nokv.c b/tools/testing/selftests/bpf/progs/test_btf_nokv.c

index 15e0f99..1dabb88 100644 (file)
--- a/tools/testing/selftests/bpf/progs/test_btf_nokv.c
+++ b/tools/testing/selftests/bpf/progs/test_btf_nokv.c
@@ -8,12 +8,12 @@ struct ipv_counts {
         unsigned int v6;
  };
  
-struct bpf_map_def SEC("maps") btf_map = {
-       .type = BPF_MAP_TYPE_ARRAY,
-       .key_size = sizeof(int),
-       .value_size = sizeof(struct ipv_counts),
-       .max_entries = 4,
-};
+struct {
+       __uint(type, BPF_MAP_TYPE_ARRAY);
+       __uint(key_size, sizeof(int));
+       __uint(value_size, sizeof(struct ipv_counts));
+       __uint(max_entries, 4);
+} btf_map SEC(".maps");
  
  __attribute__((noinline))
  int test_long_fname_2(void)
diff --git a/tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c b/tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c

index c304cd5..37aacc6 100644 (file)
--- a/tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c
@@ -10,12 +10,12 @@
  
  #define NUM_CGROUP_LEVELS      4
  
-struct bpf_map_def SEC("maps") cgroup_ids = {
-       .type = BPF_MAP_TYPE_ARRAY,
-       .key_size = sizeof(__u32),
-       .value_size = sizeof(__u64),
-       .max_entries = NUM_CGROUP_LEVELS,
-};
+struct {
+       __uint(type, BPF_MAP_TYPE_ARRAY);
+       __type(key, __u32);
+       __type(value, __u64);
+       __uint(max_entries, NUM_CGROUP_LEVELS);
+} cgroup_ids SEC(".maps");
  
  static __always_inline void log_nth_level(struct __sk_buff *skb, __u32 level)
  {
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_progs_query.c b/tools/testing/selftests/bpf/progs/test_sockmap_progs_query.c

new file mode 100644 (file)

index 0000000..9d58d61
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_progs_query.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+struct {
+       __uint(type, BPF_MAP_TYPE_SOCKMAP);
+       __uint(max_entries, 1);
+       __type(key, __u32);
+       __type(value, __u64);
+} sock_map SEC(".maps");
+
+SEC("sk_skb")
+int prog_skb_verdict(struct __sk_buff *skb)
+{
+       return SK_PASS;
+}
+
+SEC("sk_msg")
+int prog_skmsg_verdict(struct sk_msg_md *msg)
+{
+       return SK_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_tc_edt.c b/tools/testing/selftests/bpf/progs/test_tc_edt.c

index bf28814..950a70b 100644 (file)
--- a/tools/testing/selftests/bpf/progs/test_tc_edt.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_edt.c
@@ -17,12 +17,12 @@
  #define THROTTLE_RATE_BPS (5 * 1000 * 1000)
  
  /* flow_key => last_tstamp timestamp used */
-struct bpf_map_def SEC("maps") flow_map = {
-       .type = BPF_MAP_TYPE_HASH,
-       .key_size = sizeof(uint32_t),
-       .value_size = sizeof(uint64_t),
-       .max_entries = 1,
-};
+struct {
+       __uint(type, BPF_MAP_TYPE_HASH);
+       __type(key, uint32_t);
+       __type(value, uint64_t);
+       __uint(max_entries, 1);
+} flow_map SEC(".maps");
  
  static inline int throttle_flow(struct __sk_buff *skb)
  {
diff --git a/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c b/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c

index cd747cd..6edebce 100644 (file)
--- a/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c
@@ -16,12 +16,12 @@
  #include <bpf/bpf_helpers.h>
  #include <bpf/bpf_endian.h>
  
-struct bpf_map_def SEC("maps") results = {
-       .type = BPF_MAP_TYPE_ARRAY,
-       .key_size = sizeof(__u32),
-       .value_size = sizeof(__u32),
-       .max_entries = 3,
-};
+struct {
+       __uint(type, BPF_MAP_TYPE_ARRAY);
+       __type(key, __u32);
+       __type(value, __u32);
+       __uint(max_entries, 3);
+} results SEC(".maps");
  
  static __always_inline __s64 gen_syncookie(void *data_end, struct bpf_sock *sk,
                                            void *iph, __u32 ip_size,
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c

index 199c61b..53b64c9 100644 (file)
--- a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c
@@ -7,11 +7,10 @@ int _xdp_adjust_tail_grow(struct xdp_md *xdp)
  {
         void *data_end = (void *)(long)xdp->data_end;
         void *data = (void *)(long)xdp->data;
-       unsigned int data_len;
+       int data_len = bpf_xdp_get_buff_len(xdp);
         int offset = 0;
  
         /* Data length determine test case */
-       data_len = data_end - data;
  
         if (data_len == 54) { /* sizeof(pkt_v4) */
                 offset = 4096; /* test too large offset */
@@ -20,7 +19,12 @@ int _xdp_adjust_tail_grow(struct xdp_md *xdp)
         } else if (data_len == 64) {
                 offset = 128;
         } else if (data_len == 128) {
-               offset = 4096 - 256 - 320 - data_len; /* Max tail grow 3520 */
+               /* Max tail grow 3520 */
+               offset = 4096 - 256 - 320 - data_len;
+       } else if (data_len == 9000) {
+               offset = 10;
+       } else if (data_len == 9001) {
+               offset = 4096;
         } else {
                 return XDP_ABORTED; /* No matching test */
         }
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c

index b744825..ca68c03 100644 (file)
--- a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c
@@ -12,14 +12,38 @@
  SEC("xdp")
  int _xdp_adjust_tail_shrink(struct xdp_md *xdp)
  {
-       void *data_end = (void *)(long)xdp->data_end;
-       void *data = (void *)(long)xdp->data;
+       __u8 *data_end = (void *)(long)xdp->data_end;
+       __u8 *data = (void *)(long)xdp->data;
         int offset = 0;
  
-       if (data_end - data == 54) /* sizeof(pkt_v4) */
+       switch (bpf_xdp_get_buff_len(xdp)) {
+       case 54:
+               /* sizeof(pkt_v4) */
                 offset = 256; /* shrink too much */
-       else
+               break;
+       case 9000:
+               /* non-linear buff test cases */
+               if (data + 1 > data_end)
+                       return XDP_DROP;
+
+               switch (data[0]) {
+               case 0:
+                       offset = 10;
+                       break;
+               case 1:
+                       offset = 4100;
+                       break;
+               case 2:
+                       offset = 8200;
+                       break;
+               default:
+                       return XDP_DROP;
+               }
+               break;
+       default:
                 offset = 20;
+               break;
+       }
         if (bpf_xdp_adjust_tail(xdp, 0 - offset))
                 return XDP_DROP;
         return XDP_TX;
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c b/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c

index 58cf434..3379d30 100644 (file)
--- a/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c
@@ -49,7 +49,7 @@ int BPF_PROG(trace_on_entry, struct xdp_buff *xdp)
         void *data = (void *)(long)xdp->data;
  
         meta.ifindex = xdp->rxq->dev->ifindex;
-       meta.pkt_len = data_end - data;
+       meta.pkt_len = bpf_xdp_get_buff_len((struct xdp_md *)xdp);
         bpf_xdp_output(xdp, &perf_buf_map,
                        ((__u64) meta.pkt_len << 32) |
                        BPF_F_CURRENT_CPU,
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_update_frags.c b/tools/testing/selftests/bpf/progs/test_xdp_update_frags.c

new file mode 100644 (file)

index 0000000..2a3496d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_xdp_update_frags.c
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <bpf/bpf_helpers.h>
+
+int _version SEC("version") = 1;
+
+SEC("xdp.frags")
+int xdp_adjust_frags(struct xdp_md *xdp)
+{
+       __u8 *data_end = (void *)(long)xdp->data_end;
+       __u8 *data = (void *)(long)xdp->data;
+       __u8 val[16] = {};
+       __u32 offset;
+       int err;
+
+       if (data + sizeof(__u32) > data_end)
+               return XDP_DROP;
+
+       offset = *(__u32 *)data;
+       err = bpf_xdp_load_bytes(xdp, offset, val, sizeof(val));
+       if (err < 0)
+               return XDP_DROP;
+
+       if (val[0] != 0xaa || val[15] != 0xaa) /* marker */
+               return XDP_DROP;
+
+       val[0] = 0xbb; /* update the marker */
+       val[15] = 0xbb;
+       err = bpf_xdp_store_bytes(xdp, offset, val, sizeof(val));
+       if (err < 0)
+               return XDP_DROP;
+
+       return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_frags_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_frags_helpers.c

new file mode 100644 (file)

index 0000000..62fb7cd
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_frags_helpers.c
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+#define IFINDEX_LO     1
+
+struct {
+       __uint(type, BPF_MAP_TYPE_CPUMAP);
+       __uint(key_size, sizeof(__u32));
+       __uint(value_size, sizeof(struct bpf_cpumap_val));
+       __uint(max_entries, 4);
+} cpu_map SEC(".maps");
+
+SEC("xdp_cpumap/dummy_cm")
+int xdp_dummy_cm(struct xdp_md *ctx)
+{
+       return XDP_PASS;
+}
+
+SEC("xdp.frags/cpumap")
+int xdp_dummy_cm_frags(struct xdp_md *ctx)
+{
+       return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c

index 5320250..48007f1 100644 (file)
--- a/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c
@@ -33,4 +33,10 @@ int xdp_dummy_cm(struct xdp_md *ctx)
         return XDP_PASS;
  }
  
+SEC("xdp.frags/cpumap")
+int xdp_dummy_cm_frags(struct xdp_md *ctx)
+{
+       return XDP_PASS;
+}
+
  char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_frags_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_frags_helpers.c

new file mode 100644 (file)

index 0000000..e1caf51
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_frags_helpers.c
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+struct {
+       __uint(type, BPF_MAP_TYPE_DEVMAP);
+       __uint(key_size, sizeof(__u32));
+       __uint(value_size, sizeof(struct bpf_devmap_val));
+       __uint(max_entries, 4);
+} dm_ports SEC(".maps");
+
+/* valid program on DEVMAP entry via SEC name;
+ * has access to egress and ingress ifindex
+ */
+SEC("xdp_devmap/map_prog")
+int xdp_dummy_dm(struct xdp_md *ctx)
+{
+       return XDP_PASS;
+}
+
+SEC("xdp.frags/devmap")
+int xdp_dummy_dm_frags(struct xdp_md *ctx)
+{
+       return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c

index 1e6b9c3..8ae11fa 100644 (file)
--- a/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c
@@ -40,4 +40,11 @@ int xdp_dummy_dm(struct xdp_md *ctx)
  
         return XDP_PASS;
  }
+
+SEC("xdp.frags/devmap")
+int xdp_dummy_dm_frags(struct xdp_md *ctx)
+{
+       return XDP_PASS;
+}
+
  char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c

index 76cd903..29bbaa5 100644 (file)
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -31,6 +31,7 @@
  #include <linux/if_ether.h>
  #include <linux/btf.h>
  
+#include <bpf/btf.h>
  #include <bpf/bpf.h>
  #include <bpf/libbpf.h>
  
@@ -66,6 +67,11 @@ static bool unpriv_disabled = false;
  static int skips;
  static bool verbose = false;
  
+struct kfunc_btf_id_pair {
+       const char *kfunc;
+       int insn_idx;
+};
+
  struct bpf_test {
         const char *descr;
         struct bpf_insn insns[MAX_INSNS];
@@ -92,6 +98,7 @@ struct bpf_test {
         int fixup_map_reuseport_array[MAX_FIXUPS];
         int fixup_map_ringbuf[MAX_FIXUPS];
         int fixup_map_timer[MAX_FIXUPS];
+       struct kfunc_btf_id_pair fixup_kfunc_btf_id[MAX_FIXUPS];
         /* Expected verifier log output for result REJECT or VERBOSE_ACCEPT.
          * Can be a tab-separated sequence of expected strings. An empty string
          * means no log verification.
@@ -744,6 +751,7 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
         int *fixup_map_reuseport_array = test->fixup_map_reuseport_array;
         int *fixup_map_ringbuf = test->fixup_map_ringbuf;
         int *fixup_map_timer = test->fixup_map_timer;
+       struct kfunc_btf_id_pair *fixup_kfunc_btf_id = test->fixup_kfunc_btf_id;
  
         if (test->fill_helper) {
                 test->fill_insns = calloc(MAX_TEST_INSNS, sizeof(struct bpf_insn));
@@ -936,6 +944,26 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
                         fixup_map_timer++;
                 } while (*fixup_map_timer);
         }
+
+       /* Patch in kfunc BTF IDs */
+       if (fixup_kfunc_btf_id->kfunc) {
+               struct btf *btf;
+               int btf_id;
+
+               do {
+                       btf_id = 0;
+                       btf = btf__load_vmlinux_btf();
+                       if (btf) {
+                               btf_id = btf__find_by_name_kind(btf,
+                                                               fixup_kfunc_btf_id->kfunc,
+                                                               BTF_KIND_FUNC);
+                               btf_id = btf_id < 0 ? 0 : btf_id;
+                       }
+                       btf__free(btf);
+                       prog[fixup_kfunc_btf_id->insn_idx].imm = btf_id;
+                       fixup_kfunc_btf_id++;
+               } while (fixup_kfunc_btf_id->kfunc);
+       }
  }
  
  struct libcap {
diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c

index d7b74eb..829be2b 100644 (file)
--- a/tools/testing/selftests/bpf/verifier/calls.c
+++ b/tools/testing/selftests/bpf/verifier/calls.c
@@ -22,6 +22,81 @@
         .result  = ACCEPT,
  },
  {
+       "calls: invalid kfunc call: ptr_to_mem to struct with non-scalar",
+       .insns = {
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+       BPF_EXIT_INSN(),
+       },
+       .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+       .result = REJECT,
+       .errstr = "arg#0 pointer type STRUCT prog_test_fail1 must point to scalar",
+       .fixup_kfunc_btf_id = {
+               { "bpf_kfunc_call_test_fail1", 2 },
+       },
+},
+{
+       "calls: invalid kfunc call: ptr_to_mem to struct with nesting depth > 4",
+       .insns = {
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+       BPF_EXIT_INSN(),
+       },
+       .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+       .result = REJECT,
+       .errstr = "max struct nesting depth exceeded\narg#0 pointer type STRUCT prog_test_fail2",
+       .fixup_kfunc_btf_id = {
+               { "bpf_kfunc_call_test_fail2", 2 },
+       },
+},
+{
+       "calls: invalid kfunc call: ptr_to_mem to struct with FAM",
+       .insns = {
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+       BPF_EXIT_INSN(),
+       },
+       .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+       .result = REJECT,
+       .errstr = "arg#0 pointer type STRUCT prog_test_fail3 must point to scalar",
+       .fixup_kfunc_btf_id = {
+               { "bpf_kfunc_call_test_fail3", 2 },
+       },
+},
+{
+       "calls: invalid kfunc call: reg->type != PTR_TO_CTX",
+       .insns = {
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+       BPF_EXIT_INSN(),
+       },
+       .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+       .result = REJECT,
+       .errstr = "arg#0 expected pointer to ctx, but got PTR",
+       .fixup_kfunc_btf_id = {
+               { "bpf_kfunc_call_test_pass_ctx", 2 },
+       },
+},
+{
+       "calls: invalid kfunc call: void * not allowed in func proto without mem size arg",
+       .insns = {
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+       BPF_EXIT_INSN(),
+       },
+       .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+       .result = REJECT,
+       .errstr = "arg#0 pointer type UNKNOWN  must point to scalar",
+       .fixup_kfunc_btf_id = {
+               { "bpf_kfunc_call_test_mem_len_fail1", 2 },
+       },
+},
+{
         "calls: basic sanity",
         .insns = {
         BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c

index 0a5d23d..ffa5502 100644 (file)
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -906,7 +906,10 @@ static bool rx_stats_are_valid(struct ifobject *ifobject)
                         return true;
                 case STAT_TEST_RX_FULL:
                         xsk_stat = stats.rx_ring_full;
-                       expected_stat -= RX_FULL_RXQSIZE;
+                       if (ifobject->umem->num_frames < XSK_RING_PROD__DEFAULT_NUM_DESCS)
+                               expected_stat = ifobject->umem->num_frames - RX_FULL_RXQSIZE;
+                       else
+                               expected_stat = XSK_RING_PROD__DEFAULT_NUM_DESCS - RX_FULL_RXQSIZE;
                         break;
                 case STAT_TEST_RX_FILL_EMPTY:
                         xsk_stat = stats.rx_fill_ring_empty_descs;
author	Jakub Kicinski <kuba@kernel.org>
	Thu, 27 Jan 2022 20:54:16 +0000 (12:54 -0800)
committer	Jakub Kicinski <kuba@kernel.org>
	Thu, 27 Jan 2022 20:54:16 +0000 (12:54 -0800)
Documentation/bpf/btf.rst		patch \| blob \| history
Documentation/devicetree/bindings/net/fsl-fman.txt		patch \| blob \| history
drivers/net/dsa/b53/b53_common.c		patch \| blob \| history
drivers/net/dsa/mt7530.c		patch \| blob \| history
drivers/net/ethernet/broadcom/bnxt/bnxt.c		patch \| blob \| history
drivers/net/ethernet/broadcom/bnxt/bnxt.h		patch \| blob \| history
drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h		patch \| blob \| history
drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c		patch \| blob \| history
drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h		patch \| blob \| history
drivers/net/ethernet/broadcom/genet/bcmgenet.c		patch \| blob \| history
drivers/net/ethernet/cortina/gemini.c		patch \| blob \| history
drivers/net/ethernet/dec/tulip/pnic.c		patch \| blob \| history
drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c		patch \| blob \| history
drivers/net/ethernet/freescale/enetc/enetc_pf.c		patch \| blob \| history
drivers/net/ethernet/freescale/fec_main.c		patch \| blob \| history
drivers/net/ethernet/freescale/fec_ptp.c		patch \| blob \| history
drivers/net/ethernet/freescale/xgmac_mdio.c		patch \| blob \| history
drivers/net/ethernet/marvell/mvneta.c		patch \| blob \| history
drivers/net/ethernet/mediatek/mtk_star_emac.c		patch \| blob \| history
drivers/net/ethernet/mellanox/mlxsw/core_env.c		patch \| blob \| history
drivers/net/ethernet/mellanox/mlxsw/core_env.h		patch \| blob \| history
drivers/net/ethernet/mellanox/mlxsw/minimal.c		patch \| blob \| history
drivers/net/ethernet/mellanox/mlxsw/reg.h		patch \| blob \| history
drivers/net/ethernet/mellanox/mlxsw/spectrum1_kvdl.c		patch \| blob \| history
drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c		patch \| blob \| history
drivers/net/ethernet/microchip/lan743x_ethtool.c		patch \| blob \| history
drivers/net/ethernet/microsoft/mana/gdma_main.c		patch \| blob \| history
drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c		patch \| blob \| history
drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h		patch \| blob \| history
drivers/net/ethernet/netronome/nfp/nfp_net_sriov.h		patch \| blob \| history
drivers/net/ethernet/netronome/nfp/nfp_port.h		patch \| blob \| history
drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c		patch \| blob \| history
drivers/net/ethernet/pensando/ionic/ionic.h		patch \| blob \| history
drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c		patch \| blob \| history
drivers/net/ethernet/pensando/ionic/ionic_dev.c		patch \| blob \| history
drivers/net/ethernet/pensando/ionic/ionic_dev.h		patch \| blob \| history
drivers/net/ethernet/pensando/ionic/ionic_lif.c		patch \| blob \| history
drivers/net/ethernet/pensando/ionic/ionic_lif.h		patch \| blob \| history
drivers/net/ethernet/pensando/ionic/ionic_main.c		patch \| blob \| history
drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c		patch \| blob \| history
drivers/net/ethernet/pensando/ionic/ionic_txrx.c		patch \| blob \| history
drivers/net/ethernet/realtek/r8169_main.c		patch \| blob \| history
drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c		patch \| blob \| history
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c		patch \| blob \| history
drivers/net/ethernet/xilinx/xilinx_axienet.h		patch \| blob \| history
drivers/net/ethernet/xilinx/xilinx_axienet_main.c		patch \| blob \| history
drivers/net/pcs/pcs-xpcs.c		patch \| blob \| history
drivers/net/phy/at803x.c		patch \| blob \| history
drivers/net/usb/asix_devices.c		patch \| blob \| history
drivers/nfc/st-nci/vendor_cmds.c		patch \| blob \| history
drivers/nfc/st21nfca/vendor_cmds.c		patch \| blob \| history
drivers/ptp/ptp_sysfs.c		patch \| blob \| history
include/linux/bpf.h		patch \| blob \| history
include/linux/bpf_verifier.h		patch \| blob \| history
include/linux/btf.h		patch \| blob \| history
include/linux/btf_ids.h		patch \| blob \| history
include/linux/filter.h		patch \| blob \| history
include/linux/ipv6.h		patch \| blob \| history
include/linux/linkmode.h		patch \| blob \| history
include/linux/mii.h		patch \| blob \| history
include/linux/netlink.h		patch \| blob \| history
include/linux/pcs/pcs-xpcs.h		patch \| blob \| history
include/linux/skbuff.h		patch \| blob \| history
include/linux/udp.h		patch \| blob \| history
include/net/ax25.h		patch \| blob \| history
include/net/bonding.h		patch \| blob \| history
include/net/inet_timewait_sock.h		patch \| blob \| history
include/net/netfilter/nf_conntrack_bpf.h	[new file with mode: 0644]	patch \| blob
include/net/netns/ipv4.h		patch \| blob \| history
include/net/netns/ipv6.h		patch \| blob \| history
include/net/pkt_sched.h		patch \| blob \| history
include/net/sch_generic.h		patch \| blob \| history
include/net/udplite.h		patch \| blob \| history
include/net/xdp.h		patch \| blob \| history
include/uapi/linux/bpf.h		patch \| blob \| history
kernel/bpf/arraymap.c		patch \| blob \| history
kernel/bpf/btf.c		patch \| blob \| history
kernel/bpf/cgroup.c		patch \| blob \| history
kernel/bpf/core.c		patch \| blob \| history
kernel/bpf/cpumap.c		patch \| blob \| history
kernel/bpf/devmap.c		patch \| blob \| history
kernel/bpf/syscall.c		patch \| blob \| history
kernel/bpf/verifier.c		patch \| blob \| history
kernel/trace/bpf_trace.c		patch \| blob \| history
net/ax25/ax25_route.c		patch \| blob \| history
net/bpf/test_run.c		patch \| blob \| history
net/core/filter.c		patch \| blob \| history
net/core/net_namespace.c		patch \| blob \| history
net/core/sock.c		patch \| blob \| history
net/core/sock_map.c		patch \| blob \| history
net/core/xdp.c		patch \| blob \| history
net/dccp/dccp.h		patch \| blob \| history
net/dccp/ipv4.c		patch \| blob \| history
net/dccp/ipv6.c		patch \| blob \| history
net/dccp/minisocks.c		patch \| blob \| history
net/dsa/switch.c		patch \| blob \| history
net/hsr/hsr_main.h		patch \| blob \| history
net/ipv4/bpf_tcp_ca.c		patch \| blob \| history
net/ipv4/fib_semantics.c		patch \| blob \| history
net/ipv4/icmp.c		patch \| blob \| history
net/ipv4/inet_timewait_sock.c		patch \| blob \| history
net/ipv4/proc.c		patch \| blob \| history
net/ipv4/route.c		patch \| blob \| history
net/ipv4/sysctl_net_ipv4.c		patch \| blob \| history
net/ipv4/tcp.c		patch \| blob \| history
net/ipv4/tcp_bbr.c		patch \| blob \| history
net/ipv4/tcp_cubic.c		patch \| blob \| history
net/ipv4/tcp_dctcp.c		patch \| blob \| history
net/ipv4/tcp_ipv4.c		patch \| blob \| history
net/ipv4/tcp_minisocks.c		patch \| blob \| history
net/ipv4/tcp_output.c		patch \| blob \| history
net/ipv6/icmp.c		patch \| blob \| history
net/ipv6/ip6_offload.c		patch \| blob \| history
net/ipv6/ip6_tunnel.c		patch \| blob \| history
net/ipv6/tcp_ipv6.c		patch \| blob \| history
net/netfilter/Makefile		patch \| blob \| history
net/netfilter/nf_conntrack_bpf.c	[new file with mode: 0644]	patch \| blob
net/netfilter/nf_conntrack_core.c		patch \| blob \| history
net/tipc/msg.h		patch \| blob \| history
net/unix/af_unix.c		patch \| blob \| history
samples/bpf/xdp1_user.c		patch \| blob \| history
samples/bpf/xdp_adjust_tail_user.c		patch \| blob \| history
samples/bpf/xdp_fwd_user.c		patch \| blob \| history
samples/bpf/xdp_router_ipv4_user.c		patch \| blob \| history
samples/bpf/xdp_rxq_info_user.c		patch \| blob \| history
samples/bpf/xdp_sample_pkts_user.c		patch \| blob \| history
samples/bpf/xdp_sample_user.c		patch \| blob \| history
samples/bpf/xdp_tx_iptunnel_user.c		patch \| blob \| history
samples/bpf/xdpsock_ctrl_proc.c		patch \| blob \| history
samples/bpf/xdpsock_user.c		patch \| blob \| history
samples/bpf/xsk_fwd.c		patch \| blob \| history
scripts/bpf_doc.py		patch \| blob \| history
security/device_cgroup.c		patch \| blob \| history
tools/bpf/bpftool/btf.c		patch \| blob \| history
tools/bpf/bpftool/cgroup.c		patch \| blob \| history
tools/bpf/bpftool/common.c		patch \| blob \| history
tools/bpf/bpftool/gen.c		patch \| blob \| history
tools/bpf/bpftool/link.c		patch \| blob \| history
tools/bpf/bpftool/main.c		patch \| blob \| history
tools/bpf/bpftool/main.h		patch \| blob \| history
tools/bpf/bpftool/map.c		patch \| blob \| history
tools/bpf/bpftool/net.c		patch \| blob \| history
tools/bpf/bpftool/pids.c		patch \| blob \| history
tools/bpf/bpftool/prog.c		patch \| blob \| history
tools/bpf/bpftool/struct_ops.c		patch \| blob \| history
tools/bpf/resolve_btfids/Makefile		patch \| blob \| history
tools/include/uapi/linux/bpf.h		patch \| blob \| history
tools/lib/bpf/bpf.c		patch \| blob \| history
tools/lib/bpf/bpf.h		patch \| blob \| history
tools/lib/bpf/bpf_helpers.h		patch \| blob \| history
tools/lib/bpf/btf.c		patch \| blob \| history
tools/lib/bpf/btf.h		patch \| blob \| history
tools/lib/bpf/hashmap.c		patch \| blob \| history
tools/lib/bpf/libbpf.c		patch \| blob \| history
tools/lib/bpf/libbpf.h		patch \| blob \| history
tools/lib/bpf/libbpf.map		patch \| blob \| history
tools/lib/bpf/libbpf_legacy.h		patch \| blob \| history
tools/lib/bpf/netlink.c		patch \| blob \| history
tools/perf/util/bpf-loader.c		patch \| blob \| history
tools/perf/util/bpf_map.c		patch \| blob \| history
tools/testing/selftests/bpf/Makefile		patch \| blob \| history
tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c		patch \| blob \| history
tools/testing/selftests/bpf/config		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/bind_perm.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt_unix.c	[new file with mode: 0644]	patch \| blob
tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c	[new file with mode: 0644]	patch \| blob
tools/testing/selftests/bpf/prog_tests/bpf_nf.c	[new file with mode: 0644]	patch \| blob
tools/testing/selftests/bpf/prog_tests/btf.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/cgroup_getset_retval.c	[new file with mode: 0644]	patch \| blob
tools/testing/selftests/bpf/prog_tests/flow_dissector.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/global_data.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/global_data_init.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/kfunc_call.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/sockmap_basic.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/sockmap_listen.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/sockopt_sk.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/tailcalls.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/xdp_adjust_frags.c	[new file with mode: 0644]	patch \| blob
tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c		patch \| blob \| history
tools/testing/selftests/bpf/progs/bpf_iter_setsockopt_unix.c	[new file with mode: 0644]	patch \| blob
tools/testing/selftests/bpf/progs/bpf_iter_unix.c		patch \| blob \| history
tools/testing/selftests/bpf/progs/bpf_mod_race.c	[new file with mode: 0644]	patch \| blob
tools/testing/selftests/bpf/progs/bpf_tracing_net.h		patch \| blob \| history
tools/testing/selftests/bpf/progs/cgroup_getset_retval_getsockopt.c	[new file with mode: 0644]	patch \| blob
tools/testing/selftests/bpf/progs/cgroup_getset_retval_setsockopt.c	[new file with mode: 0644]	patch \| blob
tools/testing/selftests/bpf/progs/freplace_cls_redirect.c		patch \| blob \| history
tools/testing/selftests/bpf/progs/kfunc_call_race.c	[new file with mode: 0644]	patch \| blob
tools/testing/selftests/bpf/progs/kfunc_call_test.c		patch \| blob \| history
tools/testing/selftests/bpf/progs/ksym_race.c	[new file with mode: 0644]	patch \| blob
tools/testing/selftests/bpf/progs/sample_map_ret0.c		patch \| blob \| history
tools/testing/selftests/bpf/progs/sockmap_parse_prog.c		patch \| blob \| history
tools/testing/selftests/bpf/progs/sockopt_sk.c		patch \| blob \| history
tools/testing/selftests/bpf/progs/test_bpf_nf.c	[new file with mode: 0644]	patch \| blob
tools/testing/selftests/bpf/progs/test_btf_haskv.c		patch \| blob \| history
tools/testing/selftests/bpf/progs/test_btf_newkv.c		patch \| blob \| history
tools/testing/selftests/bpf/progs/test_btf_nokv.c		patch \| blob \| history
tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c		patch \| blob \| history
tools/testing/selftests/bpf/progs/test_sockmap_progs_query.c	[new file with mode: 0644]	patch \| blob
tools/testing/selftests/bpf/progs/test_tc_edt.c		patch \| blob \| history
tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c		patch \| blob \| history
tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c		patch \| blob \| history
tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c		patch \| blob \| history
tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c		patch \| blob \| history
tools/testing/selftests/bpf/progs/test_xdp_update_frags.c	[new file with mode: 0644]	patch \| blob
tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_frags_helpers.c	[new file with mode: 0644]	patch \| blob
tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c		patch \| blob \| history
tools/testing/selftests/bpf/progs/test_xdp_with_devmap_frags_helpers.c	[new file with mode: 0644]	patch \| blob
tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c		patch \| blob \| history
tools/testing/selftests/bpf/test_verifier.c		patch \| blob \| history
tools/testing/selftests/bpf/verifier/calls.c		patch \| blob \| history
tools/testing/selftests/bpf/xdpxceiver.c		patch \| blob \| history