No conflicts.
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
In libbpf, the map can be defined with extra annotation like below:
::
- struct bpf_map_def SEC("maps") btf_map = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(int),
- .value_size = sizeof(struct ipv_counts),
- .max_entries = 4,
- };
- BPF_ANNOTATE_KV_PAIR(btf_map, int, struct ipv_counts);
+ struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, struct ipv_counts);
+ __uint(max_entries, 4);
+ } btf_map SEC(".maps");
-Here, the parameters for macro BPF_ANNOTATE_KV_PAIR are map name, key and
-value types for the map. During ELF parsing, libbpf is able to extract
-key/value type_id's and assign them to BPF_MAP_CREATE attributes
-automatically.
+During ELF parsing, libbpf is able to extract key/value type_id's and assign
+them to BPF_MAP_CREATE attributes automatically.
.. _BPF_Prog_Load:
___A b1:4;
enum A b2:4;
};
- struct bpf_map_def SEC("maps") tmpmap = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(__u32),
- .value_size = sizeof(struct tmp_t),
- .max_entries = 1,
- };
- BPF_ANNOTATE_KV_PAIR(tmpmap, int, struct tmp_t);
+ struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, struct tmp_t);
+ __uint(max_entries, 1);
+ } tmpmap SEC(".maps");
bpftool is able to pretty print like below:
::
Value type: <prop-encoded-array>
Definition: A standard property.
-- bus-frequency
+- clocks
+ Usage: optional
+ Value type: <phandle>
+ Definition: A reference to the input clock of the controller
+ from which the MDC frequency is derived.
+
+- clock-frequency
Usage: optional
Value type: <u32>
- Definition: Specifies the external MDIO bus clock speed to
- be used, if different from the standard 2.5 MHz.
- This may be due to the standard speed being unsupported (e.g.
- due to a hardware problem), or to advertise that all relevant
- components in the system support a faster speed.
+ Definition: Specifies the external MDC frequency, in Hertz, to
+ be used. Requires that the input clock is specified in the
+ "clocks" property. See also: mdio.yaml.
+
+- suppress-preamble
+ Usage: optional
+ Value type: <boolean>
+ Definition: Disable generation of preamble bits. See also:
+ mdio.yaml.
- interrupts
Usage: required for external MDIO
{
int ret;
- ret = phy_init_eee(phy, 0);
+ ret = phy_init_eee(phy, false);
if (ret)
return 0;
mcr |= PMCR_RX_FC_EN;
}
- if (mode == MLO_AN_PHY && phydev && phy_init_eee(phydev, 0) >= 0) {
+ if (mode == MLO_AN_PHY && phydev && phy_init_eee(phydev, false) >= 0) {
switch (speed) {
case SPEED_1000:
mcr |= PMCR_FORCE_EEE1G;
ASYNC_EVENT_CMPL_EVENT_ID_ECHO_REQUEST,
ASYNC_EVENT_CMPL_EVENT_ID_PPS_TIMESTAMP,
ASYNC_EVENT_CMPL_EVENT_ID_ERROR_REPORT,
+ ASYNC_EVENT_CMPL_EVENT_ID_PHC_UPDATE,
};
static struct workqueue_struct *bnxt_pf_wq;
(BNXT_EVENT_RING_TYPE(data2) == \
ASYNC_EVENT_CMPL_RING_MONITOR_MSG_EVENT_DATA2_DISABLE_RING_TYPE_RX)
+#define BNXT_EVENT_PHC_EVENT_TYPE(data1) \
+ (((data1) & ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_MASK) >>\
+ ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_SFT)
+
+#define BNXT_EVENT_PHC_RTC_UPDATE(data1) \
+ (((data1) & ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_PHC_TIME_MSB_MASK) >>\
+ ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_PHC_TIME_MSB_SFT)
+
+#define BNXT_PHC_BITS 48
+
static int bnxt_async_event_process(struct bnxt *bp,
struct hwrm_async_event_cmpl *cmpl)
{
bnxt_event_error_report(bp, data1, data2);
goto async_event_process_exit;
}
+ case ASYNC_EVENT_CMPL_EVENT_ID_PHC_UPDATE: {
+ switch (BNXT_EVENT_PHC_EVENT_TYPE(data1)) {
+ case ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_PHC_RTC_UPDATE:
+ if (bp->fw_cap & BNXT_FW_CAP_PTP_RTC) {
+ struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
+ u64 ns;
+
+ spin_lock_bh(&ptp->ptp_lock);
+ bnxt_ptp_update_current_time(bp);
+ ns = (((u64)BNXT_EVENT_PHC_RTC_UPDATE(data1) <<
+ BNXT_PHC_BITS) | ptp->current_time);
+ bnxt_ptp_rtc_timecounter_init(ptp, ns);
+ spin_unlock_bh(&ptp->ptp_lock);
+ }
+ break;
+ }
+ goto async_event_process_exit;
+ }
case ASYNC_EVENT_CMPL_EVENT_ID_DEFERRED_RESPONSE: {
u16 seq_id = le32_to_cpu(cmpl->event_data2) & 0xffff;
struct hwrm_port_mac_ptp_qcfg_output *resp;
struct hwrm_port_mac_ptp_qcfg_input *req;
struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
+ bool phc_cfg;
u8 flags;
int rc;
rc = -ENODEV;
goto exit;
}
- rc = bnxt_ptp_init(bp);
+ phc_cfg = (flags & PORT_MAC_PTP_QCFG_RESP_FLAGS_RTC_CONFIGURED) != 0;
+ rc = bnxt_ptp_init(bp, phc_cfg);
if (rc)
netdev_warn(bp->dev, "PTP initialization failed.\n");
exit:
bp->fw_cap |= BNXT_FW_CAP_EXT_HW_STATS_SUPPORTED;
if (BNXT_PF(bp) && (flags_ext & FUNC_QCAPS_RESP_FLAGS_EXT_PTP_PPS_SUPPORTED))
bp->fw_cap |= BNXT_FW_CAP_PTP_PPS;
+ if (flags_ext & FUNC_QCAPS_RESP_FLAGS_EXT_PTP_64BIT_RTC_SUPPORTED)
+ bp->fw_cap |= BNXT_FW_CAP_PTP_RTC;
if (BNXT_PF(bp) && (flags_ext & FUNC_QCAPS_RESP_FLAGS_EXT_HOT_RESET_IF_SUPPORT))
bp->fw_cap |= BNXT_FW_CAP_HOT_RESET_IF;
if (BNXT_PF(bp) && (flags_ext & FUNC_QCAPS_RESP_FLAGS_EXT_FW_LIVEPATCH_SUPPORTED))
/* VF-reps may need to be re-opened after the PF is re-opened */
if (BNXT_PF(bp))
bnxt_vf_reps_open(bp);
+ bnxt_ptp_init_rtc(bp, true);
return 0;
open_err_irq:
#define BNXT_FW_CAP_EXT_STATS_SUPPORTED 0x00040000
#define BNXT_FW_CAP_ERR_RECOVER_RELOAD 0x00100000
#define BNXT_FW_CAP_HOT_RESET 0x00200000
+ #define BNXT_FW_CAP_PTP_RTC 0x00400000
#define BNXT_FW_CAP_VLAN_RX_STRIP 0x01000000
#define BNXT_FW_CAP_VLAN_TX_INSERT 0x02000000
#define BNXT_FW_CAP_EXT_HW_STATS_SUPPORTED 0x04000000
#define HWRM_FUNC_PTP_EXT_CFG 0x1a0UL
#define HWRM_FUNC_PTP_EXT_QCFG 0x1a1UL
#define HWRM_FUNC_KEY_CTX_ALLOC 0x1a2UL
+ #define HWRM_FUNC_BACKING_STORE_CFG_V2 0x1a3UL
+ #define HWRM_FUNC_BACKING_STORE_QCFG_V2 0x1a4UL
+ #define HWRM_FUNC_DBR_PACING_CFG 0x1a5UL
+ #define HWRM_FUNC_DBR_PACING_QCFG 0x1a6UL
+ #define HWRM_FUNC_DBR_PACING_BROADCAST_EVENT 0x1a7UL
+ #define HWRM_FUNC_BACKING_STORE_QCAPS_V2 0x1a8UL
#define HWRM_SELFTEST_QLIST 0x200UL
#define HWRM_SELFTEST_EXEC 0x201UL
#define HWRM_SELFTEST_IRQ 0x202UL
#define HWRM_MFG_PRVSN_IMPORT_CERT 0x212UL
#define HWRM_MFG_PRVSN_GET_STATE 0x213UL
#define HWRM_MFG_GET_NVM_MEASUREMENT 0x214UL
+ #define HWRM_MFG_PSOC_QSTATUS 0x215UL
+ #define HWRM_MFG_SELFTEST_QLIST 0x216UL
+ #define HWRM_MFG_SELFTEST_EXEC 0x217UL
#define HWRM_TF 0x2bcUL
#define HWRM_TF_VERSION_GET 0x2bdUL
#define HWRM_TF_SESSION_OPEN 0x2c6UL
#define HWRM_VERSION_MAJOR 1
#define HWRM_VERSION_MINOR 10
#define HWRM_VERSION_UPDATE 2
-#define HWRM_VERSION_RSVD 63
-#define HWRM_VERSION_STR "1.10.2.63"
+#define HWRM_VERSION_RSVD 73
+#define HWRM_VERSION_STR "1.10.2.73"
/* hwrm_ver_get_input (size:192b/24B) */
struct hwrm_ver_get_input {
#define ASYNC_EVENT_CMPL_EVENT_ID_DEFERRED_RESPONSE 0x40UL
#define ASYNC_EVENT_CMPL_EVENT_ID_PFC_WATCHDOG_CFG_CHANGE 0x41UL
#define ASYNC_EVENT_CMPL_EVENT_ID_ECHO_REQUEST 0x42UL
- #define ASYNC_EVENT_CMPL_EVENT_ID_PHC_MASTER 0x43UL
+ #define ASYNC_EVENT_CMPL_EVENT_ID_PHC_UPDATE 0x43UL
#define ASYNC_EVENT_CMPL_EVENT_ID_PPS_TIMESTAMP 0x44UL
#define ASYNC_EVENT_CMPL_EVENT_ID_ERROR_REPORT 0x45UL
- #define ASYNC_EVENT_CMPL_EVENT_ID_MAX_RGTR_EVENT_ID 0x46UL
+ #define ASYNC_EVENT_CMPL_EVENT_ID_DOORBELL_PACING_THRESHOLD 0x46UL
+ #define ASYNC_EVENT_CMPL_EVENT_ID_MAX_RGTR_EVENT_ID 0x47UL
#define ASYNC_EVENT_CMPL_EVENT_ID_FW_TRACE_MSG 0xfeUL
#define ASYNC_EVENT_CMPL_EVENT_ID_HWRM_ERROR 0xffUL
#define ASYNC_EVENT_CMPL_EVENT_ID_LAST ASYNC_EVENT_CMPL_EVENT_ID_HWRM_ERROR
__le32 event_data1;
};
-/* hwrm_async_event_cmpl_phc_master (size:128b/16B) */
-struct hwrm_async_event_cmpl_phc_master {
+/* hwrm_async_event_cmpl_phc_update (size:128b/16B) */
+struct hwrm_async_event_cmpl_phc_update {
__le16 type;
- #define ASYNC_EVENT_CMPL_PHC_MASTER_TYPE_MASK 0x3fUL
- #define ASYNC_EVENT_CMPL_PHC_MASTER_TYPE_SFT 0
- #define ASYNC_EVENT_CMPL_PHC_MASTER_TYPE_HWRM_ASYNC_EVENT 0x2eUL
- #define ASYNC_EVENT_CMPL_PHC_MASTER_TYPE_LAST ASYNC_EVENT_CMPL_PHC_MASTER_TYPE_HWRM_ASYNC_EVENT
+ #define ASYNC_EVENT_CMPL_PHC_UPDATE_TYPE_MASK 0x3fUL
+ #define ASYNC_EVENT_CMPL_PHC_UPDATE_TYPE_SFT 0
+ #define ASYNC_EVENT_CMPL_PHC_UPDATE_TYPE_HWRM_ASYNC_EVENT 0x2eUL
+ #define ASYNC_EVENT_CMPL_PHC_UPDATE_TYPE_LAST ASYNC_EVENT_CMPL_PHC_UPDATE_TYPE_HWRM_ASYNC_EVENT
__le16 event_id;
- #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_ID_PHC_MASTER 0x43UL
- #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_ID_LAST ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_ID_PHC_MASTER
+ #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_ID_PHC_UPDATE 0x43UL
+ #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_ID_LAST ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_ID_PHC_UPDATE
__le32 event_data2;
- #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA2_PHC_MASTER_FID_MASK 0xffffUL
- #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA2_PHC_MASTER_FID_SFT 0
- #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA2_PHC_SEC_FID_MASK 0xffff0000UL
- #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA2_PHC_SEC_FID_SFT 16
+ #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA2_PHC_MASTER_FID_MASK 0xffffUL
+ #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA2_PHC_MASTER_FID_SFT 0
+ #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA2_PHC_SEC_FID_MASK 0xffff0000UL
+ #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA2_PHC_SEC_FID_SFT 16
u8 opaque_v;
- #define ASYNC_EVENT_CMPL_PHC_MASTER_V 0x1UL
- #define ASYNC_EVENT_CMPL_PHC_MASTER_OPAQUE_MASK 0xfeUL
- #define ASYNC_EVENT_CMPL_PHC_MASTER_OPAQUE_SFT 1
+ #define ASYNC_EVENT_CMPL_PHC_UPDATE_V 0x1UL
+ #define ASYNC_EVENT_CMPL_PHC_UPDATE_OPAQUE_MASK 0xfeUL
+ #define ASYNC_EVENT_CMPL_PHC_UPDATE_OPAQUE_SFT 1
u8 timestamp_lo;
__le16 timestamp_hi;
__le32 event_data1;
- #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA1_FLAGS_MASK 0xfUL
- #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA1_FLAGS_SFT 0
- #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA1_FLAGS_PHC_MASTER 0x1UL
- #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA1_FLAGS_PHC_SECONDARY 0x2UL
- #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA1_FLAGS_PHC_FAILOVER 0x3UL
- #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA1_FLAGS_LAST ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA1_FLAGS_PHC_FAILOVER
+ #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_MASK 0xfUL
+ #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_SFT 0
+ #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_PHC_MASTER 0x1UL
+ #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_PHC_SECONDARY 0x2UL
+ #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_PHC_FAILOVER 0x3UL
+ #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_PHC_RTC_UPDATE 0x4UL
+ #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_LAST ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_PHC_RTC_UPDATE
+ #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_PHC_TIME_MSB_MASK 0xffff0UL
+ #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_PHC_TIME_MSB_SFT 4
};
/* hwrm_async_event_cmpl_pps_timestamp (size:128b/16B) */
#define ASYNC_EVENT_CMPL_ERROR_REPORT_NVM_EVENT_DATA1_NVM_ERR_TYPE_LAST ASYNC_EVENT_CMPL_ERROR_REPORT_NVM_EVENT_DATA1_NVM_ERR_TYPE_ERASE
};
+/* hwrm_async_event_cmpl_error_report_doorbell_drop_threshold (size:128b/16B) */
+struct hwrm_async_event_cmpl_error_report_doorbell_drop_threshold {
+ __le16 type;
+ #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_TYPE_MASK 0x3fUL
+ #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_TYPE_SFT 0
+ #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_TYPE_HWRM_ASYNC_EVENT 0x2eUL
+ #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_TYPE_LAST ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_TYPE_HWRM_ASYNC_EVENT
+ __le16 event_id;
+ #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_EVENT_ID_ERROR_REPORT 0x45UL
+ #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_EVENT_ID_LAST ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_EVENT_ID_ERROR_REPORT
+ __le32 event_data2;
+ u8 opaque_v;
+ #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_V 0x1UL
+ #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_OPAQUE_MASK 0xfeUL
+ #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_OPAQUE_SFT 1
+ u8 timestamp_lo;
+ __le16 timestamp_hi;
+ __le32 event_data1;
+ #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_EVENT_DATA1_ERROR_TYPE_MASK 0xffUL
+ #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_EVENT_DATA1_ERROR_TYPE_SFT 0
+ #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_EVENT_DATA1_ERROR_TYPE_DOORBELL_DROP_THRESHOLD 0x4UL
+ #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_EVENT_DATA1_ERROR_TYPE_LAST ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_EVENT_DATA1_ERROR_TYPE_DOORBELL_DROP_THRESHOLD
+};
+
/* hwrm_func_reset_input (size:192b/24B) */
struct hwrm_func_reset_input {
__le16 req_type;
#define FUNC_QCAPS_RESP_FLAGS_EXT_EP_RATE_CONTROL 0x800000UL
#define FUNC_QCAPS_RESP_FLAGS_EXT_MIN_BW_SUPPORTED 0x1000000UL
#define FUNC_QCAPS_RESP_FLAGS_EXT_TX_COAL_CMPL_CAP 0x2000000UL
+ #define FUNC_QCAPS_RESP_FLAGS_EXT_BS_V2_SUPPORTED 0x4000000UL
+ #define FUNC_QCAPS_RESP_FLAGS_EXT_BS_V2_REQUIRED 0x8000000UL
+ #define FUNC_QCAPS_RESP_FLAGS_EXT_PTP_64BIT_RTC_SUPPORTED 0x10000000UL
+ #define FUNC_QCAPS_RESP_FLAGS_EXT_DBR_PACING_SUPPORTED 0x20000000UL
u8 max_schqs;
u8 mpc_chnls_cap;
#define FUNC_QCAPS_RESP_MPC_CHNLS_CAP_TCE 0x1UL
__le16 rkc_entry_size;
__le32 tkc_max_entries;
__le32 rkc_max_entries;
- u8 rsvd[7];
+ u8 rsvd1[7];
u8 valid;
};
u8 valid;
};
-/* hwrm_func_ptp_cfg_input (size:320b/40B) */
+/* hwrm_func_ptp_cfg_input (size:384b/48B) */
struct hwrm_func_ptp_cfg_input {
__le16 req_type;
__le16 cmpl_ring;
#define FUNC_PTP_CFG_REQ_ENABLES_PTP_FREQ_ADJ_EXT_PERIOD 0x8UL
#define FUNC_PTP_CFG_REQ_ENABLES_PTP_FREQ_ADJ_EXT_UP 0x10UL
#define FUNC_PTP_CFG_REQ_ENABLES_PTP_FREQ_ADJ_EXT_PHASE 0x20UL
+ #define FUNC_PTP_CFG_REQ_ENABLES_PTP_SET_TIME 0x40UL
u8 ptp_pps_event;
#define FUNC_PTP_CFG_REQ_PTP_PPS_EVENT_INTERNAL 0x1UL
#define FUNC_PTP_CFG_REQ_PTP_PPS_EVENT_EXTERNAL 0x2UL
__le32 ptp_freq_adj_ext_up;
__le32 ptp_freq_adj_ext_phase_lower;
__le32 ptp_freq_adj_ext_phase_upper;
+ __le64 ptp_set_time;
};
/* hwrm_func_ptp_cfg_output (size:128b/16B) */
u8 valid;
};
+/* hwrm_func_ptp_ext_cfg_input (size:256b/32B) */
+struct hwrm_func_ptp_ext_cfg_input {
+ __le16 req_type;
+ __le16 cmpl_ring;
+ __le16 seq_id;
+ __le16 target_id;
+ __le64 resp_addr;
+ __le16 enables;
+ #define FUNC_PTP_EXT_CFG_REQ_ENABLES_PHC_MASTER_FID 0x1UL
+ #define FUNC_PTP_EXT_CFG_REQ_ENABLES_PHC_SEC_FID 0x2UL
+ #define FUNC_PTP_EXT_CFG_REQ_ENABLES_PHC_SEC_MODE 0x4UL
+ #define FUNC_PTP_EXT_CFG_REQ_ENABLES_FAILOVER_TIMER 0x8UL
+ __le16 phc_master_fid;
+ __le16 phc_sec_fid;
+ u8 phc_sec_mode;
+ #define FUNC_PTP_EXT_CFG_REQ_PHC_SEC_MODE_SWITCH 0x0UL
+ #define FUNC_PTP_EXT_CFG_REQ_PHC_SEC_MODE_ALL 0x1UL
+ #define FUNC_PTP_EXT_CFG_REQ_PHC_SEC_MODE_PF_ONLY 0x2UL
+ #define FUNC_PTP_EXT_CFG_REQ_PHC_SEC_MODE_LAST FUNC_PTP_EXT_CFG_REQ_PHC_SEC_MODE_PF_ONLY
+ u8 unused_0;
+ __le32 failover_timer;
+ u8 unused_1[4];
+};
+
+/* hwrm_func_ptp_ext_cfg_output (size:128b/16B) */
+struct hwrm_func_ptp_ext_cfg_output {
+ __le16 error_code;
+ __le16 req_type;
+ __le16 seq_id;
+ __le16 resp_len;
+ u8 unused_0[7];
+ u8 valid;
+};
+
+/* hwrm_func_ptp_ext_qcfg_input (size:192b/24B) */
+struct hwrm_func_ptp_ext_qcfg_input {
+ __le16 req_type;
+ __le16 cmpl_ring;
+ __le16 seq_id;
+ __le16 target_id;
+ __le64 resp_addr;
+ u8 unused_0[8];
+};
+
+/* hwrm_func_ptp_ext_qcfg_output (size:256b/32B) */
+struct hwrm_func_ptp_ext_qcfg_output {
+ __le16 error_code;
+ __le16 req_type;
+ __le16 seq_id;
+ __le16 resp_len;
+ __le16 phc_master_fid;
+ __le16 phc_sec_fid;
+ __le16 phc_active_fid0;
+ __le16 phc_active_fid1;
+ __le32 last_failover_event;
+ __le16 from_fid;
+ __le16 to_fid;
+ u8 unused_0[7];
+ u8 valid;
+};
+
+/* hwrm_func_backing_store_cfg_v2_input (size:448b/56B) */
+struct hwrm_func_backing_store_cfg_v2_input {
+ __le16 req_type;
+ __le16 cmpl_ring;
+ __le16 seq_id;
+ __le16 target_id;
+ __le64 resp_addr;
+ __le16 type;
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_QP 0x0UL
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_SRQ 0x1UL
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_CQ 0x2UL
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_VNIC 0x3UL
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_STAT 0x4UL
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_SP_TQM_RING 0x5UL
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_FP_TQM_RING 0x6UL
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_MRAV 0xeUL
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_TIM 0xfUL
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_TKC 0x13UL
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_RKC 0x14UL
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_MP_TQM_RING 0x15UL
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_INVALID 0xffffUL
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_LAST FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_INVALID
+ __le16 instance;
+ __le32 flags;
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_FLAGS_PREBOOT_MODE 0x1UL
+ __le64 page_dir;
+ __le32 num_entries;
+ __le16 entry_size;
+ u8 page_size_pbl_level;
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_PBL_LEVEL_MASK 0xfUL
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_PBL_LEVEL_SFT 0
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_PBL_LEVEL_LVL_0 0x0UL
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_PBL_LEVEL_LVL_1 0x1UL
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_PBL_LEVEL_LVL_2 0x2UL
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_PBL_LEVEL_LAST FUNC_BACKING_STORE_CFG_V2_REQ_PBL_LEVEL_LVL_2
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_MASK 0xf0UL
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_SFT 4
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_PG_4K (0x0UL << 4)
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_PG_8K (0x1UL << 4)
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_PG_64K (0x2UL << 4)
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_PG_2M (0x3UL << 4)
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_PG_8M (0x4UL << 4)
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_PG_1G (0x5UL << 4)
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_LAST FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_PG_1G
+ u8 subtype_valid_cnt;
+ __le32 split_entry_0;
+ __le32 split_entry_1;
+ __le32 split_entry_2;
+ __le32 split_entry_3;
+};
+
+/* hwrm_func_backing_store_cfg_v2_output (size:128b/16B) */
+struct hwrm_func_backing_store_cfg_v2_output {
+ __le16 error_code;
+ __le16 req_type;
+ __le16 seq_id;
+ __le16 resp_len;
+ u8 rsvd0[7];
+ u8 valid;
+};
+
+/* hwrm_func_backing_store_qcfg_v2_input (size:192b/24B) */
+struct hwrm_func_backing_store_qcfg_v2_input {
+ __le16 req_type;
+ __le16 cmpl_ring;
+ __le16 seq_id;
+ __le16 target_id;
+ __le64 resp_addr;
+ __le16 type;
+ #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_QP 0x0UL
+ #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_SRQ 0x1UL
+ #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_CQ 0x2UL
+ #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_VNIC 0x3UL
+ #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_STAT 0x4UL
+ #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_SP_TQM_RING 0x5UL
+ #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_FP_TQM_RING 0x6UL
+ #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_MRAV 0xeUL
+ #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_TIM 0xfUL
+ #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_TKC 0x13UL
+ #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_RKC 0x14UL
+ #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_MP_TQM_RING 0x15UL
+ #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_INVALID 0xffffUL
+ #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_LAST FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_INVALID
+ __le16 instance;
+ u8 rsvd[4];
+};
+
+/* hwrm_func_backing_store_qcfg_v2_output (size:448b/56B) */
+struct hwrm_func_backing_store_qcfg_v2_output {
+ __le16 error_code;
+ __le16 req_type;
+ __le16 seq_id;
+ __le16 resp_len;
+ __le16 type;
+ #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_QP 0x0UL
+ #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_SRQ 0x1UL
+ #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_CQ 0x2UL
+ #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_VNIC 0x3UL
+ #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_STAT 0x4UL
+ #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_SP_TQM_RING 0x5UL
+ #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_FP_TQM_RING 0x6UL
+ #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_MRAV 0xeUL
+ #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_TIM 0xfUL
+ #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_TKC 0x13UL
+ #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_RKC 0x14UL
+ #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_MP_TQM_RING 0x15UL
+ #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_INVALID 0xffffUL
+ #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_LAST FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_INVALID
+ __le16 instance;
+ __le32 flags;
+ __le64 page_dir;
+ __le32 num_entries;
+ u8 page_size_pbl_level;
+ #define FUNC_BACKING_STORE_QCFG_V2_RESP_PBL_LEVEL_MASK 0xfUL
+ #define FUNC_BACKING_STORE_QCFG_V2_RESP_PBL_LEVEL_SFT 0
+ #define FUNC_BACKING_STORE_QCFG_V2_RESP_PBL_LEVEL_LVL_0 0x0UL
+ #define FUNC_BACKING_STORE_QCFG_V2_RESP_PBL_LEVEL_LVL_1 0x1UL
+ #define FUNC_BACKING_STORE_QCFG_V2_RESP_PBL_LEVEL_LVL_2 0x2UL
+ #define FUNC_BACKING_STORE_QCFG_V2_RESP_PBL_LEVEL_LAST FUNC_BACKING_STORE_QCFG_V2_RESP_PBL_LEVEL_LVL_2
+ #define FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_MASK 0xf0UL
+ #define FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_SFT 4
+ #define FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_PG_4K (0x0UL << 4)
+ #define FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_PG_8K (0x1UL << 4)
+ #define FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_PG_64K (0x2UL << 4)
+ #define FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_PG_2M (0x3UL << 4)
+ #define FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_PG_8M (0x4UL << 4)
+ #define FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_PG_1G (0x5UL << 4)
+ #define FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_LAST FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_PG_1G
+ u8 subtype_valid_cnt;
+ u8 rsvd[2];
+ __le32 split_entry_0;
+ __le32 split_entry_1;
+ __le32 split_entry_2;
+ __le32 split_entry_3;
+ u8 rsvd2[7];
+ u8 valid;
+};
+
+/* qpc_split_entries (size:128b/16B) */
+struct qpc_split_entries {
+ __le32 qp_num_l2_entries;
+ __le32 qp_num_qp1_entries;
+ __le32 rsvd[2];
+};
+
+/* srq_split_entries (size:128b/16B) */
+struct srq_split_entries {
+ __le32 srq_num_l2_entries;
+ __le32 rsvd;
+ __le32 rsvd2[2];
+};
+
+/* cq_split_entries (size:128b/16B) */
+struct cq_split_entries {
+ __le32 cq_num_l2_entries;
+ __le32 rsvd;
+ __le32 rsvd2[2];
+};
+
+/* vnic_split_entries (size:128b/16B) */
+struct vnic_split_entries {
+ __le32 vnic_num_vnic_entries;
+ __le32 rsvd;
+ __le32 rsvd2[2];
+};
+
+/* mrav_split_entries (size:128b/16B) */
+struct mrav_split_entries {
+ __le32 mrav_num_av_entries;
+ __le32 rsvd;
+ __le32 rsvd2[2];
+};
+
+/* hwrm_func_backing_store_qcaps_v2_input (size:192b/24B) */
+struct hwrm_func_backing_store_qcaps_v2_input {
+ __le16 req_type;
+ __le16 cmpl_ring;
+ __le16 seq_id;
+ __le16 target_id;
+ __le64 resp_addr;
+ __le16 type;
+ #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_QP 0x0UL
+ #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SRQ 0x1UL
+ #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_CQ 0x2UL
+ #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_VNIC 0x3UL
+ #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_STAT 0x4UL
+ #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SP_TQM_RING 0x5UL
+ #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_FP_TQM_RING 0x6UL
+ #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_MRAV 0xeUL
+ #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_TIM 0xfUL
+ #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_TKC 0x13UL
+ #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_RKC 0x14UL
+ #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_MP_TQM_RING 0x15UL
+ #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_INVALID 0xffffUL
+ #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_LAST FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_INVALID
+ u8 rsvd[6];
+};
+
+/* hwrm_func_backing_store_qcaps_v2_output (size:448b/56B) */
+struct hwrm_func_backing_store_qcaps_v2_output {
+ __le16 error_code;
+ __le16 req_type;
+ __le16 seq_id;
+ __le16 resp_len;
+ __le16 type;
+ #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_QP 0x0UL
+ #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_SRQ 0x1UL
+ #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_CQ 0x2UL
+ #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_VNIC 0x3UL
+ #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_STAT 0x4UL
+ #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_SP_TQM_RING 0x5UL
+ #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_FP_TQM_RING 0x6UL
+ #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_MRAV 0xeUL
+ #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_TIM 0xfUL
+ #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_TKC 0x13UL
+ #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_RKC 0x14UL
+ #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_MP_TQM_RING 0x15UL
+ #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_INVALID 0xffffUL
+ #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_LAST FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_INVALID
+ __le16 entry_size;
+ __le32 flags;
+ #define FUNC_BACKING_STORE_QCAPS_V2_RESP_FLAGS_ENABLE_CTX_KIND_INIT 0x1UL
+ #define FUNC_BACKING_STORE_QCAPS_V2_RESP_FLAGS_TYPE_VALID 0x2UL
+ __le32 instance_bit_map;
+ u8 ctx_init_value;
+ u8 ctx_init_offset;
+ u8 entry_multiple;
+ u8 rsvd;
+ __le32 max_num_entries;
+ __le32 min_num_entries;
+ __le16 next_valid_type;
+ u8 subtype_valid_cnt;
+ u8 rsvd2;
+ __le32 split_entry_0;
+ __le32 split_entry_1;
+ __le32 split_entry_2;
+ __le32 split_entry_3;
+ u8 rsvd3[3];
+ u8 valid;
+};
+
/* hwrm_func_drv_if_change_input (size:192b/24B) */
struct hwrm_func_drv_if_change_input {
__le16 req_type;
u8 valid;
};
-/* hwrm_port_mac_cfg_input (size:384b/48B) */
+/* hwrm_port_mac_cfg_input (size:448b/56B) */
struct hwrm_port_mac_cfg_input {
__le16 req_type;
__le16 cmpl_ring;
#define PORT_MAC_CFG_REQ_COS_FIELD_CFG_DEFAULT_COS_SFT 5
u8 unused_0[3];
__le32 ptp_freq_adj_ppb;
- __le32 ptp_adj_phase;
+ u8 unused_1[4];
+ __le64 ptp_adj_phase;
};
/* hwrm_port_mac_cfg_output (size:128b/16B) */
#define PORT_MAC_PTP_QCFG_RESP_FLAGS_ONE_STEP_TX_TS 0x4UL
#define PORT_MAC_PTP_QCFG_RESP_FLAGS_HWRM_ACCESS 0x8UL
#define PORT_MAC_PTP_QCFG_RESP_FLAGS_PARTIAL_DIRECT_ACCESS_REF_CLOCK 0x10UL
+ #define PORT_MAC_PTP_QCFG_RESP_FLAGS_RTC_CONFIGURED 0x20UL
u8 unused_0[3];
__le32 rx_ts_reg_off_lower;
__le32 rx_ts_reg_off_upper;
#define PORT_PHY_QCAPS_RESP_PORT_CNT_2 0x2UL
#define PORT_PHY_QCAPS_RESP_PORT_CNT_3 0x3UL
#define PORT_PHY_QCAPS_RESP_PORT_CNT_4 0x4UL
- #define PORT_PHY_QCAPS_RESP_PORT_CNT_LAST PORT_PHY_QCAPS_RESP_PORT_CNT_4
+ #define PORT_PHY_QCAPS_RESP_PORT_CNT_12 0xcUL
+ #define PORT_PHY_QCAPS_RESP_PORT_CNT_LAST PORT_PHY_QCAPS_RESP_PORT_CNT_12
__le16 supported_speeds_force_mode;
#define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS_FORCE_MODE_100MBHD 0x1UL
#define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS_FORCE_MODE_100MB 0x2UL
__le16 flags2;
#define PORT_PHY_QCAPS_RESP_FLAGS2_PAUSE_UNSUPPORTED 0x1UL
#define PORT_PHY_QCAPS_RESP_FLAGS2_PFC_UNSUPPORTED 0x2UL
- u8 unused_0[1];
+ u8 internal_port_cnt;
u8 valid;
};
__le16 target_id;
__le64 resp_addr;
__le32 hash_type;
- #define VNIC_RSS_CFG_REQ_HASH_TYPE_IPV4 0x1UL
- #define VNIC_RSS_CFG_REQ_HASH_TYPE_TCP_IPV4 0x2UL
- #define VNIC_RSS_CFG_REQ_HASH_TYPE_UDP_IPV4 0x4UL
- #define VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6 0x8UL
- #define VNIC_RSS_CFG_REQ_HASH_TYPE_TCP_IPV6 0x10UL
- #define VNIC_RSS_CFG_REQ_HASH_TYPE_UDP_IPV6 0x20UL
+ #define VNIC_RSS_CFG_REQ_HASH_TYPE_IPV4 0x1UL
+ #define VNIC_RSS_CFG_REQ_HASH_TYPE_TCP_IPV4 0x2UL
+ #define VNIC_RSS_CFG_REQ_HASH_TYPE_UDP_IPV4 0x4UL
+ #define VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6 0x8UL
+ #define VNIC_RSS_CFG_REQ_HASH_TYPE_TCP_IPV6 0x10UL
+ #define VNIC_RSS_CFG_REQ_HASH_TYPE_UDP_IPV6 0x20UL
+ #define VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6_FLOW_LABEL 0x40UL
__le16 vnic_id;
u8 ring_table_pair_index;
u8 hash_mode_flags;
u8 valid;
};
+/* hwrm_tunnel_dst_port_query_input (size:192b/24B) */
struct hwrm_tunnel_dst_port_query_input {
__le16 req_type;
__le16 cmpl_ring;
u8 valid;
};
+/* hwrm_dbg_crashdump_medium_cfg_input (size:320b/40B) */
+struct hwrm_dbg_crashdump_medium_cfg_input {
+ __le16 req_type;
+ __le16 cmpl_ring;
+ __le16 seq_id;
+ __le16 target_id;
+ __le64 resp_addr;
+ __le16 output_dest_flags;
+ #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_TYPE_DDR 0x1UL
+ __le16 pg_size_lvl;
+ #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_LVL_MASK 0x3UL
+ #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_LVL_SFT 0
+ #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_LVL_LVL_0 0x0UL
+ #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_LVL_LVL_1 0x1UL
+ #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_LVL_LVL_2 0x2UL
+ #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_LVL_LAST DBG_CRASHDUMP_MEDIUM_CFG_REQ_LVL_LVL_2
+ #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_MASK 0x1cUL
+ #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_SFT 2
+ #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_PG_4K (0x0UL << 2)
+ #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_PG_8K (0x1UL << 2)
+ #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_PG_64K (0x2UL << 2)
+ #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_PG_2M (0x3UL << 2)
+ #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_PG_8M (0x4UL << 2)
+ #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_PG_1G (0x5UL << 2)
+ #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_LAST DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_PG_1G
+ #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_UNUSED11_MASK 0xffe0UL
+ #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_UNUSED11_SFT 5
+ __le32 size;
+ __le32 coredump_component_disable_flags;
+ #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_NVRAM 0x1UL
+ __le32 unused_0;
+ __le64 pbl;
+};
+
+/* hwrm_dbg_crashdump_medium_cfg_output (size:128b/16B) */
+struct hwrm_dbg_crashdump_medium_cfg_output {
+ __le16 error_code;
+ __le16 req_type;
+ __le16 seq_id;
+ __le16 resp_len;
+ u8 unused_1[7];
+ u8 valid;
+};
+
/* coredump_segment_record (size:128b/16B) */
struct coredump_segment_record {
__le16 component_id;
__le16 resp_len;
__le64 installed_items;
u8 result;
- #define NVM_INSTALL_UPDATE_RESP_RESULT_SUCCESS 0x0UL
- #define NVM_INSTALL_UPDATE_RESP_RESULT_LAST NVM_INSTALL_UPDATE_RESP_RESULT_SUCCESS
+ #define NVM_INSTALL_UPDATE_RESP_RESULT_SUCCESS 0x0UL
+ #define NVM_INSTALL_UPDATE_RESP_RESULT_FAILURE 0xffUL
+ #define NVM_INSTALL_UPDATE_RESP_RESULT_MALLOC_FAILURE 0xfdUL
+ #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_INDEX_PARAMETER 0xfbUL
+ #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_TYPE_PARAMETER 0xf3UL
+ #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_PREREQUISITE 0xf2UL
+ #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_FILE_HEADER 0xecUL
+ #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_SIGNATURE 0xebUL
+ #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_PROP_STREAM 0xeaUL
+ #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_PROP_LENGTH 0xe9UL
+ #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_MANIFEST 0xe8UL
+ #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_TRAILER 0xe7UL
+ #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_CHECKSUM 0xe6UL
+ #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_ITEM_CHECKSUM 0xe5UL
+ #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_DATA_LENGTH 0xe4UL
+ #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_DIRECTIVE 0xe1UL
+ #define NVM_INSTALL_UPDATE_RESP_RESULT_UNSUPPORTED_CHIP_REV 0xceUL
+ #define NVM_INSTALL_UPDATE_RESP_RESULT_UNSUPPORTED_DEVICE_ID 0xcdUL
+ #define NVM_INSTALL_UPDATE_RESP_RESULT_UNSUPPORTED_SUBSYS_VENDOR 0xccUL
+ #define NVM_INSTALL_UPDATE_RESP_RESULT_UNSUPPORTED_SUBSYS_ID 0xcbUL
+ #define NVM_INSTALL_UPDATE_RESP_RESULT_UNSUPPORTED_PLATFORM 0xc5UL
+ #define NVM_INSTALL_UPDATE_RESP_RESULT_DUPLICATE_ITEM 0xc4UL
+ #define NVM_INSTALL_UPDATE_RESP_RESULT_ZERO_LENGTH_ITEM 0xc3UL
+ #define NVM_INSTALL_UPDATE_RESP_RESULT_INSTALL_CHECKSUM_ERROR 0xb9UL
+ #define NVM_INSTALL_UPDATE_RESP_RESULT_INSTALL_DATA_ERROR 0xb8UL
+ #define NVM_INSTALL_UPDATE_RESP_RESULT_INSTALL_AUTHENTICATION_ERROR 0xb7UL
+ #define NVM_INSTALL_UPDATE_RESP_RESULT_ITEM_NOT_FOUND 0xb0UL
+ #define NVM_INSTALL_UPDATE_RESP_RESULT_ITEM_LOCKED 0xa7UL
+ #define NVM_INSTALL_UPDATE_RESP_RESULT_LAST NVM_INSTALL_UPDATE_RESP_RESULT_ITEM_LOCKED
u8 problem_item;
#define NVM_INSTALL_UPDATE_RESP_PROBLEM_ITEM_NONE 0x0UL
#define NVM_INSTALL_UPDATE_RESP_PROBLEM_ITEM_PACKAGE 0xffUL
#include "bnxt_hwrm.h"
#include "bnxt_ptp.h"
+static int bnxt_ptp_cfg_settime(struct bnxt *bp, u64 time)
+{
+ struct hwrm_func_ptp_cfg_input *req;
+ int rc;
+
+ rc = hwrm_req_init(bp, req, HWRM_FUNC_PTP_CFG);
+ if (rc)
+ return rc;
+
+ req->enables = cpu_to_le16(FUNC_PTP_CFG_REQ_ENABLES_PTP_SET_TIME);
+ req->ptp_set_time = cpu_to_le64(time);
+ return hwrm_req_send(bp, req);
+}
+
int bnxt_ptp_parse(struct sk_buff *skb, u16 *seq_id, u16 *hdr_off)
{
unsigned int ptp_class;
ptp_info);
u64 ns = timespec64_to_ns(ts);
+ if (ptp->bp->fw_cap & BNXT_FW_CAP_PTP_RTC)
+ return bnxt_ptp_cfg_settime(ptp->bp, ns);
+
spin_lock_bh(&ptp->ptp_lock);
timecounter_init(&ptp->tc, &ptp->cc, ns);
spin_unlock_bh(&ptp->ptp_lock);
return 0;
}
+/* Caller holds ptp_lock */
+void bnxt_ptp_update_current_time(struct bnxt *bp)
+{
+ struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
+
+ bnxt_refclk_read(ptp->bp, NULL, &ptp->current_time);
+ WRITE_ONCE(ptp->old_time, ptp->current_time);
+}
+
+static int bnxt_ptp_adjphc(struct bnxt_ptp_cfg *ptp, s64 delta)
+{
+ struct hwrm_port_mac_cfg_input *req;
+ int rc;
+
+ rc = hwrm_req_init(ptp->bp, req, HWRM_PORT_MAC_CFG);
+ if (rc)
+ return rc;
+
+ req->enables = cpu_to_le32(PORT_MAC_CFG_REQ_ENABLES_PTP_ADJ_PHASE);
+ req->ptp_adj_phase = cpu_to_le64(delta);
+
+ rc = hwrm_req_send(ptp->bp, req);
+ if (rc) {
+ netdev_err(ptp->bp->dev, "ptp adjphc failed. rc = %x\n", rc);
+ } else {
+ spin_lock_bh(&ptp->ptp_lock);
+ bnxt_ptp_update_current_time(ptp->bp);
+ spin_unlock_bh(&ptp->ptp_lock);
+ }
+
+ return rc;
+}
+
static int bnxt_ptp_adjtime(struct ptp_clock_info *ptp_info, s64 delta)
{
struct bnxt_ptp_cfg *ptp = container_of(ptp_info, struct bnxt_ptp_cfg,
ptp_info);
+ if (ptp->bp->fw_cap & BNXT_FW_CAP_PTP_RTC)
+ return bnxt_ptp_adjphc(ptp, delta);
+
spin_lock_bh(&ptp->ptp_lock);
timecounter_adjtime(&ptp->tc, delta);
spin_unlock_bh(&ptp->ptp_lock);
return !(bp->fw_cap & BNXT_FW_CAP_PTP_PPS) == !ptp->ptp_info.pin_config;
}
-int bnxt_ptp_init(struct bnxt *bp)
+static void bnxt_ptp_timecounter_init(struct bnxt *bp, bool init_tc)
+{
+ struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
+
+ if (!ptp->ptp_clock) {
+ memset(&ptp->cc, 0, sizeof(ptp->cc));
+ ptp->cc.read = bnxt_cc_read;
+ ptp->cc.mask = CYCLECOUNTER_MASK(48);
+ ptp->cc.shift = 0;
+ ptp->cc.mult = 1;
+ ptp->next_overflow_check = jiffies + BNXT_PHC_OVERFLOW_PERIOD;
+ }
+ if (init_tc)
+ timecounter_init(&ptp->tc, &ptp->cc, ktime_to_ns(ktime_get_real()));
+}
+
+/* Caller holds ptp_lock */
+void bnxt_ptp_rtc_timecounter_init(struct bnxt_ptp_cfg *ptp, u64 ns)
+{
+ timecounter_init(&ptp->tc, &ptp->cc, ns);
+ /* For RTC, cycle_last must be in sync with the timecounter value. */
+ ptp->tc.cycle_last = ns & ptp->cc.mask;
+}
+
+int bnxt_ptp_init_rtc(struct bnxt *bp, bool phc_cfg)
+{
+ struct timespec64 tsp;
+ u64 ns;
+ int rc;
+
+ if (!bp->ptp_cfg || !(bp->fw_cap & BNXT_FW_CAP_PTP_RTC))
+ return -ENODEV;
+
+ if (!phc_cfg) {
+ ktime_get_real_ts64(&tsp);
+ ns = timespec64_to_ns(&tsp);
+ rc = bnxt_ptp_cfg_settime(bp, ns);
+ if (rc)
+ return rc;
+ } else {
+ rc = bnxt_hwrm_port_ts_query(bp, PORT_TS_QUERY_REQ_FLAGS_CURRENT_TIME, &ns);
+ if (rc)
+ return rc;
+ }
+ spin_lock_bh(&bp->ptp_cfg->ptp_lock);
+ bnxt_ptp_rtc_timecounter_init(bp->ptp_cfg, ns);
+ spin_unlock_bh(&bp->ptp_cfg->ptp_lock);
+
+ return 0;
+}
+
+static void bnxt_ptp_free(struct bnxt *bp)
+{
+ struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
+
+ if (ptp->ptp_clock) {
+ ptp_clock_unregister(ptp->ptp_clock);
+ ptp->ptp_clock = NULL;
+ kfree(ptp->ptp_info.pin_config);
+ ptp->ptp_info.pin_config = NULL;
+ }
+}
+
+int bnxt_ptp_init(struct bnxt *bp, bool phc_cfg)
{
struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
int rc;
if (rc)
return rc;
+ if (bp->fw_cap & BNXT_FW_CAP_PTP_RTC) {
+ bnxt_ptp_timecounter_init(bp, false);
+ rc = bnxt_ptp_init_rtc(bp, phc_cfg);
+ if (rc)
+ goto out;
+ }
+
if (ptp->ptp_clock && bnxt_pps_config_ok(bp))
return 0;
- if (ptp->ptp_clock) {
- ptp_clock_unregister(ptp->ptp_clock);
- ptp->ptp_clock = NULL;
- kfree(ptp->ptp_info.pin_config);
- ptp->ptp_info.pin_config = NULL;
- }
+ bnxt_ptp_free(bp);
+
atomic_set(&ptp->tx_avail, BNXT_MAX_TX_TS);
spin_lock_init(&ptp->ptp_lock);
- memset(&ptp->cc, 0, sizeof(ptp->cc));
- ptp->cc.read = bnxt_cc_read;
- ptp->cc.mask = CYCLECOUNTER_MASK(48);
- ptp->cc.shift = 0;
- ptp->cc.mult = 1;
-
- ptp->next_overflow_check = jiffies + BNXT_PHC_OVERFLOW_PERIOD;
- timecounter_init(&ptp->tc, &ptp->cc, ktime_to_ns(ktime_get_real()));
+ if (!(bp->fw_cap & BNXT_FW_CAP_PTP_RTC))
+ bnxt_ptp_timecounter_init(bp, true);
ptp->ptp_info = bnxt_ptp_caps;
if ((bp->fw_cap & BNXT_FW_CAP_PTP_PPS)) {
int err = PTR_ERR(ptp->ptp_clock);
ptp->ptp_clock = NULL;
- bnxt_unmap_ptp_regs(bp);
- return err;
+ rc = err;
+ goto out;
}
if (bp->flags & BNXT_FLAG_CHIP_P5) {
spin_lock_bh(&ptp->ptp_lock);
ptp_schedule_worker(ptp->ptp_clock, 0);
}
return 0;
+
+out:
+ bnxt_ptp_free(bp);
+ bnxt_unmap_ptp_regs(bp);
+ return rc;
}
void bnxt_ptp_clear(struct bnxt *bp)
#endif
int bnxt_ptp_parse(struct sk_buff *skb, u16 *seq_id, u16 *hdr_off);
+void bnxt_ptp_update_current_time(struct bnxt *bp);
void bnxt_ptp_pps_event(struct bnxt *bp, u32 data1, u32 data2);
void bnxt_ptp_reapply_pps(struct bnxt *bp);
int bnxt_hwtstamp_set(struct net_device *dev, struct ifreq *ifr);
int bnxt_hwtstamp_get(struct net_device *dev, struct ifreq *ifr);
int bnxt_get_tx_ts_p5(struct bnxt *bp, struct sk_buff *skb);
int bnxt_get_rx_ts_p5(struct bnxt *bp, u64 *ts, u32 pkt_ts);
-int bnxt_ptp_init(struct bnxt *bp);
+void bnxt_ptp_rtc_timecounter_init(struct bnxt_ptp_cfg *ptp, u64 ns);
+int bnxt_ptp_init_rtc(struct bnxt *bp, bool phc_cfg);
+int bnxt_ptp_init(struct bnxt *bp, bool phc_cfg);
void bnxt_ptp_clear(struct bnxt *bp);
#endif
if (!p->eee_enabled) {
bcmgenet_eee_enable_set(dev, false);
} else {
- ret = phy_init_eee(dev->phydev, 0);
+ ret = phy_init_eee(dev->phydev, false);
if (ret) {
netif_err(priv, hw, dev, "EEE initialization failed\n");
return ret;
static int gemini_ethernet_port_probe(struct platform_device *pdev)
{
char *port_names[2] = { "ethernet0", "ethernet1" };
+ struct device_node *np = pdev->dev.of_node;
struct gemini_ethernet_port *port;
struct device *dev = &pdev->dev;
struct gemini_ethernet *geth;
struct net_device *netdev;
struct device *parent;
+ u8 mac[ETH_ALEN];
unsigned int id;
int irq;
int ret;
netif_napi_add(netdev, &port->napi, gmac_napi_poll,
DEFAULT_NAPI_WEIGHT);
+ ret = of_get_mac_address(np, mac);
+ if (!ret) {
+ dev_info(dev, "Setting macaddr from DT %pM\n", mac);
+ memcpy(port->mac_addr, mac, ETH_ALEN);
+ }
+
if (is_valid_ether_addr((void *)port->mac_addr)) {
eth_hw_addr_set(netdev, (u8 *)port->mac_addr);
} else {
struct tulip_private *tp = netdev_priv(dev);
void __iomem *ioaddr = tp->base_addr;
u32 phy_reg = ioread32(ioaddr + 0xB8);
- u32 new_csr6 = tp->csr6 & ~0x40C40200;
+ u32 new_csr6;
if (phy_reg & 0x78000000) { /* Ignore baseT4 */
if (phy_reg & 0x20000000) dev->if_port = 5;
return err;
}
+static struct phylink_pcs *dpaa2_mac_select_pcs(struct phylink_config *config,
+ phy_interface_t interface)
+{
+ struct dpaa2_mac *mac = phylink_to_dpaa2_mac(config);
+
+ return mac->pcs;
+}
+
static void dpaa2_mac_config(struct phylink_config *config, unsigned int mode,
const struct phylink_link_state *state)
{
static const struct phylink_mac_ops dpaa2_mac_phylink_ops = {
.validate = phylink_generic_validate,
+ .mac_select_pcs = dpaa2_mac_select_pcs,
.mac_config = dpaa2_mac_config,
.mac_link_up = dpaa2_mac_link_up,
.mac_link_down = dpaa2_mac_link_down,
}
mac->phylink = phylink;
- if (mac->pcs)
- phylink_set_pcs(mac->phylink, mac->pcs);
-
err = phylink_fwnode_phy_connect(mac->phylink, dpmac_node, 0);
if (err) {
netdev_err(net_dev, "phylink_fwnode_phy_connect() = %d\n", err);
enetc_imdio_remove(pf);
}
+static struct phylink_pcs *
+enetc_pl_mac_select_pcs(struct phylink_config *config, phy_interface_t iface)
+{
+ struct enetc_pf *pf = phylink_to_enetc_pf(config);
+
+ return pf->pcs;
+}
+
static void enetc_pl_mac_config(struct phylink_config *config,
unsigned int mode,
const struct phylink_link_state *state)
{
struct enetc_pf *pf = phylink_to_enetc_pf(config);
- struct enetc_ndev_priv *priv;
enetc_mac_config(&pf->si->hw, state->interface);
-
- priv = netdev_priv(pf->si->ndev);
- if (pf->pcs)
- phylink_set_pcs(priv->phylink, pf->pcs);
}
static void enetc_force_rgmii_mac(struct enetc_hw *hw, int speed, int duplex)
static const struct phylink_mac_ops enetc_mac_phylink_ops = {
.validate = phylink_generic_validate,
+ .mac_select_pcs = enetc_pl_mac_select_pcs,
.mac_config = enetc_pl_mac_config,
.mac_link_up = enetc_pl_mac_link_up,
.mac_link_down = enetc_pl_mac_link_down,
int ret = 0;
if (enable) {
- ret = phy_init_eee(ndev->phydev, 0);
+ ret = phy_init_eee(ndev->phydev, false);
if (ret)
return ret;
u32 val, tempval;
struct timespec64 ts;
u64 ns;
- val = 0;
if (fep->pps_enable == enable)
return 0;
#include <linux/acpi.h>
#include <linux/acpi_mdio.h>
+#include <linux/clk.h>
#include <linux/interrupt.h>
#include <linux/kernel.h>
#include <linux/mdio.h>
} __packed;
#define MDIO_STAT_ENC BIT(6)
-#define MDIO_STAT_CLKDIV(x) (((x>>1) & 0xff) << 8)
+#define MDIO_STAT_CLKDIV(x) (((x) & 0x1ff) << 7)
#define MDIO_STAT_BSY BIT(0)
#define MDIO_STAT_RD_ER BIT(1)
+#define MDIO_STAT_PRE_DIS BIT(5)
#define MDIO_CTL_DEV_ADDR(x) (x & 0x1f)
#define MDIO_CTL_PORT_ADDR(x) ((x & 0x1f) << 5)
#define MDIO_CTL_PRE_DIS BIT(10)
struct mdio_fsl_priv {
struct tgec_mdio_controller __iomem *mdio_base;
+ struct clk *enet_clk;
+ u32 mdc_freq;
bool is_little_endian;
bool has_a009885;
bool has_a011043;
return ret;
}
+static int xgmac_mdio_set_mdc_freq(struct mii_bus *bus)
+{
+ struct mdio_fsl_priv *priv = (struct mdio_fsl_priv *)bus->priv;
+ struct tgec_mdio_controller __iomem *regs = priv->mdio_base;
+ struct device *dev = bus->parent;
+ u32 mdio_stat, div;
+
+ if (device_property_read_u32(dev, "clock-frequency", &priv->mdc_freq))
+ return 0;
+
+ priv->enet_clk = devm_clk_get(dev, NULL);
+ if (IS_ERR(priv->enet_clk)) {
+ dev_err(dev, "Input clock unknown, not changing MDC frequency");
+ return PTR_ERR(priv->enet_clk);
+ }
+
+ div = ((clk_get_rate(priv->enet_clk) / priv->mdc_freq) - 1) / 2;
+ if (div < 5 || div > 0x1ff) {
+ dev_err(dev, "Requested MDC frequecy is out of range, ignoring");
+ return -EINVAL;
+ }
+
+ mdio_stat = xgmac_read32(®s->mdio_stat, priv->is_little_endian);
+ mdio_stat &= ~MDIO_STAT_CLKDIV(0x1ff);
+ mdio_stat |= MDIO_STAT_CLKDIV(div);
+ xgmac_write32(mdio_stat, ®s->mdio_stat, priv->is_little_endian);
+ return 0;
+}
+
+static void xgmac_mdio_set_suppress_preamble(struct mii_bus *bus)
+{
+ struct mdio_fsl_priv *priv = (struct mdio_fsl_priv *)bus->priv;
+ struct tgec_mdio_controller __iomem *regs = priv->mdio_base;
+ struct device *dev = bus->parent;
+ u32 mdio_stat;
+
+ if (!device_property_read_bool(dev, "suppress-preamble"))
+ return;
+
+ mdio_stat = xgmac_read32(®s->mdio_stat, priv->is_little_endian);
+ mdio_stat |= MDIO_STAT_PRE_DIS;
+ xgmac_write32(mdio_stat, ®s->mdio_stat, priv->is_little_endian);
+}
+
static int xgmac_mdio_probe(struct platform_device *pdev)
{
struct fwnode_handle *fwnode;
return -EINVAL;
}
- bus = mdiobus_alloc_size(sizeof(struct mdio_fsl_priv));
+ bus = devm_mdiobus_alloc_size(&pdev->dev, sizeof(struct mdio_fsl_priv));
if (!bus)
return -ENOMEM;
bus->probe_capabilities = MDIOBUS_C22_C45;
snprintf(bus->id, MII_BUS_ID_SIZE, "%pa", &res->start);
- /* Set the PHY base address */
priv = bus->priv;
- priv->mdio_base = ioremap(res->start, resource_size(res));
- if (!priv->mdio_base) {
- ret = -ENOMEM;
- goto err_ioremap;
- }
+ priv->mdio_base = devm_ioremap(&pdev->dev, res->start,
+ resource_size(res));
+ if (IS_ERR(priv->mdio_base))
+ return PTR_ERR(priv->mdio_base);
/* For both ACPI and DT cases, endianness of MDIO controller
* needs to be specified using "little-endian" property.
priv->has_a011043 = device_property_read_bool(&pdev->dev,
"fsl,erratum-a011043");
+ xgmac_mdio_set_suppress_preamble(bus);
+
+ ret = xgmac_mdio_set_mdc_freq(bus);
+ if (ret)
+ return ret;
+
fwnode = pdev->dev.fwnode;
if (is_of_node(fwnode))
ret = of_mdiobus_register(bus, to_of_node(fwnode));
ret = -EINVAL;
if (ret) {
dev_err(&pdev->dev, "cannot register MDIO bus\n");
- goto err_registration;
+ return ret;
}
platform_set_drvdata(pdev, bus);
return 0;
-
-err_registration:
- iounmap(priv->mdio_base);
-
-err_ioremap:
- mdiobus_free(bus);
-
- return ret;
-}
-
-static int xgmac_mdio_remove(struct platform_device *pdev)
-{
- struct mii_bus *bus = platform_get_drvdata(pdev);
- struct mdio_fsl_priv *priv = bus->priv;
-
- mdiobus_unregister(bus);
- iounmap(priv->mdio_base);
- mdiobus_free(bus);
-
- return 0;
}
static const struct of_device_id xgmac_mdio_match[] = {
.acpi_match_table = xgmac_acpi_match,
},
.probe = xgmac_mdio_probe,
- .remove = xgmac_mdio_remove,
};
module_platform_driver(xgmac_mdio_driver);
bytes_compl += buf->skb->len;
pkts_compl++;
dev_kfree_skb_any(buf->skb);
- } else if (buf->type == MVNETA_TYPE_XDP_TX ||
- buf->type == MVNETA_TYPE_XDP_NDO) {
+ } else if ((buf->type == MVNETA_TYPE_XDP_TX ||
+ buf->type == MVNETA_TYPE_XDP_NDO) && buf->xdpf) {
if (napi && buf->type == MVNETA_TYPE_XDP_TX)
xdp_return_frame_rx_napi(buf->xdpf);
else
static void
mvneta_xdp_put_buff(struct mvneta_port *pp, struct mvneta_rx_queue *rxq,
- struct xdp_buff *xdp, struct skb_shared_info *sinfo,
- int sync_len)
+ struct xdp_buff *xdp, int sync_len)
{
+ struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
int i;
+ if (likely(!xdp_buff_has_frags(xdp)))
+ goto out;
+
for (i = 0; i < sinfo->nr_frags; i++)
page_pool_put_full_page(rxq->page_pool,
skb_frag_page(&sinfo->frags[i]), true);
+
+out:
page_pool_put_page(rxq->page_pool, virt_to_head_page(xdp->data),
sync_len, true);
}
static int
mvneta_xdp_submit_frame(struct mvneta_port *pp, struct mvneta_tx_queue *txq,
- struct xdp_frame *xdpf, bool dma_map)
+ struct xdp_frame *xdpf, int *nxmit_byte, bool dma_map)
{
- struct mvneta_tx_desc *tx_desc;
- struct mvneta_tx_buf *buf;
- dma_addr_t dma_addr;
+ struct skb_shared_info *sinfo = xdp_get_shared_info_from_frame(xdpf);
+ struct device *dev = pp->dev->dev.parent;
+ struct mvneta_tx_desc *tx_desc = NULL;
+ int i, num_frames = 1;
+ struct page *page;
+
+ if (unlikely(xdp_frame_has_frags(xdpf)))
+ num_frames += sinfo->nr_frags;
- if (txq->count >= txq->tx_stop_threshold)
+ if (txq->count + num_frames >= txq->size)
return MVNETA_XDP_DROPPED;
- tx_desc = mvneta_txq_next_desc_get(txq);
+ for (i = 0; i < num_frames; i++) {
+ struct mvneta_tx_buf *buf = &txq->buf[txq->txq_put_index];
+ skb_frag_t *frag = NULL;
+ int len = xdpf->len;
+ dma_addr_t dma_addr;
- buf = &txq->buf[txq->txq_put_index];
- if (dma_map) {
- /* ndo_xdp_xmit */
- dma_addr = dma_map_single(pp->dev->dev.parent, xdpf->data,
- xdpf->len, DMA_TO_DEVICE);
- if (dma_mapping_error(pp->dev->dev.parent, dma_addr)) {
- mvneta_txq_desc_put(txq);
- return MVNETA_XDP_DROPPED;
+ if (unlikely(i)) { /* paged area */
+ frag = &sinfo->frags[i - 1];
+ len = skb_frag_size(frag);
}
- buf->type = MVNETA_TYPE_XDP_NDO;
- } else {
- struct page *page = virt_to_page(xdpf->data);
- dma_addr = page_pool_get_dma_addr(page) +
- sizeof(*xdpf) + xdpf->headroom;
- dma_sync_single_for_device(pp->dev->dev.parent, dma_addr,
- xdpf->len, DMA_BIDIRECTIONAL);
- buf->type = MVNETA_TYPE_XDP_TX;
+ tx_desc = mvneta_txq_next_desc_get(txq);
+ if (dma_map) {
+ /* ndo_xdp_xmit */
+ void *data;
+
+ data = unlikely(frag) ? skb_frag_address(frag)
+ : xdpf->data;
+ dma_addr = dma_map_single(dev, data, len,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(dev, dma_addr)) {
+ mvneta_txq_desc_put(txq);
+ goto unmap;
+ }
+
+ buf->type = MVNETA_TYPE_XDP_NDO;
+ } else {
+ page = unlikely(frag) ? skb_frag_page(frag)
+ : virt_to_page(xdpf->data);
+ dma_addr = page_pool_get_dma_addr(page);
+ if (unlikely(frag))
+ dma_addr += skb_frag_off(frag);
+ else
+ dma_addr += sizeof(*xdpf) + xdpf->headroom;
+ dma_sync_single_for_device(dev, dma_addr, len,
+ DMA_BIDIRECTIONAL);
+ buf->type = MVNETA_TYPE_XDP_TX;
+ }
+ buf->xdpf = unlikely(i) ? NULL : xdpf;
+
+ tx_desc->command = unlikely(i) ? 0 : MVNETA_TXD_F_DESC;
+ tx_desc->buf_phys_addr = dma_addr;
+ tx_desc->data_size = len;
+ *nxmit_byte += len;
+
+ mvneta_txq_inc_put(txq);
}
- buf->xdpf = xdpf;
- tx_desc->command = MVNETA_TXD_FLZ_DESC;
- tx_desc->buf_phys_addr = dma_addr;
- tx_desc->data_size = xdpf->len;
+ /*last descriptor */
+ if (likely(tx_desc))
+ tx_desc->command |= MVNETA_TXD_L_DESC | MVNETA_TXD_Z_PAD;
- mvneta_txq_inc_put(txq);
- txq->pending++;
- txq->count++;
+ txq->pending += num_frames;
+ txq->count += num_frames;
return MVNETA_XDP_TX;
+
+unmap:
+ for (i--; i >= 0; i--) {
+ mvneta_txq_desc_put(txq);
+ tx_desc = txq->descs + txq->next_desc_to_proc;
+ dma_unmap_single(dev, tx_desc->buf_phys_addr,
+ tx_desc->data_size,
+ DMA_TO_DEVICE);
+ }
+
+ return MVNETA_XDP_DROPPED;
}
static int
struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats);
struct mvneta_tx_queue *txq;
struct netdev_queue *nq;
+ int cpu, nxmit_byte = 0;
struct xdp_frame *xdpf;
- int cpu;
u32 ret;
xdpf = xdp_convert_buff_to_frame(xdp);
nq = netdev_get_tx_queue(pp->dev, txq->id);
__netif_tx_lock(nq, cpu);
- ret = mvneta_xdp_submit_frame(pp, txq, xdpf, false);
+ ret = mvneta_xdp_submit_frame(pp, txq, xdpf, &nxmit_byte, false);
if (ret == MVNETA_XDP_TX) {
u64_stats_update_begin(&stats->syncp);
- stats->es.ps.tx_bytes += xdpf->len;
+ stats->es.ps.tx_bytes += nxmit_byte;
stats->es.ps.tx_packets++;
stats->es.ps.xdp_tx++;
u64_stats_update_end(&stats->syncp);
__netif_tx_lock(nq, cpu);
for (i = 0; i < num_frame; i++) {
- ret = mvneta_xdp_submit_frame(pp, txq, frames[i], true);
+ ret = mvneta_xdp_submit_frame(pp, txq, frames[i], &nxmit_byte,
+ true);
if (ret != MVNETA_XDP_TX)
break;
- nxmit_byte += frames[i]->len;
nxmit++;
}
struct bpf_prog *prog, struct xdp_buff *xdp,
u32 frame_sz, struct mvneta_stats *stats)
{
- struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
unsigned int len, data_len, sync;
u32 ret, act;
err = xdp_do_redirect(pp->dev, xdp, prog);
if (unlikely(err)) {
- mvneta_xdp_put_buff(pp, rxq, xdp, sinfo, sync);
+ mvneta_xdp_put_buff(pp, rxq, xdp, sync);
ret = MVNETA_XDP_DROPPED;
} else {
ret = MVNETA_XDP_REDIR;
case XDP_TX:
ret = mvneta_xdp_xmit_back(pp, xdp);
if (ret != MVNETA_XDP_TX)
- mvneta_xdp_put_buff(pp, rxq, xdp, sinfo, sync);
+ mvneta_xdp_put_buff(pp, rxq, xdp, sync);
break;
default:
bpf_warn_invalid_xdp_action(pp->dev, prog, act);
trace_xdp_exception(pp->dev, prog, act);
fallthrough;
case XDP_DROP:
- mvneta_xdp_put_buff(pp, rxq, xdp, sinfo, sync);
+ mvneta_xdp_put_buff(pp, rxq, xdp, sync);
ret = MVNETA_XDP_DROPPED;
stats->xdp_drop++;
break;
int data_len = -MVNETA_MH_SIZE, len;
struct net_device *dev = pp->dev;
enum dma_data_direction dma_dir;
- struct skb_shared_info *sinfo;
if (*size > MVNETA_MAX_RX_BUF_SIZE) {
len = MVNETA_MAX_RX_BUF_SIZE;
/* Prefetch header */
prefetch(data);
+ xdp_buff_clear_frags_flag(xdp);
xdp_prepare_buff(xdp, data, pp->rx_offset_correction + MVNETA_MH_SIZE,
data_len, false);
-
- sinfo = xdp_get_shared_info_from_buff(xdp);
- sinfo->nr_frags = 0;
}
static void
struct mvneta_rx_desc *rx_desc,
struct mvneta_rx_queue *rxq,
struct xdp_buff *xdp, int *size,
- struct skb_shared_info *xdp_sinfo,
struct page *page)
{
+ struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
struct net_device *dev = pp->dev;
enum dma_data_direction dma_dir;
int data_len, len;
len, dma_dir);
rx_desc->buf_phys_addr = 0;
- if (data_len > 0 && xdp_sinfo->nr_frags < MAX_SKB_FRAGS) {
- skb_frag_t *frag = &xdp_sinfo->frags[xdp_sinfo->nr_frags++];
+ if (!xdp_buff_has_frags(xdp))
+ sinfo->nr_frags = 0;
+
+ if (data_len > 0 && sinfo->nr_frags < MAX_SKB_FRAGS) {
+ skb_frag_t *frag = &sinfo->frags[sinfo->nr_frags++];
skb_frag_off_set(frag, pp->rx_offset_correction);
skb_frag_size_set(frag, data_len);
__skb_frag_set_page(frag, page);
+
+ if (!xdp_buff_has_frags(xdp)) {
+ sinfo->xdp_frags_size = *size;
+ xdp_buff_set_frags_flag(xdp);
+ }
+ if (page_is_pfmemalloc(page))
+ xdp_buff_set_frag_pfmemalloc(xdp);
} else {
page_pool_put_full_page(rxq->page_pool, page, true);
}
-
- /* last fragment */
- if (len == *size) {
- struct skb_shared_info *sinfo;
-
- sinfo = xdp_get_shared_info_from_buff(xdp);
- sinfo->nr_frags = xdp_sinfo->nr_frags;
- memcpy(sinfo->frags, xdp_sinfo->frags,
- sinfo->nr_frags * sizeof(skb_frag_t));
- }
*size -= len;
}
struct xdp_buff *xdp, u32 desc_status)
{
struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
- int i, num_frags = sinfo->nr_frags;
struct sk_buff *skb;
+ u8 num_frags;
+
+ if (unlikely(xdp_buff_has_frags(xdp)))
+ num_frags = sinfo->nr_frags;
skb = build_skb(xdp->data_hard_start, PAGE_SIZE);
if (!skb)
skb_put(skb, xdp->data_end - xdp->data);
skb->ip_summed = mvneta_rx_csum(pp, desc_status);
- for (i = 0; i < num_frags; i++) {
- skb_frag_t *frag = &sinfo->frags[i];
-
- skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
- skb_frag_page(frag), skb_frag_off(frag),
- skb_frag_size(frag), PAGE_SIZE);
- }
+ if (unlikely(xdp_buff_has_frags(xdp)))
+ xdp_update_skb_shared_info(skb, num_frags,
+ sinfo->xdp_frags_size,
+ num_frags * xdp->frame_sz,
+ xdp_buff_is_frag_pfmemalloc(xdp));
return skb;
}
{
int rx_proc = 0, rx_todo, refill, size = 0;
struct net_device *dev = pp->dev;
- struct skb_shared_info sinfo;
struct mvneta_stats ps = {};
struct bpf_prog *xdp_prog;
u32 desc_status, frame_sz;
xdp_init_buff(&xdp_buf, PAGE_SIZE, &rxq->xdp_rxq);
xdp_buf.data_hard_start = NULL;
- sinfo.nr_frags = 0;
-
/* Get number of received packets */
rx_todo = mvneta_rxq_busy_desc_num_get(pp, rxq);
}
mvneta_swbm_add_rx_fragment(pp, rx_desc, rxq, &xdp_buf,
- &size, &sinfo, page);
+ &size, page);
} /* Middle or Last descriptor */
if (!(rx_status & MVNETA_RXD_LAST_DESC))
continue;
if (size) {
- mvneta_xdp_put_buff(pp, rxq, &xdp_buf, &sinfo, -1);
+ mvneta_xdp_put_buff(pp, rxq, &xdp_buf, -1);
goto next;
}
if (IS_ERR(skb)) {
struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats);
- mvneta_xdp_put_buff(pp, rxq, &xdp_buf, &sinfo, -1);
+ mvneta_xdp_put_buff(pp, rxq, &xdp_buf, -1);
u64_stats_update_begin(&stats->syncp);
stats->es.skb_alloc_error++;
napi_gro_receive(napi, skb);
next:
xdp_buf.data_hard_start = NULL;
- sinfo.nr_frags = 0;
}
if (xdp_buf.data_hard_start)
- mvneta_xdp_put_buff(pp, rxq, &xdp_buf, &sinfo, -1);
+ mvneta_xdp_put_buff(pp, rxq, &xdp_buf, -1);
if (ps.xdp_redirect)
xdp_do_flush_map();
return err;
}
- err = xdp_rxq_info_reg(&rxq->xdp_rxq, pp->dev, rxq->id, 0);
+ err = __xdp_rxq_info_reg(&rxq->xdp_rxq, pp->dev, rxq->id, 0,
+ PAGE_SIZE);
if (err < 0)
goto err_free_pp;
static int mvneta_change_mtu(struct net_device *dev, int mtu)
{
struct mvneta_port *pp = netdev_priv(dev);
+ struct bpf_prog *prog = pp->xdp_prog;
int ret;
if (!IS_ALIGNED(MVNETA_RX_PKT_SIZE(mtu), 8)) {
mtu = ALIGN(MVNETA_RX_PKT_SIZE(mtu), 8);
}
- if (pp->xdp_prog && mtu > MVNETA_MAX_RX_BUF_SIZE) {
- netdev_info(dev, "Illegal MTU value %d for XDP mode\n", mtu);
+ if (prog && !prog->aux->xdp_has_frags &&
+ mtu > MVNETA_MAX_RX_BUF_SIZE) {
+ netdev_info(dev, "Illegal MTU %d for XDP prog without frags\n",
+ mtu);
+
return -EINVAL;
}
.pcs_an_restart = mvneta_pcs_an_restart,
};
+static struct phylink_pcs *mvneta_mac_select_pcs(struct phylink_config *config,
+ phy_interface_t interface)
+{
+ struct net_device *ndev = to_net_dev(config->dev);
+ struct mvneta_port *pp = netdev_priv(ndev);
+
+ return &pp->phylink_pcs;
+}
+
static int mvneta_mac_prepare(struct phylink_config *config, unsigned int mode,
phy_interface_t interface)
{
mvneta_port_up(pp);
if (phy && pp->eee_enabled) {
- pp->eee_active = phy_init_eee(phy, 0) >= 0;
+ pp->eee_active = phy_init_eee(phy, false) >= 0;
mvneta_set_eee(pp, pp->eee_active && pp->tx_lpi_enabled);
}
}
static const struct phylink_mac_ops mvneta_phylink_ops = {
.validate = phylink_generic_validate,
+ .mac_select_pcs = mvneta_mac_select_pcs,
.mac_prepare = mvneta_mac_prepare,
.mac_config = mvneta_mac_config,
.mac_finish = mvneta_mac_finish,
struct mvneta_port *pp = netdev_priv(dev);
struct bpf_prog *old_prog;
- if (prog && dev->mtu > MVNETA_MAX_RX_BUF_SIZE) {
- NL_SET_ERR_MSG_MOD(extack, "MTU too large for XDP");
+ if (prog && !prog->aux->xdp_has_frags &&
+ dev->mtu > MVNETA_MAX_RX_BUF_SIZE) {
+ NL_SET_ERR_MSG_MOD(extack, "prog does not support XDP frags");
return -EOPNOTSUPP;
}
if (!dev)
return -ENOMEM;
- dev->irq = irq_of_parse_and_map(dn, 0);
- if (dev->irq == 0)
- return -EINVAL;
+ dev->tx_queue_len = MVNETA_MAX_TXD;
+ dev->watchdog_timeo = 5 * HZ;
+ dev->netdev_ops = &mvneta_netdev_ops;
+ dev->ethtool_ops = &mvneta_eth_tool_ops;
+
+ pp = netdev_priv(dev);
+ spin_lock_init(&pp->lock);
+ pp->dn = dn;
+
+ pp->rxq_def = rxq_def;
+ pp->indir[0] = rxq_def;
err = of_get_phy_mode(dn, &phy_mode);
if (err) {
dev_err(&pdev->dev, "incorrect phy-mode\n");
- goto err_free_irq;
+ return err;
}
+ pp->phy_interface = phy_mode;
+
comphy = devm_of_phy_get(&pdev->dev, dn, NULL);
- if (comphy == ERR_PTR(-EPROBE_DEFER)) {
- err = -EPROBE_DEFER;
- goto err_free_irq;
- } else if (IS_ERR(comphy)) {
+ if (comphy == ERR_PTR(-EPROBE_DEFER))
+ return -EPROBE_DEFER;
+
+ if (IS_ERR(comphy))
comphy = NULL;
+
+ pp->comphy = comphy;
+
+ pp->base = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(pp->base))
+ return PTR_ERR(pp->base);
+
+ /* Get special SoC configurations */
+ if (of_device_is_compatible(dn, "marvell,armada-3700-neta"))
+ pp->neta_armada3700 = true;
+
+ dev->irq = irq_of_parse_and_map(dn, 0);
+ if (dev->irq == 0)
+ return -EINVAL;
+
+ pp->clk = devm_clk_get(&pdev->dev, "core");
+ if (IS_ERR(pp->clk))
+ pp->clk = devm_clk_get(&pdev->dev, NULL);
+ if (IS_ERR(pp->clk)) {
+ err = PTR_ERR(pp->clk);
+ goto err_free_irq;
}
- pp = netdev_priv(dev);
- spin_lock_init(&pp->lock);
+ clk_prepare_enable(pp->clk);
+
+ pp->clk_bus = devm_clk_get(&pdev->dev, "bus");
+ if (!IS_ERR(pp->clk_bus))
+ clk_prepare_enable(pp->clk_bus);
+
+ pp->phylink_pcs.ops = &mvneta_phylink_pcs_ops;
pp->phylink_config.dev = &dev->dev;
pp->phylink_config.type = PHYLINK_NETDEV;
phy_mode, &mvneta_phylink_ops);
if (IS_ERR(phylink)) {
err = PTR_ERR(phylink);
- goto err_free_irq;
- }
-
- dev->tx_queue_len = MVNETA_MAX_TXD;
- dev->watchdog_timeo = 5 * HZ;
- dev->netdev_ops = &mvneta_netdev_ops;
-
- dev->ethtool_ops = &mvneta_eth_tool_ops;
-
- pp->phylink = phylink;
- pp->comphy = comphy;
- pp->phy_interface = phy_mode;
- pp->dn = dn;
-
- pp->rxq_def = rxq_def;
- pp->indir[0] = rxq_def;
-
- /* Get special SoC configurations */
- if (of_device_is_compatible(dn, "marvell,armada-3700-neta"))
- pp->neta_armada3700 = true;
-
- pp->clk = devm_clk_get(&pdev->dev, "core");
- if (IS_ERR(pp->clk))
- pp->clk = devm_clk_get(&pdev->dev, NULL);
- if (IS_ERR(pp->clk)) {
- err = PTR_ERR(pp->clk);
- goto err_free_phylink;
- }
-
- clk_prepare_enable(pp->clk);
-
- pp->clk_bus = devm_clk_get(&pdev->dev, "bus");
- if (!IS_ERR(pp->clk_bus))
- clk_prepare_enable(pp->clk_bus);
-
- pp->base = devm_platform_ioremap_resource(pdev, 0);
- if (IS_ERR(pp->base)) {
- err = PTR_ERR(pp->base);
goto err_clk;
}
- pp->phylink_pcs.ops = &mvneta_phylink_pcs_ops;
- phylink_set_pcs(phylink, &pp->phylink_pcs);
+ pp->phylink = phylink;
/* Alloc per-cpu port structure */
pp->ports = alloc_percpu(struct mvneta_pcpu_port);
if (!pp->ports) {
err = -ENOMEM;
- goto err_clk;
+ goto err_free_phylink;
}
/* Alloc per-cpu stats */
free_percpu(pp->stats);
err_free_ports:
free_percpu(pp->ports);
-err_clk:
- clk_disable_unprepare(pp->clk_bus);
- clk_disable_unprepare(pp->clk);
err_free_phylink:
if (pp->phylink)
phylink_destroy(pp->phylink);
+err_clk:
+ clk_disable_unprepare(pp->clk_bus);
+ clk_disable_unprepare(pp->clk);
err_free_irq:
irq_dispose_mapping(dev->irq);
return err;
return devm_register_netdev(dev, ndev);
}
+#ifdef CONFIG_OF
static const struct of_device_id mtk_star_of_match[] = {
{ .compatible = "mediatek,mt8516-eth", },
{ .compatible = "mediatek,mt8518-eth", },
{ }
};
MODULE_DEVICE_TABLE(of, mtk_star_of_match);
+#endif
static SIMPLE_DEV_PM_OPS(mtk_star_pm_ops,
mtk_star_suspend, mtk_star_resume);
int num_ports_mapped;
int num_ports_up;
enum ethtool_module_power_mode_policy power_mode_policy;
+ enum mlxsw_reg_pmtm_module_type type;
};
struct mlxsw_env {
struct mlxsw_env_module_info module_info[];
};
-static int mlxsw_env_validate_cable_ident(struct mlxsw_core *core, int id,
- bool *qsfp, bool *cmis)
+static int __mlxsw_env_validate_module_type(struct mlxsw_core *core, u8 module)
+{
+ struct mlxsw_env *mlxsw_env = mlxsw_core_env(core);
+ int err;
+
+ switch (mlxsw_env->module_info[module].type) {
+ case MLXSW_REG_PMTM_MODULE_TYPE_TWISTED_PAIR:
+ err = -EINVAL;
+ break;
+ default:
+ err = 0;
+ }
+
+ return err;
+}
+
+static int mlxsw_env_validate_module_type(struct mlxsw_core *core, u8 module)
+{
+ struct mlxsw_env *mlxsw_env = mlxsw_core_env(core);
+ int err;
+
+ mutex_lock(&mlxsw_env->module_info_lock);
+ err = __mlxsw_env_validate_module_type(core, module);
+ mutex_unlock(&mlxsw_env->module_info_lock);
+
+ return err;
+}
+
+static int
+mlxsw_env_validate_cable_ident(struct mlxsw_core *core, int id, bool *qsfp,
+ bool *cmis)
{
char mcia_pl[MLXSW_REG_MCIA_LEN];
char *eeprom_tmp;
u8 ident;
int err;
+ err = mlxsw_env_validate_module_type(core, id);
+ if (err)
+ return err;
+
mlxsw_reg_mcia_pack(mcia_pl, id, 0, MLXSW_REG_MCIA_PAGE0_LO_OFF, 0, 1,
MLXSW_REG_MCIA_I2C_ADDR_LOW);
err = mlxsw_reg_query(core, MLXSW_REG(mcia), mcia_pl);
return 0;
}
-int mlxsw_env_get_module_info(struct mlxsw_core *mlxsw_core, int module,
+int mlxsw_env_get_module_info(struct net_device *netdev,
+ struct mlxsw_core *mlxsw_core, int module,
struct ethtool_modinfo *modinfo)
{
u8 module_info[MLXSW_REG_MCIA_EEPROM_MODULE_INFO_SIZE];
unsigned int read_size;
int err;
+ err = mlxsw_env_validate_module_type(mlxsw_core, module);
+ if (err) {
+ netdev_err(netdev,
+ "EEPROM is not equipped on port module type");
+ return err;
+ }
+
err = mlxsw_env_query_module_eeprom(mlxsw_core, module, 0, offset,
module_info, false, &read_size);
if (err)
{
u32 bytes_read = 0;
u16 device_addr;
+ int err;
+
+ err = mlxsw_env_validate_module_type(mlxsw_core, module);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "EEPROM is not equipped on port module type");
+ return err;
+ }
/* Offset cannot be larger than 2 * ETH_MODULE_EEPROM_PAGE_LEN */
device_addr = page->offset;
char mcia_pl[MLXSW_REG_MCIA_LEN];
char *eeprom_tmp;
u8 size;
- int err;
size = min_t(u8, page->length - bytes_read,
MLXSW_REG_MCIA_EEPROM_SIZE);
mutex_lock(&mlxsw_env->module_info_lock);
+ err = __mlxsw_env_validate_module_type(mlxsw_core, module);
+ if (err) {
+ netdev_err(netdev, "Reset module is not supported on port module type\n");
+ goto out;
+ }
+
if (mlxsw_env->module_info[module].num_ports_up) {
netdev_err(netdev, "Cannot reset module when ports using it are administratively up\n");
err = -EINVAL;
mutex_lock(&mlxsw_env->module_info_lock);
+ err = __mlxsw_env_validate_module_type(mlxsw_core, module);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Power mode is not supported on port module type");
+ goto out;
+ }
+
params->policy = mlxsw_env->module_info[module].power_mode_policy;
mlxsw_reg_mcion_pack(mcion_pl, module);
mutex_lock(&mlxsw_env->module_info_lock);
+ err = __mlxsw_env_validate_module_type(mlxsw_core, module);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Power mode set is not supported on port module type");
+ goto out;
+ }
+
if (mlxsw_env->module_info[module].power_mode_policy == policy)
goto out;
return mlxsw_reg_write(mlxsw_core, MLXSW_REG(mtmp), mtmp_pl);
}
-static int mlxsw_env_module_temp_event_enable(struct mlxsw_core *mlxsw_core,
- u8 module_count)
+static int mlxsw_env_module_temp_event_enable(struct mlxsw_core *mlxsw_core)
{
int i, err, sensor_index;
bool has_temp_sensor;
- for (i = 0; i < module_count; i++) {
+ for (i = 0; i < mlxsw_core_env(mlxsw_core)->module_count; i++) {
err = mlxsw_env_module_has_temp_sensor(mlxsw_core, i,
&has_temp_sensor);
if (err)
}
static int
-mlxsw_env_module_oper_state_event_enable(struct mlxsw_core *mlxsw_core,
- u8 module_count)
+mlxsw_env_module_oper_state_event_enable(struct mlxsw_core *mlxsw_core)
{
int i, err;
- for (i = 0; i < module_count; i++) {
+ for (i = 0; i < mlxsw_core_env(mlxsw_core)->module_count; i++) {
char pmaos_pl[MLXSW_REG_PMAOS_LEN];
mlxsw_reg_pmaos_pack(pmaos_pl, i);
}
EXPORT_SYMBOL(mlxsw_env_module_port_down);
+static int
+mlxsw_env_module_type_set(struct mlxsw_core *mlxsw_core)
+{
+ struct mlxsw_env *mlxsw_env = mlxsw_core_env(mlxsw_core);
+ int i;
+
+ for (i = 0; i < mlxsw_env->module_count; i++) {
+ char pmtm_pl[MLXSW_REG_PMTM_LEN];
+ int err;
+
+ mlxsw_reg_pmtm_pack(pmtm_pl, 0, i);
+ err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(pmtm), pmtm_pl);
+ if (err)
+ return err;
+
+ mlxsw_env->module_info[i].type =
+ mlxsw_reg_pmtm_module_type_get(pmtm_pl);
+ }
+
+ return 0;
+}
+
int mlxsw_env_init(struct mlxsw_core *mlxsw_core, struct mlxsw_env **p_env)
{
char mgpir_pl[MLXSW_REG_MGPIR_LEN];
if (err)
goto err_module_plug_event_register;
- err = mlxsw_env_module_oper_state_event_enable(mlxsw_core,
- env->module_count);
+ err = mlxsw_env_module_oper_state_event_enable(mlxsw_core);
if (err)
goto err_oper_state_event_enable;
- err = mlxsw_env_module_temp_event_enable(mlxsw_core, env->module_count);
+ err = mlxsw_env_module_temp_event_enable(mlxsw_core);
if (err)
goto err_temp_event_enable;
+ err = mlxsw_env_module_type_set(mlxsw_core);
+ if (err)
+ goto err_type_set;
+
return 0;
+err_type_set:
err_temp_event_enable:
err_oper_state_event_enable:
mlxsw_env_module_plug_event_unregister(env);
int mlxsw_env_module_temp_thresholds_get(struct mlxsw_core *core, int module,
int off, int *temp);
-int mlxsw_env_get_module_info(struct mlxsw_core *mlxsw_core, int module,
+int mlxsw_env_get_module_info(struct net_device *netdev,
+ struct mlxsw_core *mlxsw_core, int module,
struct ethtool_modinfo *modinfo);
int mlxsw_env_get_module_eeprom(struct net_device *netdev,
struct mlxsw_m_port *mlxsw_m_port = netdev_priv(netdev);
struct mlxsw_core *core = mlxsw_m_port->mlxsw_m->core;
- return mlxsw_env_get_module_info(core, mlxsw_m_port->module, modinfo);
+ return mlxsw_env_get_module_info(netdev, core, mlxsw_m_port->module,
+ modinfo);
}
static int
#define MLXSW_REG_PTYS_ETH_SPEED_100GBASE_SR4 BIT(21)
#define MLXSW_REG_PTYS_ETH_SPEED_100GBASE_KR4 BIT(22)
#define MLXSW_REG_PTYS_ETH_SPEED_100GBASE_LR4_ER4 BIT(23)
+#define MLXSW_REG_PTYS_ETH_SPEED_100BASE_T BIT(24)
+#define MLXSW_REG_PTYS_ETH_SPEED_1000BASE_T BIT(25)
#define MLXSW_REG_PTYS_ETH_SPEED_25GBASE_CR BIT(27)
#define MLXSW_REG_PTYS_ETH_SPEED_25GBASE_KR BIT(28)
#define MLXSW_REG_PTYS_ETH_SPEED_25GBASE_SR BIT(29)
*slot_index = mlxsw_reg_pllp_slot_index_get(payload);
}
+/* PMTM - Port Module Type Mapping Register
+ * ----------------------------------------
+ * The PMTM register allows query or configuration of module types.
+ * The register can only be set when the module is disabled by PMAOS register
+ */
+#define MLXSW_REG_PMTM_ID 0x5067
+#define MLXSW_REG_PMTM_LEN 0x10
+
+MLXSW_REG_DEFINE(pmtm, MLXSW_REG_PMTM_ID, MLXSW_REG_PMTM_LEN);
+
+/* reg_pmtm_slot_index
+ * Slot index.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, pmtm, slot_index, 0x00, 24, 4);
+
+/* reg_pmtm_module
+ * Module number.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, pmtm, module, 0x00, 16, 8);
+
+enum mlxsw_reg_pmtm_module_type {
+ MLXSW_REG_PMTM_MODULE_TYPE_BACKPLANE_4_LANES = 0,
+ MLXSW_REG_PMTM_MODULE_TYPE_QSFP = 1,
+ MLXSW_REG_PMTM_MODULE_TYPE_SFP = 2,
+ MLXSW_REG_PMTM_MODULE_TYPE_BACKPLANE_SINGLE_LANE = 4,
+ MLXSW_REG_PMTM_MODULE_TYPE_BACKPLANE_2_LANES = 8,
+ MLXSW_REG_PMTM_MODULE_TYPE_CHIP2CHIP4X = 10,
+ MLXSW_REG_PMTM_MODULE_TYPE_CHIP2CHIP2X = 11,
+ MLXSW_REG_PMTM_MODULE_TYPE_CHIP2CHIP1X = 12,
+ MLXSW_REG_PMTM_MODULE_TYPE_QSFP_DD = 14,
+ MLXSW_REG_PMTM_MODULE_TYPE_OSFP = 15,
+ MLXSW_REG_PMTM_MODULE_TYPE_SFP_DD = 16,
+ MLXSW_REG_PMTM_MODULE_TYPE_DSFP = 17,
+ MLXSW_REG_PMTM_MODULE_TYPE_CHIP2CHIP8X = 18,
+ MLXSW_REG_PMTM_MODULE_TYPE_TWISTED_PAIR = 19,
+};
+
+/* reg_pmtm_module_type
+ * Module type.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, pmtm, module_type, 0x04, 0, 5);
+
+static inline void mlxsw_reg_pmtm_pack(char *payload, u8 slot_index, u8 module)
+{
+ MLXSW_REG_ZERO(pmtm, payload);
+ mlxsw_reg_pmtm_slot_index_set(payload, slot_index);
+ mlxsw_reg_pmtm_module_set(payload, module);
+}
+
/* HTGT - Host Trap Group Table
* ----------------------------
* Configures the properties for forwarding to CPU.
MLXSW_REG(pddr),
MLXSW_REG(pmmp),
MLXSW_REG(pllp),
+ MLXSW_REG(pmtm),
MLXSW_REG(htgt),
MLXSW_REG(hpkt),
MLXSW_REG(rgcr),
struct mlxsw_sp1_kvdl_part *part;
bool need_update = true;
unsigned int nr_entries;
- size_t usage_size;
u64 resource_size;
int err;
}
nr_entries = div_u64(resource_size, info->alloc_size);
- usage_size = BITS_TO_LONGS(nr_entries) * sizeof(unsigned long);
- part = kzalloc(sizeof(*part) + usage_size, GFP_KERNEL);
+ part = kzalloc(struct_size(part, usage, BITS_TO_LONGS(nr_entries)),
+ GFP_KERNEL);
if (!part)
return ERR_PTR(-ENOMEM);
{
struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(netdev);
struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
- int err;
-
- err = mlxsw_env_get_module_info(mlxsw_sp->core,
- mlxsw_sp_port->mapping.module,
- modinfo);
- return err;
+ return mlxsw_env_get_module_info(netdev, mlxsw_sp->core,
+ mlxsw_sp_port->mapping.module,
+ modinfo);
}
static int mlxsw_sp_get_module_eeprom(struct net_device *netdev,
{
struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(netdev);
struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
- int err;
-
- err = mlxsw_env_get_module_eeprom(netdev, mlxsw_sp->core,
- mlxsw_sp_port->mapping.module, ee,
- data);
- return err;
+ return mlxsw_env_get_module_eeprom(netdev, mlxsw_sp->core,
+ mlxsw_sp_port->mapping.module, ee,
+ data);
}
static int
static const struct mlxsw_sp1_port_link_mode mlxsw_sp1_port_link_mode[] = {
{
+ .mask = MLXSW_REG_PTYS_ETH_SPEED_100BASE_T,
+ .mask_ethtool = ETHTOOL_LINK_MODE_100baseT_Full_BIT,
+ .speed = SPEED_100,
+ },
+ {
.mask = MLXSW_REG_PTYS_ETH_SPEED_SGMII |
MLXSW_REG_PTYS_ETH_SPEED_1000BASE_KX,
.mask_ethtool = ETHTOOL_LINK_MODE_1000baseKX_Full_BIT,
.speed = SPEED_1000,
},
{
+ .mask = MLXSW_REG_PTYS_ETH_SPEED_1000BASE_T,
+ .mask_ethtool = ETHTOOL_LINK_MODE_1000baseT_Full_BIT,
+ .speed = SPEED_1000,
+ },
+ {
.mask = MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CX4 |
MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KX4,
.mask_ethtool = ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT,
}
if (eee->eee_enabled) {
- ret = phy_init_eee(phydev, 0);
+ ret = phy_init_eee(phydev, false);
if (ret) {
netif_err(adapter, drv, adapter->netdev,
"EEE initialization failed\n");
struct gdma_context *gc = gd->gdma_context;
struct hw_channel_context *hwc;
u32 length = gmi->length;
- u32 req_msg_size;
+ size_t req_msg_size;
int err;
int i;
return -EINVAL;
hwc = gc->hwc.driver_data;
- req_msg_size = sizeof(*req) + num_page * sizeof(u64);
+ req_msg_size = struct_size(req, page_addr_list, num_page);
if (req_msg_size > hwc->max_req_msg_size)
return -EINVAL;
return 0;
}
- entry = kmalloc(sizeof(*entry) + add_len, GFP_ATOMIC);
+ entry = kmalloc(struct_size(entry, ip_add, add_len), GFP_ATOMIC);
if (!entry) {
spin_unlock_bh(list_lock);
return -ENOMEM;
/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
/* Copyright (C) 2015-2018 Netronome Systems, Inc. */
-/*
- * nfp_net_ctrl.h
+/* nfp_net_ctrl.h
* Netronome network device driver: Control BAR layout
* Authors: Jakub Kicinski <jakub.kicinski@netronome.com>
* Jason McMullan <jason.mcmullan@netronome.com>
#include <linux/types.h>
-/**
- * Configuration BAR size.
+/* Configuration BAR size.
*
* The configuration BAR is 8K in size, but due to
* THB-350, 32k needs to be reserved.
*/
#define NFP_NET_CFG_BAR_SZ (32 * 1024)
-/**
- * Offset in Freelist buffer where packet starts on RX
- */
+/* Offset in Freelist buffer where packet starts on RX */
#define NFP_NET_RX_OFFSET 32
-/**
- * LSO parameters
+/* LSO parameters
* %NFP_NET_LSO_MAX_HDR_SZ: Maximum header size supported for LSO frames
* %NFP_NET_LSO_MAX_SEGS: Maximum number of segments LSO frame can produce
*/
#define NFP_NET_LSO_MAX_HDR_SZ 255
#define NFP_NET_LSO_MAX_SEGS 64
-/**
- * Prepend field types
- */
+/* Prepend field types */
#define NFP_NET_META_FIELD_SIZE 4
#define NFP_NET_META_HASH 1 /* next field carries hash type */
#define NFP_NET_META_MARK 2
#define NFP_META_PORT_ID_CTRL ~0U
-/**
- * Hash type pre-pended when a RSS hash was computed
- */
+/* Hash type pre-pended when a RSS hash was computed */
#define NFP_NET_RSS_NONE 0
#define NFP_NET_RSS_IPV4 1
#define NFP_NET_RSS_IPV6 2
#define NFP_NET_RSS_IPV6_UDP 8
#define NFP_NET_RSS_IPV6_EX_UDP 9
-/**
- * Ring counts
+/* Ring counts
* %NFP_NET_TXR_MAX: Maximum number of TX rings
* %NFP_NET_RXR_MAX: Maximum number of RX rings
*/
#define NFP_NET_TXR_MAX 64
#define NFP_NET_RXR_MAX 64
-/**
- * Read/Write config words (0x0000 - 0x002c)
+/* Read/Write config words (0x0000 - 0x002c)
* %NFP_NET_CFG_CTRL: Global control
* %NFP_NET_CFG_UPDATE: Indicate which fields are updated
* %NFP_NET_CFG_TXRS_ENABLE: Bitmask of enabled TX rings
#define NFP_NET_CFG_LSC 0x0020
#define NFP_NET_CFG_MACADDR 0x0024
-/**
- * Read-only words (0x0030 - 0x0050):
+/* Read-only words (0x0030 - 0x0050):
* %NFP_NET_CFG_VERSION: Firmware version number
* %NFP_NET_CFG_STS: Status
* %NFP_NET_CFG_CAP: Capabilities (same bits as %NFP_NET_CFG_CTRL)
#define NFP_NET_CFG_START_TXQ 0x0048
#define NFP_NET_CFG_START_RXQ 0x004c
-/**
- * Prepend configuration
+/* Prepend configuration
*/
#define NFP_NET_CFG_RX_OFFSET 0x0050
#define NFP_NET_CFG_RX_OFFSET_DYNAMIC 0 /* Prepend mode */
-/**
- * RSS capabilities
+/* RSS capabilities
* %NFP_NET_CFG_RSS_CAP_HFUNC: supported hash functions (same bits as
* %NFP_NET_CFG_RSS_HFUNC)
*/
#define NFP_NET_CFG_RSS_CAP 0x0054
#define NFP_NET_CFG_RSS_CAP_HFUNC 0xff000000
-/**
- * TLV area start
+/* TLV area start
* %NFP_NET_CFG_TLV_BASE: start anchor of the TLV area
*/
#define NFP_NET_CFG_TLV_BASE 0x0058
-/**
- * VXLAN/UDP encap configuration
+/* VXLAN/UDP encap configuration
* %NFP_NET_CFG_VXLAN_PORT: Base address of table of tunnels' UDP dst ports
* %NFP_NET_CFG_VXLAN_SZ: Size of the UDP port table in bytes
*/
#define NFP_NET_CFG_VXLAN_PORT 0x0060
#define NFP_NET_CFG_VXLAN_SZ 0x0008
-/**
- * BPF section
+/* BPF section
* %NFP_NET_CFG_BPF_ABI: BPF ABI version
* %NFP_NET_CFG_BPF_CAP: BPF capabilities
* %NFP_NET_CFG_BPF_MAX_LEN: Maximum size of JITed BPF code in bytes
#define NFP_NET_CFG_BPF_CFG_MASK 7ULL
#define NFP_NET_CFG_BPF_ADDR_MASK (~NFP_NET_CFG_BPF_CFG_MASK)
-/**
- * 40B reserved for future use (0x0098 - 0x00c0)
+/* 40B reserved for future use (0x0098 - 0x00c0)
*/
#define NFP_NET_CFG_RESERVED 0x0098
#define NFP_NET_CFG_RESERVED_SZ 0x0028
-/**
- * RSS configuration (0x0100 - 0x01ac):
+/* RSS configuration (0x0100 - 0x01ac):
* Used only when NFP_NET_CFG_CTRL_RSS is enabled
* %NFP_NET_CFG_RSS_CFG: RSS configuration word
* %NFP_NET_CFG_RSS_KEY: RSS "secret" key
NFP_NET_CFG_RSS_KEY_SZ)
#define NFP_NET_CFG_RSS_ITBL_SZ 0x80
-/**
- * TX ring configuration (0x200 - 0x800)
+/* TX ring configuration (0x200 - 0x800)
* %NFP_NET_CFG_TXR_BASE: Base offset for TX ring configuration
* %NFP_NET_CFG_TXR_ADDR: Per TX ring DMA address (8B entries)
* %NFP_NET_CFG_TXR_WB_ADDR: Per TX ring write back DMA address (8B entries)
#define NFP_NET_CFG_TXR_IRQ_MOD(_x) (NFP_NET_CFG_TXR_BASE + 0x500 + \
((_x) * 0x4))
-/**
- * RX ring configuration (0x0800 - 0x0c00)
+/* RX ring configuration (0x0800 - 0x0c00)
* %NFP_NET_CFG_RXR_BASE: Base offset for RX ring configuration
* %NFP_NET_CFG_RXR_ADDR: Per RX ring DMA address (8B entries)
* %NFP_NET_CFG_RXR_SZ: Per RX ring ring size (1B entries)
#define NFP_NET_CFG_RXR_IRQ_MOD(_x) (NFP_NET_CFG_RXR_BASE + 0x300 + \
((_x) * 0x4))
-/**
- * Interrupt Control/Cause registers (0x0c00 - 0x0d00)
+/* Interrupt Control/Cause registers (0x0c00 - 0x0d00)
* These registers are only used when MSI-X auto-masking is not
* enabled (%NFP_NET_CFG_CTRL_MSIXAUTO not set). The array is index
* by MSI-X entry and are 1B in size. If an entry is zero, the
#define NFP_NET_CFG_ICR_RXTX 0x1
#define NFP_NET_CFG_ICR_LSC 0x2
-/**
- * General device stats (0x0d00 - 0x0d90)
+/* General device stats (0x0d00 - 0x0d90)
* all counters are 64bit.
*/
#define NFP_NET_CFG_STATS_BASE 0x0d00
#define NFP_NET_CFG_STATS_APP3_FRAMES (NFP_NET_CFG_STATS_BASE + 0xc0)
#define NFP_NET_CFG_STATS_APP3_BYTES (NFP_NET_CFG_STATS_BASE + 0xc8)
-/**
- * Per ring stats (0x1000 - 0x1800)
+/* Per ring stats (0x1000 - 0x1800)
* options, 64bit per entry
* %NFP_NET_CFG_TXR_STATS: TX ring statistics (Packet and Byte count)
* %NFP_NET_CFG_RXR_STATS: RX ring statistics (Packet and Byte count)
#define NFP_NET_CFG_RXR_STATS(_x) (NFP_NET_CFG_RXR_STATS_BASE + \
((_x) * 0x10))
-/**
- * General use mailbox area (0x1800 - 0x19ff)
+/* General use mailbox area (0x1800 - 0x19ff)
* 4B used for update command and 4B return code
* followed by a max of 504B of variable length value
*/
#define NFP_NET_CFG_MBOX_CMD_PCI_DSCP_PRIOMAP_SET 5
#define NFP_NET_CFG_MBOX_CMD_TLV_CMSG 6
-/**
- * VLAN filtering using general use mailbox
+/* VLAN filtering using general use mailbox
* %NFP_NET_CFG_VLAN_FILTER: Base address of VLAN filter mailbox
* %NFP_NET_CFG_VLAN_FILTER_VID: VLAN ID to filter
* %NFP_NET_CFG_VLAN_FILTER_PROTO: VLAN proto to filter
#define NFP_NET_CFG_VLAN_FILTER_PROTO (NFP_NET_CFG_VLAN_FILTER + 2)
#define NFP_NET_CFG_VLAN_FILTER_SZ 0x0004
-/**
- * TLV capabilities
+/* TLV capabilities
* %NFP_NET_CFG_TLV_TYPE: Offset of type within the TLV
* %NFP_NET_CFG_TLV_TYPE_REQUIRED: Driver must be able to parse the TLV
* %NFP_NET_CFG_TLV_LENGTH: Offset of length within the TLV
#define NFP_NET_CFG_TLV_HEADER_TYPE 0x7fff0000
#define NFP_NET_CFG_TLV_HEADER_LENGTH 0x0000ffff
-/**
- * Capability TLV types
+/* Capability TLV types
*
* %NFP_NET_CFG_TLV_TYPE_UNKNOWN:
* Special TLV type to catch bugs, should never be encountered. Drivers should
struct device;
-/**
- * struct nfp_net_tlv_caps - parsed control BAR TLV capabilities
+/* struct nfp_net_tlv_caps - parsed control BAR TLV capabilities
* @me_freq_mhz: ME clock_freq (MHz)
* @mbox_off: vNIC mailbox area offset
* @mbox_len: vNIC mailbox area length
#ifndef _NFP_NET_SRIOV_H_
#define _NFP_NET_SRIOV_H_
-/**
- * SRIOV VF configuration.
+/* SRIOV VF configuration.
* The configuration memory begins with a mailbox region for communication with
* the firmware followed by individual VF entries.
*/
void nfp_devlink_port_type_eth_set(struct nfp_port *port);
void nfp_devlink_port_type_clear(struct nfp_port *port);
-/**
- * Mac stats (0x0000 - 0x0200)
+/* Mac stats (0x0000 - 0x0200)
* all counters are 64bit.
*/
#define NFP_MAC_STATS_BASE 0x0000
dma_size = BIT_ULL(dma_order);
nseg = DIV_ROUND_UP(max_size, chunk_size);
- chunks = kzalloc(array_size(sizeof(*chunks), nseg), GFP_KERNEL);
+ chunks = kcalloc(nseg, sizeof(*chunks), GFP_KERNEL);
if (!chunks)
return -ENOMEM;
#define PCI_DEVICE_ID_PENSANDO_IONIC_ETH_PF 0x1002
#define PCI_DEVICE_ID_PENSANDO_IONIC_ETH_VF 0x1003
-#define DEVCMD_TIMEOUT 10
+#define DEVCMD_TIMEOUT 5
#define IONIC_ADMINQ_TIME_SLICE msecs_to_jiffies(100)
#define IONIC_PHC_UPDATE_NS 10000000000 /* 10s in nanoseconds */
u8 status, int err);
int ionic_dev_cmd_wait(struct ionic *ionic, unsigned long max_wait);
+int ionic_dev_cmd_wait_nomsg(struct ionic *ionic, unsigned long max_wait);
+void ionic_dev_cmd_dev_err_print(struct ionic *ionic, u8 opcode, u8 status,
+ int err);
int ionic_set_dma_mask(struct ionic *ionic);
int ionic_setup(struct ionic *ionic);
int ionic_port_init(struct ionic *ionic);
int ionic_port_reset(struct ionic *ionic);
+const char *ionic_vf_attr_to_str(enum ionic_vf_attr attr);
+
#endif /* _IONIC_H_ */
static void ionic_vf_dealloc_locked(struct ionic *ionic)
{
+ struct ionic_vf_setattr_cmd vfc = { .attr = IONIC_VF_ATTR_STATSADDR };
struct ionic_vf *v;
- dma_addr_t dma = 0;
int i;
if (!ionic->vfs)
v = &ionic->vfs[i];
if (v->stats_pa) {
- (void)ionic_set_vf_config(ionic, i,
- IONIC_VF_ATTR_STATSADDR,
- (u8 *)&dma);
+ vfc.stats_pa = 0;
+ (void)ionic_set_vf_config(ionic, i, &vfc);
dma_unmap_single(ionic->dev, v->stats_pa,
sizeof(v->stats), DMA_FROM_DEVICE);
v->stats_pa = 0;
static int ionic_vf_alloc(struct ionic *ionic, int num_vfs)
{
+ struct ionic_vf_setattr_cmd vfc = { .attr = IONIC_VF_ATTR_STATSADDR };
struct ionic_vf *v;
int err = 0;
int i;
}
ionic->num_vfs++;
+
/* ignore failures from older FW, we just won't get stats */
- (void)ionic_set_vf_config(ionic, i, IONIC_VF_ATTR_STATSADDR,
- (u8 *)&v->stats_pa);
+ vfc.stats_pa = cpu_to_le64(v->stats_pa);
+ (void)ionic_set_vf_config(ionic, i, &vfc);
}
out:
goto err_out_deregister_lifs;
}
+ mod_timer(&ionic->watchdog_timer,
+ round_jiffies(jiffies + ionic->watchdog_period));
+
return 0;
err_out_deregister_lifs:
err_out_reset:
ionic_reset(ionic);
err_out_teardown:
- del_timer_sync(&ionic->watchdog_timer);
pci_clear_master(pdev);
/* Don't fail the probe for these errors, keep
* the hw interface around for inspection
!test_bit(IONIC_LIF_F_FW_RESET, lif->state))
ionic_link_status_check_request(lif, CAN_NOT_SLEEP);
- if (test_bit(IONIC_LIF_F_FILTER_SYNC_NEEDED, lif->state)) {
+ if (test_bit(IONIC_LIF_F_FILTER_SYNC_NEEDED, lif->state) &&
+ !test_bit(IONIC_LIF_F_FW_RESET, lif->state)) {
work = kzalloc(sizeof(*work), GFP_ATOMIC);
if (!work) {
netdev_err(lif->netdev, "rxmode change dropped\n");
}
}
+static void ionic_watchdog_init(struct ionic *ionic)
+{
+ struct ionic_dev *idev = &ionic->idev;
+
+ timer_setup(&ionic->watchdog_timer, ionic_watchdog_cb, 0);
+ ionic->watchdog_period = IONIC_WATCHDOG_SECS * HZ;
+
+ /* set times to ensure the first check will proceed */
+ atomic_long_set(&idev->last_check_time, jiffies - 2 * HZ);
+ idev->last_hb_time = jiffies - 2 * ionic->watchdog_period;
+ /* init as ready, so no transition if the first check succeeds */
+ idev->last_fw_hb = 0;
+ idev->fw_hb_ready = true;
+ idev->fw_status_ready = true;
+ idev->fw_generation = IONIC_FW_STS_F_GENERATION &
+ ioread8(&idev->dev_info_regs->fw_status);
+}
+
void ionic_init_devinfo(struct ionic *ionic)
{
struct ionic_dev *idev = &ionic->idev;
return -EFAULT;
}
- timer_setup(&ionic->watchdog_timer, ionic_watchdog_cb, 0);
- ionic->watchdog_period = IONIC_WATCHDOG_SECS * HZ;
-
- /* set times to ensure the first check will proceed */
- atomic_long_set(&idev->last_check_time, jiffies - 2 * HZ);
- idev->last_hb_time = jiffies - 2 * ionic->watchdog_period;
- /* init as ready, so no transition if the first check succeeds */
- idev->last_fw_hb = 0;
- idev->fw_hb_ready = true;
- idev->fw_status_ready = true;
- idev->fw_generation = IONIC_FW_STS_F_GENERATION &
- ioread8(&idev->dev_info_regs->fw_status);
-
- mod_timer(&ionic->watchdog_timer,
- round_jiffies(jiffies + ionic->watchdog_period));
+ ionic_watchdog_init(ionic);
idev->db_pages = bar->vaddr;
idev->phy_db_pages = bar->bus_addr;
}
/* Devcmd Interface */
+bool ionic_is_fw_running(struct ionic_dev *idev)
+{
+ u8 fw_status = ioread8(&idev->dev_info_regs->fw_status);
+
+ /* firmware is useful only if the running bit is set and
+ * fw_status != 0xff (bad PCI read)
+ */
+ return (fw_status != 0xff) && (fw_status & IONIC_FW_STS_F_RUNNING);
+}
+
int ionic_heartbeat_check(struct ionic *ionic)
{
- struct ionic_dev *idev = &ionic->idev;
unsigned long check_time, last_check_time;
+ struct ionic_dev *idev = &ionic->idev;
+ struct ionic_lif *lif = ionic->lif;
bool fw_status_ready = true;
bool fw_hb_ready;
u8 fw_generation;
goto do_check_time;
}
- /* firmware is useful only if the running bit is set and
- * fw_status != 0xff (bad PCI read)
- * If fw_status is not ready don't bother with the generation.
- */
fw_status = ioread8(&idev->dev_info_regs->fw_status);
- if (fw_status == 0xff || !(fw_status & IONIC_FW_STS_F_RUNNING)) {
+ /* If fw_status is not ready don't bother with the generation */
+ if (!ionic_is_fw_running(idev)) {
fw_status_ready = false;
} else {
fw_generation = fw_status & IONIC_FW_STS_F_GENERATION;
* the down, the next watchdog will see the fw is up
* and the generation value stable, so will trigger
* the fw-up activity.
+ *
+ * If we had already moved to FW_RESET from a RESET event,
+ * it is possible that we never saw the fw_status go to 0,
+ * so we fake the current idev->fw_status_ready here to
+ * force the transition and get FW up again.
*/
- fw_status_ready = false;
+ if (test_bit(IONIC_LIF_F_FW_RESET, lif->state))
+ idev->fw_status_ready = false; /* go to running */
+ else
+ fw_status_ready = false; /* go to down */
}
}
/* is this a transition? */
if (fw_status_ready != idev->fw_status_ready) {
- struct ionic_lif *lif = ionic->lif;
bool trigger = false;
- idev->fw_status_ready = fw_status_ready;
-
- if (!fw_status_ready) {
- dev_info(ionic->dev, "FW stopped %u\n", fw_status);
- if (lif && !test_bit(IONIC_LIF_F_FW_RESET, lif->state))
- trigger = true;
- } else {
- dev_info(ionic->dev, "FW running %u\n", fw_status);
- if (lif && test_bit(IONIC_LIF_F_FW_RESET, lif->state))
- trigger = true;
+ if (!fw_status_ready && lif &&
+ !test_bit(IONIC_LIF_F_FW_RESET, lif->state) &&
+ !test_and_set_bit(IONIC_LIF_F_FW_STOPPING, lif->state)) {
+ dev_info(ionic->dev, "FW stopped 0x%02x\n", fw_status);
+ trigger = true;
+
+ } else if (fw_status_ready && lif &&
+ test_bit(IONIC_LIF_F_FW_RESET, lif->state) &&
+ !test_bit(IONIC_LIF_F_FW_STOPPING, lif->state)) {
+ dev_info(ionic->dev, "FW running 0x%02x\n", fw_status);
+ trigger = true;
}
if (trigger) {
struct ionic_deferred_work *work;
+ idev->fw_status_ready = fw_status_ready;
+
work = kzalloc(sizeof(*work), GFP_ATOMIC);
if (work) {
work->type = IONIC_DW_TYPE_LIF_RESET;
}
}
- if (!fw_status_ready)
+ if (!idev->fw_status_ready)
return -ENXIO;
- /* wait at least one watchdog period since the last heartbeat */
+ /* Because of some variability in the actual FW heartbeat, we
+ * wait longer than the DEVCMD_TIMEOUT before checking again.
+ */
last_check_time = idev->last_hb_time;
- if (time_before(check_time, last_check_time + ionic->watchdog_period))
+ if (time_before(check_time, last_check_time + DEVCMD_TIMEOUT * 2 * HZ))
return 0;
fw_hb = ioread32(&idev->dev_info_regs->fw_heartbeat);
}
/* VF commands */
-int ionic_set_vf_config(struct ionic *ionic, int vf, u8 attr, u8 *data)
+int ionic_set_vf_config(struct ionic *ionic, int vf,
+ struct ionic_vf_setattr_cmd *vfc)
{
union ionic_dev_cmd cmd = {
.vf_setattr.opcode = IONIC_CMD_VF_SETATTR,
- .vf_setattr.attr = attr,
+ .vf_setattr.attr = vfc->attr,
.vf_setattr.vf_index = cpu_to_le16(vf),
};
int err;
+ memcpy(cmd.vf_setattr.pad, vfc->pad, sizeof(vfc->pad));
+
+ mutex_lock(&ionic->dev_cmd_lock);
+ ionic_dev_cmd_go(&ionic->idev, &cmd);
+ err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT);
+ mutex_unlock(&ionic->dev_cmd_lock);
+
+ return err;
+}
+
+int ionic_dev_cmd_vf_getattr(struct ionic *ionic, int vf, u8 attr,
+ struct ionic_vf_getattr_comp *comp)
+{
+ union ionic_dev_cmd cmd = {
+ .vf_getattr.opcode = IONIC_CMD_VF_GETATTR,
+ .vf_getattr.attr = attr,
+ .vf_getattr.vf_index = cpu_to_le16(vf),
+ };
+ int err;
+
+ if (vf >= ionic->num_vfs)
+ return -EINVAL;
+
switch (attr) {
case IONIC_VF_ATTR_SPOOFCHK:
- cmd.vf_setattr.spoofchk = *data;
- dev_dbg(ionic->dev, "%s: vf %d spoof %d\n",
- __func__, vf, *data);
- break;
case IONIC_VF_ATTR_TRUST:
- cmd.vf_setattr.trust = *data;
- dev_dbg(ionic->dev, "%s: vf %d trust %d\n",
- __func__, vf, *data);
- break;
case IONIC_VF_ATTR_LINKSTATE:
- cmd.vf_setattr.linkstate = *data;
- dev_dbg(ionic->dev, "%s: vf %d linkstate %d\n",
- __func__, vf, *data);
- break;
case IONIC_VF_ATTR_MAC:
- ether_addr_copy(cmd.vf_setattr.macaddr, data);
- dev_dbg(ionic->dev, "%s: vf %d macaddr %pM\n",
- __func__, vf, data);
- break;
case IONIC_VF_ATTR_VLAN:
- cmd.vf_setattr.vlanid = cpu_to_le16(*(u16 *)data);
- dev_dbg(ionic->dev, "%s: vf %d vlan %d\n",
- __func__, vf, *(u16 *)data);
- break;
case IONIC_VF_ATTR_RATE:
- cmd.vf_setattr.maxrate = cpu_to_le32(*(u32 *)data);
- dev_dbg(ionic->dev, "%s: vf %d maxrate %d\n",
- __func__, vf, *(u32 *)data);
break;
case IONIC_VF_ATTR_STATSADDR:
- cmd.vf_setattr.stats_pa = cpu_to_le64(*(u64 *)data);
- dev_dbg(ionic->dev, "%s: vf %d stats_pa 0x%08llx\n",
- __func__, vf, *(u64 *)data);
- break;
default:
return -EINVAL;
}
mutex_lock(&ionic->dev_cmd_lock);
ionic_dev_cmd_go(&ionic->idev, &cmd);
- err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT);
+ err = ionic_dev_cmd_wait_nomsg(ionic, DEVCMD_TIMEOUT);
+ memcpy_fromio(comp, &ionic->idev.dev_cmd_regs->comp.vf_getattr,
+ sizeof(*comp));
mutex_unlock(&ionic->dev_cmd_lock);
+ if (err && comp->status != IONIC_RC_ENOSUPP)
+ ionic_dev_cmd_dev_err_print(ionic, cmd.vf_getattr.opcode,
+ comp->status, err);
+
return err;
}
void ionic_dev_cmd_port_fec(struct ionic_dev *idev, u8 fec_type);
void ionic_dev_cmd_port_pause(struct ionic_dev *idev, u8 pause_type);
-int ionic_set_vf_config(struct ionic *ionic, int vf, u8 attr, u8 *data);
+int ionic_set_vf_config(struct ionic *ionic, int vf,
+ struct ionic_vf_setattr_cmd *vfc);
+int ionic_dev_cmd_vf_getattr(struct ionic *ionic, int vf, u8 attr,
+ struct ionic_vf_getattr_comp *comp);
void ionic_dev_cmd_queue_identify(struct ionic_dev *idev,
u16 lif_type, u8 qtype, u8 qver);
void ionic_dev_cmd_lif_identify(struct ionic_dev *idev, u8 type, u8 ver);
void ionic_q_service(struct ionic_queue *q, struct ionic_cq_info *cq_info,
unsigned int stop_index);
int ionic_heartbeat_check(struct ionic *ionic);
+bool ionic_is_fw_running(struct ionic_dev *idev);
#endif /* _IONIC_DEV_H_ */
ionic_link_status_check_request(lif, CAN_NOT_SLEEP);
break;
case IONIC_EVENT_RESET:
- work = kzalloc(sizeof(*work), GFP_ATOMIC);
- if (!work) {
- netdev_err(lif->netdev, "Reset event dropped\n");
- } else {
- work->type = IONIC_DW_TYPE_LIF_RESET;
- ionic_lif_deferred_enqueue(&lif->deferred, work);
+ if (lif->ionic->idev.fw_status_ready &&
+ !test_bit(IONIC_LIF_F_FW_RESET, lif->state) &&
+ !test_and_set_bit(IONIC_LIF_F_FW_STOPPING, lif->state)) {
+ work = kzalloc(sizeof(*work), GFP_ATOMIC);
+ if (!work) {
+ netdev_err(lif->netdev, "Reset event dropped\n");
+ clear_bit(IONIC_LIF_F_FW_STOPPING, lif->state);
+ } else {
+ work->type = IONIC_DW_TYPE_LIF_RESET;
+ ionic_lif_deferred_enqueue(&lif->deferred, work);
+ }
}
break;
default:
err = ionic_adminq_post_wait(lif, &ctx);
if (err)
- netdev_err(lif->netdev, "lif quiesce failed %d\n", err);
+ netdev_dbg(lif->netdev, "lif quiesce failed %d\n", err);
}
static void ionic_txrx_disable(struct ionic_lif *lif)
}
}
+static int ionic_update_cached_vf_config(struct ionic *ionic, int vf)
+{
+ struct ionic_vf_getattr_comp comp = { 0 };
+ int err;
+ u8 attr;
+
+ attr = IONIC_VF_ATTR_VLAN;
+ err = ionic_dev_cmd_vf_getattr(ionic, vf, attr, &comp);
+ if (err && comp.status != IONIC_RC_ENOSUPP)
+ goto err_out;
+ if (!err)
+ ionic->vfs[vf].vlanid = comp.vlanid;
+
+ attr = IONIC_VF_ATTR_SPOOFCHK;
+ err = ionic_dev_cmd_vf_getattr(ionic, vf, attr, &comp);
+ if (err && comp.status != IONIC_RC_ENOSUPP)
+ goto err_out;
+ if (!err)
+ ionic->vfs[vf].spoofchk = comp.spoofchk;
+
+ attr = IONIC_VF_ATTR_LINKSTATE;
+ err = ionic_dev_cmd_vf_getattr(ionic, vf, attr, &comp);
+ if (err && comp.status != IONIC_RC_ENOSUPP)
+ goto err_out;
+ if (!err) {
+ switch (comp.linkstate) {
+ case IONIC_VF_LINK_STATUS_UP:
+ ionic->vfs[vf].linkstate = IFLA_VF_LINK_STATE_ENABLE;
+ break;
+ case IONIC_VF_LINK_STATUS_DOWN:
+ ionic->vfs[vf].linkstate = IFLA_VF_LINK_STATE_DISABLE;
+ break;
+ case IONIC_VF_LINK_STATUS_AUTO:
+ ionic->vfs[vf].linkstate = IFLA_VF_LINK_STATE_AUTO;
+ break;
+ default:
+ dev_warn(ionic->dev, "Unexpected link state %u\n", comp.linkstate);
+ break;
+ }
+ }
+
+ attr = IONIC_VF_ATTR_RATE;
+ err = ionic_dev_cmd_vf_getattr(ionic, vf, attr, &comp);
+ if (err && comp.status != IONIC_RC_ENOSUPP)
+ goto err_out;
+ if (!err)
+ ionic->vfs[vf].maxrate = comp.maxrate;
+
+ attr = IONIC_VF_ATTR_TRUST;
+ err = ionic_dev_cmd_vf_getattr(ionic, vf, attr, &comp);
+ if (err && comp.status != IONIC_RC_ENOSUPP)
+ goto err_out;
+ if (!err)
+ ionic->vfs[vf].trusted = comp.trust;
+
+ attr = IONIC_VF_ATTR_MAC;
+ err = ionic_dev_cmd_vf_getattr(ionic, vf, attr, &comp);
+ if (err && comp.status != IONIC_RC_ENOSUPP)
+ goto err_out;
+ if (!err)
+ ether_addr_copy(ionic->vfs[vf].macaddr, comp.macaddr);
+
+err_out:
+ if (err)
+ dev_err(ionic->dev, "Failed to get %s for VF %d\n",
+ ionic_vf_attr_to_str(attr), vf);
+
+ return err;
+}
+
static int ionic_get_vf_config(struct net_device *netdev,
int vf, struct ifla_vf_info *ivf)
{
if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) {
ret = -EINVAL;
} else {
- ivf->vf = vf;
- ivf->vlan = le16_to_cpu(ionic->vfs[vf].vlanid);
- ivf->qos = 0;
- ivf->spoofchk = ionic->vfs[vf].spoofchk;
- ivf->linkstate = ionic->vfs[vf].linkstate;
- ivf->max_tx_rate = le32_to_cpu(ionic->vfs[vf].maxrate);
- ivf->trusted = ionic->vfs[vf].trusted;
- ether_addr_copy(ivf->mac, ionic->vfs[vf].macaddr);
+ ivf->vf = vf;
+ ivf->qos = 0;
+
+ ret = ionic_update_cached_vf_config(ionic, vf);
+ if (!ret) {
+ ivf->vlan = le16_to_cpu(ionic->vfs[vf].vlanid);
+ ivf->spoofchk = ionic->vfs[vf].spoofchk;
+ ivf->linkstate = ionic->vfs[vf].linkstate;
+ ivf->max_tx_rate = le32_to_cpu(ionic->vfs[vf].maxrate);
+ ivf->trusted = ionic->vfs[vf].trusted;
+ ether_addr_copy(ivf->mac, ionic->vfs[vf].macaddr);
+ }
}
up_read(&ionic->vf_op_lock);
static int ionic_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
{
+ struct ionic_vf_setattr_cmd vfc = { .attr = IONIC_VF_ATTR_MAC };
struct ionic_lif *lif = netdev_priv(netdev);
struct ionic *ionic = lif->ionic;
int ret;
if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) {
ret = -EINVAL;
} else {
- ret = ionic_set_vf_config(ionic, vf, IONIC_VF_ATTR_MAC, mac);
+ ether_addr_copy(vfc.macaddr, mac);
+ dev_dbg(ionic->dev, "%s: vf %d macaddr %pM\n",
+ __func__, vf, vfc.macaddr);
+
+ ret = ionic_set_vf_config(ionic, vf, &vfc);
if (!ret)
ether_addr_copy(ionic->vfs[vf].macaddr, mac);
}
static int ionic_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan,
u8 qos, __be16 proto)
{
+ struct ionic_vf_setattr_cmd vfc = { .attr = IONIC_VF_ATTR_VLAN };
struct ionic_lif *lif = netdev_priv(netdev);
struct ionic *ionic = lif->ionic;
int ret;
if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) {
ret = -EINVAL;
} else {
- ret = ionic_set_vf_config(ionic, vf,
- IONIC_VF_ATTR_VLAN, (u8 *)&vlan);
+ vfc.vlanid = cpu_to_le16(vlan);
+ dev_dbg(ionic->dev, "%s: vf %d vlan %d\n",
+ __func__, vf, le16_to_cpu(vfc.vlanid));
+
+ ret = ionic_set_vf_config(ionic, vf, &vfc);
if (!ret)
ionic->vfs[vf].vlanid = cpu_to_le16(vlan);
}
static int ionic_set_vf_rate(struct net_device *netdev, int vf,
int tx_min, int tx_max)
{
+ struct ionic_vf_setattr_cmd vfc = { .attr = IONIC_VF_ATTR_RATE };
struct ionic_lif *lif = netdev_priv(netdev);
struct ionic *ionic = lif->ionic;
int ret;
if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) {
ret = -EINVAL;
} else {
- ret = ionic_set_vf_config(ionic, vf,
- IONIC_VF_ATTR_RATE, (u8 *)&tx_max);
+ vfc.maxrate = cpu_to_le32(tx_max);
+ dev_dbg(ionic->dev, "%s: vf %d maxrate %d\n",
+ __func__, vf, le32_to_cpu(vfc.maxrate));
+
+ ret = ionic_set_vf_config(ionic, vf, &vfc);
if (!ret)
lif->ionic->vfs[vf].maxrate = cpu_to_le32(tx_max);
}
static int ionic_set_vf_spoofchk(struct net_device *netdev, int vf, bool set)
{
+ struct ionic_vf_setattr_cmd vfc = { .attr = IONIC_VF_ATTR_SPOOFCHK };
struct ionic_lif *lif = netdev_priv(netdev);
struct ionic *ionic = lif->ionic;
- u8 data = set; /* convert to u8 for config */
int ret;
if (!netif_device_present(netdev))
if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) {
ret = -EINVAL;
} else {
- ret = ionic_set_vf_config(ionic, vf,
- IONIC_VF_ATTR_SPOOFCHK, &data);
+ vfc.spoofchk = set;
+ dev_dbg(ionic->dev, "%s: vf %d spoof %d\n",
+ __func__, vf, vfc.spoofchk);
+
+ ret = ionic_set_vf_config(ionic, vf, &vfc);
if (!ret)
- ionic->vfs[vf].spoofchk = data;
+ ionic->vfs[vf].spoofchk = set;
}
up_write(&ionic->vf_op_lock);
static int ionic_set_vf_trust(struct net_device *netdev, int vf, bool set)
{
+ struct ionic_vf_setattr_cmd vfc = { .attr = IONIC_VF_ATTR_TRUST };
struct ionic_lif *lif = netdev_priv(netdev);
struct ionic *ionic = lif->ionic;
- u8 data = set; /* convert to u8 for config */
int ret;
if (!netif_device_present(netdev))
if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) {
ret = -EINVAL;
} else {
- ret = ionic_set_vf_config(ionic, vf,
- IONIC_VF_ATTR_TRUST, &data);
+ vfc.trust = set;
+ dev_dbg(ionic->dev, "%s: vf %d trust %d\n",
+ __func__, vf, vfc.trust);
+
+ ret = ionic_set_vf_config(ionic, vf, &vfc);
if (!ret)
- ionic->vfs[vf].trusted = data;
+ ionic->vfs[vf].trusted = set;
}
up_write(&ionic->vf_op_lock);
static int ionic_set_vf_link_state(struct net_device *netdev, int vf, int set)
{
+ struct ionic_vf_setattr_cmd vfc = { .attr = IONIC_VF_ATTR_LINKSTATE };
struct ionic_lif *lif = netdev_priv(netdev);
struct ionic *ionic = lif->ionic;
- u8 data;
+ u8 vfls;
int ret;
switch (set) {
case IFLA_VF_LINK_STATE_ENABLE:
- data = IONIC_VF_LINK_STATUS_UP;
+ vfls = IONIC_VF_LINK_STATUS_UP;
break;
case IFLA_VF_LINK_STATE_DISABLE:
- data = IONIC_VF_LINK_STATUS_DOWN;
+ vfls = IONIC_VF_LINK_STATUS_DOWN;
break;
case IFLA_VF_LINK_STATE_AUTO:
- data = IONIC_VF_LINK_STATUS_AUTO;
+ vfls = IONIC_VF_LINK_STATUS_AUTO;
break;
default:
return -EINVAL;
if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) {
ret = -EINVAL;
} else {
- ret = ionic_set_vf_config(ionic, vf,
- IONIC_VF_ATTR_LINKSTATE, &data);
+ vfc.linkstate = vfls;
+ dev_dbg(ionic->dev, "%s: vf %d linkstate %d\n",
+ __func__, vf, vfc.linkstate);
+
+ ret = ionic_set_vf_config(ionic, vf, &vfc);
if (!ret)
ionic->vfs[vf].linkstate = set;
}
mutex_unlock(&lif->queue_lock);
+ clear_bit(IONIC_LIF_F_FW_STOPPING, lif->state);
dev_info(ionic->dev, "FW Down: LIFs stopped\n");
}
/* unmap doorbell page */
ionic_bus_unmap_dbpage(lif->ionic, lif->kern_dbpage);
lif->kern_dbpage = NULL;
- kfree(lif->dbid_inuse);
- lif->dbid_inuse = NULL;
mutex_destroy(&lif->config_lock);
mutex_destroy(&lif->queue_lock);
return -EINVAL;
}
- lif->dbid_inuse = bitmap_zalloc(lif->dbid_count, GFP_KERNEL);
- if (!lif->dbid_inuse) {
- dev_err(dev, "Failed alloc doorbell id bitmap, aborting\n");
- return -ENOMEM;
- }
-
- /* first doorbell id reserved for kernel (dbid aka pid == zero) */
- set_bit(0, lif->dbid_inuse);
lif->kern_pid = 0;
-
dbpage_num = ionic_db_page_num(lif, lif->kern_pid);
lif->kern_dbpage = ionic_bus_map_dbpage(lif->ionic, dbpage_num);
if (!lif->kern_dbpage) {
dev_err(dev, "Cannot map dbpage, aborting\n");
- err = -ENOMEM;
- goto err_out_free_dbid;
+ return -ENOMEM;
}
err = ionic_lif_adminq_init(lif);
return 0;
err_out_notifyq_deinit:
+ napi_disable(&lif->adminqcq->napi);
ionic_lif_qcq_deinit(lif, lif->notifyqcq);
err_out_adminq_deinit:
ionic_lif_qcq_deinit(lif, lif->adminqcq);
ionic_lif_reset(lif);
ionic_bus_unmap_dbpage(lif->ionic, lif->kern_dbpage);
lif->kern_dbpage = NULL;
-err_out_free_dbid:
- kfree(lif->dbid_inuse);
- lif->dbid_inuse = NULL;
return err;
}
IONIC_LIF_F_LINK_CHECK_REQUESTED,
IONIC_LIF_F_FILTER_SYNC_NEEDED,
IONIC_LIF_F_FW_RESET,
+ IONIC_LIF_F_FW_STOPPING,
IONIC_LIF_F_SPLIT_INTR,
IONIC_LIF_F_BROKEN,
IONIC_LIF_F_TX_DIM_INTR,
u32 rx_coalesce_hw; /* what the hw is using */
u32 tx_coalesce_usecs; /* what the user asked for */
u32 tx_coalesce_hw; /* what the hw is using */
- unsigned long *dbid_inuse;
unsigned int dbid_count;
struct ionic_phc *phc;
}
}
+const char *ionic_vf_attr_to_str(enum ionic_vf_attr attr)
+{
+ switch (attr) {
+ case IONIC_VF_ATTR_SPOOFCHK:
+ return "IONIC_VF_ATTR_SPOOFCHK";
+ case IONIC_VF_ATTR_TRUST:
+ return "IONIC_VF_ATTR_TRUST";
+ case IONIC_VF_ATTR_LINKSTATE:
+ return "IONIC_VF_ATTR_LINKSTATE";
+ case IONIC_VF_ATTR_MAC:
+ return "IONIC_VF_ATTR_MAC";
+ case IONIC_VF_ATTR_VLAN:
+ return "IONIC_VF_ATTR_VLAN";
+ case IONIC_VF_ATTR_RATE:
+ return "IONIC_VF_ATTR_RATE";
+ case IONIC_VF_ATTR_STATSADDR:
+ return "IONIC_VF_ATTR_STATSADDR";
+ default:
+ return "IONIC_VF_ATTR_UNKNOWN";
+ }
+}
+
static void ionic_adminq_flush(struct ionic_lif *lif)
{
struct ionic_desc_info *desc_info;
void ionic_adminq_netdev_err_print(struct ionic_lif *lif, u8 opcode,
u8 status, int err)
{
+ const char *stat_str;
+
+ stat_str = (err == -ETIMEDOUT) ? "TIMEOUT" :
+ ionic_error_to_str(status);
+
netdev_err(lif->netdev, "%s (%d) failed: %s (%d)\n",
- ionic_opcode_to_str(opcode), opcode,
- ionic_error_to_str(status), err);
+ ionic_opcode_to_str(opcode), opcode, stat_str, err);
}
static int ionic_adminq_check_err(struct ionic_lif *lif,
if (do_msg && !test_bit(IONIC_LIF_F_FW_RESET, lif->state))
netdev_err(netdev, "Posting of %s (%d) failed: %d\n",
name, ctx->cmd.cmd.opcode, err);
+ ctx->comp.comp.status = IONIC_RC_ERROR;
return err;
}
if (remaining)
break;
- /* interrupt the wait if FW stopped */
- if (test_bit(IONIC_LIF_F_FW_RESET, lif->state)) {
+ /* force a check of FW status and break out if FW reset */
+ (void)ionic_heartbeat_check(lif->ionic);
+ if ((test_bit(IONIC_LIF_F_FW_RESET, lif->state) &&
+ !lif->ionic->idev.fw_status_ready) ||
+ test_bit(IONIC_LIF_F_FW_STOPPING, lif->state)) {
if (do_msg)
- netdev_err(netdev, "%s (%d) interrupted, FW in reset\n",
- name, ctx->cmd.cmd.opcode);
+ netdev_warn(netdev, "%s (%d) interrupted, FW in reset\n",
+ name, ctx->cmd.cmd.opcode);
+ ctx->comp.comp.status = IONIC_RC_ERROR;
return -ENXIO;
}
static void ionic_dev_cmd_clean(struct ionic *ionic)
{
- union __iomem ionic_dev_cmd_regs *regs = ionic->idev.dev_cmd_regs;
+ struct ionic_dev *idev = &ionic->idev;
- iowrite32(0, ®s->doorbell);
- memset_io(®s->cmd, 0, sizeof(regs->cmd));
+ iowrite32(0, &idev->dev_cmd_regs->doorbell);
+ memset_io(&idev->dev_cmd_regs->cmd, 0, sizeof(idev->dev_cmd_regs->cmd));
}
-int ionic_dev_cmd_wait(struct ionic *ionic, unsigned long max_seconds)
+void ionic_dev_cmd_dev_err_print(struct ionic *ionic, u8 opcode, u8 status,
+ int err)
+{
+ const char *stat_str;
+
+ stat_str = (err == -ETIMEDOUT) ? "TIMEOUT" :
+ ionic_error_to_str(status);
+
+ dev_err(ionic->dev, "DEV_CMD %s (%d) error, %s (%d) failed\n",
+ ionic_opcode_to_str(opcode), opcode, stat_str, err);
+}
+
+static int __ionic_dev_cmd_wait(struct ionic *ionic, unsigned long max_seconds,
+ const bool do_msg)
{
struct ionic_dev *idev = &ionic->idev;
unsigned long start_time;
unsigned long max_wait;
unsigned long duration;
+ int done = 0;
+ bool fw_up;
int opcode;
- int hb = 0;
- int done;
int err;
/* Wait for dev cmd to complete, retrying if we get EAGAIN,
try_again:
opcode = readb(&idev->dev_cmd_regs->cmd.cmd.opcode);
start_time = jiffies;
- do {
+ for (fw_up = ionic_is_fw_running(idev);
+ !done && fw_up && time_before(jiffies, max_wait);
+ fw_up = ionic_is_fw_running(idev)) {
done = ionic_dev_cmd_done(idev);
if (done)
break;
usleep_range(100, 200);
-
- /* Don't check the heartbeat on FW_CONTROL commands as they are
- * notorious for interrupting the firmware's heartbeat update.
- */
- if (opcode != IONIC_CMD_FW_CONTROL)
- hb = ionic_heartbeat_check(ionic);
- } while (!done && !hb && time_before(jiffies, max_wait));
+ }
duration = jiffies - start_time;
dev_dbg(ionic->dev, "DEVCMD %s (%d) done=%d took %ld secs (%ld jiffies)\n",
ionic_opcode_to_str(opcode), opcode,
done, duration / HZ, duration);
- if (!done && hb) {
- /* It is possible (but unlikely) that FW was busy and missed a
- * heartbeat check but is still alive and will process this
- * request, so don't clean the dev_cmd in this case.
- */
- dev_dbg(ionic->dev, "DEVCMD %s (%d) failed - FW halted\n",
- ionic_opcode_to_str(opcode), opcode);
+ if (!done && !fw_up) {
+ ionic_dev_cmd_clean(ionic);
+ dev_warn(ionic->dev, "DEVCMD %s (%d) interrupted - FW is down\n",
+ ionic_opcode_to_str(opcode), opcode);
return -ENXIO;
}
}
if (!(opcode == IONIC_CMD_FW_CONTROL && err == IONIC_RC_EAGAIN))
- dev_err(ionic->dev, "DEV_CMD %s (%d) error, %s (%d) failed\n",
- ionic_opcode_to_str(opcode), opcode,
- ionic_error_to_str(err), err);
+ if (do_msg)
+ ionic_dev_cmd_dev_err_print(ionic, opcode, err,
+ ionic_error_to_errno(err));
return ionic_error_to_errno(err);
}
return 0;
}
+int ionic_dev_cmd_wait(struct ionic *ionic, unsigned long max_seconds)
+{
+ return __ionic_dev_cmd_wait(ionic, max_seconds, true);
+}
+
+int ionic_dev_cmd_wait_nomsg(struct ionic *ionic, unsigned long max_seconds)
+{
+ return __ionic_dev_cmd_wait(ionic, max_seconds, false);
+}
+
int ionic_setup(struct ionic *ionic)
{
int err;
struct ionic_dev *idev = &ionic->idev;
int err;
+ if (!ionic_is_fw_running(idev))
+ return 0;
+
mutex_lock(&ionic->dev_cmd_lock);
ionic_dev_cmd_reset(idev);
err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT);
int ionic_port_reset(struct ionic *ionic)
{
struct ionic_dev *idev = &ionic->idev;
- int err;
+ int err = 0;
if (!idev->port_info)
return 0;
- mutex_lock(&ionic->dev_cmd_lock);
- ionic_dev_cmd_port_reset(idev);
- err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT);
- mutex_unlock(&ionic->dev_cmd_lock);
+ if (ionic_is_fw_running(idev)) {
+ mutex_lock(&ionic->dev_cmd_lock);
+ ionic_dev_cmd_port_reset(idev);
+ err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT);
+ mutex_unlock(&ionic->dev_cmd_lock);
+ }
dma_free_coherent(ionic->dev, idev->port_info_sz,
idev->port_info, idev->port_info_pa);
idev->port_info = NULL;
idev->port_info_pa = 0;
- if (err)
- dev_err(ionic->dev, "Failed to reset port\n");
-
return err;
}
spin_unlock_bh(&lif->rx_filters.lock);
- if (err == -ENOSPC) {
- if (le16_to_cpu(ctx.cmd.rx_filter_add.match) == IONIC_RX_FILTER_MATCH_VLAN)
- lif->max_vlans = lif->nvlans;
+ /* store the max_vlans limit that we found */
+ if (err == -ENOSPC &&
+ le16_to_cpu(ctx.cmd.rx_filter_add.match) == IONIC_RX_FILTER_MATCH_VLAN)
+ lif->max_vlans = lif->nvlans;
+
+ /* Prevent unnecessary error messages on recoverable
+ * errors as the filter will get retried on the next
+ * sync attempt.
+ */
+ switch (err) {
+ case -ENOSPC:
+ case -ENXIO:
+ case -ETIMEDOUT:
+ case -EAGAIN:
+ case -EBUSY:
return 0;
+ default:
+ break;
}
ionic_adminq_netdev_err_print(lif, ctx.cmd.cmd.opcode,
spin_unlock_bh(&lif->rx_filters.lock);
if (state != IONIC_FILTER_STATE_NEW) {
- err = ionic_adminq_post_wait(lif, &ctx);
- if (err && err != -EEXIST)
+ err = ionic_adminq_post_wait_nomsg(lif, &ctx);
+
+ switch (err) {
+ /* ignore these errors */
+ case -EEXIST:
+ case -ENXIO:
+ case -ETIMEDOUT:
+ case -EAGAIN:
+ case -EBUSY:
+ case 0:
+ break;
+ default:
+ ionic_adminq_netdev_err_print(lif, ctx.cmd.cmd.opcode,
+ ctx.comp.comp.status, err);
return err;
+ }
}
return 0;
return -EIO;
}
+static void ionic_tx_desc_unmap_bufs(struct ionic_queue *q,
+ struct ionic_desc_info *desc_info)
+{
+ struct ionic_buf_info *buf_info = desc_info->bufs;
+ struct device *dev = q->dev;
+ unsigned int i;
+
+ if (!desc_info->nbufs)
+ return;
+
+ dma_unmap_single(dev, (dma_addr_t)buf_info->dma_addr,
+ buf_info->len, DMA_TO_DEVICE);
+ buf_info++;
+ for (i = 1; i < desc_info->nbufs; i++, buf_info++)
+ dma_unmap_page(dev, (dma_addr_t)buf_info->dma_addr,
+ buf_info->len, DMA_TO_DEVICE);
+
+ desc_info->nbufs = 0;
+}
+
static void ionic_tx_clean(struct ionic_queue *q,
struct ionic_desc_info *desc_info,
struct ionic_cq_info *cq_info,
void *cb_arg)
{
- struct ionic_buf_info *buf_info = desc_info->bufs;
struct ionic_tx_stats *stats = q_to_tx_stats(q);
struct ionic_qcq *qcq = q_to_qcq(q);
struct sk_buff *skb = cb_arg;
- struct device *dev = q->dev;
- unsigned int i;
u16 qi;
- if (desc_info->nbufs) {
- dma_unmap_single(dev, (dma_addr_t)buf_info->dma_addr,
- buf_info->len, DMA_TO_DEVICE);
- buf_info++;
- for (i = 1; i < desc_info->nbufs; i++, buf_info++)
- dma_unmap_page(dev, (dma_addr_t)buf_info->dma_addr,
- buf_info->len, DMA_TO_DEVICE);
- }
+ ionic_tx_desc_unmap_bufs(q, desc_info);
if (!skb)
return;
err = ionic_tx_tcp_inner_pseudo_csum(skb);
else
err = ionic_tx_tcp_pseudo_csum(skb);
- if (err)
+ if (err) {
+ /* clean up mapping from ionic_tx_map_skb */
+ ionic_tx_desc_unmap_bufs(q, desc_info);
return err;
+ }
if (encap)
hdrlen = skb_inner_transport_header(skb) - skb->data +
return 0;
}
-static int ionic_tx_calc_csum(struct ionic_queue *q, struct sk_buff *skb,
- struct ionic_desc_info *desc_info)
+static void ionic_tx_calc_csum(struct ionic_queue *q, struct sk_buff *skb,
+ struct ionic_desc_info *desc_info)
{
struct ionic_txq_desc *desc = desc_info->txq_desc;
struct ionic_buf_info *buf_info = desc_info->bufs;
stats->crc32_csum++;
else
stats->csum++;
-
- return 0;
}
-static int ionic_tx_calc_no_csum(struct ionic_queue *q, struct sk_buff *skb,
- struct ionic_desc_info *desc_info)
+static void ionic_tx_calc_no_csum(struct ionic_queue *q, struct sk_buff *skb,
+ struct ionic_desc_info *desc_info)
{
struct ionic_txq_desc *desc = desc_info->txq_desc;
struct ionic_buf_info *buf_info = desc_info->bufs;
desc->csum_offset = 0;
stats->csum_none++;
-
- return 0;
}
-static int ionic_tx_skb_frags(struct ionic_queue *q, struct sk_buff *skb,
- struct ionic_desc_info *desc_info)
+static void ionic_tx_skb_frags(struct ionic_queue *q, struct sk_buff *skb,
+ struct ionic_desc_info *desc_info)
{
struct ionic_txq_sg_desc *sg_desc = desc_info->txq_sg_desc;
struct ionic_buf_info *buf_info = &desc_info->bufs[1];
}
stats->frags += skb_shinfo(skb)->nr_frags;
-
- return 0;
}
static int ionic_tx(struct ionic_queue *q, struct sk_buff *skb)
{
struct ionic_desc_info *desc_info = &q->info[q->head_idx];
struct ionic_tx_stats *stats = q_to_tx_stats(q);
- int err;
if (unlikely(ionic_tx_map_skb(q, skb, desc_info)))
return -EIO;
/* set up the initial descriptor */
if (skb->ip_summed == CHECKSUM_PARTIAL)
- err = ionic_tx_calc_csum(q, skb, desc_info);
+ ionic_tx_calc_csum(q, skb, desc_info);
else
- err = ionic_tx_calc_no_csum(q, skb, desc_info);
- if (err)
- return err;
+ ionic_tx_calc_no_csum(q, skb, desc_info);
/* add frags */
- err = ionic_tx_skb_frags(q, skb, desc_info);
- if (err)
- return err;
+ ionic_tx_skb_frags(q, skb, desc_info);
skb_tx_timestamp(skb);
stats->pkts++;
if (enable && tp->aspm_manageable) {
RTL_W8(tp, Config5, RTL_R8(tp, Config5) | ASPM_en);
RTL_W8(tp, Config2, RTL_R8(tp, Config2) | ClkReqEn);
+
+ switch (tp->mac_version) {
+ case RTL_GIGA_MAC_VER_60 ... RTL_GIGA_MAC_VER_63:
+ /* reset ephy tx/rx disable timer */
+ r8168_mac_ocp_modify(tp, 0xe094, 0xff00, 0);
+ /* chip can trigger L1.2 */
+ r8168_mac_ocp_modify(tp, 0xe092, 0x00ff, BIT(2));
+ break;
+ default:
+ break;
+ }
} else {
+ switch (tp->mac_version) {
+ case RTL_GIGA_MAC_VER_60 ... RTL_GIGA_MAC_VER_63:
+ r8168_mac_ocp_modify(tp, 0xe092, 0x00ff, 0);
+ break;
+ default:
+ break;
+ }
+
RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~ClkReqEn);
RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~ASPM_en);
}
rtl8169_down(tp);
}
-#ifdef CONFIG_PM
-
static int rtl8169_runtime_resume(struct device *dev)
{
struct rtl8169_private *tp = dev_get_drvdata(dev);
return 0;
}
-static int __maybe_unused rtl8169_suspend(struct device *device)
+static int rtl8169_suspend(struct device *device)
{
struct rtl8169_private *tp = dev_get_drvdata(device);
return 0;
}
-static int __maybe_unused rtl8169_resume(struct device *device)
+static int rtl8169_resume(struct device *device)
{
struct rtl8169_private *tp = dev_get_drvdata(device);
}
static const struct dev_pm_ops rtl8169_pm_ops = {
- SET_SYSTEM_SLEEP_PM_OPS(rtl8169_suspend, rtl8169_resume)
- SET_RUNTIME_PM_OPS(rtl8169_runtime_suspend, rtl8169_runtime_resume,
- rtl8169_runtime_idle)
+ SYSTEM_SLEEP_PM_OPS(rtl8169_suspend, rtl8169_resume)
+ RUNTIME_PM_OPS(rtl8169_runtime_suspend, rtl8169_runtime_resume,
+ rtl8169_runtime_idle)
};
-#endif /* CONFIG_PM */
-
static void rtl_wol_shutdown_quirk(struct rtl8169_private *tp)
{
/* WoL fails with 8168b when the receiver is disabled. */
rtl_rar_set(tp, mac_addr);
}
+/* register is set if system vendor successfully tested ASPM 1.2 */
+static bool rtl_aspm_is_safe(struct rtl8169_private *tp)
+{
+ if (tp->mac_version >= RTL_GIGA_MAC_VER_60 &&
+ r8168_mac_ocp_read(tp, 0xc0b2) & 0xf)
+ return true;
+
+ return false;
+}
+
static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
{
struct rtl8169_private *tp;
* Chips from RTL8168h partially have issues with L1.2, but seem
* to work fine with L1 and L1.1.
*/
- if (tp->mac_version >= RTL_GIGA_MAC_VER_45)
+ if (rtl_aspm_is_safe(tp))
+ rc = 0;
+ else if (tp->mac_version >= RTL_GIGA_MAC_VER_45)
rc = pci_disable_link_state(pdev, PCIE_LINK_STATE_L1_2);
else
rc = pci_disable_link_state(pdev, PCIE_LINK_STATE_L1);
.probe = rtl_init_one,
.remove = rtl_remove_one,
.shutdown = rtl_shutdown,
-#ifdef CONFIG_PM
- .driver.pm = &rtl8169_pm_ops,
-#endif
+ .driver.pm = pm_ptr(&rtl8169_pm_ops),
};
module_pci_driver(rtl8169_pci_driver);
/* MAC core supports the EEE feature. */
if (priv->hw_cap.eee) {
/* Check if the PHY supports EEE */
- if (phy_init_eee(ndev->phydev, 1))
+ if (phy_init_eee(ndev->phydev, true))
return false;
priv->eee_active = 1;
priv->pause, tx_cnt);
}
-static void stmmac_validate(struct phylink_config *config,
- unsigned long *supported,
- struct phylink_link_state *state)
+static struct phylink_pcs *stmmac_mac_select_pcs(struct phylink_config *config,
+ phy_interface_t interface)
{
struct stmmac_priv *priv = netdev_priv(to_net_dev(config->dev));
- __ETHTOOL_DECLARE_LINK_MODE_MASK(mac_supported) = { 0, };
- __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, };
- int tx_cnt = priv->plat->tx_queues_to_use;
- int max_speed = priv->plat->max_speed;
-
- phylink_set(mac_supported, 10baseT_Half);
- phylink_set(mac_supported, 10baseT_Full);
- phylink_set(mac_supported, 100baseT_Half);
- phylink_set(mac_supported, 100baseT_Full);
- phylink_set(mac_supported, 1000baseT_Half);
- phylink_set(mac_supported, 1000baseT_Full);
- phylink_set(mac_supported, 1000baseKX_Full);
-
- phylink_set(mac_supported, Autoneg);
- phylink_set(mac_supported, Pause);
- phylink_set(mac_supported, Asym_Pause);
- phylink_set_port_modes(mac_supported);
-
- /* Cut down 1G if asked to */
- if ((max_speed > 0) && (max_speed < 1000)) {
- phylink_set(mask, 1000baseT_Full);
- phylink_set(mask, 1000baseX_Full);
- } else if (priv->plat->has_gmac4) {
- if (!max_speed || max_speed >= 2500) {
- phylink_set(mac_supported, 2500baseT_Full);
- phylink_set(mac_supported, 2500baseX_Full);
- }
- } else if (priv->plat->has_xgmac) {
- if (!max_speed || (max_speed >= 2500)) {
- phylink_set(mac_supported, 2500baseT_Full);
- phylink_set(mac_supported, 2500baseX_Full);
- }
- if (!max_speed || (max_speed >= 5000)) {
- phylink_set(mac_supported, 5000baseT_Full);
- }
- if (!max_speed || (max_speed >= 10000)) {
- phylink_set(mac_supported, 10000baseSR_Full);
- phylink_set(mac_supported, 10000baseLR_Full);
- phylink_set(mac_supported, 10000baseER_Full);
- phylink_set(mac_supported, 10000baseLRM_Full);
- phylink_set(mac_supported, 10000baseT_Full);
- phylink_set(mac_supported, 10000baseKX4_Full);
- phylink_set(mac_supported, 10000baseKR_Full);
- }
- if (!max_speed || (max_speed >= 25000)) {
- phylink_set(mac_supported, 25000baseCR_Full);
- phylink_set(mac_supported, 25000baseKR_Full);
- phylink_set(mac_supported, 25000baseSR_Full);
- }
- if (!max_speed || (max_speed >= 40000)) {
- phylink_set(mac_supported, 40000baseKR4_Full);
- phylink_set(mac_supported, 40000baseCR4_Full);
- phylink_set(mac_supported, 40000baseSR4_Full);
- phylink_set(mac_supported, 40000baseLR4_Full);
- }
- if (!max_speed || (max_speed >= 50000)) {
- phylink_set(mac_supported, 50000baseCR2_Full);
- phylink_set(mac_supported, 50000baseKR2_Full);
- phylink_set(mac_supported, 50000baseSR2_Full);
- phylink_set(mac_supported, 50000baseKR_Full);
- phylink_set(mac_supported, 50000baseSR_Full);
- phylink_set(mac_supported, 50000baseCR_Full);
- phylink_set(mac_supported, 50000baseLR_ER_FR_Full);
- phylink_set(mac_supported, 50000baseDR_Full);
- }
- if (!max_speed || (max_speed >= 100000)) {
- phylink_set(mac_supported, 100000baseKR4_Full);
- phylink_set(mac_supported, 100000baseSR4_Full);
- phylink_set(mac_supported, 100000baseCR4_Full);
- phylink_set(mac_supported, 100000baseLR4_ER4_Full);
- phylink_set(mac_supported, 100000baseKR2_Full);
- phylink_set(mac_supported, 100000baseSR2_Full);
- phylink_set(mac_supported, 100000baseCR2_Full);
- phylink_set(mac_supported, 100000baseLR2_ER2_FR2_Full);
- phylink_set(mac_supported, 100000baseDR2_Full);
- }
- }
-
- /* Half-Duplex can only work with single queue */
- if (tx_cnt > 1) {
- phylink_set(mask, 10baseT_Half);
- phylink_set(mask, 100baseT_Half);
- phylink_set(mask, 1000baseT_Half);
- }
-
- linkmode_and(supported, supported, mac_supported);
- linkmode_andnot(supported, supported, mask);
- linkmode_and(state->advertising, state->advertising, mac_supported);
- linkmode_andnot(state->advertising, state->advertising, mask);
+ if (!priv->hw->xpcs)
+ return NULL;
- /* If PCS is supported, check which modes it supports. */
- if (priv->hw->xpcs)
- xpcs_validate(priv->hw->xpcs, supported, state);
+ return &priv->hw->xpcs->pcs;
}
static void stmmac_mac_config(struct phylink_config *config, unsigned int mode,
}
static const struct phylink_mac_ops stmmac_phylink_mac_ops = {
- .validate = stmmac_validate,
+ .validate = phylink_generic_validate,
+ .mac_select_pcs = stmmac_mac_select_pcs,
.mac_config = stmmac_mac_config,
.mac_link_down = stmmac_mac_link_down,
.mac_link_up = stmmac_mac_link_up,
{
struct stmmac_mdio_bus_data *mdio_bus_data = priv->plat->mdio_bus_data;
struct fwnode_handle *fwnode = of_fwnode_handle(priv->plat->phylink_node);
+ int max_speed = priv->plat->max_speed;
int mode = priv->plat->phy_interface;
struct phylink *phylink;
priv->phylink_config.dev = &priv->dev->dev;
priv->phylink_config.type = PHYLINK_NETDEV;
- priv->phylink_config.pcs_poll = true;
if (priv->plat->mdio_bus_data)
priv->phylink_config.ovr_an_inband =
mdio_bus_data->xpcs_an_inband;
if (!fwnode)
fwnode = dev_fwnode(priv->device);
+ /* Set the platform/firmware specified interface mode */
+ __set_bit(mode, priv->phylink_config.supported_interfaces);
+
+ /* If we have an xpcs, it defines which PHY interfaces are supported. */
+ if (priv->hw->xpcs)
+ xpcs_get_interfaces(priv->hw->xpcs,
+ priv->phylink_config.supported_interfaces);
+
+ priv->phylink_config.mac_capabilities = MAC_ASYM_PAUSE | MAC_SYM_PAUSE |
+ MAC_10 | MAC_100;
+
+ if (!max_speed || max_speed >= 1000)
+ priv->phylink_config.mac_capabilities |= MAC_1000;
+
+ if (priv->plat->has_gmac4) {
+ if (!max_speed || max_speed >= 2500)
+ priv->phylink_config.mac_capabilities |= MAC_2500FD;
+ } else if (priv->plat->has_xgmac) {
+ if (!max_speed || max_speed >= 2500)
+ priv->phylink_config.mac_capabilities |= MAC_2500FD;
+ if (!max_speed || max_speed >= 5000)
+ priv->phylink_config.mac_capabilities |= MAC_5000FD;
+ if (!max_speed || max_speed >= 10000)
+ priv->phylink_config.mac_capabilities |= MAC_10000FD;
+ if (!max_speed || max_speed >= 25000)
+ priv->phylink_config.mac_capabilities |= MAC_25000FD;
+ if (!max_speed || max_speed >= 40000)
+ priv->phylink_config.mac_capabilities |= MAC_40000FD;
+ if (!max_speed || max_speed >= 50000)
+ priv->phylink_config.mac_capabilities |= MAC_50000FD;
+ if (!max_speed || max_speed >= 100000)
+ priv->phylink_config.mac_capabilities |= MAC_100000FD;
+ }
+
+ /* Half-Duplex can only work with single queue */
+ if (priv->plat->tx_queues_to_use > 1)
+ priv->phylink_config.mac_capabilities &=
+ ~(MAC_10HD | MAC_100HD | MAC_1000HD);
+
phylink = phylink_create(&priv->phylink_config, fwnode,
mode, &stmmac_phylink_mac_ops);
if (IS_ERR(phylink))
return PTR_ERR(phylink);
- if (priv->hw->xpcs)
- phylink_set_pcs(phylink, &priv->hw->xpcs->pcs);
-
priv->phylink = phylink;
return 0;
}
* @phylink: Pointer to phylink instance
* @phylink_config: phylink configuration settings
* @pcs_phy: Reference to PCS/PMA PHY if used
+ * @pcs: phylink pcs structure for PCS PHY
* @switch_x_sgmii: Whether switchable 1000BaseX/SGMII mode is enabled in the core
* @axi_clk: AXI4-Lite bus clock
* @misc_clks: Misc ethernet clocks (AXI4-Stream, Ref, MGT clocks)
struct phylink_config phylink_config;
struct mdio_device *pcs_phy;
+ struct phylink_pcs pcs;
bool switch_x_sgmii;
.nway_reset = axienet_ethtools_nway_reset,
};
-static void axienet_mac_pcs_get_state(struct phylink_config *config,
- struct phylink_link_state *state)
+static struct axienet_local *pcs_to_axienet_local(struct phylink_pcs *pcs)
{
- struct net_device *ndev = to_net_dev(config->dev);
- struct axienet_local *lp = netdev_priv(ndev);
+ return container_of(pcs, struct axienet_local, pcs);
+}
- switch (state->interface) {
- case PHY_INTERFACE_MODE_SGMII:
- case PHY_INTERFACE_MODE_1000BASEX:
- phylink_mii_c22_pcs_get_state(lp->pcs_phy, state);
- break;
- default:
- break;
- }
+static void axienet_pcs_get_state(struct phylink_pcs *pcs,
+ struct phylink_link_state *state)
+{
+ struct mdio_device *pcs_phy = pcs_to_axienet_local(pcs)->pcs_phy;
+
+ phylink_mii_c22_pcs_get_state(pcs_phy, state);
}
-static void axienet_mac_an_restart(struct phylink_config *config)
+static void axienet_pcs_an_restart(struct phylink_pcs *pcs)
{
- struct net_device *ndev = to_net_dev(config->dev);
- struct axienet_local *lp = netdev_priv(ndev);
+ struct mdio_device *pcs_phy = pcs_to_axienet_local(pcs)->pcs_phy;
- phylink_mii_c22_pcs_an_restart(lp->pcs_phy);
+ phylink_mii_c22_pcs_an_restart(pcs_phy);
}
-static int axienet_mac_prepare(struct phylink_config *config, unsigned int mode,
- phy_interface_t iface)
+static int axienet_pcs_config(struct phylink_pcs *pcs, unsigned int mode,
+ phy_interface_t interface,
+ const unsigned long *advertising,
+ bool permit_pause_to_mac)
{
- struct net_device *ndev = to_net_dev(config->dev);
+ struct mdio_device *pcs_phy = pcs_to_axienet_local(pcs)->pcs_phy;
+ struct net_device *ndev = pcs_to_axienet_local(pcs)->ndev;
struct axienet_local *lp = netdev_priv(ndev);
int ret;
- switch (iface) {
- case PHY_INTERFACE_MODE_SGMII:
- case PHY_INTERFACE_MODE_1000BASEX:
- if (!lp->switch_x_sgmii)
- return 0;
-
- ret = mdiobus_write(lp->pcs_phy->bus,
- lp->pcs_phy->addr,
- XLNX_MII_STD_SELECT_REG,
- iface == PHY_INTERFACE_MODE_SGMII ?
+ if (lp->switch_x_sgmii) {
+ ret = mdiodev_write(pcs_phy, XLNX_MII_STD_SELECT_REG,
+ interface == PHY_INTERFACE_MODE_SGMII ?
XLNX_MII_STD_SELECT_SGMII : 0);
- if (ret < 0)
- netdev_warn(ndev, "Failed to switch PHY interface: %d\n",
+ if (ret < 0) {
+ netdev_warn(ndev,
+ "Failed to switch PHY interface: %d\n",
ret);
- return ret;
- default:
- return 0;
+ return ret;
+ }
}
+
+ ret = phylink_mii_c22_pcs_config(pcs_phy, mode, interface, advertising);
+ if (ret < 0)
+ netdev_warn(ndev, "Failed to configure PCS: %d\n", ret);
+
+ return ret;
}
-static void axienet_mac_config(struct phylink_config *config, unsigned int mode,
- const struct phylink_link_state *state)
+static const struct phylink_pcs_ops axienet_pcs_ops = {
+ .pcs_get_state = axienet_pcs_get_state,
+ .pcs_config = axienet_pcs_config,
+ .pcs_an_restart = axienet_pcs_an_restart,
+};
+
+static struct phylink_pcs *axienet_mac_select_pcs(struct phylink_config *config,
+ phy_interface_t interface)
{
struct net_device *ndev = to_net_dev(config->dev);
struct axienet_local *lp = netdev_priv(ndev);
- int ret;
- switch (state->interface) {
- case PHY_INTERFACE_MODE_SGMII:
- case PHY_INTERFACE_MODE_1000BASEX:
- ret = phylink_mii_c22_pcs_config(lp->pcs_phy, mode,
- state->interface,
- state->advertising);
- if (ret < 0)
- netdev_warn(ndev, "Failed to configure PCS: %d\n",
- ret);
- break;
+ if (interface == PHY_INTERFACE_MODE_1000BASEX ||
+ interface == PHY_INTERFACE_MODE_SGMII)
+ return &lp->pcs;
- default:
- break;
- }
+ return NULL;
+}
+
+static void axienet_mac_config(struct phylink_config *config, unsigned int mode,
+ const struct phylink_link_state *state)
+{
+ /* nothing meaningful to do */
}
static void axienet_mac_link_down(struct phylink_config *config,
static const struct phylink_mac_ops axienet_phylink_ops = {
.validate = phylink_generic_validate,
- .mac_pcs_get_state = axienet_mac_pcs_get_state,
- .mac_an_restart = axienet_mac_an_restart,
- .mac_prepare = axienet_mac_prepare,
+ .mac_select_pcs = axienet_mac_select_pcs,
.mac_config = axienet_mac_config,
.mac_link_down = axienet_mac_link_down,
.mac_link_up = axienet_mac_link_up,
ret = -EPROBE_DEFER;
goto cleanup_mdio;
}
- lp->phylink_config.pcs_poll = true;
+ lp->pcs.ops = &axienet_pcs_ops;
+ lp->pcs.poll = true;
}
lp->phylink_config.dev = &ndev->dev;
lp->phylink_config.type = PHYLINK_NETDEV;
- lp->phylink_config.legacy_pre_march2020 = true;
lp->phylink_config.mac_capabilities = MAC_SYM_PAUSE | MAC_ASYM_PAUSE |
MAC_10FD | MAC_100FD | MAC_1000FD;
}
}
-void xpcs_validate(struct dw_xpcs *xpcs, unsigned long *supported,
- struct phylink_link_state *state)
+static int xpcs_validate(struct phylink_pcs *pcs, unsigned long *supported,
+ const struct phylink_link_state *state)
{
- __ETHTOOL_DECLARE_LINK_MODE_MASK(xpcs_supported);
+ __ETHTOOL_DECLARE_LINK_MODE_MASK(xpcs_supported) = { 0, };
const struct xpcs_compat *compat;
+ struct dw_xpcs *xpcs;
int i;
- /* phylink expects us to report all supported modes with
- * PHY_INTERFACE_MODE_NA, just don't limit the supported and
- * advertising masks and exit.
- */
- if (state->interface == PHY_INTERFACE_MODE_NA)
- return;
-
- linkmode_zero(xpcs_supported);
-
+ xpcs = phylink_pcs_to_xpcs(pcs);
compat = xpcs_find_compat(xpcs->id, state->interface);
- /* Populate the supported link modes for this
- * PHY interface type
+ /* Populate the supported link modes for this PHY interface type.
+ * FIXME: what about the port modes and autoneg bit? This masks
+ * all those away.
*/
if (compat)
for (i = 0; compat->supported[i] != __ETHTOOL_LINK_MODE_MASK_NBITS; i++)
set_bit(compat->supported[i], xpcs_supported);
linkmode_and(supported, supported, xpcs_supported);
- linkmode_and(state->advertising, state->advertising, xpcs_supported);
+
+ return 0;
+}
+
+void xpcs_get_interfaces(struct dw_xpcs *xpcs, unsigned long *interfaces)
+{
+ int i, j;
+
+ for (i = 0; i < DW_XPCS_INTERFACE_MAX; i++) {
+ const struct xpcs_compat *compat = &xpcs->id->compat[i];
+
+ for (j = 0; j < compat->num_interfaces; j++)
+ if (compat->interface[j] < PHY_INTERFACE_MODE_MAX)
+ __set_bit(compat->interface[j], interfaces);
+ }
}
-EXPORT_SYMBOL_GPL(xpcs_validate);
+EXPORT_SYMBOL_GPL(xpcs_get_interfaces);
int xpcs_config_eee(struct dw_xpcs *xpcs, int mult_fact_100ns, int enable)
{
};
static const struct phylink_pcs_ops xpcs_phylink_ops = {
+ .pcs_validate = xpcs_validate,
.pcs_config = xpcs_config,
.pcs_get_state = xpcs_get_state,
.pcs_link_up = xpcs_link_up,
#include <linux/regulator/of_regulator.h>
#include <linux/regulator/driver.h>
#include <linux/regulator/consumer.h>
+#include <linux/phylink.h>
+#include <linux/sfp.h>
#include <dt-bindings/net/qca-ar803x.h>
#define AT803X_SPECIFIC_FUNCTION_CONTROL 0x10
#define AT803X_INTR_ENABLE_PAGE_RECEIVED BIT(12)
#define AT803X_INTR_ENABLE_LINK_FAIL BIT(11)
#define AT803X_INTR_ENABLE_LINK_SUCCESS BIT(10)
+#define AT803X_INTR_ENABLE_LINK_FAIL_BX BIT(8)
+#define AT803X_INTR_ENABLE_LINK_SUCCESS_BX BIT(7)
#define AT803X_INTR_ENABLE_WIRESPEED_DOWNGRADE BIT(5)
#define AT803X_INTR_ENABLE_POLARITY_CHANGED BIT(1)
#define AT803X_INTR_ENABLE_WOL BIT(0)
#define AT803X_DEBUG_DATA 0x1E
#define AT803X_MODE_CFG_MASK 0x0F
-#define AT803X_MODE_CFG_SGMII 0x01
+#define AT803X_MODE_CFG_BASET_RGMII 0x00
+#define AT803X_MODE_CFG_BASET_SGMII 0x01
+#define AT803X_MODE_CFG_BX1000_RGMII_50OHM 0x02
+#define AT803X_MODE_CFG_BX1000_RGMII_75OHM 0x03
+#define AT803X_MODE_CFG_BX1000_CONV_50OHM 0x04
+#define AT803X_MODE_CFG_BX1000_CONV_75OHM 0x05
+#define AT803X_MODE_CFG_FX100_RGMII_50OHM 0x06
+#define AT803X_MODE_CFG_FX100_CONV_50OHM 0x07
+#define AT803X_MODE_CFG_RGMII_AUTO_MDET 0x0B
+#define AT803X_MODE_CFG_FX100_RGMII_75OHM 0x0E
+#define AT803X_MODE_CFG_FX100_CONV_75OHM 0x0F
#define AT803X_PSSR 0x11 /*PHY-Specific Status Register*/
#define AT803X_PSSR_MR_AN_COMPLETE 0x0200
u16 clk_25m_mask;
u8 smarteee_lpi_tw_1g;
u8 smarteee_lpi_tw_100m;
+ bool is_fiber;
+ bool is_1000basex;
struct regulator_dev *vddio_rdev;
struct regulator_dev *vddh_rdev;
struct regulator *vddio;
return 0;
}
+static int at803x_sfp_insert(void *upstream, const struct sfp_eeprom_id *id)
+{
+ struct phy_device *phydev = upstream;
+ __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_support);
+ __ETHTOOL_DECLARE_LINK_MODE_MASK(sfp_support);
+ phy_interface_t iface;
+
+ linkmode_zero(phy_support);
+ phylink_set(phy_support, 1000baseX_Full);
+ phylink_set(phy_support, 1000baseT_Full);
+ phylink_set(phy_support, Autoneg);
+ phylink_set(phy_support, Pause);
+ phylink_set(phy_support, Asym_Pause);
+
+ linkmode_zero(sfp_support);
+ sfp_parse_support(phydev->sfp_bus, id, sfp_support);
+ /* Some modules support 10G modes as well as others we support.
+ * Mask out non-supported modes so the correct interface is picked.
+ */
+ linkmode_and(sfp_support, phy_support, sfp_support);
+
+ if (linkmode_empty(sfp_support)) {
+ dev_err(&phydev->mdio.dev, "incompatible SFP module inserted\n");
+ return -EINVAL;
+ }
+
+ iface = sfp_select_interface(phydev->sfp_bus, sfp_support);
+
+ /* Only 1000Base-X is supported by AR8031/8033 as the downstream SerDes
+ * interface for use with SFP modules.
+ * However, some copper modules detected as having a preferred SGMII
+ * interface do default to and function in 1000Base-X mode, so just
+ * print a warning and allow such modules, as they may have some chance
+ * of working.
+ */
+ if (iface == PHY_INTERFACE_MODE_SGMII)
+ dev_warn(&phydev->mdio.dev, "module may not function if 1000Base-X not supported\n");
+ else if (iface != PHY_INTERFACE_MODE_1000BASEX)
+ return -EINVAL;
+
+ return 0;
+}
+
+static const struct sfp_upstream_ops at803x_sfp_ops = {
+ .attach = phy_sfp_attach,
+ .detach = phy_sfp_detach,
+ .module_insert = at803x_sfp_insert,
+};
+
static int at803x_parse_dt(struct phy_device *phydev)
{
struct device_node *node = phydev->mdio.dev.of_node;
phydev_err(phydev, "failed to get VDDIO regulator\n");
return PTR_ERR(priv->vddio);
}
+
+ /* Only AR8031/8033 support 1000Base-X for SFP modules */
+ ret = phy_sfp_probe(phydev, &at803x_sfp_ops);
+ if (ret < 0)
+ return ret;
}
return 0;
return ret;
}
- /* Some bootloaders leave the fiber page selected.
- * Switch to the copper page, as otherwise we read
- * the PHY capabilities from the fiber side.
- */
if (phydev->drv->phy_id == ATH8031_PHY_ID) {
- phy_lock_mdio_bus(phydev);
- ret = at803x_write_page(phydev, AT803X_PAGE_COPPER);
- phy_unlock_mdio_bus(phydev);
- if (ret)
+ int ccr = phy_read(phydev, AT803X_REG_CHIP_CONFIG);
+ int mode_cfg;
+
+ if (ccr < 0)
goto err;
+ mode_cfg = ccr & AT803X_MODE_CFG_MASK;
+
+ switch (mode_cfg) {
+ case AT803X_MODE_CFG_BX1000_RGMII_50OHM:
+ case AT803X_MODE_CFG_BX1000_RGMII_75OHM:
+ priv->is_1000basex = true;
+ fallthrough;
+ case AT803X_MODE_CFG_FX100_RGMII_50OHM:
+ case AT803X_MODE_CFG_FX100_RGMII_75OHM:
+ priv->is_fiber = true;
+ break;
+ }
}
return 0;
static int at803x_get_features(struct phy_device *phydev)
{
+ struct at803x_priv *priv = phydev->priv;
int err;
err = genphy_read_abilities(phydev);
* As a result of that, ESTATUS_1000_XFULL is set
* to 1 even when operating in copper TP mode.
*
- * Remove this mode from the supported link modes,
- * as this driver currently only supports copper
- * operation.
+ * Remove this mode from the supported link modes
+ * when not operating in 1000BaseX mode.
*/
- linkmode_clear_bit(ETHTOOL_LINK_MODE_1000baseX_Full_BIT,
- phydev->supported);
+ if (!priv->is_1000basex)
+ linkmode_clear_bit(ETHTOOL_LINK_MODE_1000baseX_Full_BIT,
+ phydev->supported);
+
return 0;
}
static int at803x_config_init(struct phy_device *phydev)
{
+ struct at803x_priv *priv = phydev->priv;
int ret;
+ if (phydev->drv->phy_id == ATH8031_PHY_ID) {
+ /* Some bootloaders leave the fiber page selected.
+ * Switch to the appropriate page (fiber or copper), as otherwise we
+ * read the PHY capabilities from the wrong page.
+ */
+ phy_lock_mdio_bus(phydev);
+ ret = at803x_write_page(phydev,
+ priv->is_fiber ? AT803X_PAGE_FIBER :
+ AT803X_PAGE_COPPER);
+ phy_unlock_mdio_bus(phydev);
+ if (ret)
+ return ret;
+
+ ret = at8031_pll_config(phydev);
+ if (ret < 0)
+ return ret;
+ }
+
/* The RX and TX delay default is:
* after HW reset: RX delay enabled and TX delay disabled
* after SW reset: RX delay enabled, while TX delay retains the
if (ret < 0)
return ret;
- if (phydev->drv->phy_id == ATH8031_PHY_ID) {
- ret = at8031_pll_config(phydev);
- if (ret < 0)
- return ret;
- }
-
/* Ar803x extended next page bit is enabled by default. Cisco
* multigig switches read this bit and attempt to negotiate 10Gbps
* rates even if the next page bit is disabled. This is incorrect
static int at803x_config_intr(struct phy_device *phydev)
{
+ struct at803x_priv *priv = phydev->priv;
int err;
int value;
value |= AT803X_INTR_ENABLE_DUPLEX_CHANGED;
value |= AT803X_INTR_ENABLE_LINK_FAIL;
value |= AT803X_INTR_ENABLE_LINK_SUCCESS;
+ if (priv->is_fiber) {
+ value |= AT803X_INTR_ENABLE_LINK_FAIL_BX;
+ value |= AT803X_INTR_ENABLE_LINK_SUCCESS_BX;
+ }
err = phy_write(phydev, AT803X_INTR_ENABLE, value);
} else {
static int at803x_read_status(struct phy_device *phydev)
{
+ struct at803x_priv *priv = phydev->priv;
int err, old_link = phydev->link;
+ if (priv->is_1000basex)
+ return genphy_c37_read_status(phydev);
+
/* Update the link, but return if there was an error */
err = genphy_update_link(phydev);
if (err)
static int at803x_config_aneg(struct phy_device *phydev)
{
+ struct at803x_priv *priv = phydev->priv;
int ret;
ret = at803x_config_mdix(phydev, phydev->mdix_ctrl);
return ret;
}
+ if (priv->is_1000basex)
+ return genphy_c37_config_aneg(phydev);
+
/* Do not restart auto-negotiation by setting ret to 0 defautly,
* when calling __genphy_config_aneg later.
*/
reg = asix_mdio_read(dev->net, dev->mii.phy_id,
MII_MARVELL_LED_CTRL);
netdev_dbg(dev->net, "MII_MARVELL_LED_CTRL (2) = 0x%04x\n", reg);
- reg &= 0xfc0f;
}
return 0;
int st_nci_vendor_cmds_init(struct nci_dev *ndev)
{
- return nfc_set_vendor_cmds(ndev->nfc_dev, st_nci_vendor_cmds,
+ return nci_set_vendor_cmds(ndev, st_nci_vendor_cmds,
sizeof(st_nci_vendor_cmds));
}
EXPORT_SYMBOL(st_nci_vendor_cmds_init);
struct st21nfca_hci_info *info = nfc_hci_get_clientdata(hdev);
init_completion(&info->vendor_info.req_completion);
- return nfc_set_vendor_cmds(hdev->ndev, st21nfca_vendor_cmds,
- sizeof(st21nfca_vendor_cmds));
+ return nfc_hci_set_vendor_cmds(hdev, st21nfca_vendor_cmds,
+ sizeof(st21nfca_vendor_cmds));
}
EXPORT_SYMBOL(st21nfca_vendor_cmds_init);
struct device_attribute *attr, char *page)
{
struct ptp_clock *ptp = dev_get_drvdata(dev);
- return snprintf(page, PAGE_SIZE-1, "%s\n", ptp->info->name);
+ return sysfs_emit(page, "%s\n", ptp->info->name);
}
static DEVICE_ATTR_RO(clock_name);
mutex_unlock(&ptp->pincfg_mux);
- return snprintf(page, PAGE_SIZE, "%u %u\n", func, chan);
+ return sysfs_emit(page, "%u %u\n", func, chan);
}
static ssize_t ptp_pin_store(struct device *dev, struct device_attribute *attr,
struct work_struct work;
struct mutex freeze_mutex;
atomic64_t writecnt;
+ /* 'Ownership' of program-containing map is claimed by the first program
+ * that is going to use this map or by the first program which FD is
+ * stored in the map to make sure that all callers and callees have the
+ * same prog type, JITed flag and xdp_has_frags flag.
+ */
+ struct {
+ spinlock_t lock;
+ enum bpf_prog_type type;
+ bool jited;
+ bool xdp_has_frags;
+ } owner;
};
static inline bool map_value_has_spin_lock(const struct bpf_map *map)
const struct btf_type *t, int off, int size,
enum bpf_access_type atype,
u32 *next_btf_id);
- bool (*check_kfunc_call)(u32 kfunc_btf_id, struct module *owner);
};
struct bpf_prog_offload_ops {
bool func_proto_unreliable;
bool sleepable;
bool tail_call_reachable;
+ bool xdp_has_frags;
struct hlist_node tramp_hlist;
/* BTF_KIND_FUNC_PROTO for valid attach_btf_id */
const struct btf_type *attach_func_proto;
};
struct bpf_array_aux {
- /* 'Ownership' of prog array is claimed by the first program that
- * is going to use this map or by the first program which FD is
- * stored in the map to make sure that all callers and callees have
- * the same prog type and JITed flag.
- */
- struct {
- spinlock_t lock;
- enum bpf_prog_type type;
- bool jited;
- } owner;
/* Programs with direct jumps into programs part of this array. */
struct list_head poke_progs;
struct bpf_map *map;
struct rcu_head rcu;
};
-bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp);
+static inline bool map_type_contains_progs(struct bpf_map *map)
+{
+ return map->map_type == BPF_MAP_TYPE_PROG_ARRAY ||
+ map->map_type == BPF_MAP_TYPE_DEVMAP ||
+ map->map_type == BPF_MAP_TYPE_CPUMAP;
+}
+
+bool bpf_prog_map_compatible(struct bpf_map *map, const struct bpf_prog *fp);
int bpf_prog_calc_tag(struct bpf_prog *fp);
const struct bpf_func_proto *bpf_get_trace_printk_proto(void);
struct bpf_cg_run_ctx {
struct bpf_run_ctx run_ctx;
const struct bpf_prog_array_item *prog_item;
+ int retval;
};
struct bpf_trace_run_ctx {
typedef u32 (*bpf_prog_run_fn)(const struct bpf_prog *prog, const void *ctx);
-static __always_inline u32
+static __always_inline int
BPF_PROG_RUN_ARRAY_CG_FLAGS(const struct bpf_prog_array __rcu *array_rcu,
const void *ctx, bpf_prog_run_fn run_prog,
- u32 *ret_flags)
+ int retval, u32 *ret_flags)
{
const struct bpf_prog_array_item *item;
const struct bpf_prog *prog;
const struct bpf_prog_array *array;
struct bpf_run_ctx *old_run_ctx;
struct bpf_cg_run_ctx run_ctx;
- u32 ret = 1;
u32 func_ret;
+ run_ctx.retval = retval;
migrate_disable();
rcu_read_lock();
array = rcu_dereference(array_rcu);
while ((prog = READ_ONCE(item->prog))) {
run_ctx.prog_item = item;
func_ret = run_prog(prog, ctx);
- ret &= (func_ret & 1);
+ if (!(func_ret & 1) && !IS_ERR_VALUE((long)run_ctx.retval))
+ run_ctx.retval = -EPERM;
*(ret_flags) |= (func_ret >> 1);
item++;
}
bpf_reset_run_ctx(old_run_ctx);
rcu_read_unlock();
migrate_enable();
- return ret;
+ return run_ctx.retval;
}
-static __always_inline u32
+static __always_inline int
BPF_PROG_RUN_ARRAY_CG(const struct bpf_prog_array __rcu *array_rcu,
- const void *ctx, bpf_prog_run_fn run_prog)
+ const void *ctx, bpf_prog_run_fn run_prog,
+ int retval)
{
const struct bpf_prog_array_item *item;
const struct bpf_prog *prog;
const struct bpf_prog_array *array;
struct bpf_run_ctx *old_run_ctx;
struct bpf_cg_run_ctx run_ctx;
- u32 ret = 1;
+ run_ctx.retval = retval;
migrate_disable();
rcu_read_lock();
array = rcu_dereference(array_rcu);
old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
while ((prog = READ_ONCE(item->prog))) {
run_ctx.prog_item = item;
- ret &= run_prog(prog, ctx);
+ if (!run_prog(prog, ctx) && !IS_ERR_VALUE((long)run_ctx.retval))
+ run_ctx.retval = -EPERM;
item++;
}
bpf_reset_run_ctx(old_run_ctx);
rcu_read_unlock();
migrate_enable();
- return ret;
+ return run_ctx.retval;
}
static __always_inline u32
* 0: NET_XMIT_SUCCESS skb should be transmitted
* 1: NET_XMIT_DROP skb should be dropped and cn
* 2: NET_XMIT_CN skb should be transmitted and cn
- * 3: -EPERM skb should be dropped
+ * 3: -err skb should be dropped
*/
#define BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY(array, ctx, func) \
({ \
u32 _flags = 0; \
bool _cn; \
u32 _ret; \
- _ret = BPF_PROG_RUN_ARRAY_CG_FLAGS(array, ctx, func, &_flags); \
+ _ret = BPF_PROG_RUN_ARRAY_CG_FLAGS(array, ctx, func, 0, &_flags); \
_cn = _flags & BPF_RET_SET_CN; \
- if (_ret) \
+ if (_ret && !IS_ERR_VALUE((long)_ret)) \
+ _ret = -EFAULT; \
+ if (!_ret) \
_ret = (_cn ? NET_XMIT_CN : NET_XMIT_SUCCESS); \
else \
- _ret = (_cn ? NET_XMIT_DROP : -EPERM); \
+ _ret = (_cn ? NET_XMIT_DROP : _ret); \
_ret; \
})
int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog,
const union bpf_attr *kattr,
union bpf_attr __user *uattr);
-bool bpf_prog_test_check_kfunc_call(u32 kfunc_id, struct module *owner);
bool btf_ctx_access(int off, int size, enum bpf_access_type type,
const struct bpf_prog *prog,
struct bpf_insn_access_aux *info);
return -ENOTSUPP;
}
-static inline bool bpf_prog_test_check_kfunc_call(u32 kfunc_id,
- struct module *owner)
-{
- return false;
-}
-
static inline void bpf_map_put(struct bpf_map *map)
{
}
int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog);
int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype);
int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value, u64 flags);
+int sock_map_bpf_prog_query(const union bpf_attr *attr,
+ union bpf_attr __user *uattr);
+
void sock_map_unhash(struct sock *sk);
void sock_map_close(struct sock *sk, long timeout);
#else
{
return -EOPNOTSUPP;
}
+
+static inline int sock_map_bpf_prog_query(const union bpf_attr *attr,
+ union bpf_attr __user *uattr)
+{
+ return -EINVAL;
+}
#endif /* CONFIG_BPF_SYSCALL */
#endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */
int check_ptr_off_reg(struct bpf_verifier_env *env,
const struct bpf_reg_state *reg, int regno);
+int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
+ u32 regno);
int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
u32 regno, u32 mem_size);
return type & ~BPF_BASE_TYPE_MASK;
}
+static inline enum bpf_prog_type resolve_prog_type(struct bpf_prog *prog)
+{
+ return prog->aux->dst_prog ? prog->aux->dst_prog->type : prog->type;
+}
+
#endif /* _LINUX_BPF_VERIFIER_H */
#define BTF_TYPE_EMIT(type) ((void)(type *)0)
#define BTF_TYPE_EMIT_ENUM(enum_val) ((void)enum_val)
+enum btf_kfunc_type {
+ BTF_KFUNC_TYPE_CHECK,
+ BTF_KFUNC_TYPE_ACQUIRE,
+ BTF_KFUNC_TYPE_RELEASE,
+ BTF_KFUNC_TYPE_RET_NULL,
+ BTF_KFUNC_TYPE_MAX,
+};
+
struct btf;
struct btf_member;
struct btf_type;
union bpf_attr;
struct btf_show;
+struct btf_id_set;
+
+struct btf_kfunc_id_set {
+ struct module *owner;
+ union {
+ struct {
+ struct btf_id_set *check_set;
+ struct btf_id_set *acquire_set;
+ struct btf_id_set *release_set;
+ struct btf_id_set *ret_null_set;
+ };
+ struct btf_id_set *sets[BTF_KFUNC_TYPE_MAX];
+ };
+};
extern const struct file_operations btf_fops;
const char *btf_name_by_offset(const struct btf *btf, u32 offset);
struct btf *btf_parse_vmlinux(void);
struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog);
+bool btf_kfunc_id_set_contains(const struct btf *btf,
+ enum bpf_prog_type prog_type,
+ enum btf_kfunc_type type, u32 kfunc_btf_id);
+int register_btf_kfunc_id_set(enum bpf_prog_type prog_type,
+ const struct btf_kfunc_id_set *s);
#else
static inline const struct btf_type *btf_type_by_id(const struct btf *btf,
u32 type_id)
{
return NULL;
}
-#endif
-
-struct kfunc_btf_id_set {
- struct list_head list;
- struct btf_id_set *set;
- struct module *owner;
-};
-
-struct kfunc_btf_id_list {
- struct list_head list;
- struct mutex mutex;
-};
-
-#ifdef CONFIG_DEBUG_INFO_BTF_MODULES
-void register_kfunc_btf_id_set(struct kfunc_btf_id_list *l,
- struct kfunc_btf_id_set *s);
-void unregister_kfunc_btf_id_set(struct kfunc_btf_id_list *l,
- struct kfunc_btf_id_set *s);
-bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist, u32 kfunc_id,
- struct module *owner);
-
-extern struct kfunc_btf_id_list bpf_tcp_ca_kfunc_list;
-extern struct kfunc_btf_id_list prog_test_kfunc_list;
-#else
-static inline void register_kfunc_btf_id_set(struct kfunc_btf_id_list *l,
- struct kfunc_btf_id_set *s)
-{
-}
-static inline void unregister_kfunc_btf_id_set(struct kfunc_btf_id_list *l,
- struct kfunc_btf_id_set *s)
+static inline bool btf_kfunc_id_set_contains(const struct btf *btf,
+ enum bpf_prog_type prog_type,
+ enum btf_kfunc_type type,
+ u32 kfunc_btf_id)
{
+ return false;
}
-static inline bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist,
- u32 kfunc_id, struct module *owner)
+static inline int register_btf_kfunc_id_set(enum bpf_prog_type prog_type,
+ const struct btf_kfunc_id_set *s)
{
- return false;
+ return 0;
}
-
-static struct kfunc_btf_id_list bpf_tcp_ca_kfunc_list __maybe_unused;
-static struct kfunc_btf_id_list prog_test_kfunc_list __maybe_unused;
#endif
-#define DEFINE_KFUNC_BTF_ID_SET(set, name) \
- struct kfunc_btf_id_set name = { LIST_HEAD_INIT(name.list), (set), \
- THIS_MODULE }
-
#endif
#ifdef CONFIG_DEBUG_INFO_BTF
#include <linux/compiler.h> /* for __PASTE */
+#include <linux/compiler_attributes.h> /* for __maybe_unused */
/*
* Following macros help to define lists of BTF IDs placed
#else
-#define BTF_ID_LIST(name) static u32 name[5];
+#define BTF_ID_LIST(name) static u32 __maybe_unused name[5];
#define BTF_ID(prefix, name)
#define BTF_ID_UNUSED
-#define BTF_ID_LIST_GLOBAL(name, n) u32 name[n];
-#define BTF_ID_LIST_SINGLE(name, prefix, typename) static u32 name[1];
-#define BTF_ID_LIST_GLOBAL_SINGLE(name, prefix, typename) u32 name[1];
-#define BTF_SET_START(name) static struct btf_id_set name = { 0 };
-#define BTF_SET_START_GLOBAL(name) static struct btf_id_set name = { 0 };
+#define BTF_ID_LIST_GLOBAL(name, n) u32 __maybe_unused name[n];
+#define BTF_ID_LIST_SINGLE(name, prefix, typename) static u32 __maybe_unused name[1];
+#define BTF_ID_LIST_GLOBAL_SINGLE(name, prefix, typename) u32 __maybe_unused name[1];
+#define BTF_SET_START(name) static struct btf_id_set __maybe_unused name = { 0 };
+#define BTF_SET_START_GLOBAL(name) static struct btf_id_set __maybe_unused name = { 0 };
#define BTF_SET_END(name)
#endif /* CONFIG_DEBUG_INFO_BTF */
s32 level;
s32 optname;
s32 optlen;
- s32 retval;
+ /* for retval in struct bpf_cg_run_ctx */
+ struct task_struct *current_task;
+ /* Temporary "register" for indirect stores to ppos. */
+ u64 tmp_reg;
};
int copy_bpf_fprog_from_user(struct sock_fprog *dst, sockptr_t src, int len);
return NULL;
}
-static inline struct inet6_request_sock *
- inet6_rsk(const struct request_sock *rsk)
-{
- return NULL;
-}
-
static inline struct raw6_sock *raw6_sk(const struct sock *sk)
{
return NULL;
}
#define inet6_rcv_saddr(__sk) NULL
-#define tcp_twsk_ipv6only(__sk) 0
#define inet_v6_ipv6only(__sk) 0
#endif /* IS_ENABLED(CONFIG_IPV6) */
#endif /* _IPV6_H */
linkmode_clear_bit(nr, addr);
}
-static inline void linkmode_change_bit(int nr, volatile unsigned long *addr)
-{
- __change_bit(nr, addr);
-}
-
static inline int linkmode_test_bit(int nr, const volatile unsigned long *addr)
{
return test_bit(nr, addr);
}
/**
- * mii_lpa_to_linkmode_adv_sgmii
- * @advertising: pointer to destination link mode.
- * @lpa: value of the MII_LPA register
- *
- * A small helper function that translates MII_ADVERTISE bits
- * to linkmode advertisement settings when in SGMII mode.
- * Clears the old value of advertising.
- */
-static inline void mii_lpa_to_linkmode_lpa_sgmii(unsigned long *lp_advertising,
- u32 lpa)
-{
- linkmode_zero(lp_advertising);
-
- mii_lpa_mod_linkmode_lpa_sgmii(lp_advertising, lpa);
-}
-
-/**
* mii_adv_mod_linkmode_adv_t
* @advertising:pointer to destination link mode.
* @adv: value of the MII_ADVERTISE register
extack->cookie_len = sizeof(cookie);
}
-static inline void nl_set_extack_cookie_u32(struct netlink_ext_ack *extack,
- u32 cookie)
-{
- if (!extack)
- return;
- memcpy(extack->cookie, &cookie, sizeof(cookie));
- extack->cookie_len = sizeof(cookie);
-}
-
void netlink_kernel_release(struct sock *sk);
int __netlink_change_ngroups(struct sock *sk, unsigned int groups);
int netlink_change_ngroups(struct sock *sk, unsigned int groups);
phy_interface_t interface, int speed, int duplex);
int xpcs_do_config(struct dw_xpcs *xpcs, phy_interface_t interface,
unsigned int mode);
-void xpcs_validate(struct dw_xpcs *xpcs, unsigned long *supported,
- struct phylink_link_state *state);
+void xpcs_get_interfaces(struct dw_xpcs *xpcs, unsigned long *interfaces);
int xpcs_config_eee(struct dw_xpcs *xpcs, int mult_fact_100ns,
int enable);
struct dw_xpcs *xpcs_create(struct mdio_device *mdiodev,
* Warning : all fields before dataref are cleared in __alloc_skb()
*/
atomic_t dataref;
+ unsigned int xdp_frags_size;
/* Intermediate layers must ensure that destructor_arg
* remains valid until skb destructor */
return ktime_sub(ktime_get_real(), t);
}
-static inline ktime_t net_invalid_timestamp(void)
-{
- return 0;
-}
-
static inline u8 skb_metadata_len(const struct sk_buff *skb)
{
return skb_shinfo(skb)->meta_len;
return (struct udphdr *)skb_transport_header(skb);
}
-static inline struct udphdr *inner_udp_hdr(const struct sk_buff *skb)
-{
- return (struct udphdr *)skb_inner_transport_header(skb);
-}
-
#define UDP_HTABLE_SIZE_MIN (CONFIG_BASE_SMALL ? 128 : 256)
static inline u32 udp_hashfn(const struct net *net, u32 num, u32 mask)
typedef struct ax25_route {
struct ax25_route *next;
- refcount_t refcount;
ax25_address callsign;
struct net_device *dev;
ax25_digi *digipeat;
char ip_mode;
} ax25_route;
-static inline void ax25_hold_route(ax25_route *ax25_rt)
-{
- refcount_inc(&ax25_rt->refcount);
-}
-
void __ax25_put_route(ax25_route *ax25_rt);
extern rwlock_t ax25_route_lock;
read_unlock(&ax25_route_lock);
}
-static inline void ax25_put_route(ax25_route *ax25_rt)
-{
- if (refcount_dec_and_test(&ax25_rt->refcount))
- __ax25_put_route(ax25_rt);
-}
-
typedef struct {
char slave; /* slave_mode? */
struct timer_list slave_timer; /* timeout timer */
}
/* Caller must hold rcu_read_lock() for read */
-static inline struct slave *bond_slave_has_mac_rcu(struct bonding *bond,
- const u8 *mac)
-{
- struct list_head *iter;
- struct slave *tmp;
-
- bond_for_each_slave_rcu(bond, tmp, iter)
- if (ether_addr_equal_64bits(mac, tmp->dev->dev_addr))
- return tmp;
-
- return NULL;
-}
-
-/* Caller must hold rcu_read_lock() for read */
static inline bool bond_slave_has_mac_rx(struct bonding *bond, const u8 *mac)
{
struct list_head *iter;
/* these three are in inet_sock */
__be16 tw_sport;
/* And these are ours. */
- unsigned int tw_kill : 1,
- tw_transparent : 1,
+ unsigned int tw_transparent : 1,
tw_flowlabel : 20,
- tw_pad : 2, /* 2 bits hole */
+ tw_pad : 3, /* 3 bits hole */
tw_tos : 8;
u32 tw_txhash;
u32 tw_priority;
+ u32 tw_bslot; /* bind bucket slot */
struct timer_list tw_timer;
struct inet_bind_bucket *tw_tb;
};
void inet_twsk_deschedule_put(struct inet_timewait_sock *tw);
-void inet_twsk_purge(struct inet_hashinfo *hashinfo, int family);
-
static inline
struct net *twsk_net(const struct inet_timewait_sock *twsk)
{
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _NF_CONNTRACK_BPF_H
+#define _NF_CONNTRACK_BPF_H
+
+#include <linux/btf.h>
+#include <linux/kconfig.h>
+
+#if (IS_BUILTIN(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) || \
+ (IS_MODULE(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES))
+
+extern int register_nf_conntrack_bpf(void);
+
+#else
+
+static inline int register_nf_conntrack_bpf(void)
+{
+ return 0;
+}
+
+#endif
+
+#endif /* _NF_CONNTRACK_BPF_H */
struct inet_hashinfo;
struct inet_timewait_death_row {
- atomic_t tw_count;
- char tw_pad[L1_CACHE_BYTES - sizeof(atomic_t)];
+ refcount_t tw_refcount;
- struct inet_hashinfo *hashinfo;
+ struct inet_hashinfo *hashinfo ____cacheline_aligned_in_smp;
int sysctl_max_tw_buckets;
};
struct tcp_fastopen_context;
struct netns_ipv4 {
- /* Please keep tcp_death_row at first field in netns_ipv4 */
- struct inet_timewait_death_row tcp_death_row ____cacheline_aligned_in_smp;
+ struct inet_timewait_death_row *tcp_death_row;
#ifdef CONFIG_SYSCTL
struct ctl_table_header *forw_hdr;
struct hlist_head *fib_table_hash;
struct sock *fibnl;
- struct sock * __percpu *icmp_sk;
struct sock *mc_autojoin_sk;
struct inet_peer_base *peers;
- struct sock * __percpu *tcp_sk;
struct fqdir *fqdir;
u8 sysctl_icmp_echo_ignore_all;
u32 ip_rt_min_pmtu;
int ip_rt_mtu_expires;
+ int ip_rt_min_advmss;
struct local_ports ip_local_ports;
struct fib6_table *fib6_local_tbl;
struct fib_rules_ops *fib6_rules_ops;
#endif
- struct sock * __percpu *icmp_sk;
struct sock *ndisc_sk;
struct sock *tcp_sk;
struct sock *igmp_sk;
return PSCHED_NS2TICKS(ktime_get_ns());
}
-static inline psched_tdiff_t
-psched_tdiff_bounded(psched_time_t tv1, psched_time_t tv2, psched_time_t bound)
-{
- return min(tv1 - tv2, bound);
-}
-
struct qdisc_watchdog {
u64 last_expires;
struct hrtimer timer;
BUILD_BUG_ON(sizeof(qcb->data) < sz);
}
-static inline int qdisc_qlen_cpu(const struct Qdisc *q)
-{
- return this_cpu_ptr(q->cpu_qstats)->qlen;
-}
-
static inline int qdisc_qlen(const struct Qdisc *q)
{
return q->q.qlen;
return 0;
}
-/* Slow-path computation of checksum. Socket is locked. */
-static inline __wsum udplite_csum_outgoing(struct sock *sk, struct sk_buff *skb)
-{
- const struct udp_sock *up = udp_sk(skb->sk);
- int cscov = up->len;
- __wsum csum = 0;
-
- if (up->pcflag & UDPLITE_SEND_CC) {
- /*
- * Sender has set `partial coverage' option on UDP-Lite socket.
- * The special case "up->pcslen == 0" signifies full coverage.
- */
- if (up->pcslen < up->len) {
- if (0 < up->pcslen)
- cscov = up->pcslen;
- udp_hdr(skb)->len = htons(up->pcslen);
- }
- /*
- * NOTE: Causes for the error case `up->pcslen > up->len':
- * (i) Application error (will not be penalized).
- * (ii) Payload too big for send buffer: data is split
- * into several packets, each with its own header.
- * In this case (e.g. last segment), coverage may
- * exceed packet length.
- * Since packets with coverage length > packet length are
- * illegal, we fall back to the defaults here.
- */
- }
-
- skb->ip_summed = CHECKSUM_NONE; /* no HW support for checksumming */
-
- skb_queue_walk(&sk->sk_write_queue, skb) {
- const int off = skb_transport_offset(skb);
- const int len = skb->len - off;
-
- csum = skb_checksum(skb, off, (cscov > len)? len : cscov, csum);
-
- if ((cscov -= len) <= 0)
- break;
- }
- return csum;
-}
-
/* Fast-path computation of checksum. Socket may not be locked. */
static inline __wsum udplite_csum(struct sk_buff *skb)
{
u32 reg_state;
struct xdp_mem_info mem;
unsigned int napi_id;
+ u32 frag_size;
} ____cacheline_aligned; /* perf critical, avoid false-sharing */
struct xdp_txq_info {
struct net_device *dev;
};
+enum xdp_buff_flags {
+ XDP_FLAGS_HAS_FRAGS = BIT(0), /* non-linear xdp buff */
+ XDP_FLAGS_FRAGS_PF_MEMALLOC = BIT(1), /* xdp paged memory is under
+ * pressure
+ */
+};
+
struct xdp_buff {
void *data;
void *data_end;
struct xdp_rxq_info *rxq;
struct xdp_txq_info *txq;
u32 frame_sz; /* frame size to deduce data_hard_end/reserved tailroom*/
+ u32 flags; /* supported values defined in xdp_buff_flags */
};
+static __always_inline bool xdp_buff_has_frags(struct xdp_buff *xdp)
+{
+ return !!(xdp->flags & XDP_FLAGS_HAS_FRAGS);
+}
+
+static __always_inline void xdp_buff_set_frags_flag(struct xdp_buff *xdp)
+{
+ xdp->flags |= XDP_FLAGS_HAS_FRAGS;
+}
+
+static __always_inline void xdp_buff_clear_frags_flag(struct xdp_buff *xdp)
+{
+ xdp->flags &= ~XDP_FLAGS_HAS_FRAGS;
+}
+
+static __always_inline bool xdp_buff_is_frag_pfmemalloc(struct xdp_buff *xdp)
+{
+ return !!(xdp->flags & XDP_FLAGS_FRAGS_PF_MEMALLOC);
+}
+
+static __always_inline void xdp_buff_set_frag_pfmemalloc(struct xdp_buff *xdp)
+{
+ xdp->flags |= XDP_FLAGS_FRAGS_PF_MEMALLOC;
+}
+
static __always_inline void
xdp_init_buff(struct xdp_buff *xdp, u32 frame_sz, struct xdp_rxq_info *rxq)
{
xdp->frame_sz = frame_sz;
xdp->rxq = rxq;
+ xdp->flags = 0;
}
static __always_inline void
return (struct skb_shared_info *)xdp_data_hard_end(xdp);
}
+static __always_inline unsigned int xdp_get_buff_len(struct xdp_buff *xdp)
+{
+ unsigned int len = xdp->data_end - xdp->data;
+ struct skb_shared_info *sinfo;
+
+ if (likely(!xdp_buff_has_frags(xdp)))
+ goto out;
+
+ sinfo = xdp_get_shared_info_from_buff(xdp);
+ len += sinfo->xdp_frags_size;
+out:
+ return len;
+}
+
struct xdp_frame {
void *data;
u16 len;
*/
struct xdp_mem_info mem;
struct net_device *dev_rx; /* used by cpumap */
+ u32 flags; /* supported values defined in xdp_buff_flags */
};
+static __always_inline bool xdp_frame_has_frags(struct xdp_frame *frame)
+{
+ return !!(frame->flags & XDP_FLAGS_HAS_FRAGS);
+}
+
+static __always_inline bool xdp_frame_is_frag_pfmemalloc(struct xdp_frame *frame)
+{
+ return !!(frame->flags & XDP_FLAGS_FRAGS_PF_MEMALLOC);
+}
+
#define XDP_BULK_QUEUE_SIZE 16
struct xdp_frame_bulk {
int count;
frame->dev_rx = NULL;
}
+static inline void
+xdp_update_skb_shared_info(struct sk_buff *skb, u8 nr_frags,
+ unsigned int size, unsigned int truesize,
+ bool pfmemalloc)
+{
+ skb_shinfo(skb)->nr_frags = nr_frags;
+
+ skb->len += size;
+ skb->data_len += size;
+ skb->truesize += truesize;
+ skb->pfmemalloc |= pfmemalloc;
+}
+
/* Avoids inlining WARN macro in fast-path */
void xdp_warn(const char *msg, const char *func, const int line);
#define XDP_WARN(msg) xdp_warn(msg, __func__, __LINE__)
xdp->data_end = frame->data + frame->len;
xdp->data_meta = frame->data - frame->metasize;
xdp->frame_sz = frame->frame_sz;
+ xdp->flags = frame->flags;
}
static inline
xdp_frame->headroom = headroom - sizeof(*xdp_frame);
xdp_frame->metasize = metasize;
xdp_frame->frame_sz = xdp->frame_sz;
+ xdp_frame->flags = xdp->flags;
return 0;
}
return xdp_frame;
}
+void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct,
+ struct xdp_buff *xdp);
void xdp_return_frame(struct xdp_frame *xdpf);
void xdp_return_frame_rx_napi(struct xdp_frame *xdpf);
void xdp_return_buff(struct xdp_buff *xdp);
static inline void xdp_release_frame(struct xdp_frame *xdpf)
{
struct xdp_mem_info *mem = &xdpf->mem;
+ struct skb_shared_info *sinfo;
+ int i;
/* Curr only page_pool needs this */
- if (mem->type == MEM_TYPE_PAGE_POOL)
- __xdp_release_frame(xdpf->data, mem);
+ if (mem->type != MEM_TYPE_PAGE_POOL)
+ return;
+
+ if (likely(!xdp_frame_has_frags(xdpf)))
+ goto out;
+
+ sinfo = xdp_get_shared_info_from_frame(xdpf);
+ for (i = 0; i < sinfo->nr_frags; i++) {
+ struct page *page = skb_frag_page(&sinfo->frags[i]);
+
+ __xdp_release_frame(page_address(page), mem);
+ }
+out:
+ __xdp_release_frame(xdpf->data, mem);
+}
+
+int __xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
+ struct net_device *dev, u32 queue_index,
+ unsigned int napi_id, u32 frag_size);
+static inline int
+xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
+ struct net_device *dev, u32 queue_index,
+ unsigned int napi_id)
+{
+ return __xdp_rxq_info_reg(xdp_rxq, dev, queue_index, napi_id, 0);
}
-int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
- struct net_device *dev, u32 queue_index, unsigned int napi_id);
void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq);
void xdp_rxq_info_unused(struct xdp_rxq_info *xdp_rxq);
bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq);
* *ctx_out*, *data_in* and *data_out* must be NULL.
* *repeat* must be zero.
*
+ * BPF_PROG_RUN is an alias for BPF_PROG_TEST_RUN.
+ *
* Return
* Returns zero on success. On error, -1 is returned and *errno*
* is set appropriately.
*/
#define BPF_F_SLEEPABLE (1U << 4)
+/* If BPF_F_XDP_HAS_FRAGS is used in BPF_PROG_LOAD command, the loaded program
+ * fully support xdp frags.
+ */
+#define BPF_F_XDP_HAS_FRAGS (1U << 5)
+
/* When BPF ldimm64's insn[0].src_reg != 0 then this can have
* the following extensions:
*
* 0 on success, or a negative error in case of failure.
*
* u64 bpf_get_current_pid_tgid(void)
+ * Description
+ * Get the current pid and tgid.
* Return
* A 64-bit integer containing the current tgid and pid, and
* created as such:
* *current_task*\ **->pid**.
*
* u64 bpf_get_current_uid_gid(void)
+ * Description
+ * Get the current uid and gid.
* Return
* A 64-bit integer containing the current GID and UID, and
* created as such: *current_gid* **<< 32 \|** *current_uid*.
* The 32-bit hash.
*
* u64 bpf_get_current_task(void)
+ * Description
+ * Get the current task.
* Return
* A pointer to the current task struct.
*
* indicate that the hash is outdated and to trigger a
* recalculation the next time the kernel tries to access this
* hash or when the **bpf_get_hash_recalc**\ () helper is called.
+ * Return
+ * void.
*
* long bpf_get_numa_node_id(void)
* Description
* A 8-byte long unique number or 0 if *sk* is NULL.
*
* u32 bpf_get_socket_uid(struct sk_buff *skb)
+ * Description
+ * Get the owner UID of the socked associated to *skb*.
* Return
* The owner UID of the socket associated to *skb*. If the socket
* is **NULL**, or if it is not a full socket (i.e. if it is a
* The id is returned or 0 in case the id could not be retrieved.
*
* u64 bpf_get_current_cgroup_id(void)
+ * Description
+ * Get the current cgroup id based on the cgroup within which
+ * the current task is running.
* Return
* A 64-bit integer containing the current cgroup id based
* on the cgroup within which the current task is running.
*
* Return
* The number of arguments of the traced function.
+ *
+ * int bpf_get_retval(void)
+ * Description
+ * Get the syscall's return value that will be returned to userspace.
+ *
+ * This helper is currently supported by cgroup programs only.
+ * Return
+ * The syscall's return value.
+ *
+ * int bpf_set_retval(int retval)
+ * Description
+ * Set the syscall's return value that will be returned to userspace.
+ *
+ * This helper is currently supported by cgroup programs only.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * u64 bpf_xdp_get_buff_len(struct xdp_buff *xdp_md)
+ * Description
+ * Get the total size of a given xdp buff (linear and paged area)
+ * Return
+ * The total size of a given xdp buffer.
+ *
+ * long bpf_xdp_load_bytes(struct xdp_buff *xdp_md, u32 offset, void *buf, u32 len)
+ * Description
+ * This helper is provided as an easy way to load data from a
+ * xdp buffer. It can be used to load *len* bytes from *offset* from
+ * the frame associated to *xdp_md*, into the buffer pointed by
+ * *buf*.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * long bpf_xdp_store_bytes(struct xdp_buff *xdp_md, u32 offset, void *buf, u32 len)
+ * Description
+ * Store *len* bytes from buffer *buf* into the frame
+ * associated to *xdp_md*, at *offset*.
+ * Return
+ * 0 on success, or a negative error in case of failure.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
FN(get_func_arg), \
FN(get_func_ret), \
FN(get_func_arg_cnt), \
+ FN(get_retval), \
+ FN(set_retval), \
+ FN(xdp_get_buff_len), \
+ FN(xdp_load_bytes), \
+ FN(xdp_store_bytes), \
/* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
static void *prog_fd_array_get_ptr(struct bpf_map *map,
struct file *map_file, int fd)
{
- struct bpf_array *array = container_of(map, struct bpf_array, map);
struct bpf_prog *prog = bpf_prog_get(fd);
if (IS_ERR(prog))
return prog;
- if (!bpf_prog_array_compatible(array, prog)) {
+ if (!bpf_prog_map_compatible(map, prog)) {
bpf_prog_put(prog);
return ERR_PTR(-EINVAL);
}
INIT_WORK(&aux->work, prog_array_map_clear_deferred);
INIT_LIST_HEAD(&aux->poke_progs);
mutex_init(&aux->poke_mutex);
- spin_lock_init(&aux->owner.lock);
map = array_map_alloc(attr);
if (IS_ERR(map)) {
DEFINE_IDR(btf_idr);
DEFINE_SPINLOCK(btf_idr_lock);
+enum btf_kfunc_hook {
+ BTF_KFUNC_HOOK_XDP,
+ BTF_KFUNC_HOOK_TC,
+ BTF_KFUNC_HOOK_STRUCT_OPS,
+ BTF_KFUNC_HOOK_MAX,
+};
+
+enum {
+ BTF_KFUNC_SET_MAX_CNT = 32,
+};
+
+struct btf_kfunc_set_tab {
+ struct btf_id_set *sets[BTF_KFUNC_HOOK_MAX][BTF_KFUNC_TYPE_MAX];
+};
+
struct btf {
void *data;
struct btf_type **types;
refcount_t refcnt;
u32 id;
struct rcu_head rcu;
+ struct btf_kfunc_set_tab *kfunc_set_tab;
/* split BTF support */
struct btf *base_btf;
spin_unlock_irqrestore(&btf_idr_lock, flags);
}
+static void btf_free_kfunc_set_tab(struct btf *btf)
+{
+ struct btf_kfunc_set_tab *tab = btf->kfunc_set_tab;
+ int hook, type;
+
+ if (!tab)
+ return;
+ /* For module BTF, we directly assign the sets being registered, so
+ * there is nothing to free except kfunc_set_tab.
+ */
+ if (btf_is_module(btf))
+ goto free_tab;
+ for (hook = 0; hook < ARRAY_SIZE(tab->sets); hook++) {
+ for (type = 0; type < ARRAY_SIZE(tab->sets[0]); type++)
+ kfree(tab->sets[hook][type]);
+ }
+free_tab:
+ kfree(tab);
+ btf->kfunc_set_tab = NULL;
+}
+
static void btf_free(struct btf *btf)
{
+ btf_free_kfunc_set_tab(btf);
kvfree(btf->types);
kvfree(btf->resolved_sizes);
kvfree(btf->resolved_ids);
return true;
}
+static bool is_kfunc_arg_mem_size(const struct btf *btf,
+ const struct btf_param *arg,
+ const struct bpf_reg_state *reg)
+{
+ int len, sfx_len = sizeof("__sz") - 1;
+ const struct btf_type *t;
+ const char *param_name;
+
+ t = btf_type_skip_modifiers(btf, arg->type, NULL);
+ if (!btf_type_is_scalar(t) || reg->type != SCALAR_VALUE)
+ return false;
+
+ /* In the future, this can be ported to use BTF tagging */
+ param_name = btf_name_by_offset(btf, arg->name_off);
+ if (str_is_empty(param_name))
+ return false;
+ len = strlen(param_name);
+ if (len < sfx_len)
+ return false;
+ param_name += len - sfx_len;
+ if (strncmp(param_name, "__sz", sfx_len))
+ return false;
+
+ return true;
+}
+
static int btf_check_func_arg_match(struct bpf_verifier_env *env,
const struct btf *btf, u32 func_id,
struct bpf_reg_state *regs,
bool ptr_to_mem_ok)
{
struct bpf_verifier_log *log = &env->log;
+ u32 i, nargs, ref_id, ref_obj_id = 0;
bool is_kfunc = btf_is_kernel(btf);
const char *func_name, *ref_tname;
const struct btf_type *t, *ref_t;
const struct btf_param *args;
- u32 i, nargs, ref_id;
+ int ref_regno = 0;
+ bool rel = false;
t = btf_type_by_id(btf, func_id);
if (!t || !btf_type_is_func(t)) {
if (reg->type == PTR_TO_BTF_ID) {
reg_btf = reg->btf;
reg_ref_id = reg->btf_id;
+ /* Ensure only one argument is referenced PTR_TO_BTF_ID */
+ if (reg->ref_obj_id) {
+ if (ref_obj_id) {
+ bpf_log(log, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
+ regno, reg->ref_obj_id, ref_obj_id);
+ return -EFAULT;
+ }
+ ref_regno = regno;
+ ref_obj_id = reg->ref_obj_id;
+ }
} else {
reg_btf = btf_vmlinux;
reg_ref_id = *reg2btf_ids[reg->type];
u32 type_size;
if (is_kfunc) {
+ bool arg_mem_size = i + 1 < nargs && is_kfunc_arg_mem_size(btf, &args[i + 1], ®s[regno + 1]);
+
/* Permit pointer to mem, but only when argument
* type is pointer to scalar, or struct composed
* (recursively) of scalars.
+ * When arg_mem_size is true, the pointer can be
+ * void *.
*/
if (!btf_type_is_scalar(ref_t) &&
- !__btf_type_is_scalar_struct(log, btf, ref_t, 0)) {
+ !__btf_type_is_scalar_struct(log, btf, ref_t, 0) &&
+ (arg_mem_size ? !btf_type_is_void(ref_t) : 1)) {
bpf_log(log,
- "arg#%d pointer type %s %s must point to scalar or struct with scalar\n",
- i, btf_type_str(ref_t), ref_tname);
+ "arg#%d pointer type %s %s must point to %sscalar, or struct with scalar\n",
+ i, btf_type_str(ref_t), ref_tname, arg_mem_size ? "void, " : "");
return -EINVAL;
}
+
+ /* Check for mem, len pair */
+ if (arg_mem_size) {
+ if (check_kfunc_mem_size_reg(env, ®s[regno + 1], regno + 1)) {
+ bpf_log(log, "arg#%d arg#%d memory, len pair leads to invalid memory access\n",
+ i, i + 1);
+ return -EINVAL;
+ }
+ i++;
+ continue;
+ }
}
resolve_ret = btf_resolve_size(btf, ref_t, &type_size);
}
}
- return 0;
+ /* Either both are set, or neither */
+ WARN_ON_ONCE((ref_obj_id && !ref_regno) || (!ref_obj_id && ref_regno));
+ if (is_kfunc) {
+ rel = btf_kfunc_id_set_contains(btf, resolve_prog_type(env->prog),
+ BTF_KFUNC_TYPE_RELEASE, func_id);
+ /* We already made sure ref_obj_id is set only for one argument */
+ if (rel && !ref_obj_id) {
+ bpf_log(log, "release kernel function %s expects refcounted PTR_TO_BTF_ID\n",
+ func_name);
+ return -EINVAL;
+ }
+ /* Allow (!rel && ref_obj_id), so that passing such referenced PTR_TO_BTF_ID to
+ * other kfuncs works
+ */
+ }
+ /* returns argument register number > 0 in case of reference release kfunc */
+ return rel ? ref_regno : 0;
}
/* Compare BTF of a function with given bpf_reg_state.
return bsearch(&id, set->ids, set->cnt, sizeof(u32), btf_id_cmp_func) != NULL;
}
+enum {
+ BTF_MODULE_F_LIVE = (1 << 0),
+};
+
#ifdef CONFIG_DEBUG_INFO_BTF_MODULES
struct btf_module {
struct list_head list;
struct module *module;
struct btf *btf;
struct bin_attribute *sysfs_attr;
+ int flags;
};
static LIST_HEAD(btf_modules);
int err = 0;
if (mod->btf_data_size == 0 ||
- (op != MODULE_STATE_COMING && op != MODULE_STATE_GOING))
+ (op != MODULE_STATE_COMING && op != MODULE_STATE_LIVE &&
+ op != MODULE_STATE_GOING))
goto out;
switch (op) {
}
break;
+ case MODULE_STATE_LIVE:
+ mutex_lock(&btf_module_mutex);
+ list_for_each_entry_safe(btf_mod, tmp, &btf_modules, list) {
+ if (btf_mod->module != module)
+ continue;
+
+ btf_mod->flags |= BTF_MODULE_F_LIVE;
+ break;
+ }
+ mutex_unlock(&btf_module_mutex);
+ break;
case MODULE_STATE_GOING:
mutex_lock(&btf_module_mutex);
list_for_each_entry_safe(btf_mod, tmp, &btf_modules, list) {
if (btf_mod->btf != btf)
continue;
- if (try_module_get(btf_mod->module))
+ /* We must only consider module whose __init routine has
+ * finished, hence we must check for BTF_MODULE_F_LIVE flag,
+ * which is set from the notifier callback for
+ * MODULE_STATE_LIVE.
+ */
+ if ((btf_mod->flags & BTF_MODULE_F_LIVE) && try_module_get(btf_mod->module))
res = btf_mod->module;
break;
return res;
}
+/* Returns struct btf corresponding to the struct module
+ *
+ * This function can return NULL or ERR_PTR. Note that caller must
+ * release reference for struct btf iff btf_is_module is true.
+ */
+static struct btf *btf_get_module_btf(const struct module *module)
+{
+ struct btf *btf = NULL;
+#ifdef CONFIG_DEBUG_INFO_BTF_MODULES
+ struct btf_module *btf_mod, *tmp;
+#endif
+
+ if (!module)
+ return bpf_get_btf_vmlinux();
+#ifdef CONFIG_DEBUG_INFO_BTF_MODULES
+ mutex_lock(&btf_module_mutex);
+ list_for_each_entry_safe(btf_mod, tmp, &btf_modules, list) {
+ if (btf_mod->module != module)
+ continue;
+
+ btf_get(btf_mod->btf);
+ btf = btf_mod->btf;
+ break;
+ }
+ mutex_unlock(&btf_module_mutex);
+#endif
+
+ return btf;
+}
+
BPF_CALL_4(bpf_btf_find_by_name_kind, char *, name, int, name_sz, u32, kind, int, flags)
{
struct btf *btf;
BTF_TRACING_TYPE_xxx
#undef BTF_TRACING_TYPE
-/* BTF ID set registration API for modules */
-
-#ifdef CONFIG_DEBUG_INFO_BTF_MODULES
+/* Kernel Function (kfunc) BTF ID set registration API */
-void register_kfunc_btf_id_set(struct kfunc_btf_id_list *l,
- struct kfunc_btf_id_set *s)
+static int __btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook,
+ enum btf_kfunc_type type,
+ struct btf_id_set *add_set, bool vmlinux_set)
{
- mutex_lock(&l->mutex);
- list_add(&s->list, &l->list);
- mutex_unlock(&l->mutex);
+ struct btf_kfunc_set_tab *tab;
+ struct btf_id_set *set;
+ u32 set_cnt;
+ int ret;
+
+ if (hook >= BTF_KFUNC_HOOK_MAX || type >= BTF_KFUNC_TYPE_MAX) {
+ ret = -EINVAL;
+ goto end;
+ }
+
+ if (!add_set->cnt)
+ return 0;
+
+ tab = btf->kfunc_set_tab;
+ if (!tab) {
+ tab = kzalloc(sizeof(*tab), GFP_KERNEL | __GFP_NOWARN);
+ if (!tab)
+ return -ENOMEM;
+ btf->kfunc_set_tab = tab;
+ }
+
+ set = tab->sets[hook][type];
+ /* Warn when register_btf_kfunc_id_set is called twice for the same hook
+ * for module sets.
+ */
+ if (WARN_ON_ONCE(set && !vmlinux_set)) {
+ ret = -EINVAL;
+ goto end;
+ }
+
+ /* We don't need to allocate, concatenate, and sort module sets, because
+ * only one is allowed per hook. Hence, we can directly assign the
+ * pointer and return.
+ */
+ if (!vmlinux_set) {
+ tab->sets[hook][type] = add_set;
+ return 0;
+ }
+
+ /* In case of vmlinux sets, there may be more than one set being
+ * registered per hook. To create a unified set, we allocate a new set
+ * and concatenate all individual sets being registered. While each set
+ * is individually sorted, they may become unsorted when concatenated,
+ * hence re-sorting the final set again is required to make binary
+ * searching the set using btf_id_set_contains function work.
+ */
+ set_cnt = set ? set->cnt : 0;
+
+ if (set_cnt > U32_MAX - add_set->cnt) {
+ ret = -EOVERFLOW;
+ goto end;
+ }
+
+ if (set_cnt + add_set->cnt > BTF_KFUNC_SET_MAX_CNT) {
+ ret = -E2BIG;
+ goto end;
+ }
+
+ /* Grow set */
+ set = krealloc(tab->sets[hook][type],
+ offsetof(struct btf_id_set, ids[set_cnt + add_set->cnt]),
+ GFP_KERNEL | __GFP_NOWARN);
+ if (!set) {
+ ret = -ENOMEM;
+ goto end;
+ }
+
+ /* For newly allocated set, initialize set->cnt to 0 */
+ if (!tab->sets[hook][type])
+ set->cnt = 0;
+ tab->sets[hook][type] = set;
+
+ /* Concatenate the two sets */
+ memcpy(set->ids + set->cnt, add_set->ids, add_set->cnt * sizeof(set->ids[0]));
+ set->cnt += add_set->cnt;
+
+ sort(set->ids, set->cnt, sizeof(set->ids[0]), btf_id_cmp_func, NULL);
+
+ return 0;
+end:
+ btf_free_kfunc_set_tab(btf);
+ return ret;
}
-EXPORT_SYMBOL_GPL(register_kfunc_btf_id_set);
-void unregister_kfunc_btf_id_set(struct kfunc_btf_id_list *l,
- struct kfunc_btf_id_set *s)
+static int btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook,
+ const struct btf_kfunc_id_set *kset)
{
- mutex_lock(&l->mutex);
- list_del_init(&s->list);
- mutex_unlock(&l->mutex);
+ bool vmlinux_set = !btf_is_module(btf);
+ int type, ret;
+
+ for (type = 0; type < ARRAY_SIZE(kset->sets); type++) {
+ if (!kset->sets[type])
+ continue;
+
+ ret = __btf_populate_kfunc_set(btf, hook, type, kset->sets[type], vmlinux_set);
+ if (ret)
+ break;
+ }
+ return ret;
}
-EXPORT_SYMBOL_GPL(unregister_kfunc_btf_id_set);
-bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist, u32 kfunc_id,
- struct module *owner)
+static bool __btf_kfunc_id_set_contains(const struct btf *btf,
+ enum btf_kfunc_hook hook,
+ enum btf_kfunc_type type,
+ u32 kfunc_btf_id)
{
- struct kfunc_btf_id_set *s;
+ struct btf_id_set *set;
- mutex_lock(&klist->mutex);
- list_for_each_entry(s, &klist->list, list) {
- if (s->owner == owner && btf_id_set_contains(s->set, kfunc_id)) {
- mutex_unlock(&klist->mutex);
- return true;
- }
+ if (hook >= BTF_KFUNC_HOOK_MAX || type >= BTF_KFUNC_TYPE_MAX)
+ return false;
+ if (!btf->kfunc_set_tab)
+ return false;
+ set = btf->kfunc_set_tab->sets[hook][type];
+ if (!set)
+ return false;
+ return btf_id_set_contains(set, kfunc_btf_id);
+}
+
+static int bpf_prog_type_to_kfunc_hook(enum bpf_prog_type prog_type)
+{
+ switch (prog_type) {
+ case BPF_PROG_TYPE_XDP:
+ return BTF_KFUNC_HOOK_XDP;
+ case BPF_PROG_TYPE_SCHED_CLS:
+ return BTF_KFUNC_HOOK_TC;
+ case BPF_PROG_TYPE_STRUCT_OPS:
+ return BTF_KFUNC_HOOK_STRUCT_OPS;
+ default:
+ return BTF_KFUNC_HOOK_MAX;
}
- mutex_unlock(&klist->mutex);
- return false;
}
-#define DEFINE_KFUNC_BTF_ID_LIST(name) \
- struct kfunc_btf_id_list name = { LIST_HEAD_INIT(name.list), \
- __MUTEX_INITIALIZER(name.mutex) }; \
- EXPORT_SYMBOL_GPL(name)
+/* Caution:
+ * Reference to the module (obtained using btf_try_get_module) corresponding to
+ * the struct btf *MUST* be held when calling this function from verifier
+ * context. This is usually true as we stash references in prog's kfunc_btf_tab;
+ * keeping the reference for the duration of the call provides the necessary
+ * protection for looking up a well-formed btf->kfunc_set_tab.
+ */
+bool btf_kfunc_id_set_contains(const struct btf *btf,
+ enum bpf_prog_type prog_type,
+ enum btf_kfunc_type type, u32 kfunc_btf_id)
+{
+ enum btf_kfunc_hook hook;
-DEFINE_KFUNC_BTF_ID_LIST(bpf_tcp_ca_kfunc_list);
-DEFINE_KFUNC_BTF_ID_LIST(prog_test_kfunc_list);
+ hook = bpf_prog_type_to_kfunc_hook(prog_type);
+ return __btf_kfunc_id_set_contains(btf, hook, type, kfunc_btf_id);
+}
-#endif
+/* This function must be invoked only from initcalls/module init functions */
+int register_btf_kfunc_id_set(enum bpf_prog_type prog_type,
+ const struct btf_kfunc_id_set *kset)
+{
+ enum btf_kfunc_hook hook;
+ struct btf *btf;
+ int ret;
+
+ btf = btf_get_module_btf(kset->owner);
+ if (IS_ERR_OR_NULL(btf))
+ return btf ? PTR_ERR(btf) : -ENOENT;
+
+ hook = bpf_prog_type_to_kfunc_hook(prog_type);
+ ret = btf_populate_kfunc_set(btf, hook, kset);
+ /* reference is only taken for module BTF */
+ if (btf_is_module(btf))
+ btf_put(btf);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(register_btf_kfunc_id_set);
int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
const struct btf *targ_btf, __u32 targ_id)
* NET_XMIT_DROP (1) - drop packet and notify TCP to call cwr
* NET_XMIT_CN (2) - continue with packet output and notify TCP
* to call cwr
- * -EPERM - drop packet
+ * -err - drop packet
*
* For ingress packets, this function will return -EPERM if any
* attached program was found and if it returned != 1 during execution.
cgrp->bpf.effective[atype], skb, __bpf_prog_run_save_cb);
} else {
ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], skb,
- __bpf_prog_run_save_cb);
- ret = (ret == 1 ? 0 : -EPERM);
+ __bpf_prog_run_save_cb, 0);
+ if (ret && !IS_ERR_VALUE((long)ret))
+ ret = -EFAULT;
}
bpf_restore_data_end(skb, saved_data_end);
__skb_pull(skb, offset);
enum cgroup_bpf_attach_type atype)
{
struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
- int ret;
- ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], sk, bpf_prog_run);
- return ret == 1 ? 0 : -EPERM;
+ return BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], sk,
+ bpf_prog_run, 0);
}
EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
};
struct sockaddr_storage unspec;
struct cgroup *cgrp;
- int ret;
/* Check socket family since not all sockets represent network
* endpoint (e.g. AF_UNIX).
}
cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
- ret = BPF_PROG_RUN_ARRAY_CG_FLAGS(cgrp->bpf.effective[atype], &ctx,
- bpf_prog_run, flags);
-
- return ret == 1 ? 0 : -EPERM;
+ return BPF_PROG_RUN_ARRAY_CG_FLAGS(cgrp->bpf.effective[atype], &ctx,
+ bpf_prog_run, 0, flags);
}
EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_addr);
enum cgroup_bpf_attach_type atype)
{
struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
- int ret;
- ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], sock_ops,
- bpf_prog_run);
- return ret == 1 ? 0 : -EPERM;
+ return BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], sock_ops,
+ bpf_prog_run, 0);
}
EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops);
.major = major,
.minor = minor,
};
- int allow;
+ int ret;
rcu_read_lock();
cgrp = task_dfl_cgroup(current);
- allow = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx,
- bpf_prog_run);
+ ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx,
+ bpf_prog_run, 0);
rcu_read_unlock();
- return !allow;
+ return ret;
}
+BPF_CALL_0(bpf_get_retval)
+{
+ struct bpf_cg_run_ctx *ctx =
+ container_of(current->bpf_ctx, struct bpf_cg_run_ctx, run_ctx);
+
+ return ctx->retval;
+}
+
+static const struct bpf_func_proto bpf_get_retval_proto = {
+ .func = bpf_get_retval,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+};
+
+BPF_CALL_1(bpf_set_retval, int, retval)
+{
+ struct bpf_cg_run_ctx *ctx =
+ container_of(current->bpf_ctx, struct bpf_cg_run_ctx, run_ctx);
+
+ ctx->retval = retval;
+ return 0;
+}
+
+static const struct bpf_func_proto bpf_set_retval_proto = {
+ .func = bpf_set_retval,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_ANYTHING,
+};
+
static const struct bpf_func_proto *
cgroup_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
return &bpf_get_current_cgroup_id_proto;
case BPF_FUNC_perf_event_output:
return &bpf_event_output_data_proto;
+ case BPF_FUNC_get_retval:
+ return &bpf_get_retval_proto;
+ case BPF_FUNC_set_retval:
+ return &bpf_set_retval_proto;
default:
return bpf_base_func_proto(func_id);
}
rcu_read_lock();
cgrp = task_dfl_cgroup(current);
- ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx, bpf_prog_run);
+ ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx,
+ bpf_prog_run, 0);
rcu_read_unlock();
kfree(ctx.cur_val);
kfree(ctx.new_val);
}
- return ret == 1 ? 0 : -EPERM;
+ return ret;
}
#ifdef CONFIG_NET
lock_sock(sk);
ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_SETSOCKOPT],
- &ctx, bpf_prog_run);
+ &ctx, bpf_prog_run, 0);
release_sock(sk);
- if (!ret) {
- ret = -EPERM;
+ if (ret)
goto out;
- }
if (ctx.optlen == -1) {
/* optlen set to -1, bypass kernel */
.sk = sk,
.level = level,
.optname = optname,
- .retval = retval,
+ .current_task = current,
};
int ret;
lock_sock(sk);
ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_GETSOCKOPT],
- &ctx, bpf_prog_run);
+ &ctx, bpf_prog_run, retval);
release_sock(sk);
- if (!ret) {
- ret = -EPERM;
+ if (ret < 0)
goto out;
- }
if (ctx.optlen > max_optlen || ctx.optlen < 0) {
ret = -EFAULT;
goto out;
}
- /* BPF programs only allowed to set retval to 0, not some
- * arbitrary value.
- */
- if (ctx.retval != 0 && ctx.retval != retval) {
- ret = -EFAULT;
- goto out;
- }
-
if (ctx.optlen != 0) {
if (copy_to_user(optval, ctx.optval, ctx.optlen) ||
put_user(ctx.optlen, optlen)) {
}
}
- ret = ctx.retval;
-
out:
sockopt_free_buf(&ctx, &buf);
return ret;
.sk = sk,
.level = level,
.optname = optname,
- .retval = retval,
.optlen = *optlen,
.optval = optval,
.optval_end = optval + *optlen,
+ .current_task = current,
};
int ret;
*/
ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_GETSOCKOPT],
- &ctx, bpf_prog_run);
- if (!ret)
- return -EPERM;
+ &ctx, bpf_prog_run, retval);
+ if (ret < 0)
+ return ret;
if (ctx.optlen > *optlen)
return -EFAULT;
- /* BPF programs only allowed to set retval to 0, not some
- * arbitrary value.
- */
- if (ctx.retval != 0 && ctx.retval != retval)
- return -EFAULT;
-
/* BPF programs can shrink the buffer, export the modifications.
*/
if (ctx.optlen != 0)
*optlen = ctx.optlen;
- return ctx.retval;
+ return ret;
}
#endif
*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optlen);
break;
case offsetof(struct bpf_sockopt, retval):
- if (type == BPF_WRITE)
- *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_STX_MEM, retval);
- else
- *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, retval);
+ BUILD_BUG_ON(offsetof(struct bpf_cg_run_ctx, run_ctx) != 0);
+
+ if (type == BPF_WRITE) {
+ int treg = BPF_REG_9;
+
+ if (si->src_reg == treg || si->dst_reg == treg)
+ --treg;
+ if (si->src_reg == treg || si->dst_reg == treg)
+ --treg;
+ *insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, treg,
+ offsetof(struct bpf_sockopt_kern, tmp_reg));
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, current_task),
+ treg, si->dst_reg,
+ offsetof(struct bpf_sockopt_kern, current_task));
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct task_struct, bpf_ctx),
+ treg, treg,
+ offsetof(struct task_struct, bpf_ctx));
+ *insn++ = BPF_STX_MEM(BPF_FIELD_SIZEOF(struct bpf_cg_run_ctx, retval),
+ treg, si->src_reg,
+ offsetof(struct bpf_cg_run_ctx, retval));
+ *insn++ = BPF_LDX_MEM(BPF_DW, treg, si->dst_reg,
+ offsetof(struct bpf_sockopt_kern, tmp_reg));
+ } else {
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, current_task),
+ si->dst_reg, si->src_reg,
+ offsetof(struct bpf_sockopt_kern, current_task));
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct task_struct, bpf_ctx),
+ si->dst_reg, si->dst_reg,
+ offsetof(struct task_struct, bpf_ctx));
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_cg_run_ctx, retval),
+ si->dst_reg, si->dst_reg,
+ offsetof(struct bpf_cg_run_ctx, retval));
+ }
break;
case offsetof(struct bpf_sockopt, optval):
*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optval);
}
#endif
-bool bpf_prog_array_compatible(struct bpf_array *array,
- const struct bpf_prog *fp)
+bool bpf_prog_map_compatible(struct bpf_map *map,
+ const struct bpf_prog *fp)
{
bool ret;
if (fp->kprobe_override)
return false;
- spin_lock(&array->aux->owner.lock);
-
- if (!array->aux->owner.type) {
+ spin_lock(&map->owner.lock);
+ if (!map->owner.type) {
/* There's no owner yet where we could check for
* compatibility.
*/
- array->aux->owner.type = fp->type;
- array->aux->owner.jited = fp->jited;
+ map->owner.type = fp->type;
+ map->owner.jited = fp->jited;
+ map->owner.xdp_has_frags = fp->aux->xdp_has_frags;
ret = true;
} else {
- ret = array->aux->owner.type == fp->type &&
- array->aux->owner.jited == fp->jited;
+ ret = map->owner.type == fp->type &&
+ map->owner.jited == fp->jited &&
+ map->owner.xdp_has_frags == fp->aux->xdp_has_frags;
}
- spin_unlock(&array->aux->owner.lock);
+ spin_unlock(&map->owner.lock);
+
return ret;
}
mutex_lock(&aux->used_maps_mutex);
for (i = 0; i < aux->used_map_cnt; i++) {
struct bpf_map *map = aux->used_maps[i];
- struct bpf_array *array;
- if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
+ if (!map_type_contains_progs(map))
continue;
- array = container_of(map, struct bpf_array, map);
- if (!bpf_prog_array_compatible(array, fp)) {
+ if (!bpf_prog_map_compatible(map, fp)) {
ret = -EINVAL;
goto out;
}
return 0;
}
-static int __cpu_map_load_bpf_program(struct bpf_cpu_map_entry *rcpu, int fd)
+static int __cpu_map_load_bpf_program(struct bpf_cpu_map_entry *rcpu,
+ struct bpf_map *map, int fd)
{
struct bpf_prog *prog;
if (IS_ERR(prog))
return PTR_ERR(prog);
- if (prog->expected_attach_type != BPF_XDP_CPUMAP) {
+ if (prog->expected_attach_type != BPF_XDP_CPUMAP ||
+ !bpf_prog_map_compatible(map, prog)) {
bpf_prog_put(prog);
return -EINVAL;
}
rcpu->map_id = map->id;
rcpu->value.qsize = value->qsize;
- if (fd > 0 && __cpu_map_load_bpf_program(rcpu, fd))
+ if (fd > 0 && __cpu_map_load_bpf_program(rcpu, map, fd))
goto free_ptr_ring;
/* Setup kthread */
BPF_PROG_TYPE_XDP, false);
if (IS_ERR(prog))
goto err_put_dev;
- if (prog->expected_attach_type != BPF_XDP_DEVMAP)
+ if (prog->expected_attach_type != BPF_XDP_DEVMAP ||
+ !bpf_prog_map_compatible(&dtab->map, prog))
goto err_put_prog;
}
static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
{
- const struct bpf_map *map = filp->private_data;
- const struct bpf_array *array;
+ struct bpf_map *map = filp->private_data;
u32 type = 0, jited = 0;
- if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) {
- array = container_of(map, struct bpf_array, map);
- spin_lock(&array->aux->owner.lock);
- type = array->aux->owner.type;
- jited = array->aux->owner.jited;
- spin_unlock(&array->aux->owner.lock);
+ if (map_type_contains_progs(map)) {
+ spin_lock(&map->owner.lock);
+ type = map->owner.type;
+ jited = map->owner.jited;
+ spin_unlock(&map->owner.lock);
}
seq_printf(m,
atomic64_set(&map->refcnt, 1);
atomic64_set(&map->usercnt, 1);
mutex_init(&map->freeze_mutex);
+ spin_lock_init(&map->owner.lock);
map->spin_lock_off = -EINVAL;
map->timer_off = -EINVAL;
BPF_F_ANY_ALIGNMENT |
BPF_F_TEST_STATE_FREQ |
BPF_F_SLEEPABLE |
- BPF_F_TEST_RND_HI32))
+ BPF_F_TEST_RND_HI32 |
+ BPF_F_XDP_HAS_FRAGS))
return -EINVAL;
if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) &&
prog->aux->dst_prog = dst_prog;
prog->aux->offload_requested = !!attr->prog_ifindex;
prog->aux->sleepable = attr->prog_flags & BPF_F_SLEEPABLE;
+ prog->aux->xdp_has_frags = attr->prog_flags & BPF_F_XDP_HAS_FRAGS;
err = security_bpf_prog_alloc(prog->aux);
if (err)
case BPF_FLOW_DISSECTOR:
case BPF_SK_LOOKUP:
return netns_bpf_prog_query(attr, uattr);
+ case BPF_SK_SKB_STREAM_PARSER:
+ case BPF_SK_SKB_STREAM_VERDICT:
+ case BPF_SK_MSG_VERDICT:
+ case BPF_SK_SKB_VERDICT:
+ return sock_map_bpf_prog_query(attr, uattr);
default:
return -EINVAL;
}
{
return base_type(type) == PTR_TO_SOCKET ||
base_type(type) == PTR_TO_TCP_SOCK ||
- base_type(type) == PTR_TO_MEM;
+ base_type(type) == PTR_TO_MEM ||
+ base_type(type) == PTR_TO_BTF_ID;
}
static bool type_is_rdonly_mem(u32 type)
}
static struct btf *__find_kfunc_desc_btf(struct bpf_verifier_env *env,
- s16 offset, struct module **btf_modp)
+ s16 offset)
{
struct bpf_kfunc_btf kf_btf = { .offset = offset };
struct bpf_kfunc_btf_tab *tab;
sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
kfunc_btf_cmp_by_off, NULL);
}
- if (btf_modp)
- *btf_modp = b->module;
return b->btf;
}
}
static struct btf *find_kfunc_desc_btf(struct bpf_verifier_env *env,
- u32 func_id, s16 offset,
- struct module **btf_modp)
+ u32 func_id, s16 offset)
{
if (offset) {
if (offset < 0) {
return ERR_PTR(-EINVAL);
}
- return __find_kfunc_desc_btf(env, offset, btf_modp);
+ return __find_kfunc_desc_btf(env, offset);
}
return btf_vmlinux ?: ERR_PTR(-ENOENT);
}
prog_aux->kfunc_btf_tab = btf_tab;
}
- desc_btf = find_kfunc_desc_btf(env, func_id, offset, NULL);
+ desc_btf = find_kfunc_desc_btf(env, func_id, offset);
if (IS_ERR(desc_btf)) {
verbose(env, "failed to find BTF for kernel function\n");
return PTR_ERR(desc_btf);
if (insn->src_reg != BPF_PSEUDO_KFUNC_CALL)
return NULL;
- desc_btf = find_kfunc_desc_btf(data, insn->imm, insn->off, NULL);
+ desc_btf = find_kfunc_desc_btf(data, insn->imm, insn->off);
if (IS_ERR(desc_btf))
return "<error>";
#define MAX_PACKET_OFF 0xffff
-static enum bpf_prog_type resolve_prog_type(struct bpf_prog *prog)
-{
- return prog->aux->dst_prog ? prog->aux->dst_prog->type : prog->type;
-}
-
static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
const struct bpf_call_arg_meta *meta,
enum bpf_access_type t)
}
}
+static int check_mem_size_reg(struct bpf_verifier_env *env,
+ struct bpf_reg_state *reg, u32 regno,
+ bool zero_size_allowed,
+ struct bpf_call_arg_meta *meta)
+{
+ int err;
+
+ /* This is used to refine r0 return value bounds for helpers
+ * that enforce this value as an upper bound on return values.
+ * See do_refine_retval_range() for helpers that can refine
+ * the return value. C type of helper is u32 so we pull register
+ * bound from umax_value however, if negative verifier errors
+ * out. Only upper bounds can be learned because retval is an
+ * int type and negative retvals are allowed.
+ */
+ if (meta)
+ meta->msize_max_value = reg->umax_value;
+
+ /* The register is SCALAR_VALUE; the access check
+ * happens using its boundaries.
+ */
+ if (!tnum_is_const(reg->var_off))
+ /* For unprivileged variable accesses, disable raw
+ * mode so that the program is required to
+ * initialize all the memory that the helper could
+ * just partially fill up.
+ */
+ meta = NULL;
+
+ if (reg->smin_value < 0) {
+ verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n",
+ regno);
+ return -EACCES;
+ }
+
+ if (reg->umin_value == 0) {
+ err = check_helper_mem_access(env, regno - 1, 0,
+ zero_size_allowed,
+ meta);
+ if (err)
+ return err;
+ }
+
+ if (reg->umax_value >= BPF_MAX_VAR_SIZ) {
+ verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
+ regno);
+ return -EACCES;
+ }
+ err = check_helper_mem_access(env, regno - 1,
+ reg->umax_value,
+ zero_size_allowed, meta);
+ if (!err)
+ err = mark_chain_precision(env, regno);
+ return err;
+}
+
int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
u32 regno, u32 mem_size)
{
return check_helper_mem_access(env, regno, mem_size, true, NULL);
}
+int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
+ u32 regno)
+{
+ struct bpf_reg_state *mem_reg = &cur_regs(env)[regno - 1];
+ bool may_be_null = type_may_be_null(mem_reg->type);
+ struct bpf_reg_state saved_reg;
+ int err;
+
+ WARN_ON_ONCE(regno < BPF_REG_2 || regno > BPF_REG_5);
+
+ if (may_be_null) {
+ saved_reg = *mem_reg;
+ mark_ptr_not_null_reg(mem_reg);
+ }
+
+ err = check_mem_size_reg(env, reg, regno, true, NULL);
+
+ if (may_be_null)
+ *mem_reg = saved_reg;
+ return err;
+}
+
/* Implementation details:
* bpf_map_lookup returns PTR_TO_MAP_VALUE_OR_NULL
* Two bpf_map_lookups (even with the same key) will have different reg->id.
} else if (arg_type_is_mem_size(arg_type)) {
bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO);
- /* This is used to refine r0 return value bounds for helpers
- * that enforce this value as an upper bound on return values.
- * See do_refine_retval_range() for helpers that can refine
- * the return value. C type of helper is u32 so we pull register
- * bound from umax_value however, if negative verifier errors
- * out. Only upper bounds can be learned because retval is an
- * int type and negative retvals are allowed.
- */
- meta->msize_max_value = reg->umax_value;
-
- /* The register is SCALAR_VALUE; the access check
- * happens using its boundaries.
- */
- if (!tnum_is_const(reg->var_off))
- /* For unprivileged variable accesses, disable raw
- * mode so that the program is required to
- * initialize all the memory that the helper could
- * just partially fill up.
- */
- meta = NULL;
-
- if (reg->smin_value < 0) {
- verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n",
- regno);
- return -EACCES;
- }
-
- if (reg->umin_value == 0) {
- err = check_helper_mem_access(env, regno - 1, 0,
- zero_size_allowed,
- meta);
- if (err)
- return err;
- }
-
- if (reg->umax_value >= BPF_MAX_VAR_SIZ) {
- verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
- regno);
- return -EACCES;
- }
- err = check_helper_mem_access(env, regno - 1,
- reg->umax_value,
- zero_size_allowed, meta);
- if (!err)
- err = mark_chain_precision(env, regno);
+ err = check_mem_size_reg(env, reg, regno, zero_size_allowed, meta);
} else if (arg_type_is_alloc_size(arg_type)) {
if (!tnum_is_const(reg->var_off)) {
verbose(env, "R%d is not a known constant'\n",
}
}
-static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn)
+static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
+ int *insn_idx_p)
{
const struct btf_type *t, *func, *func_proto, *ptr_type;
struct bpf_reg_state *regs = cur_regs(env);
const char *func_name, *ptr_type_name;
u32 i, nargs, func_id, ptr_type_id;
- struct module *btf_mod = NULL;
+ int err, insn_idx = *insn_idx_p;
const struct btf_param *args;
struct btf *desc_btf;
- int err;
+ bool acq;
/* skip for now, but return error when we find this in fixup_kfunc_call */
if (!insn->imm)
return 0;
- desc_btf = find_kfunc_desc_btf(env, insn->imm, insn->off, &btf_mod);
+ desc_btf = find_kfunc_desc_btf(env, insn->imm, insn->off);
if (IS_ERR(desc_btf))
return PTR_ERR(desc_btf);
func_name = btf_name_by_offset(desc_btf, func->name_off);
func_proto = btf_type_by_id(desc_btf, func->type);
- if (!env->ops->check_kfunc_call ||
- !env->ops->check_kfunc_call(func_id, btf_mod)) {
+ if (!btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog),
+ BTF_KFUNC_TYPE_CHECK, func_id)) {
verbose(env, "calling kernel function %s is not allowed\n",
func_name);
return -EACCES;
}
+ acq = btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog),
+ BTF_KFUNC_TYPE_ACQUIRE, func_id);
+
/* Check the arguments */
err = btf_check_kfunc_arg_match(env, desc_btf, func_id, regs);
- if (err)
+ if (err < 0)
return err;
+ /* In case of release function, we get register number of refcounted
+ * PTR_TO_BTF_ID back from btf_check_kfunc_arg_match, do the release now
+ */
+ if (err) {
+ err = release_reference(env, regs[err].ref_obj_id);
+ if (err) {
+ verbose(env, "kfunc %s#%d reference has not been acquired before\n",
+ func_name, func_id);
+ return err;
+ }
+ }
for (i = 0; i < CALLER_SAVED_REGS; i++)
mark_reg_not_init(env, regs, caller_saved[i]);
/* Check return type */
t = btf_type_skip_modifiers(desc_btf, func_proto->type, NULL);
+
+ if (acq && !btf_type_is_ptr(t)) {
+ verbose(env, "acquire kernel function does not return PTR_TO_BTF_ID\n");
+ return -EINVAL;
+ }
+
if (btf_type_is_scalar(t)) {
mark_reg_unknown(env, regs, BPF_REG_0);
mark_btf_func_reg_size(env, BPF_REG_0, t->size);
regs[BPF_REG_0].btf = desc_btf;
regs[BPF_REG_0].type = PTR_TO_BTF_ID;
regs[BPF_REG_0].btf_id = ptr_type_id;
+ if (btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog),
+ BTF_KFUNC_TYPE_RET_NULL, func_id)) {
+ regs[BPF_REG_0].type |= PTR_MAYBE_NULL;
+ /* For mark_ptr_or_null_reg, see 93c230e3f5bd6 */
+ regs[BPF_REG_0].id = ++env->id_gen;
+ }
mark_btf_func_reg_size(env, BPF_REG_0, sizeof(void *));
+ if (acq) {
+ int id = acquire_reference_state(env, insn_idx);
+
+ if (id < 0)
+ return id;
+ regs[BPF_REG_0].id = id;
+ regs[BPF_REG_0].ref_obj_id = id;
+ }
} /* else { add_kfunc_call() ensures it is btf_type_is_void(t) } */
nargs = btf_type_vlen(func_proto);
if (insn->src_reg == BPF_PSEUDO_CALL)
err = check_func_call(env, insn, &env->insn_idx);
else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL)
- err = check_kfunc_call(env, insn);
+ err = check_kfunc_call(env, insn, &env->insn_idx);
else
err = check_helper_call(env, insn, &env->insn_idx);
if (err)
extern const struct bpf_func_proto bpf_skb_output_proto;
extern const struct bpf_func_proto bpf_xdp_output_proto;
+extern const struct bpf_func_proto bpf_xdp_get_buff_len_trace_proto;
BPF_CALL_3(bpf_get_stackid_raw_tp, struct bpf_raw_tracepoint_args *, args,
struct bpf_map *, map, u64, flags)
return &bpf_sock_from_file_proto;
case BPF_FUNC_get_socket_cookie:
return &bpf_get_socket_ptr_cookie_proto;
+ case BPF_FUNC_xdp_get_buff_len:
+ return &bpf_xdp_get_buff_len_trace_proto;
#endif
case BPF_FUNC_seq_printf:
return prog->expected_attach_type == BPF_TRACE_ITER ?
return -ENOMEM;
}
- refcount_set(&ax25_rt->refcount, 1);
ax25_rt->callsign = route->dest_addr;
ax25_rt->dev = ax25_dev->dev;
ax25_rt->digipeat = NULL;
ax25cmp(&route->dest_addr, &s->callsign) == 0) {
if (ax25_route_list == s) {
ax25_route_list = s->next;
- ax25_put_route(s);
+ __ax25_put_route(s);
} else {
for (t = ax25_route_list; t != NULL; t = t->next) {
if (t->next == s) {
t->next = s->next;
- ax25_put_route(s);
+ __ax25_put_route(s);
break;
}
}
#include <linux/btf.h>
#include <linux/btf_ids.h>
#include <linux/slab.h>
+#include <linux/init.h>
#include <linux/vmalloc.h>
#include <linux/etherdevice.h>
#include <linux/filter.h>
static int bpf_test_finish(const union bpf_attr *kattr,
union bpf_attr __user *uattr, const void *data,
- u32 size, u32 retval, u32 duration)
+ struct skb_shared_info *sinfo, u32 size,
+ u32 retval, u32 duration)
{
void __user *data_out = u64_to_user_ptr(kattr->test.data_out);
int err = -EFAULT;
err = -ENOSPC;
}
- if (data_out && copy_to_user(data_out, data, copy_size))
- goto out;
+ if (data_out) {
+ int len = sinfo ? copy_size - sinfo->xdp_frags_size : copy_size;
+
+ if (copy_to_user(data_out, data, len))
+ goto out;
+
+ if (sinfo) {
+ int i, offset = len, data_len;
+
+ for (i = 0; i < sinfo->nr_frags; i++) {
+ skb_frag_t *frag = &sinfo->frags[i];
+
+ if (offset >= copy_size) {
+ err = -ENOSPC;
+ break;
+ }
+
+ data_len = min_t(int, copy_size - offset,
+ skb_frag_size(frag));
+
+ if (copy_to_user(data_out + offset,
+ skb_frag_address(frag),
+ data_len))
+ goto out;
+
+ offset += data_len;
+ }
+ }
+ }
+
if (copy_to_user(&uattr->test.data_size_out, &size, sizeof(size)))
goto out;
if (copy_to_user(&uattr->test.retval, &retval, sizeof(retval)))
{
return a + 1;
}
+EXPORT_SYMBOL_GPL(bpf_fentry_test1);
+ALLOW_ERROR_INJECTION(bpf_fentry_test1, ERRNO);
int noinline bpf_fentry_test2(int a, u64 b)
{
return sk;
}
+struct prog_test_ref_kfunc {
+ int a;
+ int b;
+ struct prog_test_ref_kfunc *next;
+};
+
+static struct prog_test_ref_kfunc prog_test_struct = {
+ .a = 42,
+ .b = 108,
+ .next = &prog_test_struct,
+};
+
+noinline struct prog_test_ref_kfunc *
+bpf_kfunc_call_test_acquire(unsigned long *scalar_ptr)
+{
+ /* randomly return NULL */
+ if (get_jiffies_64() % 2)
+ return NULL;
+ return &prog_test_struct;
+}
+
+noinline void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p)
+{
+}
+
+struct prog_test_pass1 {
+ int x0;
+ struct {
+ int x1;
+ struct {
+ int x2;
+ struct {
+ int x3;
+ };
+ };
+ };
+};
+
+struct prog_test_pass2 {
+ int len;
+ short arr1[4];
+ struct {
+ char arr2[4];
+ unsigned long arr3[8];
+ } x;
+};
+
+struct prog_test_fail1 {
+ void *p;
+ int x;
+};
+
+struct prog_test_fail2 {
+ int x8;
+ struct prog_test_pass1 x;
+};
+
+struct prog_test_fail3 {
+ int len;
+ char arr1[2];
+ char arr2[];
+};
+
+noinline void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb)
+{
+}
+
+noinline void bpf_kfunc_call_test_pass1(struct prog_test_pass1 *p)
+{
+}
+
+noinline void bpf_kfunc_call_test_pass2(struct prog_test_pass2 *p)
+{
+}
+
+noinline void bpf_kfunc_call_test_fail1(struct prog_test_fail1 *p)
+{
+}
+
+noinline void bpf_kfunc_call_test_fail2(struct prog_test_fail2 *p)
+{
+}
+
+noinline void bpf_kfunc_call_test_fail3(struct prog_test_fail3 *p)
+{
+}
+
+noinline void bpf_kfunc_call_test_mem_len_pass1(void *mem, int mem__sz)
+{
+}
+
+noinline void bpf_kfunc_call_test_mem_len_fail1(void *mem, int len)
+{
+}
+
+noinline void bpf_kfunc_call_test_mem_len_fail2(u64 *mem, int len)
+{
+}
+
__diag_pop();
ALLOW_ERROR_INJECTION(bpf_modify_return_test, ERRNO);
-BTF_SET_START(test_sk_kfunc_ids)
+BTF_SET_START(test_sk_check_kfunc_ids)
BTF_ID(func, bpf_kfunc_call_test1)
BTF_ID(func, bpf_kfunc_call_test2)
BTF_ID(func, bpf_kfunc_call_test3)
-BTF_SET_END(test_sk_kfunc_ids)
-
-bool bpf_prog_test_check_kfunc_call(u32 kfunc_id, struct module *owner)
-{
- if (btf_id_set_contains(&test_sk_kfunc_ids, kfunc_id))
- return true;
- return bpf_check_mod_kfunc_call(&prog_test_kfunc_list, kfunc_id, owner);
-}
-
-static void *bpf_test_init(const union bpf_attr *kattr, u32 size,
- u32 headroom, u32 tailroom)
+BTF_ID(func, bpf_kfunc_call_test_acquire)
+BTF_ID(func, bpf_kfunc_call_test_release)
+BTF_ID(func, bpf_kfunc_call_test_pass_ctx)
+BTF_ID(func, bpf_kfunc_call_test_pass1)
+BTF_ID(func, bpf_kfunc_call_test_pass2)
+BTF_ID(func, bpf_kfunc_call_test_fail1)
+BTF_ID(func, bpf_kfunc_call_test_fail2)
+BTF_ID(func, bpf_kfunc_call_test_fail3)
+BTF_ID(func, bpf_kfunc_call_test_mem_len_pass1)
+BTF_ID(func, bpf_kfunc_call_test_mem_len_fail1)
+BTF_ID(func, bpf_kfunc_call_test_mem_len_fail2)
+BTF_SET_END(test_sk_check_kfunc_ids)
+
+BTF_SET_START(test_sk_acquire_kfunc_ids)
+BTF_ID(func, bpf_kfunc_call_test_acquire)
+BTF_SET_END(test_sk_acquire_kfunc_ids)
+
+BTF_SET_START(test_sk_release_kfunc_ids)
+BTF_ID(func, bpf_kfunc_call_test_release)
+BTF_SET_END(test_sk_release_kfunc_ids)
+
+BTF_SET_START(test_sk_ret_null_kfunc_ids)
+BTF_ID(func, bpf_kfunc_call_test_acquire)
+BTF_SET_END(test_sk_ret_null_kfunc_ids)
+
+static void *bpf_test_init(const union bpf_attr *kattr, u32 user_size,
+ u32 size, u32 headroom, u32 tailroom)
{
void __user *data_in = u64_to_user_ptr(kattr->test.data_in);
- u32 user_size = kattr->test.data_size_in;
void *data;
if (size < ETH_HLEN || size > PAGE_SIZE - headroom - tailroom)
if (kattr->test.flags || kattr->test.cpu)
return -EINVAL;
- data = bpf_test_init(kattr, size, NET_SKB_PAD + NET_IP_ALIGN,
+ data = bpf_test_init(kattr, kattr->test.data_size_in,
+ size, NET_SKB_PAD + NET_IP_ALIGN,
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)));
if (IS_ERR(data))
return PTR_ERR(data);
/* bpf program can never convert linear skb to non-linear */
if (WARN_ON_ONCE(skb_is_nonlinear(skb)))
size = skb_headlen(skb);
- ret = bpf_test_finish(kattr, uattr, skb->data, size, retval, duration);
+ ret = bpf_test_finish(kattr, uattr, skb->data, NULL, size, retval,
+ duration);
if (!ret)
ret = bpf_ctx_finish(kattr, uattr, ctx,
sizeof(struct __sk_buff));
union bpf_attr __user *uattr)
{
u32 tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
- u32 headroom = XDP_PACKET_HEADROOM;
u32 size = kattr->test.data_size_in;
+ u32 headroom = XDP_PACKET_HEADROOM;
+ u32 retval, duration, max_data_sz;
u32 repeat = kattr->test.repeat;
struct netdev_rx_queue *rxqueue;
+ struct skb_shared_info *sinfo;
struct xdp_buff xdp = {};
- u32 retval, duration;
+ int i, ret = -EINVAL;
struct xdp_md *ctx;
- u32 max_data_sz;
void *data;
- int ret = -EINVAL;
if (prog->expected_attach_type == BPF_XDP_DEVMAP ||
prog->expected_attach_type == BPF_XDP_CPUMAP)
headroom -= ctx->data;
}
- /* XDP have extra tailroom as (most) drivers use full page */
max_data_sz = 4096 - headroom - tailroom;
+ size = min_t(u32, size, max_data_sz);
- data = bpf_test_init(kattr, max_data_sz, headroom, tailroom);
+ data = bpf_test_init(kattr, size, max_data_sz, headroom, tailroom);
if (IS_ERR(data)) {
ret = PTR_ERR(data);
goto free_ctx;
}
rxqueue = __netif_get_rx_queue(current->nsproxy->net_ns->loopback_dev, 0);
- xdp_init_buff(&xdp, headroom + max_data_sz + tailroom,
- &rxqueue->xdp_rxq);
+ rxqueue->xdp_rxq.frag_size = headroom + max_data_sz + tailroom;
+ xdp_init_buff(&xdp, rxqueue->xdp_rxq.frag_size, &rxqueue->xdp_rxq);
xdp_prepare_buff(&xdp, data, headroom, size, true);
+ sinfo = xdp_get_shared_info_from_buff(&xdp);
ret = xdp_convert_md_to_buff(ctx, &xdp);
if (ret)
goto free_data;
+ if (unlikely(kattr->test.data_size_in > size)) {
+ void __user *data_in = u64_to_user_ptr(kattr->test.data_in);
+
+ while (size < kattr->test.data_size_in) {
+ struct page *page;
+ skb_frag_t *frag;
+ int data_len;
+
+ page = alloc_page(GFP_KERNEL);
+ if (!page) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ frag = &sinfo->frags[sinfo->nr_frags++];
+ __skb_frag_set_page(frag, page);
+
+ data_len = min_t(int, kattr->test.data_size_in - size,
+ PAGE_SIZE);
+ skb_frag_size_set(frag, data_len);
+
+ if (copy_from_user(page_address(page), data_in + size,
+ data_len)) {
+ ret = -EFAULT;
+ goto out;
+ }
+ sinfo->xdp_frags_size += data_len;
+ size += data_len;
+ }
+ xdp_buff_set_frags_flag(&xdp);
+ }
+
if (repeat > 1)
bpf_prog_change_xdp(NULL, prog);
+
ret = bpf_test_run(prog, &xdp, repeat, &retval, &duration, true);
/* We convert the xdp_buff back to an xdp_md before checking the return
* code so the reference count of any held netdevice will be decremented
if (ret)
goto out;
- if (xdp.data_meta != data + headroom ||
- xdp.data_end != xdp.data_meta + size)
- size = xdp.data_end - xdp.data_meta;
-
- ret = bpf_test_finish(kattr, uattr, xdp.data_meta, size, retval,
- duration);
+ size = xdp.data_end - xdp.data_meta + sinfo->xdp_frags_size;
+ ret = bpf_test_finish(kattr, uattr, xdp.data_meta, sinfo, size,
+ retval, duration);
if (!ret)
ret = bpf_ctx_finish(kattr, uattr, ctx,
sizeof(struct xdp_md));
if (repeat > 1)
bpf_prog_change_xdp(prog, NULL);
free_data:
+ for (i = 0; i < sinfo->nr_frags; i++)
+ __free_page(skb_frag_page(&sinfo->frags[i]));
kfree(data);
free_ctx:
kfree(ctx);
if (size < ETH_HLEN)
return -EINVAL;
- data = bpf_test_init(kattr, size, 0, 0);
+ data = bpf_test_init(kattr, kattr->test.data_size_in, size, 0, 0);
if (IS_ERR(data))
return PTR_ERR(data);
if (ret < 0)
goto out;
- ret = bpf_test_finish(kattr, uattr, &flow_keys, sizeof(flow_keys),
- retval, duration);
+ ret = bpf_test_finish(kattr, uattr, &flow_keys, NULL,
+ sizeof(flow_keys), retval, duration);
if (!ret)
ret = bpf_ctx_finish(kattr, uattr, user_ctx,
sizeof(struct bpf_flow_keys));
user_ctx->cookie = sock_gen_cookie(ctx.selected_sk);
}
- ret = bpf_test_finish(kattr, uattr, NULL, 0, retval, duration);
+ ret = bpf_test_finish(kattr, uattr, NULL, NULL, 0, retval, duration);
if (!ret)
ret = bpf_ctx_finish(kattr, uattr, user_ctx, sizeof(*user_ctx));
kfree(ctx);
return err;
}
+
+static const struct btf_kfunc_id_set bpf_prog_test_kfunc_set = {
+ .owner = THIS_MODULE,
+ .check_set = &test_sk_check_kfunc_ids,
+ .acquire_set = &test_sk_acquire_kfunc_ids,
+ .release_set = &test_sk_release_kfunc_ids,
+ .ret_null_set = &test_sk_ret_null_kfunc_ids,
+};
+
+static int __init bpf_prog_test_run_init(void)
+{
+ return register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_prog_test_kfunc_set);
+}
+late_initcall(bpf_prog_test_run_init);
.arg2_type = ARG_ANYTHING,
.arg3_type = ARG_ANYTHING,
};
+
+BPF_CALL_1(bpf_xdp_get_buff_len, struct xdp_buff*, xdp)
+{
+ return xdp_get_buff_len(xdp);
+}
+
+static const struct bpf_func_proto bpf_xdp_get_buff_len_proto = {
+ .func = bpf_xdp_get_buff_len,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+};
+
+BTF_ID_LIST_SINGLE(bpf_xdp_get_buff_len_bpf_ids, struct, xdp_buff)
+
+const struct bpf_func_proto bpf_xdp_get_buff_len_trace_proto = {
+ .func = bpf_xdp_get_buff_len,
+ .gpl_only = false,
+ .arg1_type = ARG_PTR_TO_BTF_ID,
+ .arg1_btf_id = &bpf_xdp_get_buff_len_bpf_ids[0],
+};
+
static unsigned long xdp_get_metalen(const struct xdp_buff *xdp)
{
return xdp_data_meta_unsupported(xdp) ? 0 :
.arg2_type = ARG_ANYTHING,
};
+static void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off,
+ void *buf, unsigned long len, bool flush)
+{
+ unsigned long ptr_len, ptr_off = 0;
+ skb_frag_t *next_frag, *end_frag;
+ struct skb_shared_info *sinfo;
+ void *src, *dst;
+ u8 *ptr_buf;
+
+ if (likely(xdp->data_end - xdp->data >= off + len)) {
+ src = flush ? buf : xdp->data + off;
+ dst = flush ? xdp->data + off : buf;
+ memcpy(dst, src, len);
+ return;
+ }
+
+ sinfo = xdp_get_shared_info_from_buff(xdp);
+ end_frag = &sinfo->frags[sinfo->nr_frags];
+ next_frag = &sinfo->frags[0];
+
+ ptr_len = xdp->data_end - xdp->data;
+ ptr_buf = xdp->data;
+
+ while (true) {
+ if (off < ptr_off + ptr_len) {
+ unsigned long copy_off = off - ptr_off;
+ unsigned long copy_len = min(len, ptr_len - copy_off);
+
+ src = flush ? buf : ptr_buf + copy_off;
+ dst = flush ? ptr_buf + copy_off : buf;
+ memcpy(dst, src, copy_len);
+
+ off += copy_len;
+ len -= copy_len;
+ buf += copy_len;
+ }
+
+ if (!len || next_frag == end_frag)
+ break;
+
+ ptr_off += ptr_len;
+ ptr_buf = skb_frag_address(next_frag);
+ ptr_len = skb_frag_size(next_frag);
+ next_frag++;
+ }
+}
+
+static void *bpf_xdp_pointer(struct xdp_buff *xdp, u32 offset, u32 len)
+{
+ struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
+ u32 size = xdp->data_end - xdp->data;
+ void *addr = xdp->data;
+ int i;
+
+ if (unlikely(offset > 0xffff || len > 0xffff))
+ return ERR_PTR(-EFAULT);
+
+ if (offset + len > xdp_get_buff_len(xdp))
+ return ERR_PTR(-EINVAL);
+
+ if (offset < size) /* linear area */
+ goto out;
+
+ offset -= size;
+ for (i = 0; i < sinfo->nr_frags; i++) { /* paged area */
+ u32 frag_size = skb_frag_size(&sinfo->frags[i]);
+
+ if (offset < frag_size) {
+ addr = skb_frag_address(&sinfo->frags[i]);
+ size = frag_size;
+ break;
+ }
+ offset -= frag_size;
+ }
+out:
+ return offset + len < size ? addr + offset : NULL;
+}
+
+BPF_CALL_4(bpf_xdp_load_bytes, struct xdp_buff *, xdp, u32, offset,
+ void *, buf, u32, len)
+{
+ void *ptr;
+
+ ptr = bpf_xdp_pointer(xdp, offset, len);
+ if (IS_ERR(ptr))
+ return PTR_ERR(ptr);
+
+ if (!ptr)
+ bpf_xdp_copy_buf(xdp, offset, buf, len, false);
+ else
+ memcpy(buf, ptr, len);
+
+ return 0;
+}
+
+static const struct bpf_func_proto bpf_xdp_load_bytes_proto = {
+ .func = bpf_xdp_load_bytes,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_PTR_TO_UNINIT_MEM,
+ .arg4_type = ARG_CONST_SIZE,
+};
+
+BPF_CALL_4(bpf_xdp_store_bytes, struct xdp_buff *, xdp, u32, offset,
+ void *, buf, u32, len)
+{
+ void *ptr;
+
+ ptr = bpf_xdp_pointer(xdp, offset, len);
+ if (IS_ERR(ptr))
+ return PTR_ERR(ptr);
+
+ if (!ptr)
+ bpf_xdp_copy_buf(xdp, offset, buf, len, true);
+ else
+ memcpy(ptr, buf, len);
+
+ return 0;
+}
+
+static const struct bpf_func_proto bpf_xdp_store_bytes_proto = {
+ .func = bpf_xdp_store_bytes,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_PTR_TO_UNINIT_MEM,
+ .arg4_type = ARG_CONST_SIZE,
+};
+
+static int bpf_xdp_frags_increase_tail(struct xdp_buff *xdp, int offset)
+{
+ struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
+ skb_frag_t *frag = &sinfo->frags[sinfo->nr_frags - 1];
+ struct xdp_rxq_info *rxq = xdp->rxq;
+ unsigned int tailroom;
+
+ if (!rxq->frag_size || rxq->frag_size > xdp->frame_sz)
+ return -EOPNOTSUPP;
+
+ tailroom = rxq->frag_size - skb_frag_size(frag) - skb_frag_off(frag);
+ if (unlikely(offset > tailroom))
+ return -EINVAL;
+
+ memset(skb_frag_address(frag) + skb_frag_size(frag), 0, offset);
+ skb_frag_size_add(frag, offset);
+ sinfo->xdp_frags_size += offset;
+
+ return 0;
+}
+
+static int bpf_xdp_frags_shrink_tail(struct xdp_buff *xdp, int offset)
+{
+ struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
+ int i, n_frags_free = 0, len_free = 0;
+
+ if (unlikely(offset > (int)xdp_get_buff_len(xdp) - ETH_HLEN))
+ return -EINVAL;
+
+ for (i = sinfo->nr_frags - 1; i >= 0 && offset > 0; i--) {
+ skb_frag_t *frag = &sinfo->frags[i];
+ int shrink = min_t(int, offset, skb_frag_size(frag));
+
+ len_free += shrink;
+ offset -= shrink;
+
+ if (skb_frag_size(frag) == shrink) {
+ struct page *page = skb_frag_page(frag);
+
+ __xdp_return(page_address(page), &xdp->rxq->mem,
+ false, NULL);
+ n_frags_free++;
+ } else {
+ skb_frag_size_sub(frag, shrink);
+ break;
+ }
+ }
+ sinfo->nr_frags -= n_frags_free;
+ sinfo->xdp_frags_size -= len_free;
+
+ if (unlikely(!sinfo->nr_frags)) {
+ xdp_buff_clear_frags_flag(xdp);
+ xdp->data_end -= offset;
+ }
+
+ return 0;
+}
+
BPF_CALL_2(bpf_xdp_adjust_tail, struct xdp_buff *, xdp, int, offset)
{
void *data_hard_end = xdp_data_hard_end(xdp); /* use xdp->frame_sz */
void *data_end = xdp->data_end + offset;
+ if (unlikely(xdp_buff_has_frags(xdp))) { /* non-linear xdp buff */
+ if (offset < 0)
+ return bpf_xdp_frags_shrink_tail(xdp, -offset);
+
+ return bpf_xdp_frags_increase_tail(xdp, offset);
+ }
+
/* Notice that xdp_data_hard_end have reserved some tailroom */
if (unlikely(data_end > data_hard_end))
return -EINVAL;
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
enum bpf_map_type map_type = ri->map_type;
+ /* XDP_REDIRECT is not fully supported yet for xdp frags since
+ * not all XDP capable drivers can map non-linear xdp_frame in
+ * ndo_xdp_xmit.
+ */
+ if (unlikely(xdp_buff_has_frags(xdp) &&
+ map_type != BPF_MAP_TYPE_CPUMAP))
+ return -EOPNOTSUPP;
+
if (map_type == BPF_MAP_TYPE_XSKMAP)
return __xdp_do_redirect_xsk(ri, dev, xdp, xdp_prog);
};
#endif
-static unsigned long bpf_xdp_copy(void *dst_buff, const void *src_buff,
+static unsigned long bpf_xdp_copy(void *dst, const void *ctx,
unsigned long off, unsigned long len)
{
- memcpy(dst_buff, src_buff + off, len);
+ struct xdp_buff *xdp = (struct xdp_buff *)ctx;
+
+ bpf_xdp_copy_buf(xdp, off, dst, len, false);
return 0;
}
if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK)))
return -EINVAL;
- if (unlikely(!xdp ||
- xdp_size > (unsigned long)(xdp->data_end - xdp->data)))
+
+ if (unlikely(!xdp || xdp_size > xdp_get_buff_len(xdp)))
return -EFAULT;
- return bpf_event_output(map, flags, meta, meta_size, xdp->data,
+ return bpf_event_output(map, flags, meta, meta_size, xdp,
xdp_size, bpf_xdp_copy);
}
return &bpf_xdp_redirect_map_proto;
case BPF_FUNC_xdp_adjust_tail:
return &bpf_xdp_adjust_tail_proto;
+ case BPF_FUNC_xdp_get_buff_len:
+ return &bpf_xdp_get_buff_len_proto;
+ case BPF_FUNC_xdp_load_bytes:
+ return &bpf_xdp_load_bytes_proto;
+ case BPF_FUNC_xdp_store_bytes:
+ return &bpf_xdp_store_bytes_proto;
case BPF_FUNC_fib_lookup:
return &bpf_xdp_fib_lookup_proto;
case BPF_FUNC_check_mtu:
.convert_ctx_access = tc_cls_act_convert_ctx_access,
.gen_prologue = tc_cls_act_prologue,
.gen_ld_abs = bpf_gen_ld_abs,
- .check_kfunc_call = bpf_prog_test_check_kfunc_call,
};
const struct bpf_prog_ops tc_cls_act_prog_ops = {
return peer;
}
+EXPORT_SYMBOL_GPL(get_net_ns_by_id);
/*
* setup_net runs the initializers for the network namespace object.
sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
/* pairs with the WRITE_ONCE() in netif_set_gso_max_size() */
sk->sk_gso_max_size = READ_ONCE(dst->dev->gso_max_size);
+ sk->sk_gso_max_size -= (MAX_TCP_HEADER + 1);
/* pairs with the WRITE_ONCE() in netif_set_gso_max_segs() */
max_segs = max_t(u32, READ_ONCE(dst->dev->gso_max_segs), 1);
}
return NULL;
}
-static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
- struct bpf_prog *old, u32 which)
+static int sock_map_prog_lookup(struct bpf_map *map, struct bpf_prog ***pprog,
+ u32 which)
{
struct sk_psock_progs *progs = sock_map_progs(map);
- struct bpf_prog **pprog;
if (!progs)
return -EOPNOTSUPP;
switch (which) {
case BPF_SK_MSG_VERDICT:
- pprog = &progs->msg_parser;
+ *pprog = &progs->msg_parser;
break;
#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
case BPF_SK_SKB_STREAM_PARSER:
- pprog = &progs->stream_parser;
+ *pprog = &progs->stream_parser;
break;
#endif
case BPF_SK_SKB_STREAM_VERDICT:
if (progs->skb_verdict)
return -EBUSY;
- pprog = &progs->stream_verdict;
+ *pprog = &progs->stream_verdict;
break;
case BPF_SK_SKB_VERDICT:
if (progs->stream_verdict)
return -EBUSY;
- pprog = &progs->skb_verdict;
+ *pprog = &progs->skb_verdict;
break;
default:
return -EOPNOTSUPP;
}
+ return 0;
+}
+
+static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
+ struct bpf_prog *old, u32 which)
+{
+ struct bpf_prog **pprog;
+ int ret;
+
+ ret = sock_map_prog_lookup(map, &pprog, which);
+ if (ret)
+ return ret;
+
if (old)
return psock_replace_prog(pprog, prog, old);
return 0;
}
+int sock_map_bpf_prog_query(const union bpf_attr *attr,
+ union bpf_attr __user *uattr)
+{
+ __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
+ u32 prog_cnt = 0, flags = 0, ufd = attr->target_fd;
+ struct bpf_prog **pprog;
+ struct bpf_prog *prog;
+ struct bpf_map *map;
+ struct fd f;
+ u32 id = 0;
+ int ret;
+
+ if (attr->query.query_flags)
+ return -EINVAL;
+
+ f = fdget(ufd);
+ map = __bpf_map_get(f);
+ if (IS_ERR(map))
+ return PTR_ERR(map);
+
+ rcu_read_lock();
+
+ ret = sock_map_prog_lookup(map, &pprog, attr->query.attach_type);
+ if (ret)
+ goto end;
+
+ prog = *pprog;
+ prog_cnt = !prog ? 0 : 1;
+
+ if (!attr->query.prog_cnt || !prog_ids || !prog_cnt)
+ goto end;
+
+ /* we do not hold the refcnt, the bpf prog may be released
+ * asynchronously and the id would be set to 0.
+ */
+ id = data_race(prog->aux->id);
+ if (id == 0)
+ prog_cnt = 0;
+
+end:
+ rcu_read_unlock();
+
+ if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags)) ||
+ (id != 0 && copy_to_user(prog_ids, &id, sizeof(u32))) ||
+ copy_to_user(&uattr->query.prog_cnt, &prog_cnt, sizeof(prog_cnt)))
+ ret = -EFAULT;
+
+ fdput(f);
+ return ret;
+}
+
static void sock_map_unlink(struct sock *sk, struct sk_psock_link *link)
{
switch (link->map->map_type) {
}
/* Returns 0 on success, negative on failure */
-int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
- struct net_device *dev, u32 queue_index, unsigned int napi_id)
+int __xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
+ struct net_device *dev, u32 queue_index,
+ unsigned int napi_id, u32 frag_size)
{
if (!dev) {
WARN(1, "Missing net_device from driver");
xdp_rxq->dev = dev;
xdp_rxq->queue_index = queue_index;
xdp_rxq->napi_id = napi_id;
+ xdp_rxq->frag_size = frag_size;
xdp_rxq->reg_state = REG_STATE_REGISTERED;
return 0;
}
-EXPORT_SYMBOL_GPL(xdp_rxq_info_reg);
+EXPORT_SYMBOL_GPL(__xdp_rxq_info_reg);
void xdp_rxq_info_unused(struct xdp_rxq_info *xdp_rxq)
{
* is used for those calls sites. Thus, allowing for faster recycling
* of xdp_frames/pages in those cases.
*/
-static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct,
- struct xdp_buff *xdp)
+void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct,
+ struct xdp_buff *xdp)
{
struct xdp_mem_allocator *xa;
struct page *page;
void xdp_return_frame(struct xdp_frame *xdpf)
{
+ struct skb_shared_info *sinfo;
+ int i;
+
+ if (likely(!xdp_frame_has_frags(xdpf)))
+ goto out;
+
+ sinfo = xdp_get_shared_info_from_frame(xdpf);
+ for (i = 0; i < sinfo->nr_frags; i++) {
+ struct page *page = skb_frag_page(&sinfo->frags[i]);
+
+ __xdp_return(page_address(page), &xdpf->mem, false, NULL);
+ }
+out:
__xdp_return(xdpf->data, &xdpf->mem, false, NULL);
}
EXPORT_SYMBOL_GPL(xdp_return_frame);
void xdp_return_frame_rx_napi(struct xdp_frame *xdpf)
{
+ struct skb_shared_info *sinfo;
+ int i;
+
+ if (likely(!xdp_frame_has_frags(xdpf)))
+ goto out;
+
+ sinfo = xdp_get_shared_info_from_frame(xdpf);
+ for (i = 0; i < sinfo->nr_frags; i++) {
+ struct page *page = skb_frag_page(&sinfo->frags[i]);
+
+ __xdp_return(page_address(page), &xdpf->mem, true, NULL);
+ }
+out:
__xdp_return(xdpf->data, &xdpf->mem, true, NULL);
}
EXPORT_SYMBOL_GPL(xdp_return_frame_rx_napi);
struct xdp_mem_allocator *xa;
if (mem->type != MEM_TYPE_PAGE_POOL) {
- __xdp_return(xdpf->data, &xdpf->mem, false, NULL);
+ xdp_return_frame(xdpf);
return;
}
bq->xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params);
}
+ if (unlikely(xdp_frame_has_frags(xdpf))) {
+ struct skb_shared_info *sinfo;
+ int i;
+
+ sinfo = xdp_get_shared_info_from_frame(xdpf);
+ for (i = 0; i < sinfo->nr_frags; i++) {
+ skb_frag_t *frag = &sinfo->frags[i];
+
+ bq->q[bq->count++] = skb_frag_address(frag);
+ if (bq->count == XDP_BULK_QUEUE_SIZE)
+ xdp_flush_frame_bulk(bq);
+ }
+ }
bq->q[bq->count++] = xdpf->data;
}
EXPORT_SYMBOL_GPL(xdp_return_frame_bulk);
void xdp_return_buff(struct xdp_buff *xdp)
{
+ struct skb_shared_info *sinfo;
+ int i;
+
+ if (likely(!xdp_buff_has_frags(xdp)))
+ goto out;
+
+ sinfo = xdp_get_shared_info_from_buff(xdp);
+ for (i = 0; i < sinfo->nr_frags; i++) {
+ struct page *page = skb_frag_page(&sinfo->frags[i]);
+
+ __xdp_return(page_address(page), &xdp->rxq->mem, true, xdp);
+ }
+out:
__xdp_return(xdp->data, &xdp->rxq->mem, true, xdp);
}
struct sk_buff *skb,
struct net_device *dev)
{
+ struct skb_shared_info *sinfo = xdp_get_shared_info_from_frame(xdpf);
unsigned int headroom, frame_size;
void *hard_start;
+ u8 nr_frags;
+
+ /* xdp frags frame */
+ if (unlikely(xdp_frame_has_frags(xdpf)))
+ nr_frags = sinfo->nr_frags;
/* Part of headroom was reserved to xdpf */
headroom = sizeof(*xdpf) + xdpf->headroom;
if (xdpf->metasize)
skb_metadata_set(skb, xdpf->metasize);
+ if (unlikely(xdp_frame_has_frags(xdpf)))
+ xdp_update_skb_shared_info(skb, nr_frags,
+ sinfo->xdp_frags_size,
+ nr_frags * xdpf->frame_sz,
+ xdp_frame_is_frag_pfmemalloc(xdpf));
+
/* Essential SKB info: protocol and skb->dev */
skb->protocol = eth_type_trans(skb, dev);
return (seq3 << 16) - (seq2 << 16) >= (seq1 << 16) - (seq2 << 16);
}
-static inline u64 max48(const u64 seq1, const u64 seq2)
-{
- return after48(seq1, seq2) ? seq1 : seq2;
-}
-
/**
* dccp_loss_count - Approximate the number of lost data packets in a burst loss
* @s1: last known sequence number before the loss ('hole')
inet_ctl_sock_destroy(pn->v4_ctl_sk);
}
-static void __net_exit dccp_v4_exit_batch(struct list_head *net_exit_list)
-{
- inet_twsk_purge(&dccp_hashinfo, AF_INET);
-}
-
static struct pernet_operations dccp_v4_ops = {
.init = dccp_v4_init_net,
.exit = dccp_v4_exit_net,
- .exit_batch = dccp_v4_exit_batch,
.id = &dccp_v4_pernet_id,
.size = sizeof(struct dccp_v4_pernet),
};
inet_ctl_sock_destroy(pn->v6_ctl_sk);
}
-static void __net_exit dccp_v6_exit_batch(struct list_head *net_exit_list)
-{
- inet_twsk_purge(&dccp_hashinfo, AF_INET6);
-}
-
static struct pernet_operations dccp_v6_ops = {
.init = dccp_v6_init_net,
.exit = dccp_v6_exit_net,
- .exit_batch = dccp_v6_exit_batch,
.id = &dccp_v6_pernet_id,
.size = sizeof(struct dccp_v6_pernet),
};
#include "feat.h"
struct inet_timewait_death_row dccp_death_row = {
+ .tw_refcount = REFCOUNT_INIT(1),
.sysctl_max_tw_buckets = NR_FILE * 2,
.hashinfo = &dccp_hashinfo,
};
return dsa_tag_8021q_bridge_join(ds, info);
}
-static int dsa_switch_bridge_leave(struct dsa_switch *ds,
- struct dsa_notifier_bridge_info *info)
+static int dsa_switch_sync_vlan_filtering(struct dsa_switch *ds,
+ struct dsa_notifier_bridge_info *info)
{
- struct dsa_switch_tree *dst = ds->dst;
struct netlink_ext_ack extack = {0};
bool change_vlan_filtering = false;
bool vlan_filtering;
struct dsa_port *dp;
int err;
- if (dst->index == info->tree_index && ds->index == info->sw_index &&
- ds->ops->port_bridge_leave)
- ds->ops->port_bridge_leave(ds, info->port, info->bridge);
-
- if ((dst->index != info->tree_index || ds->index != info->sw_index) &&
- ds->ops->crosschip_bridge_leave)
- ds->ops->crosschip_bridge_leave(ds, info->tree_index,
- info->sw_index, info->port,
- info->bridge);
-
if (ds->needs_standalone_vlan_filtering &&
!br_vlan_enabled(info->bridge.dev)) {
change_vlan_filtering = true;
return err;
}
+ return 0;
+}
+
+static int dsa_switch_bridge_leave(struct dsa_switch *ds,
+ struct dsa_notifier_bridge_info *info)
+{
+ struct dsa_switch_tree *dst = ds->dst;
+ int err;
+
+ if (dst->index == info->tree_index && ds->index == info->sw_index &&
+ ds->ops->port_bridge_leave)
+ ds->ops->port_bridge_leave(ds, info->port, info->bridge);
+
+ if ((dst->index != info->tree_index || ds->index != info->sw_index) &&
+ ds->ops->crosschip_bridge_leave)
+ ds->ops->crosschip_bridge_leave(ds, info->tree_index,
+ info->sw_index, info->port,
+ info->bridge);
+
+ if (ds->dst->index == info->tree_index && ds->index == info->sw_index) {
+ err = dsa_switch_sync_vlan_filtering(ds, info);
+ if (err)
+ return err;
+ }
+
return dsa_tag_8021q_bridge_leave(ds, info);
}
return ntohs(rct->sequence_nr);
}
-static inline u16 get_prp_lan_id(struct prp_rct *rct)
-{
- return ntohs(rct->lan_id_and_LSDU_size) >> 12;
-}
-
/* assume there is a valid rct */
static inline bool prp_check_lsdu_size(struct sk_buff *skb,
struct prp_rct *rct,
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2019 Facebook */
+#include <linux/init.h>
#include <linux/types.h>
#include <linux/bpf_verifier.h>
#include <linux/bpf.h>
}
}
-BTF_SET_START(bpf_tcp_ca_kfunc_ids)
+BTF_SET_START(bpf_tcp_ca_check_kfunc_ids)
BTF_ID(func, tcp_reno_ssthresh)
BTF_ID(func, tcp_reno_cong_avoid)
BTF_ID(func, tcp_reno_undo_cwnd)
BTF_ID(func, tcp_slow_start)
BTF_ID(func, tcp_cong_avoid_ai)
-BTF_SET_END(bpf_tcp_ca_kfunc_ids)
+BTF_SET_END(bpf_tcp_ca_check_kfunc_ids)
-static bool bpf_tcp_ca_check_kfunc_call(u32 kfunc_btf_id, struct module *owner)
-{
- if (btf_id_set_contains(&bpf_tcp_ca_kfunc_ids, kfunc_btf_id))
- return true;
- return bpf_check_mod_kfunc_call(&bpf_tcp_ca_kfunc_list, kfunc_btf_id, owner);
-}
+static const struct btf_kfunc_id_set bpf_tcp_ca_kfunc_set = {
+ .owner = THIS_MODULE,
+ .check_set = &bpf_tcp_ca_check_kfunc_ids,
+};
static const struct bpf_verifier_ops bpf_tcp_ca_verifier_ops = {
.get_func_proto = bpf_tcp_ca_get_func_proto,
.is_valid_access = bpf_tcp_ca_is_valid_access,
.btf_struct_access = bpf_tcp_ca_btf_struct_access,
- .check_kfunc_call = bpf_tcp_ca_check_kfunc_call,
};
static int bpf_tcp_ca_init_member(const struct btf_type *t,
.init = bpf_tcp_ca_init,
.name = "tcp_congestion_ops",
};
+
+static int __init bpf_tcp_ca_kfunc_init(void)
+{
+ return register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &bpf_tcp_ca_kfunc_set);
+}
+late_initcall(bpf_tcp_ca_kfunc_init);
return &fib_info_laddrhash[slot];
}
-static struct hlist_head *fib_info_hash_alloc(int bytes)
-{
- if (bytes <= PAGE_SIZE)
- return kzalloc(bytes, GFP_KERNEL);
- else
- return (struct hlist_head *)
- __get_free_pages(GFP_KERNEL | __GFP_ZERO,
- get_order(bytes));
-}
-
-static void fib_info_hash_free(struct hlist_head *hash, int bytes)
-{
- if (!hash)
- return;
-
- if (bytes <= PAGE_SIZE)
- kfree(hash);
- else
- free_pages((unsigned long) hash, get_order(bytes));
-}
-
static void fib_info_hash_move(struct hlist_head *new_info_hash,
struct hlist_head *new_laddrhash,
unsigned int new_size)
{
struct hlist_head *old_info_hash, *old_laddrhash;
unsigned int old_size = fib_info_hash_size;
- unsigned int i, bytes;
+ unsigned int i;
spin_lock_bh(&fib_info_lock);
old_info_hash = fib_info_hash;
spin_unlock_bh(&fib_info_lock);
- bytes = old_size * sizeof(struct hlist_head *);
- fib_info_hash_free(old_info_hash, bytes);
- fib_info_hash_free(old_laddrhash, bytes);
+ kvfree(old_info_hash);
+ kvfree(old_laddrhash);
}
__be32 fib_info_update_nhc_saddr(struct net *net, struct fib_nh_common *nhc,
unsigned int new_size = fib_info_hash_size << 1;
struct hlist_head *new_info_hash;
struct hlist_head *new_laddrhash;
- unsigned int bytes;
+ size_t bytes;
if (!new_size)
new_size = 16;
- bytes = new_size * sizeof(struct hlist_head *);
- new_info_hash = fib_info_hash_alloc(bytes);
- new_laddrhash = fib_info_hash_alloc(bytes);
+ bytes = (size_t)new_size * sizeof(struct hlist_head *);
+ new_info_hash = kvzalloc(bytes, GFP_KERNEL);
+ new_laddrhash = kvzalloc(bytes, GFP_KERNEL);
if (!new_info_hash || !new_laddrhash) {
- fib_info_hash_free(new_info_hash, bytes);
- fib_info_hash_free(new_laddrhash, bytes);
- } else
+ kvfree(new_info_hash);
+ kvfree(new_laddrhash);
+ } else {
fib_info_hash_move(new_info_hash, new_laddrhash, new_size);
-
+ }
if (!fib_info_hash_size)
goto failure;
}
static const struct icmp_control icmp_pointers[NR_ICMP_TYPES+1];
-/*
- * The ICMP socket(s). This is the most convenient way to flow control
- * our ICMP output as well as maintain a clean interface throughout
- * all layers. All Socketless IP sends will soon be gone.
- *
- * On SMP we have one ICMP socket per-cpu.
- */
-static struct sock *icmp_sk(struct net *net)
-{
- return this_cpu_read(*net->ipv4.icmp_sk);
-}
+static DEFINE_PER_CPU(struct sock *, ipv4_icmp_sk);
/* Called with BH disabled */
static inline struct sock *icmp_xmit_lock(struct net *net)
{
struct sock *sk;
- sk = icmp_sk(net);
+ sk = this_cpu_read(ipv4_icmp_sk);
if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
/* This can happen if the output path signals a
*/
return NULL;
}
+ sock_net_set(sk, net);
return sk;
}
static inline void icmp_xmit_unlock(struct sock *sk)
{
+ sock_net_set(sk, &init_net);
spin_unlock(&sk->sk_lock.slock);
}
return 0;
}
-static void icmp_push_reply(struct icmp_bxm *icmp_param,
+static void icmp_push_reply(struct sock *sk,
+ struct icmp_bxm *icmp_param,
struct flowi4 *fl4,
struct ipcm_cookie *ipc, struct rtable **rt)
{
- struct sock *sk;
struct sk_buff *skb;
- sk = icmp_sk(dev_net((*rt)->dst.dev));
if (ip_append_data(sk, fl4, icmp_glue_bits, icmp_param,
icmp_param->data_len+icmp_param->head_len,
icmp_param->head_len,
if (IS_ERR(rt))
goto out_unlock;
if (icmpv4_xrlim_allow(net, rt, &fl4, type, code))
- icmp_push_reply(icmp_param, &fl4, &ipc, &rt);
+ icmp_push_reply(sk, icmp_param, &fl4, &ipc, &rt);
ip_rt_put(rt);
out_unlock:
icmp_xmit_unlock(sk);
if (!fl4.saddr)
fl4.saddr = htonl(INADDR_DUMMY);
- icmp_push_reply(&icmp_param, &fl4, &ipc, &rt);
+ icmp_push_reply(sk, &icmp_param, &fl4, &ipc, &rt);
ende:
ip_rt_put(rt);
out_unlock:
},
};
-static void __net_exit icmp_sk_exit(struct net *net)
-{
- int i;
-
- for_each_possible_cpu(i)
- inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.icmp_sk, i));
- free_percpu(net->ipv4.icmp_sk);
- net->ipv4.icmp_sk = NULL;
-}
-
static int __net_init icmp_sk_init(struct net *net)
{
- int i, err;
-
- net->ipv4.icmp_sk = alloc_percpu(struct sock *);
- if (!net->ipv4.icmp_sk)
- return -ENOMEM;
-
- for_each_possible_cpu(i) {
- struct sock *sk;
-
- err = inet_ctl_sock_create(&sk, PF_INET,
- SOCK_RAW, IPPROTO_ICMP, net);
- if (err < 0)
- goto fail;
-
- *per_cpu_ptr(net->ipv4.icmp_sk, i) = sk;
-
- /* Enough space for 2 64K ICMP packets, including
- * sk_buff/skb_shared_info struct overhead.
- */
- sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
-
- /*
- * Speedup sock_wfree()
- */
- sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
- inet_sk(sk)->pmtudisc = IP_PMTUDISC_DONT;
- }
-
/* Control parameters for ECHO replies. */
net->ipv4.sysctl_icmp_echo_ignore_all = 0;
net->ipv4.sysctl_icmp_echo_enable_probe = 0;
net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr = 0;
return 0;
-
-fail:
- icmp_sk_exit(net);
- return err;
}
static struct pernet_operations __net_initdata icmp_sk_ops = {
.init = icmp_sk_init,
- .exit = icmp_sk_exit,
};
int __init icmp_init(void)
{
+ int err, i;
+
+ for_each_possible_cpu(i) {
+ struct sock *sk;
+
+ err = inet_ctl_sock_create(&sk, PF_INET,
+ SOCK_RAW, IPPROTO_ICMP, &init_net);
+ if (err < 0)
+ return err;
+
+ per_cpu(ipv4_icmp_sk, i) = sk;
+
+ /* Enough space for 2 64K ICMP packets, including
+ * sk_buff/skb_shared_info struct overhead.
+ */
+ sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
+
+ /*
+ * Speedup sock_wfree()
+ */
+ sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
+ inet_sk(sk)->pmtudisc = IP_PMTUDISC_DONT;
+ }
return register_pernet_subsys(&icmp_sk_ops);
}
spin_unlock(lock);
/* Disassociate with bind bucket. */
- bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), tw->tw_num,
- hashinfo->bhash_size)];
+ bhead = &hashinfo->bhash[tw->tw_bslot];
spin_lock(&bhead->lock);
inet_twsk_bind_unhash(tw, hashinfo);
spin_unlock(&bhead->lock);
- atomic_dec(&tw->tw_dr->tw_count);
+ if (refcount_dec_and_test(&tw->tw_dr->tw_refcount))
+ kfree(tw->tw_dr);
+
inet_twsk_put(tw);
}
Note, that any socket with inet->num != 0 MUST be bound in
binding cache, even if it is closed.
*/
- bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->inet_num,
- hashinfo->bhash_size)];
+ /* Cache inet_bhashfn(), because 'struct net' might be no longer
+ * available later in inet_twsk_kill().
+ */
+ tw->tw_bslot = inet_bhashfn(twsk_net(tw), inet->inet_num,
+ hashinfo->bhash_size);
+ bhead = &hashinfo->bhash[tw->tw_bslot];
spin_lock(&bhead->lock);
tw->tw_tb = icsk->icsk_bind_hash;
WARN_ON(!icsk->icsk_bind_hash);
{
struct inet_timewait_sock *tw = from_timer(tw, t, tw_timer);
- if (tw->tw_kill)
- __NET_INC_STATS(twsk_net(tw), LINUX_MIB_TIMEWAITKILLED);
- else
- __NET_INC_STATS(twsk_net(tw), LINUX_MIB_TIMEWAITED);
inet_twsk_kill(tw);
}
{
struct inet_timewait_sock *tw;
- if (atomic_read(&dr->tw_count) >= dr->sysctl_max_tw_buckets)
+ if (refcount_read(&dr->tw_refcount) - 1 >= dr->sysctl_max_tw_buckets)
return NULL;
tw = kmem_cache_alloc(sk->sk_prot_creator->twsk_prot->twsk_slab,
* of PAWS.
*/
- tw->tw_kill = timeo <= 4*HZ;
if (!rearm) {
+ bool kill = timeo <= 4*HZ;
+
+ __NET_INC_STATS(twsk_net(tw), kill ? LINUX_MIB_TIMEWAITKILLED :
+ LINUX_MIB_TIMEWAITED);
BUG_ON(mod_timer(&tw->tw_timer, jiffies + timeo));
- atomic_inc(&tw->tw_dr->tw_count);
+ refcount_inc(&tw->tw_dr->tw_refcount);
} else {
mod_timer_pending(&tw->tw_timer, jiffies + timeo);
}
}
EXPORT_SYMBOL_GPL(__inet_twsk_schedule);
-
-void inet_twsk_purge(struct inet_hashinfo *hashinfo, int family)
-{
- struct inet_timewait_sock *tw;
- struct sock *sk;
- struct hlist_nulls_node *node;
- unsigned int slot;
-
- for (slot = 0; slot <= hashinfo->ehash_mask; slot++) {
- struct inet_ehash_bucket *head = &hashinfo->ehash[slot];
-restart_rcu:
- cond_resched();
- rcu_read_lock();
-restart:
- sk_nulls_for_each_rcu(sk, node, &head->chain) {
- if (sk->sk_state != TCP_TIME_WAIT)
- continue;
- tw = inet_twsk(sk);
- if ((tw->tw_family != family) ||
- refcount_read(&twsk_net(tw)->ns.count))
- continue;
-
- if (unlikely(!refcount_inc_not_zero(&tw->tw_refcnt)))
- continue;
-
- if (unlikely((tw->tw_family != family) ||
- refcount_read(&twsk_net(tw)->ns.count))) {
- inet_twsk_put(tw);
- goto restart;
- }
-
- rcu_read_unlock();
- local_bh_disable();
- inet_twsk_deschedule_put(tw);
- local_bh_enable();
- goto restart_rcu;
- }
- /* If the nulls value we got at the end of this lookup is
- * not the expected one, we must restart lookup.
- * We probably met an item that was moved to another chain.
- */
- if (get_nulls_value(node) != slot)
- goto restart;
- rcu_read_unlock();
- }
-}
-EXPORT_SYMBOL_GPL(inet_twsk_purge);
socket_seq_show(seq);
seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %ld\n",
sock_prot_inuse_get(net, &tcp_prot), orphans,
- atomic_read(&net->ipv4.tcp_death_row.tw_count), sockets,
- proto_memory_allocated(&tcp_prot));
+ refcount_read(&net->ipv4.tcp_death_row->tw_refcount) - 1,
+ sockets, proto_memory_allocated(&tcp_prot));
seq_printf(seq, "UDP: inuse %d mem %ld\n",
sock_prot_inuse_get(net, &udp_prot),
proto_memory_allocated(&udp_prot));
#define DEFAULT_MIN_PMTU (512 + 20 + 20)
#define DEFAULT_MTU_EXPIRES (10 * 60 * HZ)
-
+#define DEFAULT_MIN_ADVMSS 256
static int ip_rt_max_size;
static int ip_rt_redirect_number __read_mostly = 9;
static int ip_rt_redirect_load __read_mostly = HZ / 50;
static int ip_rt_redirect_silence __read_mostly = ((HZ / 50) << (9 + 1));
static int ip_rt_error_cost __read_mostly = HZ;
static int ip_rt_error_burst __read_mostly = 5 * HZ;
-static int ip_rt_min_advmss __read_mostly = 256;
static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT;
static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
{
+ struct net *net = dev_net(dst->dev);
unsigned int header_size = sizeof(struct tcphdr) + sizeof(struct iphdr);
unsigned int advmss = max_t(unsigned int, ipv4_mtu(dst) - header_size,
- ip_rt_min_advmss);
+ net->ipv4.ip_rt_min_advmss);
return min(advmss, IPV4_MAX_PMTU - header_size);
}
.mode = 0644,
.proc_handler = proc_dointvec,
},
- {
- .procname = "min_adv_mss",
- .data = &ip_rt_min_advmss,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
{ }
};
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
+ {
+ .procname = "min_adv_mss",
+ .data = &init_net.ipv4.ip_rt_min_advmss,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
{ },
};
/* Set default value for namespaceified sysctls */
net->ipv4.ip_rt_min_pmtu = DEFAULT_MIN_PMTU;
net->ipv4.ip_rt_mtu_expires = DEFAULT_MTU_EXPIRES;
+ net->ipv4.ip_rt_min_advmss = DEFAULT_MIN_ADVMSS;
return 0;
}
};
static struct ctl_table ipv4_net_table[] = {
+ /* tcp_max_tw_buckets must be first in this table. */
+ {
+ .procname = "tcp_max_tw_buckets",
+/* .data = &init_net.ipv4.tcp_death_row.sysctl_max_tw_buckets, */
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
{
.procname = "icmp_echo_ignore_all",
.data = &init_net.ipv4.sysctl_icmp_echo_ignore_all,
.extra2 = &two,
},
{
- .procname = "tcp_max_tw_buckets",
- .data = &init_net.ipv4.tcp_death_row.sysctl_max_tw_buckets,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec
- },
- {
.procname = "tcp_max_syn_backlog",
.data = &init_net.ipv4.sysctl_max_syn_backlog,
.maxlen = sizeof(int),
if (!table)
goto err_alloc;
- for (i = 0; i < ARRAY_SIZE(ipv4_net_table) - 1; i++) {
+ /* skip first entry (sysctl_max_tw_buckets) */
+ for (i = 1; i < ARRAY_SIZE(ipv4_net_table) - 1; i++) {
if (table[i].data) {
/* Update the variables to point into
* the current struct net
}
}
+ table[0].data = &net->ipv4.tcp_death_row->sysctl_max_tw_buckets;
+
net->ipv4.ipv4_hdr = register_net_sysctl(net, "net/ipv4", table);
if (!net->ipv4.ipv4_hdr)
goto err_reg;
return mss_now;
/* Note : tcp_tso_autosize() will eventually split this later */
- new_size_goal = sk->sk_gso_max_size - 1 - MAX_TCP_HEADER;
- new_size_goal = tcp_bound_to_half_wnd(tp, new_size_goal);
+ new_size_goal = tcp_bound_to_half_wnd(tp, sk->sk_gso_max_size);
/* We try hard to avoid divides here */
size_goal = tp->gso_segs * mss_now;
.set_state = bbr_set_state,
};
-BTF_SET_START(tcp_bbr_kfunc_ids)
+BTF_SET_START(tcp_bbr_check_kfunc_ids)
#ifdef CONFIG_X86
#ifdef CONFIG_DYNAMIC_FTRACE
BTF_ID(func, bbr_init)
BTF_ID(func, bbr_set_state)
#endif
#endif
-BTF_SET_END(tcp_bbr_kfunc_ids)
+BTF_SET_END(tcp_bbr_check_kfunc_ids)
-static DEFINE_KFUNC_BTF_ID_SET(&tcp_bbr_kfunc_ids, tcp_bbr_kfunc_btf_set);
+static const struct btf_kfunc_id_set tcp_bbr_kfunc_set = {
+ .owner = THIS_MODULE,
+ .check_set = &tcp_bbr_check_kfunc_ids,
+};
static int __init bbr_register(void)
{
int ret;
BUILD_BUG_ON(sizeof(struct bbr) > ICSK_CA_PRIV_SIZE);
- ret = tcp_register_congestion_control(&tcp_bbr_cong_ops);
- if (ret)
+
+ ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &tcp_bbr_kfunc_set);
+ if (ret < 0)
return ret;
- register_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_bbr_kfunc_btf_set);
- return 0;
+ return tcp_register_congestion_control(&tcp_bbr_cong_ops);
}
static void __exit bbr_unregister(void)
{
- unregister_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_bbr_kfunc_btf_set);
tcp_unregister_congestion_control(&tcp_bbr_cong_ops);
}
.name = "cubic",
};
-BTF_SET_START(tcp_cubic_kfunc_ids)
+BTF_SET_START(tcp_cubic_check_kfunc_ids)
#ifdef CONFIG_X86
#ifdef CONFIG_DYNAMIC_FTRACE
BTF_ID(func, cubictcp_init)
BTF_ID(func, cubictcp_acked)
#endif
#endif
-BTF_SET_END(tcp_cubic_kfunc_ids)
+BTF_SET_END(tcp_cubic_check_kfunc_ids)
-static DEFINE_KFUNC_BTF_ID_SET(&tcp_cubic_kfunc_ids, tcp_cubic_kfunc_btf_set);
+static const struct btf_kfunc_id_set tcp_cubic_kfunc_set = {
+ .owner = THIS_MODULE,
+ .check_set = &tcp_cubic_check_kfunc_ids,
+};
static int __init cubictcp_register(void)
{
/* divide by bic_scale and by constant Srtt (100ms) */
do_div(cube_factor, bic_scale * 10);
- ret = tcp_register_congestion_control(&cubictcp);
- if (ret)
+ ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &tcp_cubic_kfunc_set);
+ if (ret < 0)
return ret;
- register_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_cubic_kfunc_btf_set);
- return 0;
+ return tcp_register_congestion_control(&cubictcp);
}
static void __exit cubictcp_unregister(void)
{
- unregister_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_cubic_kfunc_btf_set);
tcp_unregister_congestion_control(&cubictcp);
}
.name = "dctcp-reno",
};
-BTF_SET_START(tcp_dctcp_kfunc_ids)
+BTF_SET_START(tcp_dctcp_check_kfunc_ids)
#ifdef CONFIG_X86
#ifdef CONFIG_DYNAMIC_FTRACE
BTF_ID(func, dctcp_init)
BTF_ID(func, dctcp_state)
#endif
#endif
-BTF_SET_END(tcp_dctcp_kfunc_ids)
+BTF_SET_END(tcp_dctcp_check_kfunc_ids)
-static DEFINE_KFUNC_BTF_ID_SET(&tcp_dctcp_kfunc_ids, tcp_dctcp_kfunc_btf_set);
+static const struct btf_kfunc_id_set tcp_dctcp_kfunc_set = {
+ .owner = THIS_MODULE,
+ .check_set = &tcp_dctcp_check_kfunc_ids,
+};
static int __init dctcp_register(void)
{
int ret;
BUILD_BUG_ON(sizeof(struct dctcp) > ICSK_CA_PRIV_SIZE);
- ret = tcp_register_congestion_control(&dctcp);
- if (ret)
+
+ ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &tcp_dctcp_kfunc_set);
+ if (ret < 0)
return ret;
- register_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_dctcp_kfunc_btf_set);
- return 0;
+ return tcp_register_congestion_control(&dctcp);
}
static void __exit dctcp_unregister(void)
{
- unregister_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_dctcp_kfunc_btf_set);
tcp_unregister_congestion_control(&dctcp);
}
struct inet_hashinfo tcp_hashinfo;
EXPORT_SYMBOL(tcp_hashinfo);
+static DEFINE_PER_CPU(struct sock *, ipv4_tcp_sk);
+
static u32 tcp_v4_init_seq(const struct sk_buff *skb)
{
return secure_tcp_seq(ip_hdr(skb)->daddr,
struct rtable *rt;
int err;
struct ip_options_rcu *inet_opt;
- struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
+ struct inet_timewait_death_row *tcp_death_row = sock_net(sk)->ipv4.tcp_death_row;
if (addr_len < sizeof(struct sockaddr_in))
return -EINVAL;
arg.tos = ip_hdr(skb)->tos;
arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
local_bh_disable();
- ctl_sk = this_cpu_read(*net->ipv4.tcp_sk);
+ ctl_sk = this_cpu_read(ipv4_tcp_sk);
+ sock_net_set(ctl_sk, net);
if (sk) {
ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
inet_twsk(sk)->tw_mark : sk->sk_mark;
transmit_time);
ctl_sk->sk_mark = 0;
+ sock_net_set(ctl_sk, &init_net);
__TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
__TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
local_bh_enable();
arg.tos = tos;
arg.uid = sock_net_uid(net, sk_fullsock(sk) ? sk : NULL);
local_bh_disable();
- ctl_sk = this_cpu_read(*net->ipv4.tcp_sk);
+ ctl_sk = this_cpu_read(ipv4_tcp_sk);
+ sock_net_set(ctl_sk, net);
ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
inet_twsk(sk)->tw_mark : sk->sk_mark;
ctl_sk->sk_priority = (sk->sk_state == TCP_TIME_WAIT) ?
transmit_time);
ctl_sk->sk_mark = 0;
+ sock_net_set(ctl_sk, &init_net);
__TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
local_bh_enable();
}
static void __net_exit tcp_sk_exit(struct net *net)
{
- int cpu;
+ struct inet_timewait_death_row *tcp_death_row = net->ipv4.tcp_death_row;
if (net->ipv4.tcp_congestion_control)
bpf_module_put(net->ipv4.tcp_congestion_control,
net->ipv4.tcp_congestion_control->owner);
-
- for_each_possible_cpu(cpu)
- inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu));
- free_percpu(net->ipv4.tcp_sk);
+ if (refcount_dec_and_test(&tcp_death_row->tw_refcount))
+ kfree(tcp_death_row);
}
static int __net_init tcp_sk_init(struct net *net)
{
- int res, cpu, cnt;
-
- net->ipv4.tcp_sk = alloc_percpu(struct sock *);
- if (!net->ipv4.tcp_sk)
- return -ENOMEM;
-
- for_each_possible_cpu(cpu) {
- struct sock *sk;
-
- res = inet_ctl_sock_create(&sk, PF_INET, SOCK_RAW,
- IPPROTO_TCP, net);
- if (res)
- goto fail;
- sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
-
- /* Please enforce IP_DF and IPID==0 for RST and
- * ACK sent in SYN-RECV and TIME-WAIT state.
- */
- inet_sk(sk)->pmtudisc = IP_PMTUDISC_DO;
-
- *per_cpu_ptr(net->ipv4.tcp_sk, cpu) = sk;
- }
+ int cnt;
net->ipv4.sysctl_tcp_ecn = 2;
net->ipv4.sysctl_tcp_ecn_fallback = 1;
net->ipv4.sysctl_tcp_tw_reuse = 2;
net->ipv4.sysctl_tcp_no_ssthresh_metrics_save = 1;
+ net->ipv4.tcp_death_row = kzalloc(sizeof(struct inet_timewait_death_row), GFP_KERNEL);
+ if (!net->ipv4.tcp_death_row)
+ return -ENOMEM;
+ refcount_set(&net->ipv4.tcp_death_row->tw_refcount, 1);
cnt = tcp_hashinfo.ehash_mask + 1;
- net->ipv4.tcp_death_row.sysctl_max_tw_buckets = cnt / 2;
- net->ipv4.tcp_death_row.hashinfo = &tcp_hashinfo;
+ net->ipv4.tcp_death_row->sysctl_max_tw_buckets = cnt / 2;
+ net->ipv4.tcp_death_row->hashinfo = &tcp_hashinfo;
net->ipv4.sysctl_max_syn_backlog = max(128, cnt / 128);
net->ipv4.sysctl_tcp_sack = 1;
net->ipv4.tcp_congestion_control = &tcp_reno;
return 0;
-fail:
- tcp_sk_exit(net);
-
- return res;
}
static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
{
struct net *net;
- inet_twsk_purge(&tcp_hashinfo, AF_INET);
-
list_for_each_entry(net, net_exit_list, exit_list)
tcp_fastopen_ctx_destroy(net);
}
void __init tcp_v4_init(void)
{
+ int cpu, res;
+
+ for_each_possible_cpu(cpu) {
+ struct sock *sk;
+
+ res = inet_ctl_sock_create(&sk, PF_INET, SOCK_RAW,
+ IPPROTO_TCP, &init_net);
+ if (res)
+ panic("Failed to create the TCP control socket.\n");
+ sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
+
+ /* Please enforce IP_DF and IPID==0 for RST and
+ * ACK sent in SYN-RECV and TIME-WAIT state.
+ */
+ inet_sk(sk)->pmtudisc = IP_PMTUDISC_DO;
+
+ per_cpu(ipv4_tcp_sk, cpu) = sk;
+ }
if (register_pernet_subsys(&tcp_sk_ops))
panic("Failed to create the TCP control socket.\n");
const struct inet_connection_sock *icsk = inet_csk(sk);
const struct tcp_sock *tp = tcp_sk(sk);
struct inet_timewait_sock *tw;
- struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
+ struct inet_timewait_death_row *tcp_death_row = sock_net(sk)->ipv4.tcp_death_row;
tw = inet_twsk_alloc(sk, tcp_death_row, state);
bytes = min_t(unsigned long,
sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift),
- sk->sk_gso_max_size - 1 - MAX_TCP_HEADER);
+ sk->sk_gso_max_size);
/* Goal is to send at least one packet per ms,
* not one big TSO packet every 100 ms.
#include <linux/uaccess.h>
-/*
- * The ICMP socket(s). This is the most convenient way to flow control
- * our ICMP output as well as maintain a clean interface throughout
- * all layers. All Socketless IP sends will soon be gone.
- *
- * On SMP we have one ICMP socket per-cpu.
- */
-static struct sock *icmpv6_sk(struct net *net)
-{
- return this_cpu_read(*net->ipv6.icmp_sk);
-}
+static DEFINE_PER_CPU(struct sock *, ipv6_icmp_sk);
static int icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info)
};
/* Called with BH disabled */
-static __inline__ struct sock *icmpv6_xmit_lock(struct net *net)
+static struct sock *icmpv6_xmit_lock(struct net *net)
{
struct sock *sk;
- sk = icmpv6_sk(net);
+ sk = this_cpu_read(ipv6_icmp_sk);
if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
/* This can happen if the output path (f.e. SIT or
* ip6ip6 tunnel) signals dst_link_failure() for an
*/
return NULL;
}
+ sock_net_set(sk, net);
return sk;
}
-static __inline__ void icmpv6_xmit_unlock(struct sock *sk)
+static void icmpv6_xmit_unlock(struct sock *sk)
{
+ sock_net_set(sk, &init_net);
spin_unlock(&sk->sk_lock.slock);
}
security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6));
}
-static void __net_exit icmpv6_sk_exit(struct net *net)
-{
- int i;
-
- for_each_possible_cpu(i)
- inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv6.icmp_sk, i));
- free_percpu(net->ipv6.icmp_sk);
-}
-
-static int __net_init icmpv6_sk_init(struct net *net)
+int __init icmpv6_init(void)
{
struct sock *sk;
int err, i;
- net->ipv6.icmp_sk = alloc_percpu(struct sock *);
- if (!net->ipv6.icmp_sk)
- return -ENOMEM;
-
for_each_possible_cpu(i) {
err = inet_ctl_sock_create(&sk, PF_INET6,
- SOCK_RAW, IPPROTO_ICMPV6, net);
+ SOCK_RAW, IPPROTO_ICMPV6, &init_net);
if (err < 0) {
pr_err("Failed to initialize the ICMP6 control socket (err %d)\n",
err);
- goto fail;
+ return err;
}
- *per_cpu_ptr(net->ipv6.icmp_sk, i) = sk;
+ per_cpu(ipv6_icmp_sk, i) = sk;
/* Enough space for 2 64K ICMP packets, including
* sk_buff struct overhead.
*/
sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
}
- return 0;
-
- fail:
- icmpv6_sk_exit(net);
- return err;
-}
-
-static struct pernet_operations icmpv6_sk_ops = {
- .init = icmpv6_sk_init,
- .exit = icmpv6_sk_exit,
-};
-
-int __init icmpv6_init(void)
-{
- int err;
-
- err = register_pernet_subsys(&icmpv6_sk_ops);
- if (err < 0)
- return err;
err = -EAGAIN;
if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
fail:
pr_err("Failed to register ICMP6 protocol\n");
- unregister_pernet_subsys(&icmpv6_sk_ops);
return err;
}
void icmpv6_cleanup(void)
{
inet6_unregister_icmp_sender(icmp6_send);
- unregister_pernet_subsys(&icmpv6_sk_ops);
inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
}
if ((first_word & htonl(0xF00FFFFF)) ||
!ipv6_addr_equal(&iph->saddr, &iph2->saddr) ||
!ipv6_addr_equal(&iph->daddr, &iph2->daddr) ||
- *(u16 *)&iph->nexthdr != *(u16 *)&iph2->nexthdr) {
+ iph->nexthdr != iph2->nexthdr) {
not_same_flow:
NAPI_GRO_CB(p)->same_flow = 0;
continue;
goto not_same_flow;
}
/* flush if Traffic Class fields are different */
- NAPI_GRO_CB(p)->flush |= !!(first_word & htonl(0x0FF00000));
+ NAPI_GRO_CB(p)->flush |= !!((first_word & htonl(0x0FF00000)) |
+ (__force __be32)(iph->hop_limit ^ iph2->hop_limit));
NAPI_GRO_CB(p)->flush |= flush;
/* If the previous IP ID value was based on an atomic
memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr));
neigh_release(neigh);
+ } else if (skb->protocol == htons(ETH_P_IP)) {
+ struct rtable *rt = skb_rtable(skb);
+
+ if (rt->rt_gw_family == AF_INET6)
+ memcpy(&fl6->daddr, &rt->rt_gw6, sizeof(fl6->daddr));
}
} else if (t->parms.proto != 0 && !(t->parms.flags &
(IP6_TNL_F_USE_ORIG_TCLASS |
struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
struct inet_sock *inet = inet_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
+ struct inet_timewait_death_row *tcp_death_row;
struct ipv6_pinfo *np = tcp_inet6_sk(sk);
struct tcp_sock *tp = tcp_sk(sk);
struct in6_addr *saddr = NULL, *final_p, final;
struct dst_entry *dst;
int addr_type;
int err;
- struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
if (addr_len < SIN6_LEN_RFC2133)
return -EINVAL;
inet->inet_dport = usin->sin6_port;
tcp_set_state(sk, TCP_SYN_SENT);
+ tcp_death_row = sock_net(sk)->ipv4.tcp_death_row;
err = inet6_hash_connect(tcp_death_row, sk);
if (err)
goto late_failure;
inet_ctl_sock_destroy(net->ipv6.tcp_sk);
}
-static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
-{
- inet_twsk_purge(&tcp_hashinfo, AF_INET6);
-}
-
static struct pernet_operations tcpv6_net_ops = {
.init = tcpv6_net_init,
.exit = tcpv6_net_exit,
- .exit_batch = tcpv6_net_exit_batch,
};
int __init tcpv6_init(void)
nf_conntrack-$(CONFIG_NF_CT_PROTO_DCCP) += nf_conntrack_proto_dccp.o
nf_conntrack-$(CONFIG_NF_CT_PROTO_SCTP) += nf_conntrack_proto_sctp.o
nf_conntrack-$(CONFIG_NF_CT_PROTO_GRE) += nf_conntrack_proto_gre.o
+ifeq ($(CONFIG_NF_CONNTRACK),m)
+nf_conntrack-$(CONFIG_DEBUG_INFO_BTF_MODULES) += nf_conntrack_bpf.o
+else ifeq ($(CONFIG_NF_CONNTRACK),y)
+nf_conntrack-$(CONFIG_DEBUG_INFO_BTF) += nf_conntrack_bpf.o
+endif
obj-$(CONFIG_NETFILTER) = netfilter.o
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/* Unstable Conntrack Helpers for XDP and TC-BPF hook
+ *
+ * These are called from the XDP and SCHED_CLS BPF programs. Note that it is
+ * allowed to break compatibility for these functions since the interface they
+ * are exposed through to BPF programs is explicitly unstable.
+ */
+
+#include <linux/bpf.h>
+#include <linux/btf.h>
+#include <linux/types.h>
+#include <linux/btf_ids.h>
+#include <linux/net_namespace.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_core.h>
+
+/* bpf_ct_opts - Options for CT lookup helpers
+ *
+ * Members:
+ * @netns_id - Specify the network namespace for lookup
+ * Values:
+ * BPF_F_CURRENT_NETNS (-1)
+ * Use namespace associated with ctx (xdp_md, __sk_buff)
+ * [0, S32_MAX]
+ * Network Namespace ID
+ * @error - Out parameter, set for any errors encountered
+ * Values:
+ * -EINVAL - Passed NULL for bpf_tuple pointer
+ * -EINVAL - opts->reserved is not 0
+ * -EINVAL - netns_id is less than -1
+ * -EINVAL - opts__sz isn't NF_BPF_CT_OPTS_SZ (12)
+ * -EPROTO - l4proto isn't one of IPPROTO_TCP or IPPROTO_UDP
+ * -ENONET - No network namespace found for netns_id
+ * -ENOENT - Conntrack lookup could not find entry for tuple
+ * -EAFNOSUPPORT - tuple__sz isn't one of sizeof(tuple->ipv4)
+ * or sizeof(tuple->ipv6)
+ * @l4proto - Layer 4 protocol
+ * Values:
+ * IPPROTO_TCP, IPPROTO_UDP
+ * @reserved - Reserved member, will be reused for more options in future
+ * Values:
+ * 0
+ */
+struct bpf_ct_opts {
+ s32 netns_id;
+ s32 error;
+ u8 l4proto;
+ u8 reserved[3];
+};
+
+enum {
+ NF_BPF_CT_OPTS_SZ = 12,
+};
+
+static struct nf_conn *__bpf_nf_ct_lookup(struct net *net,
+ struct bpf_sock_tuple *bpf_tuple,
+ u32 tuple_len, u8 protonum,
+ s32 netns_id)
+{
+ struct nf_conntrack_tuple_hash *hash;
+ struct nf_conntrack_tuple tuple;
+
+ if (unlikely(protonum != IPPROTO_TCP && protonum != IPPROTO_UDP))
+ return ERR_PTR(-EPROTO);
+ if (unlikely(netns_id < BPF_F_CURRENT_NETNS))
+ return ERR_PTR(-EINVAL);
+
+ memset(&tuple, 0, sizeof(tuple));
+ switch (tuple_len) {
+ case sizeof(bpf_tuple->ipv4):
+ tuple.src.l3num = AF_INET;
+ tuple.src.u3.ip = bpf_tuple->ipv4.saddr;
+ tuple.src.u.tcp.port = bpf_tuple->ipv4.sport;
+ tuple.dst.u3.ip = bpf_tuple->ipv4.daddr;
+ tuple.dst.u.tcp.port = bpf_tuple->ipv4.dport;
+ break;
+ case sizeof(bpf_tuple->ipv6):
+ tuple.src.l3num = AF_INET6;
+ memcpy(tuple.src.u3.ip6, bpf_tuple->ipv6.saddr, sizeof(bpf_tuple->ipv6.saddr));
+ tuple.src.u.tcp.port = bpf_tuple->ipv6.sport;
+ memcpy(tuple.dst.u3.ip6, bpf_tuple->ipv6.daddr, sizeof(bpf_tuple->ipv6.daddr));
+ tuple.dst.u.tcp.port = bpf_tuple->ipv6.dport;
+ break;
+ default:
+ return ERR_PTR(-EAFNOSUPPORT);
+ }
+
+ tuple.dst.protonum = protonum;
+
+ if (netns_id >= 0) {
+ net = get_net_ns_by_id(net, netns_id);
+ if (unlikely(!net))
+ return ERR_PTR(-ENONET);
+ }
+
+ hash = nf_conntrack_find_get(net, &nf_ct_zone_dflt, &tuple);
+ if (netns_id >= 0)
+ put_net(net);
+ if (!hash)
+ return ERR_PTR(-ENOENT);
+ return nf_ct_tuplehash_to_ctrack(hash);
+}
+
+__diag_push();
+__diag_ignore(GCC, 8, "-Wmissing-prototypes",
+ "Global functions as their definitions will be in nf_conntrack BTF");
+
+/* bpf_xdp_ct_lookup - Lookup CT entry for the given tuple, and acquire a
+ * reference to it
+ *
+ * Parameters:
+ * @xdp_ctx - Pointer to ctx (xdp_md) in XDP program
+ * Cannot be NULL
+ * @bpf_tuple - Pointer to memory representing the tuple to look up
+ * Cannot be NULL
+ * @tuple__sz - Length of the tuple structure
+ * Must be one of sizeof(bpf_tuple->ipv4) or
+ * sizeof(bpf_tuple->ipv6)
+ * @opts - Additional options for lookup (documented above)
+ * Cannot be NULL
+ * @opts__sz - Length of the bpf_ct_opts structure
+ * Must be NF_BPF_CT_OPTS_SZ (12)
+ */
+struct nf_conn *
+bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
+ u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz)
+{
+ struct xdp_buff *ctx = (struct xdp_buff *)xdp_ctx;
+ struct net *caller_net;
+ struct nf_conn *nfct;
+
+ BUILD_BUG_ON(sizeof(struct bpf_ct_opts) != NF_BPF_CT_OPTS_SZ);
+
+ if (!opts)
+ return NULL;
+ if (!bpf_tuple || opts->reserved[0] || opts->reserved[1] ||
+ opts->reserved[2] || opts__sz != NF_BPF_CT_OPTS_SZ) {
+ opts->error = -EINVAL;
+ return NULL;
+ }
+ caller_net = dev_net(ctx->rxq->dev);
+ nfct = __bpf_nf_ct_lookup(caller_net, bpf_tuple, tuple__sz, opts->l4proto,
+ opts->netns_id);
+ if (IS_ERR(nfct)) {
+ opts->error = PTR_ERR(nfct);
+ return NULL;
+ }
+ return nfct;
+}
+
+/* bpf_skb_ct_lookup - Lookup CT entry for the given tuple, and acquire a
+ * reference to it
+ *
+ * Parameters:
+ * @skb_ctx - Pointer to ctx (__sk_buff) in TC program
+ * Cannot be NULL
+ * @bpf_tuple - Pointer to memory representing the tuple to look up
+ * Cannot be NULL
+ * @tuple__sz - Length of the tuple structure
+ * Must be one of sizeof(bpf_tuple->ipv4) or
+ * sizeof(bpf_tuple->ipv6)
+ * @opts - Additional options for lookup (documented above)
+ * Cannot be NULL
+ * @opts__sz - Length of the bpf_ct_opts structure
+ * Must be NF_BPF_CT_OPTS_SZ (12)
+ */
+struct nf_conn *
+bpf_skb_ct_lookup(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple,
+ u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz)
+{
+ struct sk_buff *skb = (struct sk_buff *)skb_ctx;
+ struct net *caller_net;
+ struct nf_conn *nfct;
+
+ BUILD_BUG_ON(sizeof(struct bpf_ct_opts) != NF_BPF_CT_OPTS_SZ);
+
+ if (!opts)
+ return NULL;
+ if (!bpf_tuple || opts->reserved[0] || opts->reserved[1] ||
+ opts->reserved[2] || opts__sz != NF_BPF_CT_OPTS_SZ) {
+ opts->error = -EINVAL;
+ return NULL;
+ }
+ caller_net = skb->dev ? dev_net(skb->dev) : sock_net(skb->sk);
+ nfct = __bpf_nf_ct_lookup(caller_net, bpf_tuple, tuple__sz, opts->l4proto,
+ opts->netns_id);
+ if (IS_ERR(nfct)) {
+ opts->error = PTR_ERR(nfct);
+ return NULL;
+ }
+ return nfct;
+}
+
+/* bpf_ct_release - Release acquired nf_conn object
+ *
+ * This must be invoked for referenced PTR_TO_BTF_ID, and the verifier rejects
+ * the program if any references remain in the program in all of the explored
+ * states.
+ *
+ * Parameters:
+ * @nf_conn - Pointer to referenced nf_conn object, obtained using
+ * bpf_xdp_ct_lookup or bpf_skb_ct_lookup.
+ */
+void bpf_ct_release(struct nf_conn *nfct)
+{
+ if (!nfct)
+ return;
+ nf_ct_put(nfct);
+}
+
+__diag_pop()
+
+BTF_SET_START(nf_ct_xdp_check_kfunc_ids)
+BTF_ID(func, bpf_xdp_ct_lookup)
+BTF_ID(func, bpf_ct_release)
+BTF_SET_END(nf_ct_xdp_check_kfunc_ids)
+
+BTF_SET_START(nf_ct_tc_check_kfunc_ids)
+BTF_ID(func, bpf_skb_ct_lookup)
+BTF_ID(func, bpf_ct_release)
+BTF_SET_END(nf_ct_tc_check_kfunc_ids)
+
+BTF_SET_START(nf_ct_acquire_kfunc_ids)
+BTF_ID(func, bpf_xdp_ct_lookup)
+BTF_ID(func, bpf_skb_ct_lookup)
+BTF_SET_END(nf_ct_acquire_kfunc_ids)
+
+BTF_SET_START(nf_ct_release_kfunc_ids)
+BTF_ID(func, bpf_ct_release)
+BTF_SET_END(nf_ct_release_kfunc_ids)
+
+/* Both sets are identical */
+#define nf_ct_ret_null_kfunc_ids nf_ct_acquire_kfunc_ids
+
+static const struct btf_kfunc_id_set nf_conntrack_xdp_kfunc_set = {
+ .owner = THIS_MODULE,
+ .check_set = &nf_ct_xdp_check_kfunc_ids,
+ .acquire_set = &nf_ct_acquire_kfunc_ids,
+ .release_set = &nf_ct_release_kfunc_ids,
+ .ret_null_set = &nf_ct_ret_null_kfunc_ids,
+};
+
+static const struct btf_kfunc_id_set nf_conntrack_tc_kfunc_set = {
+ .owner = THIS_MODULE,
+ .check_set = &nf_ct_tc_check_kfunc_ids,
+ .acquire_set = &nf_ct_acquire_kfunc_ids,
+ .release_set = &nf_ct_release_kfunc_ids,
+ .ret_null_set = &nf_ct_ret_null_kfunc_ids,
+};
+
+int register_nf_conntrack_bpf(void)
+{
+ int ret;
+
+ ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &nf_conntrack_xdp_kfunc_set);
+ return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &nf_conntrack_tc_kfunc_set);
+}
#include <linux/rculist_nulls.h>
#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_bpf.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_expect.h>
#include <net/netfilter/nf_conntrack_helper.h>
conntrack_gc_work_init(&conntrack_gc_work);
queue_delayed_work(system_power_efficient_wq, &conntrack_gc_work.dwork, HZ);
+ ret = register_nf_conntrack_bpf();
+ if (ret < 0)
+ goto err_kfunc;
+
return 0;
+err_kfunc:
+ cancel_delayed_work_sync(&conntrack_gc_work.dwork);
+ nf_conntrack_proto_fini();
err_proto:
nf_conntrack_seqadj_fini();
err_seqadj:
m->hdr[w] |= htonl(val);
}
-static inline void msg_swap_words(struct tipc_msg *msg, u32 a, u32 b)
-{
- u32 temp = msg->hdr[a];
-
- msg->hdr[a] = msg->hdr[b];
- msg->hdr[b] = temp;
-}
-
/*
* Word 0
*/
msg_set_bits(m, 1, 21, 0xf, msg_reroute_cnt(m) + 1);
}
-static inline void msg_reset_reroute_cnt(struct tipc_msg *m)
-{
- msg_set_bits(m, 1, 21, 0xf, 0);
-}
-
static inline u32 msg_lookup_scope(struct tipc_msg *m)
{
return msg_bits(m, 1, 19, 0x3);
msg_set_word(m, 2, n);
}
-static inline u32 msg_bcgap_after(struct tipc_msg *m)
-{
- return msg_bits(m, 2, 16, 0xffff);
-}
-
static inline void msg_set_bcgap_after(struct tipc_msg *m, u32 n)
{
msg_set_bits(m, 2, 16, 0xffff, n);
msg_set_bits(m, 4, 0, 0xffff, n);
}
-static inline void msg_set_long_msgno(struct tipc_msg *m, u32 n)
-{
- msg_set_bits(m, 4, 0, 0xffff, n);
-}
-
static inline u32 msg_bc_netid(struct tipc_msg *m)
{
return msg_word(m, 4);
return sk;
}
-static struct sock *unix_next_socket(struct seq_file *seq,
- struct sock *sk,
- loff_t *pos)
+static struct sock *unix_get_first(struct seq_file *seq, loff_t *pos)
{
unsigned long bucket = get_bucket(*pos);
+ struct sock *sk;
- while (sk > (struct sock *)SEQ_START_TOKEN) {
- sk = sk_next(sk);
- if (!sk)
- goto next_bucket;
- if (sock_net(sk) == seq_file_net(seq))
- return sk;
- }
-
- do {
+ while (bucket < ARRAY_SIZE(unix_socket_table)) {
spin_lock(&unix_table_locks[bucket]);
+
sk = unix_from_bucket(seq, pos);
if (sk)
return sk;
-next_bucket:
- spin_unlock(&unix_table_locks[bucket++]);
- *pos = set_bucket_offset(bucket, 1);
- } while (bucket < ARRAY_SIZE(unix_socket_table));
+ spin_unlock(&unix_table_locks[bucket]);
+
+ *pos = set_bucket_offset(++bucket, 1);
+ }
return NULL;
}
+static struct sock *unix_get_next(struct seq_file *seq, struct sock *sk,
+ loff_t *pos)
+{
+ unsigned long bucket = get_bucket(*pos);
+
+ for (sk = sk_next(sk); sk; sk = sk_next(sk))
+ if (sock_net(sk) == seq_file_net(seq))
+ return sk;
+
+ spin_unlock(&unix_table_locks[bucket]);
+
+ *pos = set_bucket_offset(++bucket, 1);
+
+ return unix_get_first(seq, pos);
+}
+
static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
{
if (!*pos)
return SEQ_START_TOKEN;
- if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
- return NULL;
-
- return unix_next_socket(seq, NULL, pos);
+ return unix_get_first(seq, pos);
}
static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
++*pos;
- return unix_next_socket(seq, v, pos);
+
+ if (v == SEQ_START_TOKEN)
+ return unix_get_first(seq, pos);
+
+ return unix_get_next(seq, v, pos);
}
static void unix_seq_stop(struct seq_file *seq, void *v)
};
#if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL)
+struct bpf_unix_iter_state {
+ struct seq_net_private p;
+ unsigned int cur_sk;
+ unsigned int end_sk;
+ unsigned int max_sk;
+ struct sock **batch;
+ bool st_bucket_done;
+};
+
struct bpf_iter__unix {
__bpf_md_ptr(struct bpf_iter_meta *, meta);
__bpf_md_ptr(struct unix_sock *, unix_sk);
return bpf_iter_run_prog(prog, &ctx);
}
+static int bpf_iter_unix_hold_batch(struct seq_file *seq, struct sock *start_sk)
+
+{
+ struct bpf_unix_iter_state *iter = seq->private;
+ unsigned int expected = 1;
+ struct sock *sk;
+
+ sock_hold(start_sk);
+ iter->batch[iter->end_sk++] = start_sk;
+
+ for (sk = sk_next(start_sk); sk; sk = sk_next(sk)) {
+ if (sock_net(sk) != seq_file_net(seq))
+ continue;
+
+ if (iter->end_sk < iter->max_sk) {
+ sock_hold(sk);
+ iter->batch[iter->end_sk++] = sk;
+ }
+
+ expected++;
+ }
+
+ spin_unlock(&unix_table_locks[start_sk->sk_hash]);
+
+ return expected;
+}
+
+static void bpf_iter_unix_put_batch(struct bpf_unix_iter_state *iter)
+{
+ while (iter->cur_sk < iter->end_sk)
+ sock_put(iter->batch[iter->cur_sk++]);
+}
+
+static int bpf_iter_unix_realloc_batch(struct bpf_unix_iter_state *iter,
+ unsigned int new_batch_sz)
+{
+ struct sock **new_batch;
+
+ new_batch = kvmalloc(sizeof(*new_batch) * new_batch_sz,
+ GFP_USER | __GFP_NOWARN);
+ if (!new_batch)
+ return -ENOMEM;
+
+ bpf_iter_unix_put_batch(iter);
+ kvfree(iter->batch);
+ iter->batch = new_batch;
+ iter->max_sk = new_batch_sz;
+
+ return 0;
+}
+
+static struct sock *bpf_iter_unix_batch(struct seq_file *seq,
+ loff_t *pos)
+{
+ struct bpf_unix_iter_state *iter = seq->private;
+ unsigned int expected;
+ bool resized = false;
+ struct sock *sk;
+
+ if (iter->st_bucket_done)
+ *pos = set_bucket_offset(get_bucket(*pos) + 1, 1);
+
+again:
+ /* Get a new batch */
+ iter->cur_sk = 0;
+ iter->end_sk = 0;
+
+ sk = unix_get_first(seq, pos);
+ if (!sk)
+ return NULL; /* Done */
+
+ expected = bpf_iter_unix_hold_batch(seq, sk);
+
+ if (iter->end_sk == expected) {
+ iter->st_bucket_done = true;
+ return sk;
+ }
+
+ if (!resized && !bpf_iter_unix_realloc_batch(iter, expected * 3 / 2)) {
+ resized = true;
+ goto again;
+ }
+
+ return sk;
+}
+
+static void *bpf_iter_unix_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ if (!*pos)
+ return SEQ_START_TOKEN;
+
+ /* bpf iter does not support lseek, so it always
+ * continue from where it was stop()-ped.
+ */
+ return bpf_iter_unix_batch(seq, pos);
+}
+
+static void *bpf_iter_unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ struct bpf_unix_iter_state *iter = seq->private;
+ struct sock *sk;
+
+ /* Whenever seq_next() is called, the iter->cur_sk is
+ * done with seq_show(), so advance to the next sk in
+ * the batch.
+ */
+ if (iter->cur_sk < iter->end_sk)
+ sock_put(iter->batch[iter->cur_sk++]);
+
+ ++*pos;
+
+ if (iter->cur_sk < iter->end_sk)
+ sk = iter->batch[iter->cur_sk];
+ else
+ sk = bpf_iter_unix_batch(seq, pos);
+
+ return sk;
+}
+
static int bpf_iter_unix_seq_show(struct seq_file *seq, void *v)
{
struct bpf_iter_meta meta;
struct bpf_prog *prog;
struct sock *sk = v;
uid_t uid;
+ bool slow;
+ int ret;
if (v == SEQ_START_TOKEN)
return 0;
+ slow = lock_sock_fast(sk);
+
+ if (unlikely(sk_unhashed(sk))) {
+ ret = SEQ_SKIP;
+ goto unlock;
+ }
+
uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk));
meta.seq = seq;
prog = bpf_iter_get_info(&meta, false);
- return unix_prog_seq_show(prog, &meta, v, uid);
+ ret = unix_prog_seq_show(prog, &meta, v, uid);
+unlock:
+ unlock_sock_fast(sk, slow);
+ return ret;
}
static void bpf_iter_unix_seq_stop(struct seq_file *seq, void *v)
{
+ struct bpf_unix_iter_state *iter = seq->private;
struct bpf_iter_meta meta;
struct bpf_prog *prog;
(void)unix_prog_seq_show(prog, &meta, v, 0);
}
- unix_seq_stop(seq, v);
+ if (iter->cur_sk < iter->end_sk)
+ bpf_iter_unix_put_batch(iter);
}
static const struct seq_operations bpf_iter_unix_seq_ops = {
- .start = unix_seq_start,
- .next = unix_seq_next,
+ .start = bpf_iter_unix_seq_start,
+ .next = bpf_iter_unix_seq_next,
.stop = bpf_iter_unix_seq_stop,
.show = bpf_iter_unix_seq_show,
};
DEFINE_BPF_ITER_FUNC(unix, struct bpf_iter_meta *meta,
struct unix_sock *unix_sk, uid_t uid)
+#define INIT_BATCH_SZ 16
+
+static int bpf_iter_init_unix(void *priv_data, struct bpf_iter_aux_info *aux)
+{
+ struct bpf_unix_iter_state *iter = priv_data;
+ int err;
+
+ err = bpf_iter_init_seq_net(priv_data, aux);
+ if (err)
+ return err;
+
+ err = bpf_iter_unix_realloc_batch(iter, INIT_BATCH_SZ);
+ if (err) {
+ bpf_iter_fini_seq_net(priv_data);
+ return err;
+ }
+
+ return 0;
+}
+
+static void bpf_iter_fini_unix(void *priv_data)
+{
+ struct bpf_unix_iter_state *iter = priv_data;
+
+ bpf_iter_fini_seq_net(priv_data);
+ kvfree(iter->batch);
+}
+
static const struct bpf_iter_seq_info unix_seq_info = {
.seq_ops = &bpf_iter_unix_seq_ops,
- .init_seq_private = bpf_iter_init_seq_net,
- .fini_seq_private = bpf_iter_fini_seq_net,
- .seq_priv_size = sizeof(struct seq_net_private),
+ .init_seq_private = bpf_iter_init_unix,
+ .fini_seq_private = bpf_iter_fini_unix,
+ .seq_priv_size = sizeof(struct bpf_unix_iter_state),
};
+static const struct bpf_func_proto *
+bpf_iter_unix_get_func_proto(enum bpf_func_id func_id,
+ const struct bpf_prog *prog)
+{
+ switch (func_id) {
+ case BPF_FUNC_setsockopt:
+ return &bpf_sk_setsockopt_proto;
+ case BPF_FUNC_getsockopt:
+ return &bpf_sk_getsockopt_proto;
+ default:
+ return NULL;
+ }
+}
+
static struct bpf_iter_reg unix_reg_info = {
.target = "unix",
.ctx_arg_info_size = 1,
{ offsetof(struct bpf_iter__unix, unix_sk),
PTR_TO_BTF_ID_OR_NULL },
},
+ .get_func_proto = bpf_iter_unix_get_func_proto,
.seq_info = &unix_seq_info,
};
{
__u32 curr_prog_id = 0;
- if (bpf_get_link_xdp_id(ifindex, &curr_prog_id, xdp_flags)) {
- printf("bpf_get_link_xdp_id failed\n");
+ if (bpf_xdp_query_id(ifindex, xdp_flags, &curr_prog_id)) {
+ printf("bpf_xdp_query_id failed\n");
exit(1);
}
if (prog_id == curr_prog_id)
- bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
+ bpf_xdp_detach(ifindex, xdp_flags, NULL);
else if (!curr_prog_id)
printf("couldn't find a prog id on a given interface\n");
else
signal(SIGINT, int_exit);
signal(SIGTERM, int_exit);
- if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) {
+ if (bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL) < 0) {
printf("link set xdp fd failed\n");
return 1;
}
__u32 curr_prog_id = 0;
if (ifindex > -1) {
- if (bpf_get_link_xdp_id(ifindex, &curr_prog_id, xdp_flags)) {
- printf("bpf_get_link_xdp_id failed\n");
+ if (bpf_xdp_query_id(ifindex, xdp_flags, &curr_prog_id)) {
+ printf("bpf_xdp_query_id failed\n");
exit(1);
}
if (prog_id == curr_prog_id)
- bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
+ bpf_xdp_detach(ifindex, xdp_flags, NULL);
else if (!curr_prog_id)
printf("couldn't find a prog id on a given iface\n");
else
signal(SIGINT, int_exit);
signal(SIGTERM, int_exit);
- if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) {
+ if (bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL) < 0) {
printf("link set xdp fd failed\n");
return 1;
}
{
int err;
- err = bpf_set_link_xdp_fd(idx, prog_fd, xdp_flags);
+ err = bpf_xdp_attach(idx, prog_fd, xdp_flags, NULL);
if (err < 0) {
printf("ERROR: failed to attach program to %s\n", name);
return err;
{
int err;
- err = bpf_set_link_xdp_fd(idx, -1, xdp_flags);
+ err = bpf_xdp_detach(idx, xdp_flags, NULL);
if (err < 0)
printf("ERROR: failed to detach program from %s\n", name);
int i = 0;
for (i = 0; i < total_ifindex; i++) {
- if (bpf_get_link_xdp_id(ifindex_list[i], &prog_id, flags)) {
- printf("bpf_get_link_xdp_id on iface %d failed\n",
+ if (bpf_xdp_query_id(ifindex_list[i], flags, &prog_id)) {
+ printf("bpf_xdp_query_id on iface %d failed\n",
ifindex_list[i]);
exit(1);
}
if (prog_id_list[i] == prog_id)
- bpf_set_link_xdp_fd(ifindex_list[i], -1, flags);
+ bpf_xdp_detach(ifindex_list[i], flags, NULL);
else if (!prog_id)
printf("couldn't find a prog id on iface %d\n",
ifindex_list[i]);
}
prog_id_list = (__u32 *)calloc(total_ifindex, sizeof(__u32 *));
for (i = 0; i < total_ifindex; i++) {
- if (bpf_set_link_xdp_fd(ifindex_list[i], prog_fd, flags) < 0) {
+ if (bpf_xdp_attach(ifindex_list[i], prog_fd, flags, NULL) < 0) {
printf("link set xdp fd failed\n");
int recovery_index = i;
for (i = 0; i < recovery_index; i++)
- bpf_set_link_xdp_fd(ifindex_list[i], -1, flags);
+ bpf_xdp_detach(ifindex_list[i], flags, NULL);
return 1;
}
__u32 curr_prog_id = 0;
if (ifindex > -1) {
- if (bpf_get_link_xdp_id(ifindex, &curr_prog_id, xdp_flags)) {
- printf("bpf_get_link_xdp_id failed\n");
+ if (bpf_xdp_query_id(ifindex, xdp_flags, &curr_prog_id)) {
+ printf("bpf_xdp_query_id failed\n");
exit(EXIT_FAIL);
}
if (prog_id == curr_prog_id) {
fprintf(stderr,
"Interrupted: Removing XDP program on ifindex:%d device:%s\n",
ifindex, ifname);
- bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
+ bpf_xdp_detach(ifindex, xdp_flags, NULL);
} else if (!curr_prog_id) {
printf("couldn't find a prog id on a given iface\n");
} else {
static struct record *alloc_record_per_rxq(void)
{
- unsigned int nr_rxqs = bpf_map__def(rx_queue_index_map)->max_entries;
+ unsigned int nr_rxqs = bpf_map__max_entries(rx_queue_index_map);
struct record *array;
array = calloc(nr_rxqs, sizeof(struct record));
static struct stats_record *alloc_stats_record(void)
{
- unsigned int nr_rxqs = bpf_map__def(rx_queue_index_map)->max_entries;
+ unsigned int nr_rxqs = bpf_map__max_entries(rx_queue_index_map);
struct stats_record *rec;
int i;
static void free_stats_record(struct stats_record *r)
{
- unsigned int nr_rxqs = bpf_map__def(rx_queue_index_map)->max_entries;
+ unsigned int nr_rxqs = bpf_map__max_entries(rx_queue_index_map);
int i;
for (i = 0; i < nr_rxqs; i++)
map_collect_percpu(fd, 0, &rec->stats);
fd = bpf_map__fd(rx_queue_index_map);
- max_rxqs = bpf_map__def(rx_queue_index_map)->max_entries;
+ max_rxqs = bpf_map__max_entries(rx_queue_index_map);
for (i = 0; i < max_rxqs; i++)
map_collect_percpu(fd, i, &rec->rxq[i]);
}
struct stats_record *stats_prev,
int action, __u32 cfg_opt)
{
- unsigned int nr_rxqs = bpf_map__def(rx_queue_index_map)->max_entries;
+ unsigned int nr_rxqs = bpf_map__max_entries(rx_queue_index_map);
unsigned int nr_cpus = bpf_num_possible_cpus();
double pps = 0, err = 0;
struct record *rec, *prev;
signal(SIGINT, int_exit);
signal(SIGTERM, int_exit);
- if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) {
+ if (bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL) < 0) {
fprintf(stderr, "link set xdp fd failed\n");
return EXIT_FAIL_XDP;
}
__u32 info_len = sizeof(info);
int err;
- err = bpf_set_link_xdp_fd(idx, fd, xdp_flags);
+ err = bpf_xdp_attach(idx, fd, xdp_flags, NULL);
if (err < 0) {
printf("ERROR: failed to attach program to %s\n", name);
return err;
__u32 curr_prog_id = 0;
int err = 0;
- err = bpf_get_link_xdp_id(idx, &curr_prog_id, xdp_flags);
+ err = bpf_xdp_query_id(idx, xdp_flags, &curr_prog_id);
if (err) {
- printf("bpf_get_link_xdp_id failed\n");
+ printf("bpf_xdp_query_id failed\n");
return err;
}
if (prog_id == curr_prog_id) {
- err = bpf_set_link_xdp_fd(idx, -1, xdp_flags);
+ err = bpf_xdp_detach(idx, xdp_flags, NULL);
if (err < 0)
printf("ERROR: failed to detach prog from %s\n", name);
} else if (!curr_prog_id) {
int ret;
if (prog_id) {
- ret = bpf_get_link_xdp_id(ifindex, &cur_prog_id, xdp_flags);
+ ret = bpf_xdp_query_id(ifindex, xdp_flags, &cur_prog_id);
if (ret < 0)
return -errno;
}
}
- return bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
+ return bpf_xdp_detach(ifindex, xdp_flags, NULL);
}
int sample_install_xdp(struct bpf_program *xdp_prog, int ifindex, bool generic,
xdp_flags |= !force ? XDP_FLAGS_UPDATE_IF_NOEXIST : 0;
xdp_flags |= generic ? XDP_FLAGS_SKB_MODE : XDP_FLAGS_DRV_MODE;
- ret = bpf_set_link_xdp_fd(ifindex, bpf_program__fd(xdp_prog),
- xdp_flags);
+ ret = bpf_xdp_attach(ifindex, bpf_program__fd(xdp_prog), xdp_flags, NULL);
if (ret < 0) {
ret = -errno;
fprintf(stderr,
return ret;
}
- ret = bpf_get_link_xdp_id(ifindex, &prog_id, xdp_flags);
+ ret = bpf_xdp_query_id(ifindex, xdp_flags, &prog_id);
if (ret < 0) {
ret = -errno;
fprintf(stderr,
__u32 curr_prog_id = 0;
if (ifindex > -1) {
- if (bpf_get_link_xdp_id(ifindex, &curr_prog_id, xdp_flags)) {
- printf("bpf_get_link_xdp_id failed\n");
+ if (bpf_xdp_query_id(ifindex, xdp_flags, &curr_prog_id)) {
+ printf("bpf_xdp_query_id failed\n");
exit(1);
}
if (prog_id == curr_prog_id)
- bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
+ bpf_xdp_detach(ifindex, xdp_flags, NULL);
else if (!curr_prog_id)
printf("couldn't find a prog id on a given iface\n");
else
}
}
- if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) {
+ if (bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL) < 0) {
printf("link set xdp fd failed\n");
return 1;
}
poll_stats(kill_after_s);
- bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
+ bpf_xdp_detach(ifindex, xdp_flags, NULL);
return 0;
}
unlink(SOCKET_NAME);
/* Unset fd for given ifindex */
- err = bpf_set_link_xdp_fd(ifindex, -1, 0);
+ err = bpf_xdp_detach(ifindex, 0, NULL);
if (err) {
fprintf(stderr, "Error when unsetting bpf prog_fd for ifindex(%d)\n", ifindex);
return err;
{
u32 curr_prog_id = 0;
- if (bpf_get_link_xdp_id(opt_ifindex, &curr_prog_id, opt_xdp_flags)) {
- printf("bpf_get_link_xdp_id failed\n");
+ if (bpf_xdp_query_id(opt_ifindex, opt_xdp_flags, &curr_prog_id)) {
+ printf("bpf_xdp_query_id failed\n");
exit(EXIT_FAILURE);
}
if (prog_id == curr_prog_id)
- bpf_set_link_xdp_fd(opt_ifindex, -1, opt_xdp_flags);
+ bpf_xdp_detach(opt_ifindex, opt_xdp_flags, NULL);
else if (!curr_prog_id)
printf("couldn't find a prog id on a given interface\n");
else
if (ret)
exit_with_error(-ret);
- ret = bpf_get_link_xdp_id(opt_ifindex, &prog_id, opt_xdp_flags);
+ ret = bpf_xdp_query_id(opt_ifindex, opt_xdp_flags, &prog_id);
if (ret)
exit_with_error(-ret);
exit(EXIT_FAILURE);
}
- if (bpf_set_link_xdp_fd(opt_ifindex, prog_fd, opt_xdp_flags) < 0) {
+ if (bpf_xdp_attach(opt_ifindex, prog_fd, opt_xdp_flags, NULL) < 0) {
fprintf(stderr, "ERROR: link set xdp fd failed\n");
exit(EXIT_FAILURE);
}
int i;
for (i = 0 ; i < n_ports; i++)
- bpf_set_link_xdp_fd(if_nametoindex(port_params[i].iface), -1,
- port_params[i].xsk_cfg.xdp_flags);
+ bpf_xdp_detach(if_nametoindex(port_params[i].iface),
+ port_params[i].xsk_cfg.xdp_flags, NULL);
}
int main(int argc, char **argv)
self.line = ''
self.helpers = []
self.commands = []
+ self.desc_unique_helpers = set()
+ self.define_unique_helpers = []
+ self.desc_syscalls = []
+ self.enum_syscalls = []
def parse_element(self):
proto = self.parse_symbol()
- desc = self.parse_desc()
- ret = self.parse_ret()
+ desc = self.parse_desc(proto)
+ ret = self.parse_ret(proto)
return APIElement(proto=proto, desc=desc, ret=ret)
def parse_helper(self):
proto = self.parse_proto()
- desc = self.parse_desc()
- ret = self.parse_ret()
+ desc = self.parse_desc(proto)
+ ret = self.parse_ret(proto)
return Helper(proto=proto, desc=desc, ret=ret)
def parse_symbol(self):
- p = re.compile(' \* ?(.+)$')
+ p = re.compile(' \* ?(BPF\w+)$')
capture = p.match(self.line)
if not capture:
raise NoSyscallCommandFound
self.line = self.reader.readline()
return capture.group(1)
- def parse_desc(self):
+ def parse_desc(self, proto):
p = re.compile(' \* ?(?:\t| {5,8})Description$')
capture = p.match(self.line)
if not capture:
- # Helper can have empty description and we might be parsing another
- # attribute: return but do not consume.
- return ''
+ raise Exception("No description section found for " + proto)
# Description can be several lines, some of them possibly empty, and it
# stops when another subsection title is met.
desc = ''
+ desc_present = False
while True:
self.line = self.reader.readline()
if self.line == ' *\n':
p = re.compile(' \* ?(?:\t| {5,8})(?:\t| {8})(.*)')
capture = p.match(self.line)
if capture:
+ desc_present = True
desc += capture.group(1) + '\n'
else:
break
+
+ if not desc_present:
+ raise Exception("No description found for " + proto)
return desc
- def parse_ret(self):
+ def parse_ret(self, proto):
p = re.compile(' \* ?(?:\t| {5,8})Return$')
capture = p.match(self.line)
if not capture:
- # Helper can have empty retval and we might be parsing another
- # attribute: return but do not consume.
- return ''
+ raise Exception("No return section found for " + proto)
# Return value description can be several lines, some of them possibly
# empty, and it stops when another subsection title is met.
ret = ''
+ ret_present = False
while True:
self.line = self.reader.readline()
if self.line == ' *\n':
p = re.compile(' \* ?(?:\t| {5,8})(?:\t| {8})(.*)')
capture = p.match(self.line)
if capture:
+ ret_present = True
ret += capture.group(1) + '\n'
else:
break
+
+ if not ret_present:
+ raise Exception("No return found for " + proto)
return ret
- def seek_to(self, target, help_message):
+ def seek_to(self, target, help_message, discard_lines = 1):
self.reader.seek(0)
offset = self.reader.read().find(target)
if offset == -1:
raise Exception(help_message)
self.reader.seek(offset)
self.reader.readline()
- self.reader.readline()
+ for _ in range(discard_lines):
+ self.reader.readline()
self.line = self.reader.readline()
- def parse_syscall(self):
+ def parse_desc_syscall(self):
self.seek_to('* DOC: eBPF Syscall Commands',
'Could not find start of eBPF syscall descriptions list')
while True:
try:
command = self.parse_element()
self.commands.append(command)
+ self.desc_syscalls.append(command.proto)
+
except NoSyscallCommandFound:
break
- def parse_helpers(self):
+ def parse_enum_syscall(self):
+ self.seek_to('enum bpf_cmd {',
+ 'Could not find start of bpf_cmd enum', 0)
+ # Searches for either one or more BPF\w+ enums
+ bpf_p = re.compile('\s*(BPF\w+)+')
+ # Searches for an enum entry assigned to another entry,
+ # for e.g. BPF_PROG_RUN = BPF_PROG_TEST_RUN, which is
+ # not documented hence should be skipped in check to
+ # determine if the right number of syscalls are documented
+ assign_p = re.compile('\s*(BPF\w+)\s*=\s*(BPF\w+)')
+ bpf_cmd_str = ''
+ while True:
+ capture = assign_p.match(self.line)
+ if capture:
+ # Skip line if an enum entry is assigned to another entry
+ self.line = self.reader.readline()
+ continue
+ capture = bpf_p.match(self.line)
+ if capture:
+ bpf_cmd_str += self.line
+ else:
+ break
+ self.line = self.reader.readline()
+ # Find the number of occurences of BPF\w+
+ self.enum_syscalls = re.findall('(BPF\w+)+', bpf_cmd_str)
+
+ def parse_desc_helpers(self):
self.seek_to('* Start of BPF helper function descriptions:',
'Could not find start of eBPF helper descriptions list')
while True:
try:
helper = self.parse_helper()
self.helpers.append(helper)
+ proto = helper.proto_break_down()
+ self.desc_unique_helpers.add(proto['name'])
except NoHelperFound:
break
+ def parse_define_helpers(self):
+ # Parse the number of FN(...) in #define __BPF_FUNC_MAPPER to compare
+ # later with the number of unique function names present in description.
+ # Note: seek_to(..) discards the first line below the target search text,
+ # resulting in FN(unspec) being skipped and not added to self.define_unique_helpers.
+ self.seek_to('#define __BPF_FUNC_MAPPER(FN)',
+ 'Could not find start of eBPF helper definition list')
+ # Searches for either one or more FN(\w+) defines or a backslash for newline
+ p = re.compile('\s*(FN\(\w+\))+|\\\\')
+ fn_defines_str = ''
+ while True:
+ capture = p.match(self.line)
+ if capture:
+ fn_defines_str += self.line
+ else:
+ break
+ self.line = self.reader.readline()
+ # Find the number of occurences of FN(\w+)
+ self.define_unique_helpers = re.findall('FN\(\w+\)', fn_defines_str)
+
def run(self):
- self.parse_syscall()
- self.parse_helpers()
+ self.parse_desc_syscall()
+ self.parse_enum_syscall()
+ self.parse_desc_helpers()
+ self.parse_define_helpers()
self.reader.close()
###############################################################################
self.print_one(elem)
self.print_footer()
+ def elem_number_check(self, desc_unique_elem, define_unique_elem, type, instance):
+ """
+ Checks the number of helpers/syscalls documented within the header file
+ description with those defined as part of enum/macro and raise an
+ Exception if they don't match.
+ """
+ nr_desc_unique_elem = len(desc_unique_elem)
+ nr_define_unique_elem = len(define_unique_elem)
+ if nr_desc_unique_elem != nr_define_unique_elem:
+ exception_msg = '''
+The number of unique %s in description (%d) doesn\'t match the number of unique %s defined in %s (%d)
+''' % (type, nr_desc_unique_elem, type, instance, nr_define_unique_elem)
+ if nr_desc_unique_elem < nr_define_unique_elem:
+ # Function description is parsed until no helper is found (which can be due to
+ # misformatting). Hence, only print the first missing/misformatted helper/enum.
+ exception_msg += '''
+The description for %s is not present or formatted correctly.
+''' % (define_unique_elem[nr_desc_unique_elem])
+ raise Exception(exception_msg)
class PrinterRST(Printer):
"""
print('')
-
class PrinterHelpersRST(PrinterRST):
"""
A printer for dumping collected information about helpers as a ReStructured
"""
def __init__(self, parser):
self.elements = parser.helpers
+ self.elem_number_check(parser.desc_unique_helpers, parser.define_unique_helpers, 'helper', '__BPF_FUNC_MAPPER')
def print_header(self):
header = '''\
"""
def __init__(self, parser):
self.elements = parser.commands
+ self.elem_number_check(parser.desc_syscalls, parser.enum_syscalls, 'syscall', 'bpf_cmd')
def print_header(self):
header = '''\
"""
def __init__(self, parser):
self.elements = parser.helpers
+ self.elem_number_check(parser.desc_unique_helpers, parser.define_unique_helpers, 'helper', '__BPF_FUNC_MAPPER')
type_fwds = [
'struct bpf_fib_lookup',
int rc = BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type, major, minor, access);
if (rc)
- return -EPERM;
+ return rc;
#ifdef CONFIG_CGROUP_DEVICE
return devcgroup_legacy_check_permission(type, major, minor, access);
equal_fn_for_key_as_id, NULL);
btf_map_table = hashmap__new(hash_fn_for_key_as_id,
equal_fn_for_key_as_id, NULL);
- if (!btf_prog_table || !btf_map_table) {
+ if (IS_ERR(btf_prog_table) || IS_ERR(btf_map_table)) {
hashmap__free(btf_prog_table);
hashmap__free(btf_map_table);
if (fd >= 0)
const char *attach_flags_str,
int level)
{
+ char prog_name[MAX_PROG_FULL_NAME];
struct bpf_prog_info info = {};
__u32 info_len = sizeof(info);
int prog_fd;
return -1;
}
+ get_prog_full_name(&info, prog_fd, prog_name, sizeof(prog_name));
if (json_output) {
jsonw_start_object(json_wtr);
jsonw_uint_field(json_wtr, "id", info.id);
jsonw_uint_field(json_wtr, "attach_type", attach_type);
jsonw_string_field(json_wtr, "attach_flags",
attach_flags_str);
- jsonw_string_field(json_wtr, "name", info.name);
+ jsonw_string_field(json_wtr, "name", prog_name);
jsonw_end_object(json_wtr);
} else {
printf("%s%-8u ", level ? " " : "", info.id);
printf("%-15s", attach_type_name[attach_type]);
else
printf("type %-10u", attach_type);
- printf(" %-15s %-15s\n", attach_flags_str, info.name);
+ printf(" %-15s %-15s\n", attach_flags_str, prog_name);
}
close(prog_fd);
#include <bpf/bpf.h>
#include <bpf/hashmap.h>
#include <bpf/libbpf.h> /* libbpf_num_possible_cpus */
+#include <bpf/btf.h>
#include "main.h"
return names[type];
}
+void get_prog_full_name(const struct bpf_prog_info *prog_info, int prog_fd,
+ char *name_buff, size_t buff_len)
+{
+ const char *prog_name = prog_info->name;
+ const struct btf_type *func_type;
+ const struct bpf_func_info finfo;
+ struct bpf_prog_info info = {};
+ __u32 info_len = sizeof(info);
+ struct btf *prog_btf = NULL;
+
+ if (buff_len <= BPF_OBJ_NAME_LEN ||
+ strlen(prog_info->name) < BPF_OBJ_NAME_LEN - 1)
+ goto copy_name;
+
+ if (!prog_info->btf_id || prog_info->nr_func_info == 0)
+ goto copy_name;
+
+ info.nr_func_info = 1;
+ info.func_info_rec_size = prog_info->func_info_rec_size;
+ if (info.func_info_rec_size > sizeof(finfo))
+ info.func_info_rec_size = sizeof(finfo);
+ info.func_info = ptr_to_u64(&finfo);
+
+ if (bpf_obj_get_info_by_fd(prog_fd, &info, &info_len))
+ goto copy_name;
+
+ prog_btf = btf__load_from_kernel_by_id(info.btf_id);
+ if (!prog_btf)
+ goto copy_name;
+
+ func_type = btf__type_by_id(prog_btf, finfo.type_id);
+ if (!func_type || !btf_is_func(func_type))
+ goto copy_name;
+
+ prog_name = btf__name_by_offset(prog_btf, func_type->name_off);
+
+copy_name:
+ snprintf(name_buff, buff_len, "%s", prog_name);
+
+ if (prog_btf)
+ btf__free(prog_btf);
+}
+
int get_fd_type(int fd)
{
char path[PATH_MAX];
/* only generate definitions for memory-mapped internal maps */
if (!bpf_map__is_internal(map))
continue;
- if (!(bpf_map__def(map)->map_flags & BPF_F_MMAPABLE))
+ if (!(bpf_map__map_flags(map) & BPF_F_MMAPABLE))
continue;
if (!get_map_ident(map, map_ident, sizeof(map_ident)))
if (!get_map_ident(map, ident, sizeof(ident)))
continue;
if (bpf_map__is_internal(map) &&
- (bpf_map__def(map)->map_flags & BPF_F_MMAPABLE))
+ (bpf_map__map_flags(map) & BPF_F_MMAPABLE))
printf("\tmunmap(skel->%1$s, %2$zd);\n",
ident, bpf_map_mmap_sz(map));
codegen("\
continue;
if (!bpf_map__is_internal(map) ||
- !(bpf_map__def(map)->map_flags & BPF_F_MMAPABLE))
+ !(bpf_map__map_flags(map) & BPF_F_MMAPABLE))
continue;
codegen("\
continue;
if (!bpf_map__is_internal(map) ||
- !(bpf_map__def(map)->map_flags & BPF_F_MMAPABLE))
+ !(bpf_map__map_flags(map) & BPF_F_MMAPABLE))
continue;
- if (bpf_map__def(map)->map_flags & BPF_F_RDONLY_PROG)
+ if (bpf_map__map_flags(map) & BPF_F_RDONLY_PROG)
mmap_flags = "PROT_READ";
else
mmap_flags = "PROT_READ | PROT_WRITE";
s = (struct bpf_object_skeleton *)calloc(1, sizeof(*s));\n\
if (!s) \n\
goto err; \n\
- obj->skeleton = s; \n\
\n\
s->sz = sizeof(*s); \n\
s->name = \"%1$s\"; \n\
i, bpf_map__name(map), i, ident);
/* memory-mapped internal maps */
if (bpf_map__is_internal(map) &&
- (bpf_map__def(map)->map_flags & BPF_F_MMAPABLE)) {
+ (bpf_map__map_flags(map) & BPF_F_MMAPABLE)) {
printf("\ts->maps[%zu].mmaped = (void **)&obj->%s;\n",
i, ident);
}
\n\
s->data = (void *)%2$s__elf_bytes(&s->data_sz); \n\
\n\
+ obj->skeleton = s; \n\
return 0; \n\
err: \n\
bpf_object__destroy_skeleton(s); \n\
/* Copyright (C) 2020 Facebook */
#include <errno.h>
+#include <linux/err.h>
#include <net/if.h>
#include <stdio.h>
#include <unistd.h>
if (show_pinned) {
link_table = hashmap__new(hash_fn_for_key_as_id,
equal_fn_for_key_as_id, NULL);
- if (!link_table) {
+ if (IS_ERR(link_table)) {
p_err("failed to create hashmap for pinned paths");
return -1;
}
}
if (!legacy_libbpf) {
- ret = libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+ enum libbpf_strict_mode mode;
+
+ /* Allow legacy map definitions for skeleton generation.
+ * It will still be rejected if users use LIBBPF_STRICT_ALL
+ * mode for loading generated skeleton.
+ */
+ mode = (__LIBBPF_STRICT_LAST - 1) & ~LIBBPF_STRICT_MAP_DEFINITIONS;
+ ret = libbpf_set_strict_mode(mode);
if (ret)
p_err("failed to enable libbpf strict mode: %d", ret);
}
int cmd_select(const struct cmd *cmds, int argc, char **argv,
int (*help)(int argc, char **argv));
+#define MAX_PROG_FULL_NAME 128
+void get_prog_full_name(const struct bpf_prog_info *prog_info, int prog_fd,
+ char *name_buff, size_t buff_len);
+
int get_fd_type(int fd);
const char *get_fd_type_name(enum bpf_obj_type type);
char *get_fdinfo(int fd, const char *key);
if (show_pinned) {
map_table = hashmap__new(hash_fn_for_key_as_id,
equal_fn_for_key_as_id, NULL);
- if (!map_table) {
+ if (IS_ERR(map_table)) {
p_err("failed to create hashmap for pinned paths");
return -1;
}
if (attach_type == NET_ATTACH_TYPE_XDP_OFFLOAD)
flags |= XDP_FLAGS_HW_MODE;
- return bpf_set_link_xdp_fd(ifindex, progfd, flags);
+ return bpf_xdp_attach(ifindex, progfd, flags, NULL);
}
static int do_attach(int argc, char **argv)
// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
/* Copyright (C) 2020 Facebook */
#include <errno.h>
+#include <linux/err.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
libbpf_print_fn_t default_print;
*map = hashmap__new(hash_fn_for_key_as_id, equal_fn_for_key_as_id, NULL);
- if (!*map) {
+ if (IS_ERR(*map)) {
p_err("failed to create hashmap for PID references");
return -1;
}
free(value);
}
-static void print_prog_header_json(struct bpf_prog_info *info)
+static void print_prog_header_json(struct bpf_prog_info *info, int fd)
{
+ char prog_name[MAX_PROG_FULL_NAME];
+
jsonw_uint_field(json_wtr, "id", info->id);
if (info->type < ARRAY_SIZE(prog_type_name))
jsonw_string_field(json_wtr, "type",
else
jsonw_uint_field(json_wtr, "type", info->type);
- if (*info->name)
- jsonw_string_field(json_wtr, "name", info->name);
+ if (*info->name) {
+ get_prog_full_name(info, fd, prog_name, sizeof(prog_name));
+ jsonw_string_field(json_wtr, "name", prog_name);
+ }
jsonw_name(json_wtr, "tag");
jsonw_printf(json_wtr, "\"" BPF_TAG_FMT "\"",
char *memlock;
jsonw_start_object(json_wtr);
- print_prog_header_json(info);
+ print_prog_header_json(info, fd);
print_dev_json(info->ifindex, info->netns_dev, info->netns_ino);
if (info->load_time) {
jsonw_end_object(json_wtr);
}
-static void print_prog_header_plain(struct bpf_prog_info *info)
+static void print_prog_header_plain(struct bpf_prog_info *info, int fd)
{
+ char prog_name[MAX_PROG_FULL_NAME];
+
printf("%u: ", info->id);
if (info->type < ARRAY_SIZE(prog_type_name))
printf("%s ", prog_type_name[info->type]);
else
printf("type %u ", info->type);
- if (*info->name)
- printf("name %s ", info->name);
+ if (*info->name) {
+ get_prog_full_name(info, fd, prog_name, sizeof(prog_name));
+ printf("name %s ", prog_name);
+ }
printf("tag ");
fprint_hex(stdout, info->tag, BPF_TAG_SIZE, "");
{
char *memlock;
- print_prog_header_plain(info);
+ print_prog_header_plain(info, fd);
if (info->load_time) {
char buf[32];
if (show_pinned) {
prog_table = hashmap__new(hash_fn_for_key_as_id,
equal_fn_for_key_as_id, NULL);
- if (!prog_table) {
+ if (IS_ERR(prog_table)) {
p_err("failed to create hashmap for pinned paths");
return -1;
}
if (json_output && nb_fds > 1) {
jsonw_start_object(json_wtr); /* prog object */
- print_prog_header_json(&info);
+ print_prog_header_json(&info, fds[i]);
jsonw_name(json_wtr, "insns");
} else if (nb_fds > 1) {
- print_prog_header_plain(&info);
+ print_prog_header_plain(&info, fds[i]);
}
err = prog_dump(&info, mode, filepath, opcodes, visual, linum);
static int do_register(int argc, char **argv)
{
LIBBPF_OPTS(bpf_object_open_opts, open_opts);
- const struct bpf_map_def *def;
struct bpf_map_info info = {};
__u32 info_len = sizeof(info);
int nr_errs = 0, nr_maps = 0;
}
bpf_object__for_each_map(map, obj) {
- def = bpf_map__def(map);
- if (def->type != BPF_MAP_TYPE_STRUCT_OPS)
+ if (bpf_map__type(map) != BPF_MAP_TYPE_STRUCT_OPS)
continue;
link = bpf_map__attach_struct_ops(map);
ARCH = $(HOSTARCH)
RM ?= rm
CROSS_COMPILE =
+CFLAGS := $(KBUILD_HOSTCFLAGS)
+LDFLAGS := $(KBUILD_HOSTLDFLAGS)
OUTPUT ?= $(srctree)/tools/bpf/resolve_btfids/
$(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(LIBBPF_OUT)
$(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC) OUTPUT=$(LIBBPF_OUT) \
- DESTDIR=$(LIBBPF_DESTDIR) prefix= \
+ DESTDIR=$(LIBBPF_DESTDIR) prefix= EXTRA_CFLAGS="$(CFLAGS)" \
$(abspath $@) install_headers
-CFLAGS := -g \
+CFLAGS += -g \
-I$(srctree)/tools/include \
-I$(srctree)/tools/include/uapi \
-I$(LIBBPF_INCLUDE) \
* *ctx_out*, *data_in* and *data_out* must be NULL.
* *repeat* must be zero.
*
+ * BPF_PROG_RUN is an alias for BPF_PROG_TEST_RUN.
+ *
* Return
* Returns zero on success. On error, -1 is returned and *errno*
* is set appropriately.
*/
#define BPF_F_SLEEPABLE (1U << 4)
+/* If BPF_F_XDP_HAS_FRAGS is used in BPF_PROG_LOAD command, the loaded program
+ * fully support xdp frags.
+ */
+#define BPF_F_XDP_HAS_FRAGS (1U << 5)
+
/* When BPF ldimm64's insn[0].src_reg != 0 then this can have
* the following extensions:
*
* 0 on success, or a negative error in case of failure.
*
* u64 bpf_get_current_pid_tgid(void)
+ * Description
+ * Get the current pid and tgid.
* Return
* A 64-bit integer containing the current tgid and pid, and
* created as such:
* *current_task*\ **->pid**.
*
* u64 bpf_get_current_uid_gid(void)
+ * Description
+ * Get the current uid and gid.
* Return
* A 64-bit integer containing the current GID and UID, and
* created as such: *current_gid* **<< 32 \|** *current_uid*.
* The 32-bit hash.
*
* u64 bpf_get_current_task(void)
+ * Description
+ * Get the current task.
* Return
* A pointer to the current task struct.
*
* indicate that the hash is outdated and to trigger a
* recalculation the next time the kernel tries to access this
* hash or when the **bpf_get_hash_recalc**\ () helper is called.
+ * Return
+ * void.
*
* long bpf_get_numa_node_id(void)
* Description
* A 8-byte long unique number or 0 if *sk* is NULL.
*
* u32 bpf_get_socket_uid(struct sk_buff *skb)
+ * Description
+ * Get the owner UID of the socked associated to *skb*.
* Return
* The owner UID of the socket associated to *skb*. If the socket
* is **NULL**, or if it is not a full socket (i.e. if it is a
* The id is returned or 0 in case the id could not be retrieved.
*
* u64 bpf_get_current_cgroup_id(void)
+ * Description
+ * Get the current cgroup id based on the cgroup within which
+ * the current task is running.
* Return
* A 64-bit integer containing the current cgroup id based
* on the cgroup within which the current task is running.
*
* Return
* The number of arguments of the traced function.
+ *
+ * int bpf_get_retval(void)
+ * Description
+ * Get the syscall's return value that will be returned to userspace.
+ *
+ * This helper is currently supported by cgroup programs only.
+ * Return
+ * The syscall's return value.
+ *
+ * int bpf_set_retval(int retval)
+ * Description
+ * Set the syscall's return value that will be returned to userspace.
+ *
+ * This helper is currently supported by cgroup programs only.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * u64 bpf_xdp_get_buff_len(struct xdp_buff *xdp_md)
+ * Description
+ * Get the total size of a given xdp buff (linear and paged area)
+ * Return
+ * The total size of a given xdp buffer.
+ *
+ * long bpf_xdp_load_bytes(struct xdp_buff *xdp_md, u32 offset, void *buf, u32 len)
+ * Description
+ * This helper is provided as an easy way to load data from a
+ * xdp buffer. It can be used to load *len* bytes from *offset* from
+ * the frame associated to *xdp_md*, into the buffer pointed by
+ * *buf*.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * long bpf_xdp_store_bytes(struct xdp_buff *xdp_md, u32 offset, void *buf, u32 len)
+ * Description
+ * Store *len* bytes from buffer *buf* into the frame
+ * associated to *xdp_md*, at *offset*.
+ * Return
+ * 0 on success, or a negative error in case of failure.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
FN(get_func_arg), \
FN(get_func_ret), \
FN(get_func_arg_cnt), \
+ FN(get_retval), \
+ FN(set_retval), \
+ FN(xdp_get_buff_len), \
+ FN(xdp_load_bytes), \
+ FN(xdp_store_bytes), \
/* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
.flags = flags,
);
- return bpf_prog_attach_xattr(prog_fd, target_fd, type, &opts);
+ return bpf_prog_attach_opts(prog_fd, target_fd, type, &opts);
}
-int bpf_prog_attach_xattr(int prog_fd, int target_fd,
+int bpf_prog_attach_opts(int prog_fd, int target_fd,
enum bpf_attach_type type,
const struct bpf_prog_attach_opts *opts)
{
return libbpf_err_errno(ret);
}
+__attribute__((alias("bpf_prog_attach_opts")))
+int bpf_prog_attach_xattr(int prog_fd, int target_fd,
+ enum bpf_attach_type type,
+ const struct bpf_prog_attach_opts *opts);
+
int bpf_prog_detach(int target_fd, enum bpf_attach_type type)
{
union bpf_attr attr;
LIBBPF_API int bpf_prog_attach(int prog_fd, int attachable_fd,
enum bpf_attach_type type, unsigned int flags);
+LIBBPF_API int bpf_prog_attach_opts(int prog_fd, int attachable_fd,
+ enum bpf_attach_type type,
+ const struct bpf_prog_attach_opts *opts);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_prog_attach_opts() instead")
LIBBPF_API int bpf_prog_attach_xattr(int prog_fd, int attachable_fd,
enum bpf_attach_type type,
const struct bpf_prog_attach_opts *opts);
unsigned int value_size;
unsigned int max_entries;
unsigned int map_flags;
-};
+} __attribute__((deprecated("use BTF-defined maps in .maps section")));
enum libbpf_pin_type {
LIBBPF_PIN_NONE,
struct btf_pipe {
const struct btf *src;
struct btf *dst;
+ struct hashmap *str_off_map; /* map string offsets from src to dst */
};
static int btf_rewrite_str(__u32 *str_off, void *ctx)
{
struct btf_pipe *p = ctx;
- int off;
+ void *mapped_off;
+ int off, err;
if (!*str_off) /* nothing to do for empty strings */
return 0;
+ if (p->str_off_map &&
+ hashmap__find(p->str_off_map, (void *)(long)*str_off, &mapped_off)) {
+ *str_off = (__u32)(long)mapped_off;
+ return 0;
+ }
+
off = btf__add_str(p->dst, btf__str_by_offset(p->src, *str_off));
if (off < 0)
return off;
+ /* Remember string mapping from src to dst. It avoids
+ * performing expensive string comparisons.
+ */
+ if (p->str_off_map) {
+ err = hashmap__append(p->str_off_map, (void *)(long)*str_off, (void *)(long)off);
+ if (err)
+ return err;
+ }
+
*str_off = off;
return 0;
}
return 0;
}
+static size_t btf_dedup_identity_hash_fn(const void *key, void *ctx);
+static bool btf_dedup_equal_fn(const void *k1, const void *k2, void *ctx);
+
int btf__add_btf(struct btf *btf, const struct btf *src_btf)
{
struct btf_pipe p = { .src = src_btf, .dst = btf };
if (!off)
return libbpf_err(-ENOMEM);
+ /* Map the string offsets from src_btf to the offsets from btf to improve performance */
+ p.str_off_map = hashmap__new(btf_dedup_identity_hash_fn, btf_dedup_equal_fn, NULL);
+ if (IS_ERR(p.str_off_map))
+ return libbpf_err(-ENOMEM);
+
/* bulk copy types data for all types from src_btf */
memcpy(t, src_btf->types_data, data_sz);
btf->hdr->str_off += data_sz;
btf->nr_types += cnt;
+ hashmap__free(p.str_off_map);
+
/* return type ID of the first added BTF type */
return btf->start_id + btf->nr_types - cnt;
err_out:
* wasn't modified, so doesn't need restoring, see big comment above */
btf->hdr->str_len = old_strs_len;
+ hashmap__free(p.str_off_map);
+
return libbpf_err(err);
}
const struct btf_dump_type_data_opts *opts);
/*
- * A set of helpers for easier BTF types handling
+ * A set of helpers for easier BTF types handling.
+ *
+ * The inline functions below rely on constants from the kernel headers which
+ * may not be available for applications including this header file. To avoid
+ * compilation errors, we define all the constants here that were added after
+ * the initial introduction of the BTF_KIND* constants.
*/
+#ifndef BTF_KIND_FUNC
+#define BTF_KIND_FUNC 12 /* Function */
+#define BTF_KIND_FUNC_PROTO 13 /* Function Proto */
+#endif
+#ifndef BTF_KIND_VAR
+#define BTF_KIND_VAR 14 /* Variable */
+#define BTF_KIND_DATASEC 15 /* Section */
+#endif
+#ifndef BTF_KIND_FLOAT
+#define BTF_KIND_FLOAT 16 /* Floating point */
+#endif
+/* The kernel header switched to enums, so these two were never #defined */
+#define BTF_KIND_DECL_TAG 17 /* Decl Tag */
+#define BTF_KIND_TYPE_TAG 18 /* Type Tag */
+
static inline __u16 btf_kind(const struct btf_type *t)
{
return BTF_INFO_KIND(t->info);
void hashmap__free(struct hashmap *map)
{
- if (!map)
+ if (IS_ERR_OR_NULL(map))
return;
hashmap__clear(map);
return true;
}
-
SEC_SLEEPABLE = 8,
/* allow non-strict prefix matching */
SEC_SLOPPY_PFX = 16,
+ /* BPF program support non-linear XDP buffer */
+ SEC_XDP_FRAGS = 32,
};
struct bpf_sec_def {
if (obj->efile.maps_shndx < 0)
return 0;
+ if (libbpf_mode & LIBBPF_STRICT_MAP_DEFINITIONS) {
+ pr_warn("legacy map definitions in SEC(\"maps\") are not supported\n");
+ return -EOPNOTSUPP;
+ }
+
if (!symbols)
return -EINVAL;
return -LIBBPF_ERRNO__FORMAT;
}
+ pr_warn("map '%s' (legacy): legacy map definitions are deprecated, use BTF-defined maps instead\n", map_name);
+
if (ELF64_ST_BIND(sym->st_info) == STB_LOCAL) {
pr_warn("map '%s' (legacy): static maps are not supported\n", map_name);
return -ENOTSUP;
return 0;
if (!bpf_map__is_internal(map)) {
+ pr_warn("Use of BPF_ANNOTATE_KV_PAIR is deprecated, use BTF-defined maps in .maps section instead\n");
ret = btf__get_map_kv_tids(obj->btf, map->name, def->key_size,
def->value_size, &key_type_id,
&value_type_id);
if (def & SEC_SLEEPABLE)
opts->prog_flags |= BPF_F_SLEEPABLE;
+ if (prog->type == BPF_PROG_TYPE_XDP && (def & SEC_XDP_FRAGS))
+ opts->prog_flags |= BPF_F_XDP_HAS_FRAGS;
+
if ((prog->type == BPF_PROG_TYPE_TRACING ||
prog->type == BPF_PROG_TYPE_LSM ||
prog->type == BPF_PROG_TYPE_EXT) && !prog->attach_btf_id) {
SEC_DEF("lsm.s/", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_lsm),
SEC_DEF("iter/", TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF, attach_iter),
SEC_DEF("syscall", SYSCALL, 0, SEC_SLEEPABLE),
+ SEC_DEF("xdp.frags/devmap", XDP, BPF_XDP_DEVMAP, SEC_XDP_FRAGS),
SEC_DEF("xdp_devmap/", XDP, BPF_XDP_DEVMAP, SEC_ATTACHABLE),
+ SEC_DEF("xdp.frags/cpumap", XDP, BPF_XDP_CPUMAP, SEC_XDP_FRAGS),
SEC_DEF("xdp_cpumap/", XDP, BPF_XDP_CPUMAP, SEC_ATTACHABLE),
+ SEC_DEF("xdp.frags", XDP, BPF_XDP, SEC_XDP_FRAGS),
SEC_DEF("xdp", XDP, BPF_XDP, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX),
SEC_DEF("perf_event", PERF_EVENT, 0, SEC_NONE | SEC_SLOPPY_PFX),
SEC_DEF("lwt_in", LWT_IN, 0, SEC_NONE | SEC_SLOPPY_PFX),
void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s)
{
+ if (!s)
+ return;
+
if (s->progs)
bpf_object__detach_skeleton(s);
if (s->obj)
LIBBPF_API int bpf_map__fd(const struct bpf_map *map);
LIBBPF_API int bpf_map__reuse_fd(struct bpf_map *map, int fd);
/* get map definition */
-LIBBPF_API const struct bpf_map_def *bpf_map__def(const struct bpf_map *map);
+LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 8, "use appropriate getters or setters instead")
+const struct bpf_map_def *bpf_map__def(const struct bpf_map *map);
/* get map name */
LIBBPF_API const char *bpf_map__name(const struct bpf_map *map);
/* get/set map type */
};
#define bpf_xdp_set_link_opts__last_field old_fd
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_xdp_attach() instead")
LIBBPF_API int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_xdp_attach() instead")
LIBBPF_API int bpf_set_link_xdp_fd_opts(int ifindex, int fd, __u32 flags,
const struct bpf_xdp_set_link_opts *opts);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_xdp_query_id() instead")
LIBBPF_API int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_xdp_query() instead")
LIBBPF_API int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info,
size_t info_size, __u32 flags);
+struct bpf_xdp_attach_opts {
+ size_t sz;
+ int old_prog_fd;
+ size_t :0;
+};
+#define bpf_xdp_attach_opts__last_field old_prog_fd
+
+struct bpf_xdp_query_opts {
+ size_t sz;
+ __u32 prog_id; /* output */
+ __u32 drv_prog_id; /* output */
+ __u32 hw_prog_id; /* output */
+ __u32 skb_prog_id; /* output */
+ __u8 attach_mode; /* output */
+ size_t :0;
+};
+#define bpf_xdp_query_opts__last_field attach_mode
+
+LIBBPF_API int bpf_xdp_attach(int ifindex, int prog_fd, __u32 flags,
+ const struct bpf_xdp_attach_opts *opts);
+LIBBPF_API int bpf_xdp_detach(int ifindex, __u32 flags,
+ const struct bpf_xdp_attach_opts *opts);
+LIBBPF_API int bpf_xdp_query(int ifindex, int flags, struct bpf_xdp_query_opts *opts);
+LIBBPF_API int bpf_xdp_query_id(int ifindex, int flags, __u32 *prog_id);
+
/* TC related API */
enum bpf_tc_attach_point {
BPF_TC_INGRESS = 1 << 0,
bpf_link_create;
bpf_link_update;
bpf_map__set_initial_value;
+ bpf_prog_attach_opts;
bpf_program__attach_cgroup;
bpf_program__attach_lsm;
bpf_program__is_lsm;
bpf_program__log_level;
bpf_program__set_log_buf;
bpf_program__set_log_level;
+ bpf_xdp_attach;
+ bpf_xdp_detach;
+ bpf_xdp_query;
+ bpf_xdp_query_id;
libbpf_probe_bpf_helper;
libbpf_probe_bpf_map_type;
libbpf_probe_bpf_prog_type;
* operation.
*/
LIBBPF_STRICT_AUTO_RLIMIT_MEMLOCK = 0x10,
+ /*
+ * Error out on any SEC("maps") map definition, which are deprecated
+ * in favor of BTF-defined map definitions in SEC(".maps").
+ */
+ LIBBPF_STRICT_MAP_DEFINITIONS = 0x20,
__LIBBPF_STRICT_LAST,
};
return libbpf_netlink_send_recv(&req, NULL, NULL, NULL);
}
+int bpf_xdp_attach(int ifindex, int prog_fd, __u32 flags, const struct bpf_xdp_attach_opts *opts)
+{
+ int old_prog_fd, err;
+
+ if (!OPTS_VALID(opts, bpf_xdp_attach_opts))
+ return libbpf_err(-EINVAL);
+
+ old_prog_fd = OPTS_GET(opts, old_prog_fd, 0);
+ if (old_prog_fd)
+ flags |= XDP_FLAGS_REPLACE;
+ else
+ old_prog_fd = -1;
+
+ err = __bpf_set_link_xdp_fd_replace(ifindex, prog_fd, old_prog_fd, flags);
+ return libbpf_err(err);
+}
+
+int bpf_xdp_detach(int ifindex, __u32 flags, const struct bpf_xdp_attach_opts *opts)
+{
+ return bpf_xdp_attach(ifindex, -1, flags, opts);
+}
+
int bpf_set_link_xdp_fd_opts(int ifindex, int fd, __u32 flags,
const struct bpf_xdp_set_link_opts *opts)
{
return 0;
}
-int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info,
- size_t info_size, __u32 flags)
+int bpf_xdp_query(int ifindex, int xdp_flags, struct bpf_xdp_query_opts *opts)
{
- struct xdp_id_md xdp_id = {};
- __u32 mask;
- int ret;
struct libbpf_nla_req req = {
.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
.nh.nlmsg_type = RTM_GETLINK,
.nh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
.ifinfo.ifi_family = AF_PACKET,
};
+ struct xdp_id_md xdp_id = {};
+ int err;
- if (flags & ~XDP_FLAGS_MASK || !info_size)
+ if (!OPTS_VALID(opts, bpf_xdp_query_opts))
+ return libbpf_err(-EINVAL);
+
+ if (xdp_flags & ~XDP_FLAGS_MASK)
return libbpf_err(-EINVAL);
/* Check whether the single {HW,DRV,SKB} mode is set */
- flags &= (XDP_FLAGS_SKB_MODE | XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE);
- mask = flags - 1;
- if (flags && flags & mask)
+ xdp_flags &= XDP_FLAGS_SKB_MODE | XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE;
+ if (xdp_flags & (xdp_flags - 1))
return libbpf_err(-EINVAL);
xdp_id.ifindex = ifindex;
- xdp_id.flags = flags;
+ xdp_id.flags = xdp_flags;
- ret = libbpf_netlink_send_recv(&req, __dump_link_nlmsg,
+ err = libbpf_netlink_send_recv(&req, __dump_link_nlmsg,
get_xdp_info, &xdp_id);
- if (!ret) {
- size_t sz = min(info_size, sizeof(xdp_id.info));
+ if (err)
+ return libbpf_err(err);
- memcpy(info, &xdp_id.info, sz);
- memset((void *) info + sz, 0, info_size - sz);
- }
+ OPTS_SET(opts, prog_id, xdp_id.info.prog_id);
+ OPTS_SET(opts, drv_prog_id, xdp_id.info.drv_prog_id);
+ OPTS_SET(opts, hw_prog_id, xdp_id.info.hw_prog_id);
+ OPTS_SET(opts, skb_prog_id, xdp_id.info.skb_prog_id);
+ OPTS_SET(opts, attach_mode, xdp_id.info.attach_mode);
- return libbpf_err(ret);
+ return 0;
}
-static __u32 get_xdp_id(struct xdp_link_info *info, __u32 flags)
+int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info,
+ size_t info_size, __u32 flags)
{
- flags &= XDP_FLAGS_MODES;
+ LIBBPF_OPTS(bpf_xdp_query_opts, opts);
+ size_t sz;
+ int err;
+
+ if (!info_size)
+ return libbpf_err(-EINVAL);
- if (info->attach_mode != XDP_ATTACHED_MULTI && !flags)
- return info->prog_id;
- if (flags & XDP_FLAGS_DRV_MODE)
- return info->drv_prog_id;
- if (flags & XDP_FLAGS_HW_MODE)
- return info->hw_prog_id;
- if (flags & XDP_FLAGS_SKB_MODE)
- return info->skb_prog_id;
+ err = bpf_xdp_query(ifindex, flags, &opts);
+ if (err)
+ return libbpf_err(err);
+
+ /* struct xdp_link_info field layout matches struct bpf_xdp_query_opts
+ * layout after sz field
+ */
+ sz = min(info_size, offsetofend(struct xdp_link_info, attach_mode));
+ memcpy(info, &opts.prog_id, sz);
+ memset((void *)info + sz, 0, info_size - sz);
return 0;
}
-int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags)
+int bpf_xdp_query_id(int ifindex, int flags, __u32 *prog_id)
{
- struct xdp_link_info info;
+ LIBBPF_OPTS(bpf_xdp_query_opts, opts);
int ret;
- ret = bpf_get_link_xdp_info(ifindex, &info, sizeof(info), flags);
- if (!ret)
- *prog_id = get_xdp_id(&info, flags);
+ ret = bpf_xdp_query(ifindex, flags, &opts);
+ if (ret)
+ return libbpf_err(ret);
+
+ flags &= XDP_FLAGS_MODES;
- return libbpf_err(ret);
+ if (opts.attach_mode != XDP_ATTACHED_MULTI && !flags)
+ *prog_id = opts.prog_id;
+ else if (flags & XDP_FLAGS_DRV_MODE)
+ *prog_id = opts.drv_prog_id;
+ else if (flags & XDP_FLAGS_HW_MODE)
+ *prog_id = opts.hw_prog_id;
+ else if (flags & XDP_FLAGS_SKB_MODE)
+ *prog_id = opts.skb_prog_id;
+ else
+ *prog_id = 0;
+
+ return 0;
+}
+
+
+int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags)
+{
+ return bpf_xdp_query_id(ifindex, flags, prog_id);
}
typedef int (*qdisc_config_t)(struct libbpf_nla_req *req);
{
struct bpf_map_op *op;
const char *map_name = bpf_map__name(map);
- const struct bpf_map_def *def = bpf_map__def(map);
- if (IS_ERR(def)) {
- pr_debug("Unable to get map definition from '%s'\n",
- map_name);
+ if (!map) {
+ pr_debug("Map '%s' is invalid\n", map_name);
return -BPF_LOADER_ERRNO__INTERNAL;
}
- if (def->type != BPF_MAP_TYPE_ARRAY) {
+ if (bpf_map__type(map) != BPF_MAP_TYPE_ARRAY) {
pr_debug("Map %s type is not BPF_MAP_TYPE_ARRAY\n",
map_name);
return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE;
}
- if (def->key_size < sizeof(unsigned int)) {
+ if (bpf_map__key_size(map) < sizeof(unsigned int)) {
pr_debug("Map %s has incorrect key size\n", map_name);
return -BPF_LOADER_ERRNO__OBJCONF_MAP_KEYSIZE;
}
- switch (def->value_size) {
+ switch (bpf_map__value_size(map)) {
case 1:
case 2:
case 4:
struct parse_events_term *term,
struct evlist *evlist)
{
- const struct bpf_map_def *def;
struct bpf_map_op *op;
const char *map_name = bpf_map__name(map);
struct evsel *evsel = evlist__find_evsel_by_str(evlist, term->val.str);
return -BPF_LOADER_ERRNO__OBJCONF_MAP_NOEVT;
}
- def = bpf_map__def(map);
- if (IS_ERR(def)) {
- pr_debug("Unable to get map definition from '%s'\n",
- map_name);
- return PTR_ERR(def);
+ if (!map) {
+ pr_debug("Map '%s' is invalid\n", map_name);
+ return PTR_ERR(map);
}
/*
* No need to check key_size and value_size:
* kernel has already checked them.
*/
- if (def->type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
+ if (bpf_map__type(map) != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
pr_debug("Map %s type is not BPF_MAP_TYPE_PERF_EVENT_ARRAY\n",
map_name);
return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE;
const char *map_name)
{
struct parse_events_array *array = &term->array;
- const struct bpf_map_def *def;
unsigned int i;
if (!array->nr_ranges)
return -BPF_LOADER_ERRNO__INTERNAL;
}
- def = bpf_map__def(map);
- if (IS_ERR(def)) {
- pr_debug("ERROR: Unable to get map definition from '%s'\n",
- map_name);
+ if (!map) {
+ pr_debug("Map '%s' is invalid\n", map_name);
return -BPF_LOADER_ERRNO__INTERNAL;
}
size_t length = array->ranges[i].length;
unsigned int idx = start + length - 1;
- if (idx >= def->max_entries) {
+ if (idx >= bpf_map__max_entries(map)) {
pr_debug("ERROR: index %d too large\n", idx);
return -BPF_LOADER_ERRNO__OBJCONF_MAP_IDX2BIG;
}
}
typedef int (*map_config_func_t)(const char *name, int map_fd,
- const struct bpf_map_def *pdef,
+ const struct bpf_map *map,
struct bpf_map_op *op,
void *pkey, void *arg);
static int
foreach_key_array_all(map_config_func_t func,
void *arg, const char *name,
- int map_fd, const struct bpf_map_def *pdef,
+ int map_fd, const struct bpf_map *map,
struct bpf_map_op *op)
{
unsigned int i;
int err;
- for (i = 0; i < pdef->max_entries; i++) {
- err = func(name, map_fd, pdef, op, &i, arg);
+ for (i = 0; i < bpf_map__max_entries(map); i++) {
+ err = func(name, map_fd, map, op, &i, arg);
if (err) {
pr_debug("ERROR: failed to insert value to %s[%u]\n",
name, i);
static int
foreach_key_array_ranges(map_config_func_t func, void *arg,
const char *name, int map_fd,
- const struct bpf_map_def *pdef,
+ const struct bpf_map *map,
struct bpf_map_op *op)
{
unsigned int i, j;
for (j = 0; j < length; j++) {
unsigned int idx = start + j;
- err = func(name, map_fd, pdef, op, &idx, arg);
+ err = func(name, map_fd, map, op, &idx, arg);
if (err) {
pr_debug("ERROR: failed to insert value to %s[%u]\n",
name, idx);
map_config_func_t func,
void *arg)
{
- int err, map_fd;
+ int err, map_fd, type;
struct bpf_map_op *op;
- const struct bpf_map_def *def;
const char *name = bpf_map__name(map);
struct bpf_map_priv *priv = bpf_map__priv(map);
return 0;
}
- def = bpf_map__def(map);
- if (IS_ERR(def)) {
- pr_debug("ERROR: failed to get definition from map %s\n", name);
+ if (!map) {
+ pr_debug("Map '%s' is invalid\n", name);
return -BPF_LOADER_ERRNO__INTERNAL;
}
map_fd = bpf_map__fd(map);
return map_fd;
}
+ type = bpf_map__type(map);
list_for_each_entry(op, &priv->ops_list, list) {
- switch (def->type) {
+ switch (type) {
case BPF_MAP_TYPE_ARRAY:
case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
switch (op->key_type) {
case BPF_MAP_KEY_ALL:
err = foreach_key_array_all(func, arg, name,
- map_fd, def, op);
+ map_fd, map, op);
break;
case BPF_MAP_KEY_RANGES:
err = foreach_key_array_ranges(func, arg, name,
- map_fd, def,
- op);
+ map_fd, map, op);
break;
default:
pr_debug("ERROR: keytype for map '%s' invalid\n",
static int
apply_obj_config_map_for_key(const char *name, int map_fd,
- const struct bpf_map_def *pdef,
+ const struct bpf_map *map,
struct bpf_map_op *op,
void *pkey, void *arg __maybe_unused)
{
switch (op->op_type) {
case BPF_MAP_OP_SET_VALUE:
err = apply_config_value_for_key(map_fd, pkey,
- pdef->value_size,
+ bpf_map__value_size(map),
op->v.value);
break;
case BPF_MAP_OP_SET_EVSEL:
#include <stdlib.h>
#include <unistd.h>
-static bool bpf_map_def__is_per_cpu(const struct bpf_map_def *def)
+static bool bpf_map__is_per_cpu(enum bpf_map_type type)
{
- return def->type == BPF_MAP_TYPE_PERCPU_HASH ||
- def->type == BPF_MAP_TYPE_PERCPU_ARRAY ||
- def->type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
- def->type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE;
+ return type == BPF_MAP_TYPE_PERCPU_HASH ||
+ type == BPF_MAP_TYPE_PERCPU_ARRAY ||
+ type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
+ type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE;
}
-static void *bpf_map_def__alloc_value(const struct bpf_map_def *def)
+static void *bpf_map__alloc_value(const struct bpf_map *map)
{
- if (bpf_map_def__is_per_cpu(def))
- return malloc(round_up(def->value_size, 8) * sysconf(_SC_NPROCESSORS_CONF));
+ if (bpf_map__is_per_cpu(bpf_map__type(map)))
+ return malloc(round_up(bpf_map__value_size(map), 8) *
+ sysconf(_SC_NPROCESSORS_CONF));
- return malloc(def->value_size);
+ return malloc(bpf_map__value_size(map));
}
int bpf_map__fprintf(struct bpf_map *map, FILE *fp)
{
- const struct bpf_map_def *def = bpf_map__def(map);
void *prev_key = NULL, *key, *value;
int fd = bpf_map__fd(map), err;
int printed = 0;
if (fd < 0)
return fd;
- if (IS_ERR(def))
- return PTR_ERR(def);
+ if (!map)
+ return PTR_ERR(map);
err = -ENOMEM;
- key = malloc(def->key_size);
+ key = malloc(bpf_map__key_size(map));
if (key == NULL)
goto out;
- value = bpf_map_def__alloc_value(def);
+ value = bpf_map__alloc_value(map);
if (value == NULL)
goto out_free_key;
BPF_GCC ?= $(shell command -v bpf-gcc;)
SAN_CFLAGS ?=
-CFLAGS += -g -O0 -rdynamic -Wall $(GENFLAGS) $(SAN_CFLAGS) \
+CFLAGS += -g -O0 -rdynamic -Wall -Werror $(GENFLAGS) $(SAN_CFLAGS) \
-I$(CURDIR) -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR) \
-I$(TOOLSINCDIR) -I$(APIDIR) -I$(OUTPUT)
LDFLAGS += $(SAN_CFLAGS)
MENDIAN=$(if $(IS_LITTLE_ENDIAN),-mlittle-endian,-mbig-endian)
CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG))
-BPF_CFLAGS = -g -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN) \
+BPF_CFLAGS = -g -Werror -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN) \
-I$(INCLUDE_DIR) -I$(CURDIR) -I$(APIDIR) \
-I$(abspath $(OUTPUT)/../usr/include)
.write = bpf_testmod_test_write,
};
-BTF_SET_START(bpf_testmod_kfunc_ids)
+BTF_SET_START(bpf_testmod_check_kfunc_ids)
BTF_ID(func, bpf_testmod_test_mod_kfunc)
-BTF_SET_END(bpf_testmod_kfunc_ids)
+BTF_SET_END(bpf_testmod_check_kfunc_ids)
-static DEFINE_KFUNC_BTF_ID_SET(&bpf_testmod_kfunc_ids, bpf_testmod_kfunc_btf_set);
+static const struct btf_kfunc_id_set bpf_testmod_kfunc_set = {
+ .owner = THIS_MODULE,
+ .check_set = &bpf_testmod_check_kfunc_ids,
+};
+
+extern int bpf_fentry_test1(int a);
static int bpf_testmod_init(void)
{
int ret;
- ret = sysfs_create_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file);
- if (ret)
+ ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_testmod_kfunc_set);
+ if (ret < 0)
return ret;
- register_kfunc_btf_id_set(&prog_test_kfunc_list, &bpf_testmod_kfunc_btf_set);
- return 0;
+ if (bpf_fentry_test1(0) < 0)
+ return -EINVAL;
+ return sysfs_create_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file);
}
static void bpf_testmod_exit(void)
{
- unregister_kfunc_btf_id_set(&prog_test_kfunc_list, &bpf_testmod_kfunc_btf_set);
return sysfs_remove_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file);
}
CONFIG_BLK_DEV_LOOP=y
CONFIG_FUNCTION_TRACER=y
CONFIG_DYNAMIC_FTRACE=y
+CONFIG_NETFILTER=y
+CONFIG_NF_DEFRAG_IPV4=y
+CONFIG_NF_DEFRAG_IPV6=y
+CONFIG_NF_CONNTRACK=y
+CONFIG_USERFAULTFD=y
// SPDX-License-Identifier: GPL-2.0
-#include <test_progs.h>
-#include "bind_perm.skel.h"
-
+#define _GNU_SOURCE
+#include <sched.h>
+#include <stdlib.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/capability.h>
+#include "test_progs.h"
+#include "bind_perm.skel.h"
+
static int duration;
+static int create_netns(void)
+{
+ if (!ASSERT_OK(unshare(CLONE_NEWNET), "create netns"))
+ return -1;
+
+ return 0;
+}
+
void try_bind(int family, int port, int expected_errno)
{
struct sockaddr_storage addr = {};
struct bind_perm *skel;
int cgroup_fd;
+ if (create_netns())
+ return;
+
cgroup_fd = test__join_cgroup("/bind_perm");
if (CHECK(cgroup_fd < 0, "cg-join", "errno %d", errno))
return;
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Amazon.com Inc. or its affiliates. */
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <test_progs.h>
+#include "bpf_iter_setsockopt_unix.skel.h"
+
+#define NR_CASES 5
+
+static int create_unix_socket(struct bpf_iter_setsockopt_unix *skel)
+{
+ struct sockaddr_un addr = {
+ .sun_family = AF_UNIX,
+ .sun_path = "",
+ };
+ socklen_t len;
+ int fd, err;
+
+ fd = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (!ASSERT_NEQ(fd, -1, "socket"))
+ return -1;
+
+ len = offsetof(struct sockaddr_un, sun_path);
+ err = bind(fd, (struct sockaddr *)&addr, len);
+ if (!ASSERT_OK(err, "bind"))
+ return -1;
+
+ len = sizeof(addr);
+ err = getsockname(fd, (struct sockaddr *)&addr, &len);
+ if (!ASSERT_OK(err, "getsockname"))
+ return -1;
+
+ memcpy(&skel->bss->sun_path, &addr.sun_path,
+ len - offsetof(struct sockaddr_un, sun_path));
+
+ return fd;
+}
+
+static void test_sndbuf(struct bpf_iter_setsockopt_unix *skel, int fd)
+{
+ socklen_t optlen;
+ int i, err;
+
+ for (i = 0; i < NR_CASES; i++) {
+ if (!ASSERT_NEQ(skel->data->sndbuf_getsockopt[i], -1,
+ "bpf_(get|set)sockopt"))
+ return;
+
+ err = setsockopt(fd, SOL_SOCKET, SO_SNDBUF,
+ &(skel->data->sndbuf_setsockopt[i]),
+ sizeof(skel->data->sndbuf_setsockopt[i]));
+ if (!ASSERT_OK(err, "setsockopt"))
+ return;
+
+ optlen = sizeof(skel->bss->sndbuf_getsockopt_expected[i]);
+ err = getsockopt(fd, SOL_SOCKET, SO_SNDBUF,
+ &(skel->bss->sndbuf_getsockopt_expected[i]),
+ &optlen);
+ if (!ASSERT_OK(err, "getsockopt"))
+ return;
+
+ if (!ASSERT_EQ(skel->data->sndbuf_getsockopt[i],
+ skel->bss->sndbuf_getsockopt_expected[i],
+ "bpf_(get|set)sockopt"))
+ return;
+ }
+}
+
+void test_bpf_iter_setsockopt_unix(void)
+{
+ struct bpf_iter_setsockopt_unix *skel;
+ int err, unix_fd, iter_fd;
+ char buf;
+
+ skel = bpf_iter_setsockopt_unix__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ return;
+
+ unix_fd = create_unix_socket(skel);
+ if (!ASSERT_NEQ(unix_fd, -1, "create_unix_server"))
+ goto destroy;
+
+ skel->links.change_sndbuf = bpf_program__attach_iter(skel->progs.change_sndbuf, NULL);
+ if (!ASSERT_OK_PTR(skel->links.change_sndbuf, "bpf_program__attach_iter"))
+ goto destroy;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(skel->links.change_sndbuf));
+ if (!ASSERT_GE(iter_fd, 0, "bpf_iter_create"))
+ goto destroy;
+
+ while ((err = read(iter_fd, &buf, sizeof(buf))) == -1 &&
+ errno == EAGAIN)
+ ;
+ if (!ASSERT_OK(err, "read iter error"))
+ goto destroy;
+
+ test_sndbuf(skel, unix_fd);
+destroy:
+ bpf_iter_setsockopt_unix__destroy(skel);
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+#include <unistd.h>
+#include <pthread.h>
+#include <sys/mman.h>
+#include <stdatomic.h>
+#include <test_progs.h>
+#include <sys/syscall.h>
+#include <linux/module.h>
+#include <linux/userfaultfd.h>
+
+#include "ksym_race.skel.h"
+#include "bpf_mod_race.skel.h"
+#include "kfunc_call_race.skel.h"
+
+/* This test crafts a race between btf_try_get_module and do_init_module, and
+ * checks whether btf_try_get_module handles the invocation for a well-formed
+ * but uninitialized module correctly. Unless the module has completed its
+ * initcalls, the verifier should fail the program load and return ENXIO.
+ *
+ * userfaultfd is used to trigger a fault in an fmod_ret program, and make it
+ * sleep, then the BPF program is loaded and the return value from verifier is
+ * inspected. After this, the userfaultfd is closed so that the module loading
+ * thread makes forward progress, and fmod_ret injects an error so that the
+ * module load fails and it is freed.
+ *
+ * If the verifier succeeded in loading the supplied program, it will end up
+ * taking reference to freed module, and trigger a crash when the program fd
+ * is closed later. This is true for both kfuncs and ksyms. In both cases,
+ * the crash is triggered inside bpf_prog_free_deferred, when module reference
+ * is finally released.
+ */
+
+struct test_config {
+ const char *str_open;
+ void *(*bpf_open_and_load)();
+ void (*bpf_destroy)(void *);
+};
+
+enum test_state {
+ _TS_INVALID,
+ TS_MODULE_LOAD,
+ TS_MODULE_LOAD_FAIL,
+};
+
+static _Atomic enum test_state state = _TS_INVALID;
+
+static int sys_finit_module(int fd, const char *param_values, int flags)
+{
+ return syscall(__NR_finit_module, fd, param_values, flags);
+}
+
+static int sys_delete_module(const char *name, unsigned int flags)
+{
+ return syscall(__NR_delete_module, name, flags);
+}
+
+static int load_module(const char *mod)
+{
+ int ret, fd;
+
+ fd = open("bpf_testmod.ko", O_RDONLY);
+ if (fd < 0)
+ return fd;
+
+ ret = sys_finit_module(fd, "", 0);
+ close(fd);
+ if (ret < 0)
+ return ret;
+ return 0;
+}
+
+static void *load_module_thread(void *p)
+{
+
+ if (!ASSERT_NEQ(load_module("bpf_testmod.ko"), 0, "load_module_thread must fail"))
+ atomic_store(&state, TS_MODULE_LOAD);
+ else
+ atomic_store(&state, TS_MODULE_LOAD_FAIL);
+ return p;
+}
+
+static int sys_userfaultfd(int flags)
+{
+ return syscall(__NR_userfaultfd, flags);
+}
+
+static int test_setup_uffd(void *fault_addr)
+{
+ struct uffdio_register uffd_register = {};
+ struct uffdio_api uffd_api = {};
+ int uffd;
+
+ uffd = sys_userfaultfd(O_CLOEXEC);
+ if (uffd < 0)
+ return -errno;
+
+ uffd_api.api = UFFD_API;
+ uffd_api.features = 0;
+ if (ioctl(uffd, UFFDIO_API, &uffd_api)) {
+ close(uffd);
+ return -1;
+ }
+
+ uffd_register.range.start = (unsigned long)fault_addr;
+ uffd_register.range.len = 4096;
+ uffd_register.mode = UFFDIO_REGISTER_MODE_MISSING;
+ if (ioctl(uffd, UFFDIO_REGISTER, &uffd_register)) {
+ close(uffd);
+ return -1;
+ }
+ return uffd;
+}
+
+static void test_bpf_mod_race_config(const struct test_config *config)
+{
+ void *fault_addr, *skel_fail;
+ struct bpf_mod_race *skel;
+ struct uffd_msg uffd_msg;
+ pthread_t load_mod_thrd;
+ _Atomic int *blockingp;
+ int uffd, ret;
+
+ fault_addr = mmap(0, 4096, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (!ASSERT_NEQ(fault_addr, MAP_FAILED, "mmap for uffd registration"))
+ return;
+
+ if (!ASSERT_OK(sys_delete_module("bpf_testmod", 0), "unload bpf_testmod"))
+ goto end_mmap;
+
+ skel = bpf_mod_race__open();
+ if (!ASSERT_OK_PTR(skel, "bpf_mod_kfunc_race__open"))
+ goto end_module;
+
+ skel->rodata->bpf_mod_race_config.tgid = getpid();
+ skel->rodata->bpf_mod_race_config.inject_error = -4242;
+ skel->rodata->bpf_mod_race_config.fault_addr = fault_addr;
+ if (!ASSERT_OK(bpf_mod_race__load(skel), "bpf_mod___load"))
+ goto end_destroy;
+ blockingp = (_Atomic int *)&skel->bss->bpf_blocking;
+
+ if (!ASSERT_OK(bpf_mod_race__attach(skel), "bpf_mod_kfunc_race__attach"))
+ goto end_destroy;
+
+ uffd = test_setup_uffd(fault_addr);
+ if (!ASSERT_GE(uffd, 0, "userfaultfd open + register address"))
+ goto end_destroy;
+
+ if (!ASSERT_OK(pthread_create(&load_mod_thrd, NULL, load_module_thread, NULL),
+ "load module thread"))
+ goto end_uffd;
+
+ /* Now, we either fail loading module, or block in bpf prog, spin to find out */
+ while (!atomic_load(&state) && !atomic_load(blockingp))
+ ;
+ if (!ASSERT_EQ(state, _TS_INVALID, "module load should block"))
+ goto end_join;
+ if (!ASSERT_EQ(*blockingp, 1, "module load blocked")) {
+ pthread_kill(load_mod_thrd, SIGKILL);
+ goto end_uffd;
+ }
+
+ /* We might have set bpf_blocking to 1, but may have not blocked in
+ * bpf_copy_from_user. Read userfaultfd descriptor to verify that.
+ */
+ if (!ASSERT_EQ(read(uffd, &uffd_msg, sizeof(uffd_msg)), sizeof(uffd_msg),
+ "read uffd block event"))
+ goto end_join;
+ if (!ASSERT_EQ(uffd_msg.event, UFFD_EVENT_PAGEFAULT, "read uffd event is pagefault"))
+ goto end_join;
+
+ /* We know that load_mod_thrd is blocked in the fmod_ret program, the
+ * module state is still MODULE_STATE_COMING because mod->init hasn't
+ * returned. This is the time we try to load a program calling kfunc and
+ * check if we get ENXIO from verifier.
+ */
+ skel_fail = config->bpf_open_and_load();
+ ret = errno;
+ if (!ASSERT_EQ(skel_fail, NULL, config->str_open)) {
+ /* Close uffd to unblock load_mod_thrd */
+ close(uffd);
+ uffd = -1;
+ while (atomic_load(blockingp) != 2)
+ ;
+ ASSERT_OK(kern_sync_rcu(), "kern_sync_rcu");
+ config->bpf_destroy(skel_fail);
+ goto end_join;
+
+ }
+ ASSERT_EQ(ret, ENXIO, "verifier returns ENXIO");
+ ASSERT_EQ(skel->data->res_try_get_module, false, "btf_try_get_module == false");
+
+ close(uffd);
+ uffd = -1;
+end_join:
+ pthread_join(load_mod_thrd, NULL);
+ if (uffd < 0)
+ ASSERT_EQ(atomic_load(&state), TS_MODULE_LOAD_FAIL, "load_mod_thrd success");
+end_uffd:
+ if (uffd >= 0)
+ close(uffd);
+end_destroy:
+ bpf_mod_race__destroy(skel);
+ ASSERT_OK(kern_sync_rcu(), "kern_sync_rcu");
+end_module:
+ sys_delete_module("bpf_testmod", 0);
+ ASSERT_OK(load_module("bpf_testmod.ko"), "restore bpf_testmod");
+end_mmap:
+ munmap(fault_addr, 4096);
+ atomic_store(&state, _TS_INVALID);
+}
+
+static const struct test_config ksym_config = {
+ .str_open = "ksym_race__open_and_load",
+ .bpf_open_and_load = (void *)ksym_race__open_and_load,
+ .bpf_destroy = (void *)ksym_race__destroy,
+};
+
+static const struct test_config kfunc_config = {
+ .str_open = "kfunc_call_race__open_and_load",
+ .bpf_open_and_load = (void *)kfunc_call_race__open_and_load,
+ .bpf_destroy = (void *)kfunc_call_race__destroy,
+};
+
+void serial_test_bpf_mod_race(void)
+{
+ if (test__start_subtest("ksym (used_btfs UAF)"))
+ test_bpf_mod_race_config(&ksym_config);
+ if (test__start_subtest("kfunc (kfunc_btf_tab UAF)"))
+ test_bpf_mod_race_config(&kfunc_config);
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <network_helpers.h>
+#include "test_bpf_nf.skel.h"
+
+enum {
+ TEST_XDP,
+ TEST_TC_BPF,
+};
+
+void test_bpf_nf_ct(int mode)
+{
+ struct test_bpf_nf *skel;
+ int prog_fd, err, retval;
+
+ skel = test_bpf_nf__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_bpf_nf__open_and_load"))
+ return;
+
+ if (mode == TEST_XDP)
+ prog_fd = bpf_program__fd(skel->progs.nf_xdp_ct_test);
+ else
+ prog_fd = bpf_program__fd(skel->progs.nf_skb_ct_test);
+
+ err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), NULL, NULL,
+ (__u32 *)&retval, NULL);
+ if (!ASSERT_OK(err, "bpf_prog_test_run"))
+ goto end;
+
+ ASSERT_EQ(skel->bss->test_einval_bpf_tuple, -EINVAL, "Test EINVAL for NULL bpf_tuple");
+ ASSERT_EQ(skel->bss->test_einval_reserved, -EINVAL, "Test EINVAL for reserved not set to 0");
+ ASSERT_EQ(skel->bss->test_einval_netns_id, -EINVAL, "Test EINVAL for netns_id < -1");
+ ASSERT_EQ(skel->bss->test_einval_len_opts, -EINVAL, "Test EINVAL for len__opts != NF_BPF_CT_OPTS_SZ");
+ ASSERT_EQ(skel->bss->test_eproto_l4proto, -EPROTO, "Test EPROTO for l4proto != TCP or UDP");
+ ASSERT_EQ(skel->bss->test_enonet_netns_id, -ENONET, "Test ENONET for bad but valid netns_id");
+ ASSERT_EQ(skel->bss->test_enoent_lookup, -ENOENT, "Test ENOENT for failed lookup");
+ ASSERT_EQ(skel->bss->test_eafnosupport, -EAFNOSUPPORT, "Test EAFNOSUPPORT for invalid len__tuple");
+end:
+ test_bpf_nf__destroy(skel);
+}
+
+void test_bpf_nf(void)
+{
+ if (test__start_subtest("xdp-ct"))
+ test_bpf_nf_ct(TEST_XDP);
+ if (test__start_subtest("tc-bpf-ct"))
+ test_bpf_nf_ct(TEST_TC_BPF);
+}
has_btf_ext = btf_ext != NULL;
btf_ext__free(btf_ext);
+ /* temporary disable LIBBPF_STRICT_MAP_DEFINITIONS to test legacy maps */
+ libbpf_set_strict_mode((__LIBBPF_STRICT_LAST - 1) & ~LIBBPF_STRICT_MAP_DEFINITIONS);
obj = bpf_object__open(test->file);
err = libbpf_get_error(obj);
if (CHECK(err, "obj: %d", err))
fprintf(stderr, "OK");
done:
+ libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
btf__free(btf);
free(func_info);
bpf_object__close(obj);
attach_opts.flags = BPF_F_ALLOW_OVERRIDE | BPF_F_REPLACE;
attach_opts.replace_prog_fd = allow_prog[0];
- if (CHECK(!bpf_prog_attach_xattr(allow_prog[6], cg1,
+ if (CHECK(!bpf_prog_attach_opts(allow_prog[6], cg1,
BPF_CGROUP_INET_EGRESS, &attach_opts),
"fail_prog_replace_override", "unexpected success\n"))
goto err;
CHECK_FAIL(errno != EINVAL);
attach_opts.flags = BPF_F_REPLACE;
- if (CHECK(!bpf_prog_attach_xattr(allow_prog[6], cg1,
+ if (CHECK(!bpf_prog_attach_opts(allow_prog[6], cg1,
BPF_CGROUP_INET_EGRESS, &attach_opts),
"fail_prog_replace_no_multi", "unexpected success\n"))
goto err;
attach_opts.flags = BPF_F_ALLOW_MULTI | BPF_F_REPLACE;
attach_opts.replace_prog_fd = -1;
- if (CHECK(!bpf_prog_attach_xattr(allow_prog[6], cg1,
+ if (CHECK(!bpf_prog_attach_opts(allow_prog[6], cg1,
BPF_CGROUP_INET_EGRESS, &attach_opts),
"fail_prog_replace_bad_fd", "unexpected success\n"))
goto err;
/* replacing a program that is not attached to cgroup should fail */
attach_opts.replace_prog_fd = allow_prog[3];
- if (CHECK(!bpf_prog_attach_xattr(allow_prog[6], cg1,
+ if (CHECK(!bpf_prog_attach_opts(allow_prog[6], cg1,
BPF_CGROUP_INET_EGRESS, &attach_opts),
"fail_prog_replace_no_ent", "unexpected success\n"))
goto err;
/* replace 1st from the top program */
attach_opts.replace_prog_fd = allow_prog[0];
- if (CHECK(bpf_prog_attach_xattr(allow_prog[6], cg1,
+ if (CHECK(bpf_prog_attach_opts(allow_prog[6], cg1,
BPF_CGROUP_INET_EGRESS, &attach_opts),
"prog_replace", "errno=%d\n", errno))
goto err;
/* replace program with itself */
attach_opts.replace_prog_fd = allow_prog[6];
- if (CHECK(bpf_prog_attach_xattr(allow_prog[6], cg1,
+ if (CHECK(bpf_prog_attach_opts(allow_prog[6], cg1,
BPF_CGROUP_INET_EGRESS, &attach_opts),
"prog_replace", "errno=%d\n", errno))
goto err;
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright 2021 Google LLC.
+ */
+
+#include <test_progs.h>
+#include <cgroup_helpers.h>
+#include <network_helpers.h>
+
+#include "cgroup_getset_retval_setsockopt.skel.h"
+#include "cgroup_getset_retval_getsockopt.skel.h"
+
+#define SOL_CUSTOM 0xdeadbeef
+
+static int zero;
+
+static void test_setsockopt_set(int cgroup_fd, int sock_fd)
+{
+ struct cgroup_getset_retval_setsockopt *obj;
+ struct bpf_link *link_set_eunatch = NULL;
+
+ obj = cgroup_getset_retval_setsockopt__open_and_load();
+ if (!ASSERT_OK_PTR(obj, "skel-load"))
+ return;
+
+ /* Attach setsockopt that sets EUNATCH, assert that
+ * we actually get that error when we run setsockopt()
+ */
+ link_set_eunatch = bpf_program__attach_cgroup(obj->progs.set_eunatch,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_set_eunatch, "cg-attach-set_eunatch"))
+ goto close_bpf_object;
+
+ if (!ASSERT_ERR(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR,
+ &zero, sizeof(int)), "setsockopt"))
+ goto close_bpf_object;
+ if (!ASSERT_EQ(errno, EUNATCH, "setsockopt-errno"))
+ goto close_bpf_object;
+
+ if (!ASSERT_EQ(obj->bss->invocations, 1, "invocations"))
+ goto close_bpf_object;
+ if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+ goto close_bpf_object;
+
+close_bpf_object:
+ bpf_link__destroy(link_set_eunatch);
+
+ cgroup_getset_retval_setsockopt__destroy(obj);
+}
+
+static void test_setsockopt_set_and_get(int cgroup_fd, int sock_fd)
+{
+ struct cgroup_getset_retval_setsockopt *obj;
+ struct bpf_link *link_set_eunatch = NULL, *link_get_retval = NULL;
+
+ obj = cgroup_getset_retval_setsockopt__open_and_load();
+ if (!ASSERT_OK_PTR(obj, "skel-load"))
+ return;
+
+ /* Attach setsockopt that sets EUNATCH, and one that gets the
+ * previously set errno. Assert that we get the same errno back.
+ */
+ link_set_eunatch = bpf_program__attach_cgroup(obj->progs.set_eunatch,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_set_eunatch, "cg-attach-set_eunatch"))
+ goto close_bpf_object;
+ link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval"))
+ goto close_bpf_object;
+
+ if (!ASSERT_ERR(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR,
+ &zero, sizeof(int)), "setsockopt"))
+ goto close_bpf_object;
+ if (!ASSERT_EQ(errno, EUNATCH, "setsockopt-errno"))
+ goto close_bpf_object;
+
+ if (!ASSERT_EQ(obj->bss->invocations, 2, "invocations"))
+ goto close_bpf_object;
+ if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+ goto close_bpf_object;
+ if (!ASSERT_EQ(obj->bss->retval_value, -EUNATCH, "retval_value"))
+ goto close_bpf_object;
+
+close_bpf_object:
+ bpf_link__destroy(link_set_eunatch);
+ bpf_link__destroy(link_get_retval);
+
+ cgroup_getset_retval_setsockopt__destroy(obj);
+}
+
+static void test_setsockopt_default_zero(int cgroup_fd, int sock_fd)
+{
+ struct cgroup_getset_retval_setsockopt *obj;
+ struct bpf_link *link_get_retval = NULL;
+
+ obj = cgroup_getset_retval_setsockopt__open_and_load();
+ if (!ASSERT_OK_PTR(obj, "skel-load"))
+ return;
+
+ /* Attach setsockopt that gets the previously set errno.
+ * Assert that, without anything setting one, we get 0.
+ */
+ link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval"))
+ goto close_bpf_object;
+
+ if (!ASSERT_OK(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR,
+ &zero, sizeof(int)), "setsockopt"))
+ goto close_bpf_object;
+
+ if (!ASSERT_EQ(obj->bss->invocations, 1, "invocations"))
+ goto close_bpf_object;
+ if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+ goto close_bpf_object;
+ if (!ASSERT_EQ(obj->bss->retval_value, 0, "retval_value"))
+ goto close_bpf_object;
+
+close_bpf_object:
+ bpf_link__destroy(link_get_retval);
+
+ cgroup_getset_retval_setsockopt__destroy(obj);
+}
+
+static void test_setsockopt_default_zero_and_set(int cgroup_fd, int sock_fd)
+{
+ struct cgroup_getset_retval_setsockopt *obj;
+ struct bpf_link *link_get_retval = NULL, *link_set_eunatch = NULL;
+
+ obj = cgroup_getset_retval_setsockopt__open_and_load();
+ if (!ASSERT_OK_PTR(obj, "skel-load"))
+ return;
+
+ /* Attach setsockopt that gets the previously set errno, and then
+ * one that sets the errno to EUNATCH. Assert that the get does not
+ * see EUNATCH set later, and does not prevent EUNATCH from being set.
+ */
+ link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval"))
+ goto close_bpf_object;
+ link_set_eunatch = bpf_program__attach_cgroup(obj->progs.set_eunatch,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_set_eunatch, "cg-attach-set_eunatch"))
+ goto close_bpf_object;
+
+ if (!ASSERT_ERR(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR,
+ &zero, sizeof(int)), "setsockopt"))
+ goto close_bpf_object;
+ if (!ASSERT_EQ(errno, EUNATCH, "setsockopt-errno"))
+ goto close_bpf_object;
+
+ if (!ASSERT_EQ(obj->bss->invocations, 2, "invocations"))
+ goto close_bpf_object;
+ if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+ goto close_bpf_object;
+ if (!ASSERT_EQ(obj->bss->retval_value, 0, "retval_value"))
+ goto close_bpf_object;
+
+close_bpf_object:
+ bpf_link__destroy(link_get_retval);
+ bpf_link__destroy(link_set_eunatch);
+
+ cgroup_getset_retval_setsockopt__destroy(obj);
+}
+
+static void test_setsockopt_override(int cgroup_fd, int sock_fd)
+{
+ struct cgroup_getset_retval_setsockopt *obj;
+ struct bpf_link *link_set_eunatch = NULL, *link_set_eisconn = NULL;
+ struct bpf_link *link_get_retval = NULL;
+
+ obj = cgroup_getset_retval_setsockopt__open_and_load();
+ if (!ASSERT_OK_PTR(obj, "skel-load"))
+ return;
+
+ /* Attach setsockopt that sets EUNATCH, then one that sets EISCONN,
+ * and then one that gets the exported errno. Assert both the syscall
+ * and the helper sees the last set errno.
+ */
+ link_set_eunatch = bpf_program__attach_cgroup(obj->progs.set_eunatch,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_set_eunatch, "cg-attach-set_eunatch"))
+ goto close_bpf_object;
+ link_set_eisconn = bpf_program__attach_cgroup(obj->progs.set_eisconn,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_set_eisconn, "cg-attach-set_eisconn"))
+ goto close_bpf_object;
+ link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval"))
+ goto close_bpf_object;
+
+ if (!ASSERT_ERR(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR,
+ &zero, sizeof(int)), "setsockopt"))
+ goto close_bpf_object;
+ if (!ASSERT_EQ(errno, EISCONN, "setsockopt-errno"))
+ goto close_bpf_object;
+
+ if (!ASSERT_EQ(obj->bss->invocations, 3, "invocations"))
+ goto close_bpf_object;
+ if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+ goto close_bpf_object;
+ if (!ASSERT_EQ(obj->bss->retval_value, -EISCONN, "retval_value"))
+ goto close_bpf_object;
+
+close_bpf_object:
+ bpf_link__destroy(link_set_eunatch);
+ bpf_link__destroy(link_set_eisconn);
+ bpf_link__destroy(link_get_retval);
+
+ cgroup_getset_retval_setsockopt__destroy(obj);
+}
+
+static void test_setsockopt_legacy_eperm(int cgroup_fd, int sock_fd)
+{
+ struct cgroup_getset_retval_setsockopt *obj;
+ struct bpf_link *link_legacy_eperm = NULL, *link_get_retval = NULL;
+
+ obj = cgroup_getset_retval_setsockopt__open_and_load();
+ if (!ASSERT_OK_PTR(obj, "skel-load"))
+ return;
+
+ /* Attach setsockopt that return a reject without setting errno
+ * (legacy reject), and one that gets the errno. Assert that for
+ * backward compatibility the syscall result in EPERM, and this
+ * is also visible to the helper.
+ */
+ link_legacy_eperm = bpf_program__attach_cgroup(obj->progs.legacy_eperm,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_legacy_eperm, "cg-attach-legacy_eperm"))
+ goto close_bpf_object;
+ link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval"))
+ goto close_bpf_object;
+
+ if (!ASSERT_ERR(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR,
+ &zero, sizeof(int)), "setsockopt"))
+ goto close_bpf_object;
+ if (!ASSERT_EQ(errno, EPERM, "setsockopt-errno"))
+ goto close_bpf_object;
+
+ if (!ASSERT_EQ(obj->bss->invocations, 2, "invocations"))
+ goto close_bpf_object;
+ if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+ goto close_bpf_object;
+ if (!ASSERT_EQ(obj->bss->retval_value, -EPERM, "retval_value"))
+ goto close_bpf_object;
+
+close_bpf_object:
+ bpf_link__destroy(link_legacy_eperm);
+ bpf_link__destroy(link_get_retval);
+
+ cgroup_getset_retval_setsockopt__destroy(obj);
+}
+
+static void test_setsockopt_legacy_no_override(int cgroup_fd, int sock_fd)
+{
+ struct cgroup_getset_retval_setsockopt *obj;
+ struct bpf_link *link_set_eunatch = NULL, *link_legacy_eperm = NULL;
+ struct bpf_link *link_get_retval = NULL;
+
+ obj = cgroup_getset_retval_setsockopt__open_and_load();
+ if (!ASSERT_OK_PTR(obj, "skel-load"))
+ return;
+
+ /* Attach setsockopt that sets EUNATCH, then one that return a reject
+ * without setting errno, and then one that gets the exported errno.
+ * Assert both the syscall and the helper's errno are unaffected by
+ * the second prog (i.e. legacy rejects does not override the errno
+ * to EPERM).
+ */
+ link_set_eunatch = bpf_program__attach_cgroup(obj->progs.set_eunatch,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_set_eunatch, "cg-attach-set_eunatch"))
+ goto close_bpf_object;
+ link_legacy_eperm = bpf_program__attach_cgroup(obj->progs.legacy_eperm,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_legacy_eperm, "cg-attach-legacy_eperm"))
+ goto close_bpf_object;
+ link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval"))
+ goto close_bpf_object;
+
+ if (!ASSERT_ERR(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR,
+ &zero, sizeof(int)), "setsockopt"))
+ goto close_bpf_object;
+ if (!ASSERT_EQ(errno, EUNATCH, "setsockopt-errno"))
+ goto close_bpf_object;
+
+ if (!ASSERT_EQ(obj->bss->invocations, 3, "invocations"))
+ goto close_bpf_object;
+ if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+ goto close_bpf_object;
+ if (!ASSERT_EQ(obj->bss->retval_value, -EUNATCH, "retval_value"))
+ goto close_bpf_object;
+
+close_bpf_object:
+ bpf_link__destroy(link_set_eunatch);
+ bpf_link__destroy(link_legacy_eperm);
+ bpf_link__destroy(link_get_retval);
+
+ cgroup_getset_retval_setsockopt__destroy(obj);
+}
+
+static void test_getsockopt_get(int cgroup_fd, int sock_fd)
+{
+ struct cgroup_getset_retval_getsockopt *obj;
+ struct bpf_link *link_get_retval = NULL;
+ int buf;
+ socklen_t optlen = sizeof(buf);
+
+ obj = cgroup_getset_retval_getsockopt__open_and_load();
+ if (!ASSERT_OK_PTR(obj, "skel-load"))
+ return;
+
+ /* Attach getsockopt that gets previously set errno. Assert that the
+ * error from kernel is in both ctx_retval_value and retval_value.
+ */
+ link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval"))
+ goto close_bpf_object;
+
+ if (!ASSERT_ERR(getsockopt(sock_fd, SOL_CUSTOM, 0,
+ &buf, &optlen), "getsockopt"))
+ goto close_bpf_object;
+ if (!ASSERT_EQ(errno, EOPNOTSUPP, "getsockopt-errno"))
+ goto close_bpf_object;
+
+ if (!ASSERT_EQ(obj->bss->invocations, 1, "invocations"))
+ goto close_bpf_object;
+ if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+ goto close_bpf_object;
+ if (!ASSERT_EQ(obj->bss->retval_value, -EOPNOTSUPP, "retval_value"))
+ goto close_bpf_object;
+ if (!ASSERT_EQ(obj->bss->ctx_retval_value, -EOPNOTSUPP, "ctx_retval_value"))
+ goto close_bpf_object;
+
+close_bpf_object:
+ bpf_link__destroy(link_get_retval);
+
+ cgroup_getset_retval_getsockopt__destroy(obj);
+}
+
+static void test_getsockopt_override(int cgroup_fd, int sock_fd)
+{
+ struct cgroup_getset_retval_getsockopt *obj;
+ struct bpf_link *link_set_eisconn = NULL;
+ int buf;
+ socklen_t optlen = sizeof(buf);
+
+ obj = cgroup_getset_retval_getsockopt__open_and_load();
+ if (!ASSERT_OK_PTR(obj, "skel-load"))
+ return;
+
+ /* Attach getsockopt that sets retval to -EISCONN. Assert that this
+ * overrides the value from kernel.
+ */
+ link_set_eisconn = bpf_program__attach_cgroup(obj->progs.set_eisconn,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_set_eisconn, "cg-attach-set_eisconn"))
+ goto close_bpf_object;
+
+ if (!ASSERT_ERR(getsockopt(sock_fd, SOL_CUSTOM, 0,
+ &buf, &optlen), "getsockopt"))
+ goto close_bpf_object;
+ if (!ASSERT_EQ(errno, EISCONN, "getsockopt-errno"))
+ goto close_bpf_object;
+
+ if (!ASSERT_EQ(obj->bss->invocations, 1, "invocations"))
+ goto close_bpf_object;
+ if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+ goto close_bpf_object;
+
+close_bpf_object:
+ bpf_link__destroy(link_set_eisconn);
+
+ cgroup_getset_retval_getsockopt__destroy(obj);
+}
+
+static void test_getsockopt_retval_sync(int cgroup_fd, int sock_fd)
+{
+ struct cgroup_getset_retval_getsockopt *obj;
+ struct bpf_link *link_set_eisconn = NULL, *link_clear_retval = NULL;
+ struct bpf_link *link_get_retval = NULL;
+ int buf;
+ socklen_t optlen = sizeof(buf);
+
+ obj = cgroup_getset_retval_getsockopt__open_and_load();
+ if (!ASSERT_OK_PTR(obj, "skel-load"))
+ return;
+
+ /* Attach getsockopt that sets retval to -EISCONN, and one that clears
+ * ctx retval. Assert that the clearing ctx retval is synced to helper
+ * and clears any errors both from kernel and BPF..
+ */
+ link_set_eisconn = bpf_program__attach_cgroup(obj->progs.set_eisconn,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_set_eisconn, "cg-attach-set_eisconn"))
+ goto close_bpf_object;
+ link_clear_retval = bpf_program__attach_cgroup(obj->progs.clear_retval,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_clear_retval, "cg-attach-clear_retval"))
+ goto close_bpf_object;
+ link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval"))
+ goto close_bpf_object;
+
+ if (!ASSERT_OK(getsockopt(sock_fd, SOL_CUSTOM, 0,
+ &buf, &optlen), "getsockopt"))
+ goto close_bpf_object;
+
+ if (!ASSERT_EQ(obj->bss->invocations, 3, "invocations"))
+ goto close_bpf_object;
+ if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+ goto close_bpf_object;
+ if (!ASSERT_EQ(obj->bss->retval_value, 0, "retval_value"))
+ goto close_bpf_object;
+ if (!ASSERT_EQ(obj->bss->ctx_retval_value, 0, "ctx_retval_value"))
+ goto close_bpf_object;
+
+close_bpf_object:
+ bpf_link__destroy(link_set_eisconn);
+ bpf_link__destroy(link_clear_retval);
+ bpf_link__destroy(link_get_retval);
+
+ cgroup_getset_retval_getsockopt__destroy(obj);
+}
+
+void test_cgroup_getset_retval(void)
+{
+ int cgroup_fd = -1;
+ int sock_fd = -1;
+
+ cgroup_fd = test__join_cgroup("/cgroup_getset_retval");
+ if (!ASSERT_GE(cgroup_fd, 0, "cg-create"))
+ goto close_fd;
+
+ sock_fd = start_server(AF_INET, SOCK_DGRAM, NULL, 0, 0);
+ if (!ASSERT_GE(sock_fd, 0, "start-server"))
+ goto close_fd;
+
+ if (test__start_subtest("setsockopt-set"))
+ test_setsockopt_set(cgroup_fd, sock_fd);
+
+ if (test__start_subtest("setsockopt-set_and_get"))
+ test_setsockopt_set_and_get(cgroup_fd, sock_fd);
+
+ if (test__start_subtest("setsockopt-default_zero"))
+ test_setsockopt_default_zero(cgroup_fd, sock_fd);
+
+ if (test__start_subtest("setsockopt-default_zero_and_set"))
+ test_setsockopt_default_zero_and_set(cgroup_fd, sock_fd);
+
+ if (test__start_subtest("setsockopt-override"))
+ test_setsockopt_override(cgroup_fd, sock_fd);
+
+ if (test__start_subtest("setsockopt-legacy_eperm"))
+ test_setsockopt_legacy_eperm(cgroup_fd, sock_fd);
+
+ if (test__start_subtest("setsockopt-legacy_no_override"))
+ test_setsockopt_legacy_no_override(cgroup_fd, sock_fd);
+
+ if (test__start_subtest("getsockopt-get"))
+ test_getsockopt_get(cgroup_fd, sock_fd);
+
+ if (test__start_subtest("getsockopt-override"))
+ test_getsockopt_override(cgroup_fd, sock_fd);
+
+ if (test__start_subtest("getsockopt-retval_sync"))
+ test_getsockopt_retval_sync(cgroup_fd, sock_fd);
+
+close_fd:
+ close(cgroup_fd);
+}
if (map_fd < 0)
return -1;
- for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+ for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
snprintf(prog_name, sizeof(prog_name), "flow_dissector_%d", i);
prog = bpf_object__find_program_by_name(obj, prog_name);
if (CHECK_FAIL(map_fd < 0))
return;
- buff = malloc(bpf_map__def(map)->value_size);
+ buff = malloc(bpf_map__value_size(map));
if (buff)
err = bpf_map_update_elem(map_fd, &zero, buff, 0);
free(buff);
if (CHECK_FAIL(!map || !bpf_map__is_internal(map)))
goto out;
- sz = bpf_map__def(map)->value_size;
+ sz = bpf_map__value_size(map);
newval = malloc(sz);
if (CHECK_FAIL(!newval))
goto out;
ASSERT_OK(err, "bpf_prog_test_run(test2)");
ASSERT_EQ(retval, 3, "test2-retval");
+ prog_fd = skel->progs.kfunc_call_test_ref_btf_id.prog_fd;
+ err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
+ NULL, NULL, (__u32 *)&retval, NULL);
+ ASSERT_OK(err, "bpf_prog_test_run(test_ref_btf_id)");
+ ASSERT_EQ(retval, 0, "test_ref_btf_id-retval");
+
kfunc_call_test_lskel__destroy(skel);
}
#include "test_sockmap_update.skel.h"
#include "test_sockmap_invalid_update.skel.h"
#include "test_sockmap_skb_verdict_attach.skel.h"
+#include "test_sockmap_progs_query.skel.h"
#include "bpf_iter_sockmap.skel.h"
#define TCP_REPAIR 19 /* TCP sock is under repair right now */
test_sockmap_skb_verdict_attach__destroy(skel);
}
+static __u32 query_prog_id(int prog_fd)
+{
+ struct bpf_prog_info info = {};
+ __u32 info_len = sizeof(info);
+ int err;
+
+ err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
+ if (!ASSERT_OK(err, "bpf_obj_get_info_by_fd") ||
+ !ASSERT_EQ(info_len, sizeof(info), "bpf_obj_get_info_by_fd"))
+ return 0;
+
+ return info.id;
+}
+
+static void test_sockmap_progs_query(enum bpf_attach_type attach_type)
+{
+ struct test_sockmap_progs_query *skel;
+ int err, map_fd, verdict_fd;
+ __u32 attach_flags = 0;
+ __u32 prog_ids[3] = {};
+ __u32 prog_cnt = 3;
+
+ skel = test_sockmap_progs_query__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_sockmap_progs_query__open_and_load"))
+ return;
+
+ map_fd = bpf_map__fd(skel->maps.sock_map);
+
+ if (attach_type == BPF_SK_MSG_VERDICT)
+ verdict_fd = bpf_program__fd(skel->progs.prog_skmsg_verdict);
+ else
+ verdict_fd = bpf_program__fd(skel->progs.prog_skb_verdict);
+
+ err = bpf_prog_query(map_fd, attach_type, 0 /* query flags */,
+ &attach_flags, prog_ids, &prog_cnt);
+ ASSERT_OK(err, "bpf_prog_query failed");
+ ASSERT_EQ(attach_flags, 0, "wrong attach_flags on query");
+ ASSERT_EQ(prog_cnt, 0, "wrong program count on query");
+
+ err = bpf_prog_attach(verdict_fd, map_fd, attach_type, 0);
+ if (!ASSERT_OK(err, "bpf_prog_attach failed"))
+ goto out;
+
+ prog_cnt = 1;
+ err = bpf_prog_query(map_fd, attach_type, 0 /* query flags */,
+ &attach_flags, prog_ids, &prog_cnt);
+ ASSERT_OK(err, "bpf_prog_query failed");
+ ASSERT_EQ(attach_flags, 0, "wrong attach_flags on query");
+ ASSERT_EQ(prog_cnt, 1, "wrong program count on query");
+ ASSERT_EQ(prog_ids[0], query_prog_id(verdict_fd),
+ "wrong prog_ids on query");
+
+ bpf_prog_detach2(verdict_fd, map_fd, attach_type);
+out:
+ test_sockmap_progs_query__destroy(skel);
+}
+
void test_sockmap_basic(void)
{
if (test__start_subtest("sockmap create_update_free"))
test_sockmap_skb_verdict_attach(BPF_SK_SKB_STREAM_VERDICT,
BPF_SK_SKB_VERDICT);
}
+ if (test__start_subtest("sockmap msg_verdict progs query"))
+ test_sockmap_progs_query(BPF_SK_MSG_VERDICT);
+ if (test__start_subtest("sockmap stream_parser progs query"))
+ test_sockmap_progs_query(BPF_SK_SKB_STREAM_PARSER);
+ if (test__start_subtest("sockmap stream_verdict progs query"))
+ test_sockmap_progs_query(BPF_SK_SKB_STREAM_VERDICT);
+ if (test__start_subtest("sockmap skb_verdict progs query"))
+ test_sockmap_progs_query(BPF_SK_SKB_VERDICT);
}
static void test_ops_cleanup(const struct bpf_map *map)
{
- const struct bpf_map_def *def;
int err, mapfd;
u32 key;
- def = bpf_map__def(map);
mapfd = bpf_map__fd(map);
- for (key = 0; key < def->max_entries; key++) {
+ for (key = 0; key < bpf_map__max_entries(map); key++) {
err = bpf_map_delete_elem(mapfd, &key);
if (err && errno != EINVAL && errno != ENOENT)
FAIL_ERRNO("map_delete: expected EINVAL/ENOENT");
static const char *map_type_str(const struct bpf_map *map)
{
- const struct bpf_map_def *def;
+ int type;
- def = bpf_map__def(map);
- if (IS_ERR(def))
+ if (!map)
return "invalid";
+ type = bpf_map__type(map);
- switch (def->type) {
+ switch (type) {
case BPF_MAP_TYPE_SOCKMAP:
return "sockmap";
case BPF_MAP_TYPE_SOCKHASH:
}
memset(&buf, 0, sizeof(buf));
- buf.zc.address = 12345; /* rejected by BPF */
+ buf.zc.address = 12345; /* Not page aligned. Rejected by tcp_zerocopy_receive() */
optlen = sizeof(buf.zc);
errno = 0;
err = getsockopt(fd, SOL_TCP, TCP_ZEROCOPY_RECEIVE, &buf, &optlen);
- if (errno != EPERM) {
+ if (errno != EINVAL) {
log_err("Unexpected getsockopt(TCP_ZEROCOPY_RECEIVE) err=%d errno=%d",
err, errno);
goto err;
if (CHECK_FAIL(map_fd < 0))
goto out;
- for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+ for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
prog = bpf_object__find_program_by_name(obj, prog_name);
goto out;
}
- for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+ for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
&duration, &retval, NULL);
CHECK(err || retval != i, "tailcall",
CHECK(err || retval != 3, "tailcall", "err %d errno %d retval %d\n",
err, errno, retval);
- for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+ for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
prog = bpf_object__find_program_by_name(obj, prog_name);
CHECK(err || retval != 0, "tailcall", "err %d errno %d retval %d\n",
err, errno, retval);
- for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
- j = bpf_map__def(prog_array)->max_entries - 1 - i;
+ for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
+ j = bpf_map__max_entries(prog_array) - 1 - i;
snprintf(prog_name, sizeof(prog_name), "classifier_%d", j);
prog = bpf_object__find_program_by_name(obj, prog_name);
goto out;
}
- for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
- j = bpf_map__def(prog_array)->max_entries - 1 - i;
+ for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
+ j = bpf_map__max_entries(prog_array) - 1 - i;
err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
&duration, &retval, NULL);
CHECK(err || retval != 3, "tailcall", "err %d errno %d retval %d\n",
err, errno, retval);
- for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+ for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
err = bpf_map_delete_elem(map_fd, &i);
if (CHECK_FAIL(err >= 0 || errno != ENOENT))
goto out;
if (CHECK_FAIL(map_fd < 0))
goto out;
- for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+ for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
prog = bpf_object__find_program_by_name(obj, prog_name);
if (CHECK_FAIL(map_fd < 0))
return;
- for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+ for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
prog = bpf_object__find_program_by_name(obj, prog_name);
goto out;
}
- for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+ for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
err = bpf_map_update_elem(data_fd, &zero, &i, BPF_ANY);
if (CHECK_FAIL(err))
goto out;
"err %d errno %d retval %d\n", err, errno, retval);
}
- for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+ for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
err = bpf_map_update_elem(data_fd, &zero, &i, BPF_ANY);
if (CHECK_FAIL(err))
goto out;
if (CHECK_FAIL(map_fd < 0))
return;
- for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+ for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
prog = bpf_object__find_program_by_name(obj, prog_name);
goto out;
}
- for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+ for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
err = bpf_map_update_elem(data_fd, &zero, &key[i], BPF_ANY);
if (CHECK_FAIL(err))
goto out;
"err %d errno %d retval %d\n", err, errno, retval);
}
- for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+ for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
err = bpf_map_update_elem(data_fd, &zero, &key[i], BPF_ANY);
if (CHECK_FAIL(err))
goto out;
goto out;
/* nop -> jmp */
- for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+ for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
prog = bpf_object__find_program_by_name(obj, prog_name);
if (CHECK_FAIL(map_fd < 0))
goto out;
- for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+ for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
prog = bpf_object__find_program_by_name(obj, prog_name);
if (CHECK_FAIL(map_fd < 0))
goto out;
- for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+ for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
prog = bpf_object__find_program_by_name(obj, prog_name);
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <network_helpers.h>
+
+void test_xdp_update_frags(void)
+{
+ const char *file = "./test_xdp_update_frags.o";
+ __u32 duration, retval, size;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ int err, prog_fd;
+ __u32 *offset;
+ __u8 *buf;
+
+ obj = bpf_object__open(file);
+ if (libbpf_get_error(obj))
+ return;
+
+ prog = bpf_object__next_program(obj, NULL);
+ if (bpf_object__load(obj))
+ return;
+
+ prog_fd = bpf_program__fd(prog);
+
+ buf = malloc(128);
+ if (!ASSERT_OK_PTR(buf, "alloc buf 128b"))
+ goto out;
+
+ memset(buf, 0, 128);
+ offset = (__u32 *)buf;
+ *offset = 16;
+ buf[*offset] = 0xaa; /* marker at offset 16 (head) */
+ buf[*offset + 15] = 0xaa; /* marker at offset 31 (head) */
+
+ err = bpf_prog_test_run(prog_fd, 1, buf, 128,
+ buf, &size, &retval, &duration);
+
+ /* test_xdp_update_frags: buf[16,31]: 0xaa -> 0xbb */
+ ASSERT_OK(err, "xdp_update_frag");
+ ASSERT_EQ(retval, XDP_PASS, "xdp_update_frag retval");
+ ASSERT_EQ(buf[16], 0xbb, "xdp_update_frag buf[16]");
+ ASSERT_EQ(buf[31], 0xbb, "xdp_update_frag buf[31]");
+
+ free(buf);
+
+ buf = malloc(9000);
+ if (!ASSERT_OK_PTR(buf, "alloc buf 9Kb"))
+ goto out;
+
+ memset(buf, 0, 9000);
+ offset = (__u32 *)buf;
+ *offset = 5000;
+ buf[*offset] = 0xaa; /* marker at offset 5000 (frag0) */
+ buf[*offset + 15] = 0xaa; /* marker at offset 5015 (frag0) */
+
+ err = bpf_prog_test_run(prog_fd, 1, buf, 9000,
+ buf, &size, &retval, &duration);
+
+ /* test_xdp_update_frags: buf[5000,5015]: 0xaa -> 0xbb */
+ ASSERT_OK(err, "xdp_update_frag");
+ ASSERT_EQ(retval, XDP_PASS, "xdp_update_frag retval");
+ ASSERT_EQ(buf[5000], 0xbb, "xdp_update_frag buf[5000]");
+ ASSERT_EQ(buf[5015], 0xbb, "xdp_update_frag buf[5015]");
+
+ memset(buf, 0, 9000);
+ offset = (__u32 *)buf;
+ *offset = 3510;
+ buf[*offset] = 0xaa; /* marker at offset 3510 (head) */
+ buf[*offset + 15] = 0xaa; /* marker at offset 3525 (frag0) */
+
+ err = bpf_prog_test_run(prog_fd, 1, buf, 9000,
+ buf, &size, &retval, &duration);
+
+ /* test_xdp_update_frags: buf[3510,3525]: 0xaa -> 0xbb */
+ ASSERT_OK(err, "xdp_update_frag");
+ ASSERT_EQ(retval, XDP_PASS, "xdp_update_frag retval");
+ ASSERT_EQ(buf[3510], 0xbb, "xdp_update_frag buf[3510]");
+ ASSERT_EQ(buf[3525], 0xbb, "xdp_update_frag buf[3525]");
+
+ memset(buf, 0, 9000);
+ offset = (__u32 *)buf;
+ *offset = 7606;
+ buf[*offset] = 0xaa; /* marker at offset 7606 (frag0) */
+ buf[*offset + 15] = 0xaa; /* marker at offset 7621 (frag1) */
+
+ err = bpf_prog_test_run(prog_fd, 1, buf, 9000,
+ buf, &size, &retval, &duration);
+
+ /* test_xdp_update_frags: buf[7606,7621]: 0xaa -> 0xbb */
+ ASSERT_OK(err, "xdp_update_frag");
+ ASSERT_EQ(retval, XDP_PASS, "xdp_update_frag retval");
+ ASSERT_EQ(buf[7606], 0xbb, "xdp_update_frag buf[7606]");
+ ASSERT_EQ(buf[7621], 0xbb, "xdp_update_frag buf[7621]");
+
+ free(buf);
+out:
+ bpf_object__close(obj);
+}
+
+void test_xdp_adjust_frags(void)
+{
+ if (test__start_subtest("xdp_adjust_frags"))
+ test_xdp_update_frags();
+}
char buf[128];
err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
- if (CHECK_FAIL(err))
+ if (ASSERT_OK(err, "test_xdp_adjust_tail_shrink"))
return;
err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
buf, &size, &retval, &duration);
-
- CHECK(err || retval != XDP_DROP,
- "ipv4", "err %d errno %d retval %d size %d\n",
- err, errno, retval, size);
+ ASSERT_OK(err, "ipv4");
+ ASSERT_EQ(retval, XDP_DROP, "ipv4 retval");
expect_sz = sizeof(pkt_v6) - 20; /* Test shrink with 20 bytes */
err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6),
buf, &size, &retval, &duration);
- CHECK(err || retval != XDP_TX || size != expect_sz,
- "ipv6", "err %d errno %d retval %d size %d expect-size %d\n",
- err, errno, retval, size, expect_sz);
+ ASSERT_OK(err, "ipv6");
+ ASSERT_EQ(retval, XDP_TX, "ipv6 retval");
+ ASSERT_EQ(size, expect_sz, "ipv6 size");
+
bpf_object__close(obj);
}
int err, prog_fd;
err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
- if (CHECK_FAIL(err))
+ if (ASSERT_OK(err, "test_xdp_adjust_tail_grow"))
return;
err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
buf, &size, &retval, &duration);
- CHECK(err || retval != XDP_DROP,
- "ipv4", "err %d errno %d retval %d size %d\n",
- err, errno, retval, size);
+ ASSERT_OK(err, "ipv4");
+ ASSERT_EQ(retval, XDP_DROP, "ipv4 retval");
expect_sz = sizeof(pkt_v6) + 40; /* Test grow with 40 bytes */
err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6) /* 74 */,
buf, &size, &retval, &duration);
- CHECK(err || retval != XDP_TX || size != expect_sz,
- "ipv6", "err %d errno %d retval %d size %d expect-size %d\n",
- err, errno, retval, size, expect_sz);
+ ASSERT_OK(err, "ipv6");
+ ASSERT_EQ(retval, XDP_TX, "ipv6 retval");
+ ASSERT_EQ(size, expect_sz, "ipv6 size");
bpf_object__close(obj);
}
};
err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &tattr.prog_fd);
- if (CHECK_ATTR(err, "load", "err %d errno %d\n", err, errno))
+ if (ASSERT_OK(err, "test_xdp_adjust_tail_grow"))
return;
/* Test case-64 */
/* Kernel side alloc packet memory area that is zero init */
err = bpf_prog_test_run_xattr(&tattr);
- CHECK_ATTR(errno != ENOSPC /* Due limit copy_size in bpf_test_finish */
- || tattr.retval != XDP_TX
- || tattr.data_size_out != 192, /* Expected grow size */
- "case-64",
- "err %d errno %d retval %d size %d\n",
- err, errno, tattr.retval, tattr.data_size_out);
+ ASSERT_EQ(errno, ENOSPC, "case-64 errno"); /* Due limit copy_size in bpf_test_finish */
+ ASSERT_EQ(tattr.retval, XDP_TX, "case-64 retval");
+ ASSERT_EQ(tattr.data_size_out, 192, "case-64 data_size_out"); /* Expected grow size */
/* Extra checks for data contents */
- CHECK_ATTR(tattr.data_size_out != 192
- || buf[0] != 1 || buf[63] != 1 /* 0-63 memset to 1 */
- || buf[64] != 0 || buf[127] != 0 /* 64-127 memset to 0 */
- || buf[128] != 1 || buf[191] != 1, /*128-191 memset to 1 */
- "case-64-data",
- "err %d errno %d retval %d size %d\n",
- err, errno, tattr.retval, tattr.data_size_out);
+ ASSERT_EQ(buf[0], 1, "case-64-data buf[0]"); /* 0-63 memset to 1 */
+ ASSERT_EQ(buf[63], 1, "case-64-data buf[63]");
+ ASSERT_EQ(buf[64], 0, "case-64-data buf[64]"); /* 64-127 memset to 0 */
+ ASSERT_EQ(buf[127], 0, "case-64-data buf[127]");
+ ASSERT_EQ(buf[128], 1, "case-64-data buf[128]"); /* 128-191 memset to 1 */
+ ASSERT_EQ(buf[191], 1, "case-64-data buf[191]");
/* Test case-128 */
memset(buf, 2, sizeof(buf));
err = bpf_prog_test_run_xattr(&tattr);
max_grow = 4096 - XDP_PACKET_HEADROOM - tailroom; /* 3520 */
- CHECK_ATTR(err
- || tattr.retval != XDP_TX
- || tattr.data_size_out != max_grow,/* Expect max grow size */
- "case-128",
- "err %d errno %d retval %d size %d expect-size %d\n",
- err, errno, tattr.retval, tattr.data_size_out, max_grow);
+ ASSERT_OK(err, "case-128");
+ ASSERT_EQ(tattr.retval, XDP_TX, "case-128 retval");
+ ASSERT_EQ(tattr.data_size_out, max_grow, "case-128 data_size_out"); /* Expect max grow */
/* Extra checks for data content: Count grow size, will contain zeros */
for (i = 0, cnt = 0; i < sizeof(buf); i++) {
if (buf[i] == 0)
cnt++;
}
- CHECK_ATTR((cnt != (max_grow - tattr.data_size_in)) /* Grow increase */
- || tattr.data_size_out != max_grow, /* Total grow size */
- "case-128-data",
- "err %d errno %d retval %d size %d grow-size %d\n",
- err, errno, tattr.retval, tattr.data_size_out, cnt);
+ ASSERT_EQ(cnt, max_grow - tattr.data_size_in, "case-128-data cnt"); /* Grow increase */
+ ASSERT_EQ(tattr.data_size_out, max_grow, "case-128-data data_size_out"); /* Total grow */
+
+ bpf_object__close(obj);
+}
+
+void test_xdp_adjust_frags_tail_shrink(void)
+{
+ const char *file = "./test_xdp_adjust_tail_shrink.o";
+ __u32 duration, retval, size, exp_size;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ int err, prog_fd;
+ __u8 *buf;
+
+ /* For the individual test cases, the first byte in the packet
+ * indicates which test will be run.
+ */
+ obj = bpf_object__open(file);
+ if (libbpf_get_error(obj))
+ return;
+
+ prog = bpf_object__next_program(obj, NULL);
+ if (bpf_object__load(obj))
+ return;
+
+ prog_fd = bpf_program__fd(prog);
+
+ buf = malloc(9000);
+ if (!ASSERT_OK_PTR(buf, "alloc buf 9Kb"))
+ goto out;
+
+ memset(buf, 0, 9000);
+
+ /* Test case removing 10 bytes from last frag, NOT freeing it */
+ exp_size = 8990; /* 9000 - 10 */
+ err = bpf_prog_test_run(prog_fd, 1, buf, 9000,
+ buf, &size, &retval, &duration);
+
+ ASSERT_OK(err, "9Kb-10b");
+ ASSERT_EQ(retval, XDP_TX, "9Kb-10b retval");
+ ASSERT_EQ(size, exp_size, "9Kb-10b size");
+
+ /* Test case removing one of two pages, assuming 4K pages */
+ buf[0] = 1;
+ exp_size = 4900; /* 9000 - 4100 */
+ err = bpf_prog_test_run(prog_fd, 1, buf, 9000,
+ buf, &size, &retval, &duration);
+
+ ASSERT_OK(err, "9Kb-4Kb");
+ ASSERT_EQ(retval, XDP_TX, "9Kb-4Kb retval");
+ ASSERT_EQ(size, exp_size, "9Kb-4Kb size");
+
+ /* Test case removing two pages resulting in a linear xdp_buff */
+ buf[0] = 2;
+ exp_size = 800; /* 9000 - 8200 */
+ err = bpf_prog_test_run(prog_fd, 1, buf, 9000,
+ buf, &size, &retval, &duration);
+
+ ASSERT_OK(err, "9Kb-9Kb");
+ ASSERT_EQ(retval, XDP_TX, "9Kb-9Kb retval");
+ ASSERT_EQ(size, exp_size, "9Kb-9Kb size");
+
+ free(buf);
+out:
+ bpf_object__close(obj);
+}
+
+void test_xdp_adjust_frags_tail_grow(void)
+{
+ const char *file = "./test_xdp_adjust_tail_grow.o";
+ __u32 duration, retval, size, exp_size;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ int err, i, prog_fd;
+ __u8 *buf;
+
+ obj = bpf_object__open(file);
+ if (libbpf_get_error(obj))
+ return;
+
+ prog = bpf_object__next_program(obj, NULL);
+ if (bpf_object__load(obj))
+ return;
+
+ prog_fd = bpf_program__fd(prog);
+
+ buf = malloc(16384);
+ if (!ASSERT_OK_PTR(buf, "alloc buf 16Kb"))
+ goto out;
+
+ /* Test case add 10 bytes to last frag */
+ memset(buf, 1, 16384);
+ size = 9000;
+ exp_size = size + 10;
+ err = bpf_prog_test_run(prog_fd, 1, buf, size,
+ buf, &size, &retval, &duration);
+
+ ASSERT_OK(err, "9Kb+10b");
+ ASSERT_EQ(retval, XDP_TX, "9Kb+10b retval");
+ ASSERT_EQ(size, exp_size, "9Kb+10b size");
+
+ for (i = 0; i < 9000; i++)
+ ASSERT_EQ(buf[i], 1, "9Kb+10b-old");
+
+ for (i = 9000; i < 9010; i++)
+ ASSERT_EQ(buf[i], 0, "9Kb+10b-new");
+
+ for (i = 9010; i < 16384; i++)
+ ASSERT_EQ(buf[i], 1, "9Kb+10b-untouched");
+
+ /* Test a too large grow */
+ memset(buf, 1, 16384);
+ size = 9001;
+ exp_size = size;
+ err = bpf_prog_test_run(prog_fd, 1, buf, size,
+ buf, &size, &retval, &duration);
+
+ ASSERT_OK(err, "9Kb+10b");
+ ASSERT_EQ(retval, XDP_DROP, "9Kb+10b retval");
+ ASSERT_EQ(size, exp_size, "9Kb+10b size");
+ free(buf);
+out:
bpf_object__close(obj);
}
test_xdp_adjust_tail_grow();
if (test__start_subtest("xdp_adjust_tail_grow2"))
test_xdp_adjust_tail_grow2();
+ if (test__start_subtest("xdp_adjust_frags_tail_shrink"))
+ test_xdp_adjust_frags_tail_shrink();
+ if (test__start_subtest("xdp_adjust_frags_tail_grow"))
+ test_xdp_adjust_frags_tail_grow();
}
int pkt_len;
};
+struct test_ctx_s {
+ bool passed;
+ int pkt_size;
+};
+
+struct test_ctx_s test_ctx;
+
static void on_sample(void *ctx, int cpu, void *data, __u32 size)
{
- int duration = 0;
struct meta *meta = (struct meta *)data;
struct ipv4_packet *trace_pkt_v4 = data + sizeof(*meta);
+ unsigned char *raw_pkt = data + sizeof(*meta);
+ struct test_ctx_s *tst_ctx = ctx;
+
+ ASSERT_GE(size, sizeof(pkt_v4) + sizeof(*meta), "check_size");
+ ASSERT_EQ(meta->ifindex, if_nametoindex("lo"), "check_meta_ifindex");
+ ASSERT_EQ(meta->pkt_len, tst_ctx->pkt_size, "check_meta_pkt_len");
+ ASSERT_EQ(memcmp(trace_pkt_v4, &pkt_v4, sizeof(pkt_v4)), 0,
+ "check_packet_content");
+
+ if (meta->pkt_len > sizeof(pkt_v4)) {
+ for (int i = 0; i < meta->pkt_len - sizeof(pkt_v4); i++)
+ ASSERT_EQ(raw_pkt[i + sizeof(pkt_v4)], (unsigned char)i,
+ "check_packet_content");
+ }
+
+ tst_ctx->passed = true;
+}
- if (CHECK(size < sizeof(pkt_v4) + sizeof(*meta),
- "check_size", "size %u < %zu\n",
- size, sizeof(pkt_v4) + sizeof(*meta)))
- return;
+#define BUF_SZ 9000
- if (CHECK(meta->ifindex != if_nametoindex("lo"), "check_meta_ifindex",
- "meta->ifindex = %d\n", meta->ifindex))
+static void run_xdp_bpf2bpf_pkt_size(int pkt_fd, struct perf_buffer *pb,
+ struct test_xdp_bpf2bpf *ftrace_skel,
+ int pkt_size)
+{
+ __u32 duration = 0, retval, size;
+ __u8 *buf, *buf_in;
+ int err;
+
+ if (!ASSERT_LE(pkt_size, BUF_SZ, "pkt_size") ||
+ !ASSERT_GE(pkt_size, sizeof(pkt_v4), "pkt_size"))
return;
- if (CHECK(meta->pkt_len != sizeof(pkt_v4), "check_meta_pkt_len",
- "meta->pkt_len = %zd\n", sizeof(pkt_v4)))
+ buf_in = malloc(BUF_SZ);
+ if (!ASSERT_OK_PTR(buf_in, "buf_in malloc()"))
return;
- if (CHECK(memcmp(trace_pkt_v4, &pkt_v4, sizeof(pkt_v4)),
- "check_packet_content", "content not the same\n"))
+ buf = malloc(BUF_SZ);
+ if (!ASSERT_OK_PTR(buf, "buf malloc()")) {
+ free(buf_in);
return;
+ }
+
+ test_ctx.passed = false;
+ test_ctx.pkt_size = pkt_size;
+
+ memcpy(buf_in, &pkt_v4, sizeof(pkt_v4));
+ if (pkt_size > sizeof(pkt_v4)) {
+ for (int i = 0; i < (pkt_size - sizeof(pkt_v4)); i++)
+ buf_in[i + sizeof(pkt_v4)] = i;
+ }
+
+ /* Run test program */
+ err = bpf_prog_test_run(pkt_fd, 1, buf_in, pkt_size,
+ buf, &size, &retval, &duration);
+
+ ASSERT_OK(err, "ipv4");
+ ASSERT_EQ(retval, XDP_PASS, "ipv4 retval");
+ ASSERT_EQ(size, pkt_size, "ipv4 size");
+
+ /* Make sure bpf_xdp_output() was triggered and it sent the expected
+ * data to the perf ring buffer.
+ */
+ err = perf_buffer__poll(pb, 100);
- *(bool *)ctx = true;
+ ASSERT_GE(err, 0, "perf_buffer__poll");
+ ASSERT_TRUE(test_ctx.passed, "test passed");
+ /* Verify test results */
+ ASSERT_EQ(ftrace_skel->bss->test_result_fentry, if_nametoindex("lo"),
+ "fentry result");
+ ASSERT_EQ(ftrace_skel->bss->test_result_fexit, XDP_PASS, "fexit result");
+
+ free(buf);
+ free(buf_in);
}
void test_xdp_bpf2bpf(void)
{
- __u32 duration = 0, retval, size;
- char buf[128];
int err, pkt_fd, map_fd;
- bool passed = false;
- struct iphdr iph;
- struct iptnl_info value4 = {.family = AF_INET};
+ int pkt_sizes[] = {sizeof(pkt_v4), 1024, 4100, 8200};
+ struct iptnl_info value4 = {.family = AF_INET6};
struct test_xdp *pkt_skel = NULL;
struct test_xdp_bpf2bpf *ftrace_skel = NULL;
struct vip key4 = {.protocol = 6, .family = AF_INET};
/* Load XDP program to introspect */
pkt_skel = test_xdp__open_and_load();
- if (CHECK(!pkt_skel, "pkt_skel_load", "test_xdp skeleton failed\n"))
+ if (!ASSERT_OK_PTR(pkt_skel, "test_xdp__open_and_load"))
return;
pkt_fd = bpf_program__fd(pkt_skel->progs._xdp_tx_iptunnel);
/* Load trace program */
ftrace_skel = test_xdp_bpf2bpf__open();
- if (CHECK(!ftrace_skel, "__open", "ftrace skeleton failed\n"))
+ if (!ASSERT_OK_PTR(ftrace_skel, "test_xdp_bpf2bpf__open"))
goto out;
/* Demonstrate the bpf_program__set_attach_target() API rather than
bpf_program__set_attach_target(prog, pkt_fd, "_xdp_tx_iptunnel");
err = test_xdp_bpf2bpf__load(ftrace_skel);
- if (CHECK(err, "__load", "ftrace skeleton failed\n"))
+ if (!ASSERT_OK(err, "test_xdp_bpf2bpf__load"))
goto out;
err = test_xdp_bpf2bpf__attach(ftrace_skel);
- if (CHECK(err, "ftrace_attach", "ftrace attach failed: %d\n", err))
+ if (!ASSERT_OK(err, "test_xdp_bpf2bpf__attach"))
goto out;
/* Set up perf buffer */
- pb = perf_buffer__new(bpf_map__fd(ftrace_skel->maps.perf_buf_map), 1,
- on_sample, NULL, &passed, NULL);
+ pb = perf_buffer__new(bpf_map__fd(ftrace_skel->maps.perf_buf_map), 8,
+ on_sample, NULL, &test_ctx, NULL);
if (!ASSERT_OK_PTR(pb, "perf_buf__new"))
goto out;
- /* Run test program */
- err = bpf_prog_test_run(pkt_fd, 1, &pkt_v4, sizeof(pkt_v4),
- buf, &size, &retval, &duration);
- memcpy(&iph, buf + sizeof(struct ethhdr), sizeof(iph));
- if (CHECK(err || retval != XDP_TX || size != 74 ||
- iph.protocol != IPPROTO_IPIP, "ipv4",
- "err %d errno %d retval %d size %d\n",
- err, errno, retval, size))
- goto out;
-
- /* Make sure bpf_xdp_output() was triggered and it sent the expected
- * data to the perf ring buffer.
- */
- err = perf_buffer__poll(pb, 100);
- if (CHECK(err < 0, "perf_buffer__poll", "err %d\n", err))
- goto out;
-
- CHECK_FAIL(!passed);
-
- /* Verify test results */
- if (CHECK(ftrace_skel->bss->test_result_fentry != if_nametoindex("lo"),
- "result", "fentry failed err %llu\n",
- ftrace_skel->bss->test_result_fentry))
- goto out;
-
- CHECK(ftrace_skel->bss->test_result_fexit != XDP_TX, "result",
- "fexit failed err %llu\n", ftrace_skel->bss->test_result_fexit);
-
+ for (int i = 0; i < ARRAY_SIZE(pkt_sizes); i++)
+ run_xdp_bpf2bpf_pkt_size(pkt_fd, pb, ftrace_skel,
+ pkt_sizes[i]);
out:
- if (pb)
- perf_buffer__free(pb);
+ perf_buffer__free(pb);
test_xdp__destroy(pkt_skel);
test_xdp_bpf2bpf__destroy(ftrace_skel);
}
#include <linux/if_link.h>
#include <test_progs.h>
+#include "test_xdp_with_cpumap_frags_helpers.skel.h"
#include "test_xdp_with_cpumap_helpers.skel.h"
#define IFINDEX_LO 1
-void serial_test_xdp_cpumap_attach(void)
+void test_xdp_with_cpumap_helpers(void)
{
struct test_xdp_with_cpumap_helpers *skel;
struct bpf_prog_info info = {};
err = bpf_map_update_elem(map_fd, &idx, &val, 0);
ASSERT_NEQ(err, 0, "Add non-BPF_XDP_CPUMAP program to cpumap entry");
+ /* Try to attach BPF_XDP program with frags to cpumap when we have
+ * already loaded a BPF_XDP program on the map
+ */
+ idx = 1;
+ val.qsize = 192;
+ val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_cm_frags);
+ err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+ ASSERT_NEQ(err, 0, "Add BPF_XDP program with frags to cpumap entry");
+
out_close:
test_xdp_with_cpumap_helpers__destroy(skel);
}
+
+void test_xdp_with_cpumap_frags_helpers(void)
+{
+ struct test_xdp_with_cpumap_frags_helpers *skel;
+ struct bpf_prog_info info = {};
+ __u32 len = sizeof(info);
+ struct bpf_cpumap_val val = {
+ .qsize = 192,
+ };
+ int err, frags_prog_fd, map_fd;
+ __u32 idx = 0;
+
+ skel = test_xdp_with_cpumap_frags_helpers__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_xdp_with_cpumap_helpers__open_and_load"))
+ return;
+
+ frags_prog_fd = bpf_program__fd(skel->progs.xdp_dummy_cm_frags);
+ map_fd = bpf_map__fd(skel->maps.cpu_map);
+ err = bpf_obj_get_info_by_fd(frags_prog_fd, &info, &len);
+ if (!ASSERT_OK(err, "bpf_obj_get_info_by_fd"))
+ goto out_close;
+
+ val.bpf_prog.fd = frags_prog_fd;
+ err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+ ASSERT_OK(err, "Add program to cpumap entry");
+
+ err = bpf_map_lookup_elem(map_fd, &idx, &val);
+ ASSERT_OK(err, "Read cpumap entry");
+ ASSERT_EQ(info.id, val.bpf_prog.id,
+ "Match program id to cpumap entry prog_id");
+
+ /* Try to attach BPF_XDP program to cpumap when we have
+ * already loaded a BPF_XDP program with frags on the map
+ */
+ idx = 1;
+ val.qsize = 192;
+ val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_cm);
+ err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+ ASSERT_NEQ(err, 0, "Add BPF_XDP program to cpumap entry");
+
+out_close:
+ test_xdp_with_cpumap_frags_helpers__destroy(skel);
+}
+
+void serial_test_xdp_cpumap_attach(void)
+{
+ if (test__start_subtest("CPUMAP with programs in entries"))
+ test_xdp_with_cpumap_helpers();
+
+ if (test__start_subtest("CPUMAP with frags programs in entries"))
+ test_xdp_with_cpumap_frags_helpers();
+}
#include <test_progs.h>
#include "test_xdp_devmap_helpers.skel.h"
+#include "test_xdp_with_devmap_frags_helpers.skel.h"
#include "test_xdp_with_devmap_helpers.skel.h"
#define IFINDEX_LO 1
err = bpf_map_update_elem(map_fd, &idx, &val, 0);
ASSERT_NEQ(err, 0, "Add non-BPF_XDP_DEVMAP program to devmap entry");
+ /* Try to attach BPF_XDP program with frags to devmap when we have
+ * already loaded a BPF_XDP program on the map
+ */
+ idx = 1;
+ val.ifindex = 1;
+ val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_dm_frags);
+ err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+ ASSERT_NEQ(err, 0, "Add BPF_XDP program with frags to devmap entry");
+
out_close:
test_xdp_with_devmap_helpers__destroy(skel);
}
}
}
+void test_xdp_with_devmap_frags_helpers(void)
+{
+ struct test_xdp_with_devmap_frags_helpers *skel;
+ struct bpf_prog_info info = {};
+ struct bpf_devmap_val val = {
+ .ifindex = IFINDEX_LO,
+ };
+ __u32 len = sizeof(info);
+ int err, dm_fd_frags, map_fd;
+ __u32 idx = 0;
+
+ skel = test_xdp_with_devmap_frags_helpers__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_xdp_with_devmap_helpers__open_and_load"))
+ return;
+
+ dm_fd_frags = bpf_program__fd(skel->progs.xdp_dummy_dm_frags);
+ map_fd = bpf_map__fd(skel->maps.dm_ports);
+ err = bpf_obj_get_info_by_fd(dm_fd_frags, &info, &len);
+ if (!ASSERT_OK(err, "bpf_obj_get_info_by_fd"))
+ goto out_close;
+
+ val.bpf_prog.fd = dm_fd_frags;
+ err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+ ASSERT_OK(err, "Add frags program to devmap entry");
+
+ err = bpf_map_lookup_elem(map_fd, &idx, &val);
+ ASSERT_OK(err, "Read devmap entry");
+ ASSERT_EQ(info.id, val.bpf_prog.id,
+ "Match program id to devmap entry prog_id");
+
+ /* Try to attach BPF_XDP program to devmap when we have
+ * already loaded a BPF_XDP program with frags on the map
+ */
+ idx = 1;
+ val.ifindex = 1;
+ val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_dm);
+ err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+ ASSERT_NEQ(err, 0, "Add BPF_XDP program to devmap entry");
+
+out_close:
+ test_xdp_with_devmap_frags_helpers__destroy(skel);
+}
void serial_test_xdp_devmap_attach(void)
{
if (test__start_subtest("DEVMAP with programs in entries"))
test_xdp_with_devmap_helpers();
+ if (test__start_subtest("DEVMAP with frags programs in entries"))
+ test_xdp_with_devmap_frags_helpers();
+
if (test__start_subtest("Verifier check of DEVMAP programs"))
test_neg_xdp_devmap_helpers();
}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Amazon.com Inc. or its affiliates. */
+#include "bpf_iter.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <limits.h>
+
+#define AUTOBIND_LEN 6
+char sun_path[AUTOBIND_LEN];
+
+#define NR_CASES 5
+int sndbuf_setsockopt[NR_CASES] = {-1, 0, 8192, INT_MAX / 2, INT_MAX};
+int sndbuf_getsockopt[NR_CASES] = {-1, -1, -1, -1, -1};
+int sndbuf_getsockopt_expected[NR_CASES];
+
+static inline int cmpname(struct unix_sock *unix_sk)
+{
+ int i;
+
+ for (i = 0; i < AUTOBIND_LEN; i++) {
+ if (unix_sk->addr->name->sun_path[i] != sun_path[i])
+ return -1;
+ }
+
+ return 0;
+}
+
+SEC("iter/unix")
+int change_sndbuf(struct bpf_iter__unix *ctx)
+{
+ struct unix_sock *unix_sk = ctx->unix_sk;
+ int i, err;
+
+ if (!unix_sk || !unix_sk->addr)
+ return 0;
+
+ if (unix_sk->addr->name->sun_path[0])
+ return 0;
+
+ if (cmpname(unix_sk))
+ return 0;
+
+ for (i = 0; i < NR_CASES; i++) {
+ err = bpf_setsockopt(unix_sk, SOL_SOCKET, SO_SNDBUF,
+ &sndbuf_setsockopt[i],
+ sizeof(sndbuf_setsockopt[i]));
+ if (err)
+ break;
+
+ err = bpf_getsockopt(unix_sk, SOL_SOCKET, SO_SNDBUF,
+ &sndbuf_getsockopt[i],
+ sizeof(sndbuf_getsockopt[i]));
+ if (err)
+ break;
+ }
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
BPF_SEQ_PRINTF(seq, " @");
for (i = 1; i < len; i++) {
- /* unix_mkname() tests this upper bound. */
+ /* unix_validate_addr() tests this upper bound. */
if (i >= sizeof(struct sockaddr_un))
break;
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+const volatile struct {
+ /* thread to activate trace programs for */
+ pid_t tgid;
+ /* return error from __init function */
+ int inject_error;
+ /* uffd monitored range start address */
+ void *fault_addr;
+} bpf_mod_race_config = { -1 };
+
+int bpf_blocking = 0;
+int res_try_get_module = -1;
+
+static __always_inline bool check_thread_id(void)
+{
+ struct task_struct *task = bpf_get_current_task_btf();
+
+ return task->tgid == bpf_mod_race_config.tgid;
+}
+
+/* The trace of execution is something like this:
+ *
+ * finit_module()
+ * load_module()
+ * prepare_coming_module()
+ * notifier_call(MODULE_STATE_COMING)
+ * btf_parse_module()
+ * btf_alloc_id() // Visible to userspace at this point
+ * list_add(btf_mod->list, &btf_modules)
+ * do_init_module()
+ * freeinit = kmalloc()
+ * ret = mod->init()
+ * bpf_prog_widen_race()
+ * bpf_copy_from_user()
+ * ...<sleep>...
+ * if (ret < 0)
+ * ...
+ * free_module()
+ * return ret
+ *
+ * At this point, module loading thread is blocked, we now load the program:
+ *
+ * bpf_check
+ * add_kfunc_call/check_pseudo_btf_id
+ * btf_try_get_module
+ * try_get_module_live == false
+ * return -ENXIO
+ *
+ * Without the fix (try_get_module_live in btf_try_get_module):
+ *
+ * bpf_check
+ * add_kfunc_call/check_pseudo_btf_id
+ * btf_try_get_module
+ * try_get_module == true
+ * <store module reference in btf_kfunc_tab or used_btf array>
+ * ...
+ * return fd
+ *
+ * Now, if we inject an error in the blocked program, our module will be freed
+ * (going straight from MODULE_STATE_COMING to MODULE_STATE_GOING).
+ * Later, when bpf program is freed, it will try to module_put already freed
+ * module. This is why try_get_module_live returns false if mod->state is not
+ * MODULE_STATE_LIVE.
+ */
+
+SEC("fmod_ret.s/bpf_fentry_test1")
+int BPF_PROG(widen_race, int a, int ret)
+{
+ char dst;
+
+ if (!check_thread_id())
+ return 0;
+ /* Indicate that we will attempt to block */
+ bpf_blocking = 1;
+ bpf_copy_from_user(&dst, 1, bpf_mod_race_config.fault_addr);
+ return bpf_mod_race_config.inject_error;
+}
+
+SEC("fexit/do_init_module")
+int BPF_PROG(fexit_init_module, struct module *mod, int ret)
+{
+ if (!check_thread_id())
+ return 0;
+ /* Indicate that we finished blocking */
+ bpf_blocking = 2;
+ return 0;
+}
+
+SEC("fexit/btf_try_get_module")
+int BPF_PROG(fexit_module_get, const struct btf *btf, struct module *mod)
+{
+ res_try_get_module = !!mod;
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
#define AF_INET 2
#define AF_INET6 10
+#define SOL_SOCKET 1
+#define SO_SNDBUF 7
#define __SO_ACCEPTCON (1 << 16)
#define SOL_TCP 6
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright 2021 Google LLC.
+ */
+
+#include <errno.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+__u32 invocations = 0;
+__u32 assertion_error = 0;
+__u32 retval_value = 0;
+__u32 ctx_retval_value = 0;
+
+SEC("cgroup/getsockopt")
+int get_retval(struct bpf_sockopt *ctx)
+{
+ retval_value = bpf_get_retval();
+ ctx_retval_value = ctx->retval;
+ __sync_fetch_and_add(&invocations, 1);
+
+ return 1;
+}
+
+SEC("cgroup/getsockopt")
+int set_eisconn(struct bpf_sockopt *ctx)
+{
+ __sync_fetch_and_add(&invocations, 1);
+
+ if (bpf_set_retval(-EISCONN))
+ assertion_error = 1;
+
+ return 1;
+}
+
+SEC("cgroup/getsockopt")
+int clear_retval(struct bpf_sockopt *ctx)
+{
+ __sync_fetch_and_add(&invocations, 1);
+
+ ctx->retval = 0;
+
+ return 1;
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright 2021 Google LLC.
+ */
+
+#include <errno.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+__u32 invocations = 0;
+__u32 assertion_error = 0;
+__u32 retval_value = 0;
+
+SEC("cgroup/setsockopt")
+int get_retval(struct bpf_sockopt *ctx)
+{
+ retval_value = bpf_get_retval();
+ __sync_fetch_and_add(&invocations, 1);
+
+ return 1;
+}
+
+SEC("cgroup/setsockopt")
+int set_eunatch(struct bpf_sockopt *ctx)
+{
+ __sync_fetch_and_add(&invocations, 1);
+
+ if (bpf_set_retval(-EUNATCH))
+ assertion_error = 1;
+
+ return 0;
+}
+
+SEC("cgroup/setsockopt")
+int set_eisconn(struct bpf_sockopt *ctx)
+{
+ __sync_fetch_and_add(&invocations, 1);
+
+ if (bpf_set_retval(-EISCONN))
+ assertion_error = 1;
+
+ return 0;
+}
+
+SEC("cgroup/setsockopt")
+int legacy_eperm(struct bpf_sockopt *ctx)
+{
+ __sync_fetch_and_add(&invocations, 1);
+
+ return 0;
+}
#include <bpf/bpf_endian.h>
#include <bpf/bpf_helpers.h>
-struct bpf_map_def SEC("maps") sock_map = {
- .type = BPF_MAP_TYPE_SOCKMAP,
- .key_size = sizeof(int),
- .value_size = sizeof(int),
- .max_entries = 2,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __type(key, int);
+ __type(value, int);
+ __uint(max_entries, 2);
+} sock_map SEC(".maps");
SEC("freplace/cls_redirect")
int freplace_cls_redirect_test(struct __sk_buff *skb)
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+
+extern void bpf_testmod_test_mod_kfunc(int i) __ksym;
+
+SEC("tc")
+int kfunc_call_fail(struct __sk_buff *ctx)
+{
+ bpf_testmod_test_mod_kfunc(0);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2021 Facebook */
-#include <linux/bpf.h>
+#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
-#include "bpf_tcp_helpers.h"
extern int bpf_kfunc_call_test2(struct sock *sk, __u32 a, __u32 b) __ksym;
extern __u64 bpf_kfunc_call_test1(struct sock *sk, __u32 a, __u64 b,
__u32 c, __u64 d) __ksym;
+extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp) __ksym;
+extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym;
+extern void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb) __ksym;
+extern void bpf_kfunc_call_test_pass1(struct prog_test_pass1 *p) __ksym;
+extern void bpf_kfunc_call_test_pass2(struct prog_test_pass2 *p) __ksym;
+extern void bpf_kfunc_call_test_mem_len_pass1(void *mem, int len) __ksym;
+extern void bpf_kfunc_call_test_mem_len_fail2(__u64 *mem, int len) __ksym;
+
SEC("tc")
int kfunc_call_test2(struct __sk_buff *skb)
{
return ret;
}
+SEC("tc")
+int kfunc_call_test_ref_btf_id(struct __sk_buff *skb)
+{
+ struct prog_test_ref_kfunc *pt;
+ unsigned long s = 0;
+ int ret = 0;
+
+ pt = bpf_kfunc_call_test_acquire(&s);
+ if (pt) {
+ if (pt->a != 42 || pt->b != 108)
+ ret = -1;
+ bpf_kfunc_call_test_release(pt);
+ }
+ return ret;
+}
+
+SEC("tc")
+int kfunc_call_test_pass(struct __sk_buff *skb)
+{
+ struct prog_test_pass1 p1 = {};
+ struct prog_test_pass2 p2 = {};
+ short a = 0;
+ __u64 b = 0;
+ long c = 0;
+ char d = 0;
+ int e = 0;
+
+ bpf_kfunc_call_test_pass_ctx(skb);
+ bpf_kfunc_call_test_pass1(&p1);
+ bpf_kfunc_call_test_pass2(&p2);
+
+ bpf_kfunc_call_test_mem_len_pass1(&a, sizeof(a));
+ bpf_kfunc_call_test_mem_len_pass1(&b, sizeof(b));
+ bpf_kfunc_call_test_mem_len_pass1(&c, sizeof(c));
+ bpf_kfunc_call_test_mem_len_pass1(&d, sizeof(d));
+ bpf_kfunc_call_test_mem_len_pass1(&e, sizeof(e));
+ bpf_kfunc_call_test_mem_len_fail2(&b, -1);
+
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+
+extern int bpf_testmod_ksym_percpu __ksym;
+
+SEC("tc")
+int ksym_fail(struct __sk_buff *ctx)
+{
+ return *(int *)bpf_this_cpu_ptr(&bpf_testmod_ksym_percpu);
+}
+
+char _license[] SEC("license") = "GPL";
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
-struct bpf_map_def SEC("maps") htab = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(__u32),
- .value_size = sizeof(long),
- .max_entries = 2,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, __u32);
+ __type(value, long);
+ __uint(max_entries, 2);
+} htab SEC(".maps");
-struct bpf_map_def SEC("maps") array = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(__u32),
- .value_size = sizeof(long),
- .max_entries = 2,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, __u32);
+ __type(value, long);
+ __uint(max_entries, 2);
+} array SEC(".maps");
/* Sample program which should always load for testing control paths. */
SEC(".text") int func()
{
void *data_end = (void *)(long) skb->data_end;
void *data = (void *)(long) skb->data;
- __u32 lport = skb->local_port;
- __u32 rport = skb->remote_port;
__u8 *d = data;
int err;
*/
if (optval + sizeof(struct tcp_zerocopy_receive) > optval_end)
- return 0; /* EPERM, bounds check */
+ return 0; /* bounds check */
if (((struct tcp_zerocopy_receive *)optval)->address != 0)
- return 0; /* EPERM, unexpected data */
+ return 0; /* unexpected data */
return 1;
}
if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) {
if (optval + 1 > optval_end)
- return 0; /* EPERM, bounds check */
+ return 0; /* bounds check */
ctx->retval = 0; /* Reset system call return value to zero */
* bytes of data.
*/
if (optval_end - optval != page_size)
- return 0; /* EPERM, unexpected data size */
+ return 0; /* unexpected data size */
return 1;
}
if (ctx->level != SOL_CUSTOM)
- return 0; /* EPERM, deny everything except custom level */
+ return 0; /* deny everything except custom level */
if (optval + 1 > optval_end)
- return 0; /* EPERM, bounds check */
+ return 0; /* bounds check */
storage = bpf_sk_storage_get(&socket_storage_map, ctx->sk, 0,
BPF_SK_STORAGE_GET_F_CREATE);
if (!storage)
- return 0; /* EPERM, couldn't get sk storage */
+ return 0; /* couldn't get sk storage */
if (!ctx->retval)
- return 0; /* EPERM, kernel should not have handled
+ return 0; /* kernel should not have handled
* SOL_CUSTOM, something is wrong!
*/
ctx->retval = 0; /* Reset system call return value to zero */
/* Overwrite SO_SNDBUF value */
if (optval + sizeof(__u32) > optval_end)
- return 0; /* EPERM, bounds check */
+ return 0; /* bounds check */
*(__u32 *)optval = 0x55AA;
ctx->optlen = 4;
/* Always use cubic */
if (optval + 5 > optval_end)
- return 0; /* EPERM, bounds check */
+ return 0; /* bounds check */
memcpy(optval, "cubic", 5);
ctx->optlen = 5;
if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) {
/* Original optlen is larger than PAGE_SIZE. */
if (ctx->optlen != page_size * 2)
- return 0; /* EPERM, unexpected data size */
+ return 0; /* unexpected data size */
if (optval + 1 > optval_end)
- return 0; /* EPERM, bounds check */
+ return 0; /* bounds check */
/* Make sure we can trim the buffer. */
optval[0] = 0;
* bytes of data.
*/
if (optval_end - optval != page_size)
- return 0; /* EPERM, unexpected data size */
+ return 0; /* unexpected data size */
return 1;
}
if (ctx->level != SOL_CUSTOM)
- return 0; /* EPERM, deny everything except custom level */
+ return 0; /* deny everything except custom level */
if (optval + 1 > optval_end)
- return 0; /* EPERM, bounds check */
+ return 0; /* bounds check */
storage = bpf_sk_storage_get(&socket_storage_map, ctx->sk, 0,
BPF_SK_STORAGE_GET_F_CREATE);
if (!storage)
- return 0; /* EPERM, couldn't get sk storage */
+ return 0; /* couldn't get sk storage */
storage->val = optval[0];
ctx->optlen = -1; /* BPF has consumed this option, don't call kernel
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+
+#define EAFNOSUPPORT 97
+#define EPROTO 71
+#define ENONET 64
+#define EINVAL 22
+#define ENOENT 2
+
+int test_einval_bpf_tuple = 0;
+int test_einval_reserved = 0;
+int test_einval_netns_id = 0;
+int test_einval_len_opts = 0;
+int test_eproto_l4proto = 0;
+int test_enonet_netns_id = 0;
+int test_enoent_lookup = 0;
+int test_eafnosupport = 0;
+
+struct nf_conn;
+
+struct bpf_ct_opts___local {
+ s32 netns_id;
+ s32 error;
+ u8 l4proto;
+ u8 reserved[3];
+} __attribute__((preserve_access_index));
+
+struct nf_conn *bpf_xdp_ct_lookup(struct xdp_md *, struct bpf_sock_tuple *, u32,
+ struct bpf_ct_opts___local *, u32) __ksym;
+struct nf_conn *bpf_skb_ct_lookup(struct __sk_buff *, struct bpf_sock_tuple *, u32,
+ struct bpf_ct_opts___local *, u32) __ksym;
+void bpf_ct_release(struct nf_conn *) __ksym;
+
+static __always_inline void
+nf_ct_test(struct nf_conn *(*func)(void *, struct bpf_sock_tuple *, u32,
+ struct bpf_ct_opts___local *, u32),
+ void *ctx)
+{
+ struct bpf_ct_opts___local opts_def = { .l4proto = IPPROTO_TCP, .netns_id = -1 };
+ struct bpf_sock_tuple bpf_tuple;
+ struct nf_conn *ct;
+
+ __builtin_memset(&bpf_tuple, 0, sizeof(bpf_tuple.ipv4));
+
+ ct = func(ctx, NULL, 0, &opts_def, sizeof(opts_def));
+ if (ct)
+ bpf_ct_release(ct);
+ else
+ test_einval_bpf_tuple = opts_def.error;
+
+ opts_def.reserved[0] = 1;
+ ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def));
+ opts_def.reserved[0] = 0;
+ opts_def.l4proto = IPPROTO_TCP;
+ if (ct)
+ bpf_ct_release(ct);
+ else
+ test_einval_reserved = opts_def.error;
+
+ opts_def.netns_id = -2;
+ ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def));
+ opts_def.netns_id = -1;
+ if (ct)
+ bpf_ct_release(ct);
+ else
+ test_einval_netns_id = opts_def.error;
+
+ ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def) - 1);
+ if (ct)
+ bpf_ct_release(ct);
+ else
+ test_einval_len_opts = opts_def.error;
+
+ opts_def.l4proto = IPPROTO_ICMP;
+ ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def));
+ opts_def.l4proto = IPPROTO_TCP;
+ if (ct)
+ bpf_ct_release(ct);
+ else
+ test_eproto_l4proto = opts_def.error;
+
+ opts_def.netns_id = 0xf00f;
+ ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def));
+ opts_def.netns_id = -1;
+ if (ct)
+ bpf_ct_release(ct);
+ else
+ test_enonet_netns_id = opts_def.error;
+
+ ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def));
+ if (ct)
+ bpf_ct_release(ct);
+ else
+ test_enoent_lookup = opts_def.error;
+
+ ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4) - 1, &opts_def, sizeof(opts_def));
+ if (ct)
+ bpf_ct_release(ct);
+ else
+ test_eafnosupport = opts_def.error;
+}
+
+SEC("xdp")
+int nf_xdp_ct_test(struct xdp_md *ctx)
+{
+ nf_ct_test((void *)bpf_xdp_ct_lookup, ctx);
+ return 0;
+}
+
+SEC("tc")
+int nf_skb_ct_test(struct __sk_buff *ctx)
+{
+ nf_ct_test((void *)bpf_skb_ct_lookup, ctx);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
unsigned int v6;
};
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
struct bpf_map_def SEC("maps") btf_map = {
.type = BPF_MAP_TYPE_ARRAY,
.key_size = sizeof(int),
.value_size = sizeof(struct ipv_counts),
.max_entries = 4,
};
+#pragma GCC diagnostic pop
BPF_ANNOTATE_KV_PAIR(btf_map, int, struct ipv_counts);
unsigned int v6;
};
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
/* just to validate we can handle maps in multiple sections */
struct bpf_map_def SEC("maps") btf_map_legacy = {
.type = BPF_MAP_TYPE_ARRAY,
.value_size = sizeof(long long),
.max_entries = 4,
};
+#pragma GCC diagnostic pop
BPF_ANNOTATE_KV_PAIR(btf_map_legacy, int, struct ipv_counts);
unsigned int v6;
};
-struct bpf_map_def SEC("maps") btf_map = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(int),
- .value_size = sizeof(struct ipv_counts),
- .max_entries = 4,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(struct ipv_counts));
+ __uint(max_entries, 4);
+} btf_map SEC(".maps");
__attribute__((noinline))
int test_long_fname_2(void)
#define NUM_CGROUP_LEVELS 4
-struct bpf_map_def SEC("maps") cgroup_ids = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(__u32),
- .value_size = sizeof(__u64),
- .max_entries = NUM_CGROUP_LEVELS,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, __u32);
+ __type(value, __u64);
+ __uint(max_entries, NUM_CGROUP_LEVELS);
+} cgroup_ids SEC(".maps");
static __always_inline void log_nth_level(struct __sk_buff *skb, __u32 level)
{
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} sock_map SEC(".maps");
+
+SEC("sk_skb")
+int prog_skb_verdict(struct __sk_buff *skb)
+{
+ return SK_PASS;
+}
+
+SEC("sk_msg")
+int prog_skmsg_verdict(struct sk_msg_md *msg)
+{
+ return SK_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
#define THROTTLE_RATE_BPS (5 * 1000 * 1000)
/* flow_key => last_tstamp timestamp used */
-struct bpf_map_def SEC("maps") flow_map = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(uint32_t),
- .value_size = sizeof(uint64_t),
- .max_entries = 1,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, uint32_t);
+ __type(value, uint64_t);
+ __uint(max_entries, 1);
+} flow_map SEC(".maps");
static inline int throttle_flow(struct __sk_buff *skb)
{
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
-struct bpf_map_def SEC("maps") results = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(__u32),
- .value_size = sizeof(__u32),
- .max_entries = 3,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, __u32);
+ __type(value, __u32);
+ __uint(max_entries, 3);
+} results SEC(".maps");
static __always_inline __s64 gen_syncookie(void *data_end, struct bpf_sock *sk,
void *iph, __u32 ip_size,
{
void *data_end = (void *)(long)xdp->data_end;
void *data = (void *)(long)xdp->data;
- unsigned int data_len;
+ int data_len = bpf_xdp_get_buff_len(xdp);
int offset = 0;
/* Data length determine test case */
- data_len = data_end - data;
if (data_len == 54) { /* sizeof(pkt_v4) */
offset = 4096; /* test too large offset */
} else if (data_len == 64) {
offset = 128;
} else if (data_len == 128) {
- offset = 4096 - 256 - 320 - data_len; /* Max tail grow 3520 */
+ /* Max tail grow 3520 */
+ offset = 4096 - 256 - 320 - data_len;
+ } else if (data_len == 9000) {
+ offset = 10;
+ } else if (data_len == 9001) {
+ offset = 4096;
} else {
return XDP_ABORTED; /* No matching test */
}
SEC("xdp")
int _xdp_adjust_tail_shrink(struct xdp_md *xdp)
{
- void *data_end = (void *)(long)xdp->data_end;
- void *data = (void *)(long)xdp->data;
+ __u8 *data_end = (void *)(long)xdp->data_end;
+ __u8 *data = (void *)(long)xdp->data;
int offset = 0;
- if (data_end - data == 54) /* sizeof(pkt_v4) */
+ switch (bpf_xdp_get_buff_len(xdp)) {
+ case 54:
+ /* sizeof(pkt_v4) */
offset = 256; /* shrink too much */
- else
+ break;
+ case 9000:
+ /* non-linear buff test cases */
+ if (data + 1 > data_end)
+ return XDP_DROP;
+
+ switch (data[0]) {
+ case 0:
+ offset = 10;
+ break;
+ case 1:
+ offset = 4100;
+ break;
+ case 2:
+ offset = 8200;
+ break;
+ default:
+ return XDP_DROP;
+ }
+ break;
+ default:
offset = 20;
+ break;
+ }
if (bpf_xdp_adjust_tail(xdp, 0 - offset))
return XDP_DROP;
return XDP_TX;
void *data = (void *)(long)xdp->data;
meta.ifindex = xdp->rxq->dev->ifindex;
- meta.pkt_len = data_end - data;
+ meta.pkt_len = bpf_xdp_get_buff_len((struct xdp_md *)xdp);
bpf_xdp_output(xdp, &perf_buf_map,
((__u64) meta.pkt_len << 32) |
BPF_F_CURRENT_CPU,
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <bpf/bpf_helpers.h>
+
+int _version SEC("version") = 1;
+
+SEC("xdp.frags")
+int xdp_adjust_frags(struct xdp_md *xdp)
+{
+ __u8 *data_end = (void *)(long)xdp->data_end;
+ __u8 *data = (void *)(long)xdp->data;
+ __u8 val[16] = {};
+ __u32 offset;
+ int err;
+
+ if (data + sizeof(__u32) > data_end)
+ return XDP_DROP;
+
+ offset = *(__u32 *)data;
+ err = bpf_xdp_load_bytes(xdp, offset, val, sizeof(val));
+ if (err < 0)
+ return XDP_DROP;
+
+ if (val[0] != 0xaa || val[15] != 0xaa) /* marker */
+ return XDP_DROP;
+
+ val[0] = 0xbb; /* update the marker */
+ val[15] = 0xbb;
+ err = bpf_xdp_store_bytes(xdp, offset, val, sizeof(val));
+ if (err < 0)
+ return XDP_DROP;
+
+ return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+#define IFINDEX_LO 1
+
+struct {
+ __uint(type, BPF_MAP_TYPE_CPUMAP);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(struct bpf_cpumap_val));
+ __uint(max_entries, 4);
+} cpu_map SEC(".maps");
+
+SEC("xdp_cpumap/dummy_cm")
+int xdp_dummy_cm(struct xdp_md *ctx)
+{
+ return XDP_PASS;
+}
+
+SEC("xdp.frags/cpumap")
+int xdp_dummy_cm_frags(struct xdp_md *ctx)
+{
+ return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
return XDP_PASS;
}
+SEC("xdp.frags/cpumap")
+int xdp_dummy_cm_frags(struct xdp_md *ctx)
+{
+ return XDP_PASS;
+}
+
char _license[] SEC("license") = "GPL";
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_DEVMAP);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(struct bpf_devmap_val));
+ __uint(max_entries, 4);
+} dm_ports SEC(".maps");
+
+/* valid program on DEVMAP entry via SEC name;
+ * has access to egress and ingress ifindex
+ */
+SEC("xdp_devmap/map_prog")
+int xdp_dummy_dm(struct xdp_md *ctx)
+{
+ return XDP_PASS;
+}
+
+SEC("xdp.frags/devmap")
+int xdp_dummy_dm_frags(struct xdp_md *ctx)
+{
+ return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
return XDP_PASS;
}
+
+SEC("xdp.frags/devmap")
+int xdp_dummy_dm_frags(struct xdp_md *ctx)
+{
+ return XDP_PASS;
+}
+
char _license[] SEC("license") = "GPL";
#include <linux/if_ether.h>
#include <linux/btf.h>
+#include <bpf/btf.h>
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
static int skips;
static bool verbose = false;
+struct kfunc_btf_id_pair {
+ const char *kfunc;
+ int insn_idx;
+};
+
struct bpf_test {
const char *descr;
struct bpf_insn insns[MAX_INSNS];
int fixup_map_reuseport_array[MAX_FIXUPS];
int fixup_map_ringbuf[MAX_FIXUPS];
int fixup_map_timer[MAX_FIXUPS];
+ struct kfunc_btf_id_pair fixup_kfunc_btf_id[MAX_FIXUPS];
/* Expected verifier log output for result REJECT or VERBOSE_ACCEPT.
* Can be a tab-separated sequence of expected strings. An empty string
* means no log verification.
int *fixup_map_reuseport_array = test->fixup_map_reuseport_array;
int *fixup_map_ringbuf = test->fixup_map_ringbuf;
int *fixup_map_timer = test->fixup_map_timer;
+ struct kfunc_btf_id_pair *fixup_kfunc_btf_id = test->fixup_kfunc_btf_id;
if (test->fill_helper) {
test->fill_insns = calloc(MAX_TEST_INSNS, sizeof(struct bpf_insn));
fixup_map_timer++;
} while (*fixup_map_timer);
}
+
+ /* Patch in kfunc BTF IDs */
+ if (fixup_kfunc_btf_id->kfunc) {
+ struct btf *btf;
+ int btf_id;
+
+ do {
+ btf_id = 0;
+ btf = btf__load_vmlinux_btf();
+ if (btf) {
+ btf_id = btf__find_by_name_kind(btf,
+ fixup_kfunc_btf_id->kfunc,
+ BTF_KIND_FUNC);
+ btf_id = btf_id < 0 ? 0 : btf_id;
+ }
+ btf__free(btf);
+ prog[fixup_kfunc_btf_id->insn_idx].imm = btf_id;
+ fixup_kfunc_btf_id++;
+ } while (fixup_kfunc_btf_id->kfunc);
+ }
}
struct libcap {
.result = ACCEPT,
},
{
+ "calls: invalid kfunc call: ptr_to_mem to struct with non-scalar",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = REJECT,
+ .errstr = "arg#0 pointer type STRUCT prog_test_fail1 must point to scalar",
+ .fixup_kfunc_btf_id = {
+ { "bpf_kfunc_call_test_fail1", 2 },
+ },
+},
+{
+ "calls: invalid kfunc call: ptr_to_mem to struct with nesting depth > 4",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = REJECT,
+ .errstr = "max struct nesting depth exceeded\narg#0 pointer type STRUCT prog_test_fail2",
+ .fixup_kfunc_btf_id = {
+ { "bpf_kfunc_call_test_fail2", 2 },
+ },
+},
+{
+ "calls: invalid kfunc call: ptr_to_mem to struct with FAM",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = REJECT,
+ .errstr = "arg#0 pointer type STRUCT prog_test_fail3 must point to scalar",
+ .fixup_kfunc_btf_id = {
+ { "bpf_kfunc_call_test_fail3", 2 },
+ },
+},
+{
+ "calls: invalid kfunc call: reg->type != PTR_TO_CTX",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = REJECT,
+ .errstr = "arg#0 expected pointer to ctx, but got PTR",
+ .fixup_kfunc_btf_id = {
+ { "bpf_kfunc_call_test_pass_ctx", 2 },
+ },
+},
+{
+ "calls: invalid kfunc call: void * not allowed in func proto without mem size arg",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = REJECT,
+ .errstr = "arg#0 pointer type UNKNOWN must point to scalar",
+ .fixup_kfunc_btf_id = {
+ { "bpf_kfunc_call_test_mem_len_fail1", 2 },
+ },
+},
+{
"calls: basic sanity",
.insns = {
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
return true;
case STAT_TEST_RX_FULL:
xsk_stat = stats.rx_ring_full;
- expected_stat -= RX_FULL_RXQSIZE;
+ if (ifobject->umem->num_frames < XSK_RING_PROD__DEFAULT_NUM_DESCS)
+ expected_stat = ifobject->umem->num_frames - RX_FULL_RXQSIZE;
+ else
+ expected_stat = XSK_RING_PROD__DEFAULT_NUM_DESCS - RX_FULL_RXQSIZE;
break;
case STAT_TEST_RX_FILL_EMPTY:
xsk_stat = stats.rx_fill_ring_empty_descs;