Merge tag 'wireless-drivers-next-2021-10-29' of git://git.kernel.org/pub/scm/linux...
authorJakub Kicinski <kuba@kernel.org>
Fri, 29 Oct 2021 15:58:39 +0000 (08:58 -0700)
committerJakub Kicinski <kuba@kernel.org>
Fri, 29 Oct 2021 15:58:40 +0000 (08:58 -0700)
Kalle Valo says:

====================
wireless-drivers-next patches for v5.16

Fourth set of patches for v5.16. Mostly fixes this time, wcn36xx and
iwlwifi have some new features but nothing really out of ordinary.
We have one conflict with kspp tree.

Major changes:

ath11k
 * fix QCA6390 A-MSDU handling (CVE-2020-24588)

wcn36xx
 * enable hardware scan offload for 5Ghz band
 * add missing 5GHz channels 136 and 144

iwlwifi
 * support a new ACPI table revision
 * improvements in the device selection code
 * new hardware support
 * support for WiFi 6E enablement via BIOS
 * support firmware API version 67
 * support for 160MHz in ranging measurements

====================

Link: https://lore.kernel.org/r/20211029134707.DE2B0C4360D@smtp.codeaurora.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
313 files changed:
Documentation/devicetree/bindings/mfd/brcm,cru.yaml
Documentation/devicetree/bindings/net/lantiq,xrx200-net.yaml
Documentation/devicetree/bindings/pinctrl/brcm,ns-pinmux.yaml
Documentation/networking/devlink/bnxt.rst
Documentation/networking/devlink/index.rst
Documentation/networking/devlink/octeontx2.rst [new file with mode: 0644]
Documentation/userspace-api/ioctl/ioctl-number.rst
MAINTAINERS
Makefile
arch/arm/Kconfig
arch/arm/boot/compressed/decompress.c
arch/arm/boot/dts/sun7i-a20-olinuxino-lime2.dts
arch/arm/include/asm/uaccess.h
arch/arm/kernel/head.S
arch/arm/kernel/traps.c
arch/arm/kernel/vmlinux-xip.lds.S
arch/arm/mm/proc-macros.S
arch/arm/probes/kprobes/core.c
arch/arm64/boot/dts/allwinner/sun50i-h5-nanopi-neo2.dts
arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-s.dts
arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-som.dtsi
arch/arm64/boot/dts/qcom/sm8250.dtsi
arch/arm64/net/bpf_jit_comp.c
arch/nds32/kernel/ftrace.c
arch/nios2/platform/Kconfig.platform
arch/riscv/net/bpf_jit_core.c
arch/um/drivers/net_kern.c
arch/x86/include/asm/kvm_host.h
arch/x86/kvm/mmu/mmu.c
arch/x86/kvm/svm/sev.c
arch/x86/kvm/vmx/vmx.c
arch/x86/kvm/x86.c
arch/xtensa/platforms/iss/network.c
block/blk-cgroup.c
block/partitions/core.c
drivers/acpi/power.c
drivers/ata/sata_mv.c
drivers/base/regmap/regcache-rbtree.c
drivers/firewire/net.c
drivers/hv/hyperv_vmbus.h
drivers/infiniband/core/sa_query.c
drivers/infiniband/hw/hfi1/pio.c
drivers/infiniband/hw/irdma/uk.c
drivers/infiniband/hw/irdma/verbs.c
drivers/infiniband/hw/irdma/ws.c
drivers/infiniband/hw/mlx5/cmd.c
drivers/infiniband/hw/mlx5/cmd.h
drivers/infiniband/hw/mlx5/devx.c
drivers/infiniband/hw/mlx5/devx.h
drivers/infiniband/hw/mlx5/main.c
drivers/infiniband/hw/mlx5/mlx5_ib.h
drivers/infiniband/hw/mlx5/mr.c
drivers/infiniband/hw/mlx5/odp.c
drivers/infiniband/hw/mlx5/qp.c
drivers/infiniband/hw/mlx5/wr.c
drivers/infiniband/hw/qedr/qedr.h
drivers/infiniband/hw/qedr/qedr_iw_cm.c
drivers/infiniband/hw/qedr/verbs.c
drivers/infiniband/hw/qib/qib_user_sdma.c
drivers/infiniband/sw/rdmavt/qp.c
drivers/media/dvb-core/dvb_net.c
drivers/message/fusion/mptlan.c
drivers/misc/sgi-xp/xpnet.c
drivers/net/bareudp.c
drivers/net/ethernet/broadcom/bnxt/Makefile
drivers/net/ethernet/broadcom/bnxt/bnxt.c
drivers/net/ethernet/broadcom/bnxt/bnxt.h
drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c [new file with mode: 0644]
drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.h
drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h
drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h
drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h
drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h
drivers/net/ethernet/cadence/macb_main.c
drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
drivers/net/ethernet/intel/ice/ice.h
drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
drivers/net/ethernet/intel/ice/ice_flex_pipe.c
drivers/net/ethernet/intel/ice/ice_flex_type.h
drivers/net/ethernet/intel/ice/ice_lag.c
drivers/net/ethernet/intel/ice/ice_lib.c
drivers/net/ethernet/intel/ice/ice_main.c
drivers/net/ethernet/intel/ice/ice_protocol_type.h
drivers/net/ethernet/intel/ice/ice_ptp.c
drivers/net/ethernet/intel/ice/ice_repr.c
drivers/net/ethernet/intel/ice/ice_switch.c
drivers/net/ethernet/intel/ice/ice_switch.h
drivers/net/ethernet/intel/ice/ice_tc_lib.c
drivers/net/ethernet/intel/ice/ice_tc_lib.h
drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
drivers/net/ethernet/lantiq_xrx200.c
drivers/net/ethernet/marvell/mvneta.c
drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
drivers/net/ethernet/marvell/octeontx2/af/npc.h
drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c
drivers/net/ethernet/marvell/prestera/prestera.h
drivers/net/ethernet/marvell/prestera/prestera_ethtool.c
drivers/net/ethernet/marvell/prestera/prestera_ethtool.h
drivers/net/ethernet/marvell/prestera/prestera_hw.c
drivers/net/ethernet/marvell/prestera/prestera_hw.h
drivers/net/ethernet/marvell/prestera/prestera_main.c
drivers/net/ethernet/marvell/prestera/prestera_pci.c
drivers/net/ethernet/marvell/prestera/prestera_rxtx.c
drivers/net/ethernet/marvell/sky2.c
drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h
drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.c
drivers/net/ethernet/mellanox/mlx5/core/en.h
drivers/net/ethernet/mellanox/mlx5/core/en/params.c
drivers/net/ethernet/mellanox/mlx5/core/en/params.h
drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
drivers/net/ethernet/mellanox/mlx5/core/en/rss.c
drivers/net/ethernet/mellanox/mlx5/core/en/rss.h
drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c
drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h
drivers/net/ethernet/mellanox/mlx5/core/en/tir.c
drivers/net/ethernet/mellanox/mlx5/core/en/tir.h
drivers/net/ethernet/mellanox/mlx5/core/en/trap.c
drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
drivers/net/ethernet/mellanox/mlx5/core/en_common.c
drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
drivers/net/ethernet/mellanox/mlx5/core/en_main.c
drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h
drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
drivers/net/ethernet/mellanox/mlx5/core/fw.c
drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
drivers/net/ethernet/mellanox/mlx5/core/lag/mp.h
drivers/net/ethernet/mellanox/mlx5/core/main.c
drivers/net/ethernet/mellanox/mlx5/core/mr.c
drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c
drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
drivers/net/ethernet/mellanox/mlx5/core/uar.c
drivers/net/ethernet/mellanox/mlxsw/pci.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
drivers/net/ethernet/microchip/lan743x_main.c
drivers/net/ethernet/microchip/lan743x_main.h
drivers/net/ethernet/netronome/nfp/bpf/main.c
drivers/net/ethernet/netronome/nfp/bpf/main.h
drivers/net/ethernet/netronome/nfp/bpf/offload.c
drivers/net/ethernet/nxp/lpc_eth.c
drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
drivers/net/ethernet/realtek/r8169_main.c
drivers/net/ifb.c
drivers/net/phy/at803x.c
drivers/net/phy/microchip_t1.c
drivers/net/phy/phy.c
drivers/net/thunderbolt.c
drivers/net/usb/lan78xx.c
drivers/net/usb/usbnet.c
drivers/net/virtio_net.c
drivers/net/vmxnet3/vmxnet3_drv.c
drivers/net/xen-netfront.c
drivers/nfc/port100.c
drivers/pinctrl/bcm/pinctrl-ns.c
drivers/pinctrl/pinctrl-amd.c
drivers/pinctrl/stm32/pinctrl-stm32.c
drivers/ptp/ptp_clock.c
drivers/reset/Kconfig
drivers/reset/reset-brcmstb-rescal.c
drivers/reset/reset-socfpga.c
drivers/reset/tegra/reset-bpmp.c
drivers/scsi/hosts.c
drivers/scsi/mpi3mr/mpi3mr_os.c
drivers/scsi/qla2xxx/qla_bsg.c
drivers/scsi/qla2xxx/qla_os.c
drivers/scsi/qla2xxx/qla_target.c
drivers/scsi/scsi.c
drivers/scsi/scsi_sysfs.c
drivers/scsi/scsi_transport_iscsi.c
drivers/scsi/sd.c
drivers/scsi/storvsc_drv.c
drivers/scsi/ufs/ufshcd-pci.c
drivers/spi/spi-altera-dfl.c
drivers/spi/spi-altera-platform.c
drivers/spi/spi-pl022.c
drivers/spi/spi-tegra20-slink.c
drivers/staging/octeon/ethernet.c
drivers/vdpa/mlx5/core/mlx5_vdpa.h
drivers/vdpa/mlx5/core/mr.c
drivers/vdpa/mlx5/core/resources.c
drivers/vdpa/mlx5/net/mlx5_vnet.c
drivers/vdpa/vdpa_user/vduse_dev.c
drivers/virtio/virtio_ring.c
drivers/watchdog/iTCO_wdt.c
drivers/watchdog/ixp4xx_wdt.c
drivers/watchdog/omap_wdt.c
drivers/watchdog/sbsa_gwdt.c
fs/autofs/waitq.c
fs/fuse/fuse_i.h
fs/fuse/inode.c
fs/fuse/virtio_fs.c
fs/io-wq.c
fs/io_uring.c
fs/ksmbd/auth.c
fs/ksmbd/connection.c
fs/ksmbd/ksmbd_netlink.h
fs/ksmbd/mgmt/user_config.c
fs/ksmbd/mgmt/user_config.h
fs/ksmbd/smb2misc.c
fs/ksmbd/smb2ops.c
fs/ksmbd/smb2pdu.c
fs/ksmbd/smb2pdu.h
fs/ksmbd/transport_ipc.c
fs/ksmbd/transport_ipc.h
fs/ksmbd/transport_rdma.c
fs/ksmbd/vfs.c
fs/ksmbd/vfs.h
include/acpi/platform/acgcc.h
include/linux/bitmap.h
include/linux/bpf-cgroup.h
include/linux/bpf.h
include/linux/bpf_types.h
include/linux/filter.h
include/linux/mlx5/device.h
include/linux/mlx5/driver.h
include/linux/mlx5/fs.h
include/linux/mlx5/mlx5_ifc.h
include/linux/skbuff.h
include/linux/skmsg.h
include/net/cfg80211.h
include/net/mctp.h
include/net/mctpdevice.h
include/net/mptcp.h
include/net/sch_generic.h
include/net/sock.h
include/net/switchdev.h
include/net/tcp.h
include/net/tls.h
include/net/udp.h
kernel/bpf/arraymap.c
kernel/bpf/core.c
kernel/bpf/syscall.c
kernel/cgroup/cgroup.c
kernel/sched/core.c
kernel/trace/trace_eprobe.c
lib/bitmap.c
mm/secretmem.c
net/batman-adv/bridge_loop_avoidance.c
net/batman-adv/main.c
net/batman-adv/network-coding.c
net/batman-adv/translation-table.c
net/bridge/br_fdb.c
net/bridge/br_if.c
net/bridge/br_mdb.c
net/bridge/br_private.h
net/bridge/br_switchdev.c
net/bridge/br_vlan.c
net/core/dev.c
net/core/devlink.c
net/core/net-sysfs.c
net/core/skbuff.c
net/core/skmsg.c
net/core/sock_destructor.h [new file with mode: 0644]
net/core/sysctl_net_core.c
net/core/xdp.c
net/dsa/slave.c
net/ipv4/af_inet.c
net/ipv4/inet_diag.c
net/ipv4/ipconfig.c
net/ipv4/tcp.c
net/ipv4/tcp_bpf.c
net/ipv4/tcp_output.c
net/ipv4/udp.c
net/ipv4/udp_bpf.c
net/ipv6/af_inet6.c
net/ipv6/route.c
net/ipv6/tcp_ipv6.c
net/mac80211/mesh.c
net/mctp/Kconfig
net/mctp/device.c
net/mctp/route.c
net/mptcp/options.c
net/mptcp/protocol.c
net/mptcp/protocol.h
net/sched/em_meta.c
net/sched/sch_generic.c
net/sched/sch_gred.c
net/sched/sch_htb.c
net/sctp/sm_statefuns.c
net/smc/af_smc.c
net/smc/smc_llc.c
net/switchdev/switchdev.c
net/tipc/crypto.c
net/tls/tls_main.c
net/tls/tls_sw.c
net/unix/af_unix.c
net/unix/unix_bpf.c
net/wireless/core.c
net/wireless/core.h
net/wireless/mlme.c
net/wireless/scan.c
net/wireless/util.c
tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
tools/testing/selftests/drivers/net/mlxsw/sch_offload.sh
tools/testing/selftests/net/fcnal-test.sh
tools/testing/selftests/net/forwarding/sch_tbf_etsprio.sh

index fc1317a..28ac60a 100644 (file)
@@ -32,13 +32,13 @@ properties:
   "#size-cells":
     const: 1
 
-  pinctrl:
-    $ref: ../pinctrl/brcm,ns-pinmux.yaml
-
 patternProperties:
   '^clock-controller@[a-f0-9]+$':
     $ref: ../clock/brcm,iproc-clocks.yaml
 
+  '^pin-controller@[a-f0-9]+$':
+    $ref: ../pinctrl/brcm,ns-pinmux.yaml
+
   '^thermal@[a-f0-9]+$':
     $ref: ../thermal/brcm,ns-thermal.yaml
 
@@ -73,9 +73,10 @@ examples:
                                  "iprocfast", "sata1", "sata2";
         };
 
-        pinctrl {
+        pin-controller@1c0 {
             compatible = "brcm,bcm4708-pinmux";
-            offset = <0x1c0>;
+            reg = <0x1c0 0x24>;
+            reg-names = "cru_gpio_control";
         };
 
         thermal@2c0 {
index 16d831f..7bc074a 100644 (file)
@@ -29,18 +29,6 @@ properties:
       - const: tx
       - const: rx
 
-  lantiq,tx-burst-length:
-    $ref: /schemas/types.yaml#/definitions/uint32
-    description: |
-      TX programmable burst length.
-    enum: [2, 4, 8]
-
-  lantiq,rx-burst-length:
-    $ref: /schemas/types.yaml#/definitions/uint32
-    description: |
-      RX programmable burst length.
-    enum: [2, 4, 8]
-
   '#address-cells':
     const: 1
 
@@ -53,8 +41,6 @@ required:
   - interrupt-parent
   - interrupts
   - interrupt-names
-  - lantiq,tx-burst-length
-  - lantiq,rx-burst-length
   - "#address-cells"
   - "#size-cells"
 
@@ -70,6 +56,4 @@ examples:
         interrupt-parent = <&icu0>;
         interrupts = <73>, <72>;
         interrupt-names = "tx", "rx";
-        lantiq,tx-burst-length = <8>;
-        lantiq,rx-burst-length = <8>;
     };
index 470aff5..fc39e3e 100644 (file)
@@ -17,9 +17,6 @@ description:
 
   A list of pins varies across chipsets so few bindings are available.
 
-  Node of the pinmux must be nested in the CRU (Central Resource Unit) "syscon"
-  node.
-
 properties:
   compatible:
     enum:
@@ -27,10 +24,11 @@ properties:
       - brcm,bcm4709-pinmux
       - brcm,bcm53012-pinmux
 
-  offset:
-    description: offset of pin registers in the CRU block
+  reg:
     maxItems: 1
-    $ref: /schemas/types.yaml#/definitions/uint32-array
+
+  reg-names:
+    const: cru_gpio_control
 
 patternProperties:
   '-pins$':
@@ -72,23 +70,20 @@ allOf:
                         uart1_grp ]
 
 required:
-  - offset
+  - reg
+  - reg-names
 
 additionalProperties: false
 
 examples:
   - |
-    cru@1800c100 {
-        compatible = "syscon", "simple-mfd";
-        reg = <0x1800c100 0x1a4>;
-
-        pinctrl {
-            compatible = "brcm,bcm4708-pinmux";
-            offset = <0xc0>;
-
-            spi-pins {
-                function = "spi";
-                groups = "spi_grp";
-            };
+    pin-controller@1800c1c0 {
+        compatible = "brcm,bcm4708-pinmux";
+        reg = <0x1800c1c0 0x24>;
+        reg-names = "cru_gpio_control";
+
+        spi-pins {
+            function = "spi";
+            groups = "spi_grp";
         };
     };
index 3dfd84c..a4fb276 100644 (file)
@@ -22,6 +22,8 @@ Parameters
      - Permanent
    * - ``msix_vec_per_pf_min``
      - Permanent
+   * - ``enable_remote_dev_reset``
+     - Runtime
 
 The ``bnxt`` driver also implements the following driver-specific
 parameters.
index 19ffd56..4431237 100644 (file)
@@ -48,3 +48,4 @@ parameters, info versions, and other features it supports.
    am65-nuss-cpsw-switch
    prestera
    iosm
+   octeontx2
diff --git a/Documentation/networking/devlink/octeontx2.rst b/Documentation/networking/devlink/octeontx2.rst
new file mode 100644 (file)
index 0000000..610de99
--- /dev/null
@@ -0,0 +1,42 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=========================
+octeontx2 devlink support
+=========================
+
+This document describes the devlink features implemented by the ``octeontx2 AF, PF and VF``
+device drivers.
+
+Parameters
+==========
+
+The ``octeontx2 PF and VF`` drivers implement the following driver-specific parameters.
+
+.. list-table:: Driver-specific parameters implemented
+   :widths: 5 5 5 85
+
+   * - Name
+     - Type
+     - Mode
+     - Description
+   * - ``mcam_count``
+     - u16
+     - runtime
+     - Select number of match CAM entries to be allocated for an interface.
+       The same is used for ntuple filters of the interface. Supported by
+       PF and VF drivers.
+
+The ``octeontx2 AF`` driver implements the following driver-specific parameters.
+
+.. list-table:: Driver-specific parameters implemented
+   :widths: 5 5 5 85
+
+   * - Name
+     - Type
+     - Mode
+     - Description
+   * - ``dwrr_mtu``
+     - u32
+     - runtime
+     - Use to set the quantum which hardware uses for scheduling among transmit queues.
+       Hardware uses weighted DWRR algorithm to schedule among all transmit queues.
index 2e81340..6655d92 100644 (file)
@@ -104,6 +104,7 @@ Code  Seq#    Include File                                           Comments
 '8'   all                                                            SNP8023 advanced NIC card
                                                                      <mailto:mcr@solidum.com>
 ';'   64-7F  linux/vfio.h
+'='   00-3f  uapi/linux/ptp_clock.h                                  <mailto:richardcochran@gmail.com>
 '@'   00-0F  linux/radeonfb.h                                        conflict!
 '@'   00-0F  drivers/video/aty/aty128fb.c                            conflict!
 'A'   00-1F  linux/apm_bios.h                                        conflict!
index 975086c..3b85f03 100644 (file)
@@ -5464,6 +5464,19 @@ F:       include/net/devlink.h
 F:     include/uapi/linux/devlink.h
 F:     net/core/devlink.c
 
+DH ELECTRONICS IMX6 DHCOM BOARD SUPPORT
+M:     Christoph Niedermaier <cniedermaier@dh-electronics.com>
+L:     kernel@dh-electronics.com
+S:     Maintained
+F:     arch/arm/boot/dts/imx6*-dhcom-*
+
+DH ELECTRONICS STM32MP1 DHCOM/DHCOR BOARD SUPPORT
+M:     Marek Vasut <marex@denx.de>
+L:     kernel@dh-electronics.com
+S:     Maintained
+F:     arch/arm/boot/dts/stm32mp1*-dhcom-*
+F:     arch/arm/boot/dts/stm32mp1*-dhcor-*
+
 DIALOG SEMICONDUCTOR DRIVERS
 M:     Support Opensource <support.opensource@diasemi.com>
 S:     Supported
@@ -11284,7 +11297,6 @@ F:      Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst
 F:     drivers/net/ethernet/marvell/octeontx2/af/
 
 MARVELL PRESTERA ETHERNET SWITCH DRIVER
-M:     Vadym Kochan <vkochan@marvell.com>
 M:     Taras Chornyi <tchornyi@marvell.com>
 S:     Supported
 W:     https://github.com/Marvell-switching/switchdev-prestera
@@ -20352,6 +20364,7 @@ X86 ARCHITECTURE (32-BIT AND 64-BIT)
 M:     Thomas Gleixner <tglx@linutronix.de>
 M:     Ingo Molnar <mingo@redhat.com>
 M:     Borislav Petkov <bp@alien8.de>
+M:     Dave Hansen <dave.hansen@linux.intel.com>
 M:     x86@kernel.org
 R:     "H. Peter Anvin" <hpa@zytor.com>
 L:     linux-kernel@vger.kernel.org
index 9129767..30c7c81 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 5
 PATCHLEVEL = 15
 SUBLEVEL = 0
-EXTRAVERSION = -rc6
+EXTRAVERSION = -rc7
 NAME = Opossums on Parade
 
 # *DOCUMENTATION*
index 59baf6c..dcf2df6 100644 (file)
@@ -92,6 +92,7 @@ config ARM
        select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL
        select HAVE_FUNCTION_GRAPH_TRACER if !THUMB2_KERNEL && !CC_IS_CLANG
        select HAVE_FUNCTION_TRACER if !XIP_KERNEL
+       select HAVE_FUTEX_CMPXCHG if FUTEX
        select HAVE_GCC_PLUGINS
        select HAVE_HW_BREAKPOINT if PERF_EVENTS && (CPU_V6 || CPU_V6K || CPU_V7)
        select HAVE_IRQ_TIME_ACCOUNTING
index aa075d8..74255e8 100644 (file)
@@ -47,7 +47,10 @@ extern char * strchrnul(const char *, int);
 #endif
 
 #ifdef CONFIG_KERNEL_XZ
+/* Prevent KASAN override of string helpers in decompressor */
+#undef memmove
 #define memmove memmove
+#undef memcpy
 #define memcpy memcpy
 #include "../../../../lib/decompress_unxz.c"
 #endif
index 8077f17..ecb91fb 100644 (file)
        pinctrl-names = "default";
        pinctrl-0 = <&gmac_rgmii_pins>;
        phy-handle = <&phy1>;
-       phy-mode = "rgmii";
+       phy-mode = "rgmii-id";
        status = "okay";
 };
 
index 084d1c0..36fbc33 100644 (file)
@@ -176,6 +176,7 @@ extern int __get_user_64t_4(void *);
                register unsigned long __l asm("r1") = __limit;         \
                register int __e asm("r0");                             \
                unsigned int __ua_flags = uaccess_save_and_enable();    \
+               int __tmp_e;                                            \
                switch (sizeof(*(__p))) {                               \
                case 1:                                                 \
                        if (sizeof((x)) >= 8)                           \
@@ -203,9 +204,10 @@ extern int __get_user_64t_4(void *);
                        break;                                          \
                default: __e = __get_user_bad(); break;                 \
                }                                                       \
+               __tmp_e = __e;                                          \
                uaccess_restore(__ua_flags);                            \
                x = (typeof(*(p))) __r2;                                \
-               __e;                                                    \
+               __tmp_e;                                                \
        })
 
 #define get_user(x, p)                                                 \
index 29070eb..3fc7f97 100644 (file)
@@ -253,7 +253,7 @@ __create_page_tables:
        add     r0, r4, #KERNEL_OFFSET >> (SECTION_SHIFT - PMD_ORDER)
        ldr     r6, =(_end - 1)
        adr_l   r5, kernel_sec_start            @ _pa(kernel_sec_start)
-#ifdef CONFIG_CPU_ENDIAN_BE8
+#if defined CONFIG_CPU_ENDIAN_BE8 || defined CONFIG_CPU_ENDIAN_BE32
        str     r8, [r5, #4]                    @ Save physical start of kernel (BE)
 #else
        str     r8, [r5]                        @ Save physical start of kernel (LE)
@@ -266,7 +266,7 @@ __create_page_tables:
        bls     1b
        eor     r3, r3, r7                      @ Remove the MMU flags
        adr_l   r5, kernel_sec_end              @ _pa(kernel_sec_end)
-#ifdef CONFIG_CPU_ENDIAN_BE8
+#if defined CONFIG_CPU_ENDIAN_BE8 || defined CONFIG_CPU_ENDIAN_BE32
        str     r3, [r5, #4]                    @ Save physical end of kernel (BE)
 #else
        str     r3, [r5]                        @ Save physical end of kernel (LE)
index 4a7edc6..195dff5 100644 (file)
@@ -136,7 +136,7 @@ static void dump_mem(const char *lvl, const char *str, unsigned long bottom,
                for (p = first, i = 0; i < 8 && p < top; i++, p += 4) {
                        if (p >= bottom && p < top) {
                                unsigned long val;
-                               if (get_kernel_nofault(val, (unsigned long *)p))
+                               if (!get_kernel_nofault(val, (unsigned long *)p))
                                        sprintf(str + i * 9, " %08lx", val);
                                else
                                        sprintf(str + i * 9, " ????????");
index 5013682..f14c236 100644 (file)
@@ -40,6 +40,10 @@ SECTIONS
                ARM_DISCARD
                *(.alt.smp.init)
                *(.pv_table)
+#ifndef CONFIG_ARM_UNWIND
+               *(.ARM.exidx) *(.ARM.exidx.*)
+               *(.ARM.extab) *(.ARM.extab.*)
+#endif
        }
 
        . = XIP_VIRT_ADDR(CONFIG_XIP_PHYS_ADDR);
@@ -172,7 +176,7 @@ ASSERT((__arch_info_end - __arch_info_begin), "no machine record defined")
 ASSERT((_end - __bss_start) >= 12288, ".bss too small for CONFIG_XIP_DEFLATED_DATA")
 #endif
 
-#ifdef CONFIG_ARM_MPU
+#if defined(CONFIG_ARM_MPU) && !defined(CONFIG_COMPILE_TEST)
 /*
  * Due to PMSAv7 restriction on base address and size we have to
  * enforce minimal alignment restrictions. It was seen that weaker
index e2c743a..d9f7dfe 100644 (file)
@@ -340,6 +340,7 @@ ENTRY(\name\()_cache_fns)
 
 .macro define_tlb_functions name:req, flags_up:req, flags_smp
        .type   \name\()_tlb_fns, #object
+       .align 2
 ENTRY(\name\()_tlb_fns)
        .long   \name\()_flush_user_tlb_range
        .long   \name\()_flush_kern_tlb_range
index 27e0af7..9d8634e 100644 (file)
@@ -439,7 +439,7 @@ static struct undef_hook kprobes_arm_break_hook = {
 
 #endif /* !CONFIG_THUMB2_KERNEL */
 
-int __init arch_init_kprobes()
+int __init arch_init_kprobes(void)
 {
        arm_probes_decode_init();
 #ifdef CONFIG_THUMB2_KERNEL
index 02f8e72..05486cc 100644 (file)
@@ -75,7 +75,7 @@
        pinctrl-0 = <&emac_rgmii_pins>;
        phy-supply = <&reg_gmac_3v3>;
        phy-handle = <&ext_rgmii_phy>;
-       phy-mode = "rgmii";
+       phy-mode = "rgmii-id";
        status = "okay";
 };
 
index d17abb5..e99e764 100644 (file)
@@ -70,7 +70,9 @@
                regulator-name = "rst-usb-eth2";
                pinctrl-names = "default";
                pinctrl-0 = <&pinctrl_usb_eth2>;
-               gpio = <&gpio3 2 GPIO_ACTIVE_LOW>;
+               gpio = <&gpio3 2 GPIO_ACTIVE_HIGH>;
+               enable-active-high;
+               regulator-always-on;
        };
 
        reg_vdd_5v: regulator-5v {
@@ -95,7 +97,7 @@
                clocks = <&osc_can>;
                interrupt-parent = <&gpio4>;
                interrupts = <28 IRQ_TYPE_EDGE_FALLING>;
-               spi-max-frequency = <100000>;
+               spi-max-frequency = <10000000>;
                vdd-supply = <&reg_vdd_3v3>;
                xceiver-supply = <&reg_vdd_5v>;
        };
 &fec1 {
        pinctrl-names = "default";
        pinctrl-0 = <&pinctrl_enet>;
-       phy-connection-type = "rgmii";
+       phy-connection-type = "rgmii-rxid";
        phy-handle = <&ethphy>;
        status = "okay";
 
index 9db9b90..42bbbb3 100644 (file)
                        reg_vdd_soc: BUCK1 {
                                regulator-name = "buck1";
                                regulator-min-microvolt = <800000>;
-                               regulator-max-microvolt = <900000>;
+                               regulator-max-microvolt = <850000>;
                                regulator-boot-on;
                                regulator-always-on;
                                regulator-ramp-delay = <3125>;
+                               nxp,dvs-run-voltage = <850000>;
+                               nxp,dvs-standby-voltage = <800000>;
                        };
 
                        reg_vdd_arm: BUCK2 {
                        reg_vdd_dram: BUCK3 {
                                regulator-name = "buck3";
                                regulator-min-microvolt = <850000>;
-                               regulator-max-microvolt = <900000>;
+                               regulator-max-microvolt = <950000>;
                                regulator-boot-on;
                                regulator-always-on;
                        };
 
                        reg_vdd_snvs: LDO2 {
                                regulator-name = "ldo2";
-                               regulator-min-microvolt = <850000>;
+                               regulator-min-microvolt = <800000>;
                                regulator-max-microvolt = <900000>;
                                regulator-boot-on;
                                regulator-always-on;
index 8c15d9f..d12e4cb 100644 (file)
                        power-domains = <&dispcc MDSS_GDSC>;
 
                        clocks = <&dispcc DISP_CC_MDSS_AHB_CLK>,
+                                <&gcc GCC_DISP_HF_AXI_CLK>,
                                 <&gcc GCC_DISP_SF_AXI_CLK>,
                                 <&dispcc DISP_CC_MDSS_MDP_CLK>;
-                       clock-names = "iface", "nrt_bus", "core";
+                       clock-names = "iface", "bus", "nrt_bus", "core";
 
                        assigned-clocks = <&dispcc DISP_CC_MDSS_MDP_CLK>;
                        assigned-clock-rates = <460000000>;
index 41c23f4..803e777 100644 (file)
@@ -1136,6 +1136,11 @@ out:
        return prog;
 }
 
+u64 bpf_jit_alloc_exec_limit(void)
+{
+       return BPF_JIT_REGION_SIZE;
+}
+
 void *bpf_jit_alloc_exec(unsigned long size)
 {
        return __vmalloc_node_range(size, PAGE_SIZE, BPF_JIT_REGION_START,
index 0e23e3a..d55b73b 100644 (file)
@@ -6,7 +6,7 @@
 
 #ifndef CONFIG_DYNAMIC_FTRACE
 extern void (*ftrace_trace_function)(unsigned long, unsigned long,
-                                    struct ftrace_ops*, struct pt_regs*);
+                                    struct ftrace_ops*, struct ftrace_regs*);
 extern void ftrace_graph_caller(void);
 
 noinline void __naked ftrace_stub(unsigned long ip, unsigned long parent_ip,
index 9e32fb7..e849daf 100644 (file)
@@ -37,6 +37,7 @@ config NIOS2_DTB_PHYS_ADDR
 
 config NIOS2_DTB_SOURCE_BOOL
        bool "Compile and link device tree into kernel image"
+       depends on !COMPILE_TEST
        help
          This allows you to specify a dts (device tree source) file
          which will be compiled and linked into the kernel image.
index fed86f4..753d85b 100644 (file)
@@ -125,7 +125,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 
        if (i == NR_JIT_ITERATIONS) {
                pr_err("bpf-jit: image did not converge in <%d passes!\n", i);
-               bpf_jit_binary_free(jit_data->header);
+               if (jit_data->header)
+                       bpf_jit_binary_free(jit_data->header);
                prog = orig_prog;
                goto out_offset;
        }
@@ -166,6 +167,11 @@ out:
        return prog;
 }
 
+u64 bpf_jit_alloc_exec_limit(void)
+{
+       return BPF_JIT_REGION_SIZE;
+}
+
 void *bpf_jit_alloc_exec(unsigned long size)
 {
        return __vmalloc_node_range(size, PAGE_SIZE, BPF_JIT_REGION_START,
index 2fc0b03..5933138 100644 (file)
@@ -276,7 +276,7 @@ static const struct ethtool_ops uml_net_ethtool_ops = {
 
 void uml_net_setup_etheraddr(struct net_device *dev, char *str)
 {
-       unsigned char *addr = dev->dev_addr;
+       u8 addr[ETH_ALEN];
        char *end;
        int i;
 
@@ -316,6 +316,7 @@ void uml_net_setup_etheraddr(struct net_device *dev, char *str)
                       addr[0] | 0x02, addr[1], addr[2], addr[3], addr[4],
                       addr[5]);
        }
+       eth_hw_addr_set(dev, addr);
        return;
 
 random:
index f8f48a7..5a0298a 100644 (file)
@@ -702,7 +702,8 @@ struct kvm_vcpu_arch {
 
        struct kvm_pio_request pio;
        void *pio_data;
-       void *guest_ins_data;
+       void *sev_pio_data;
+       unsigned sev_pio_count;
 
        u8 event_exit_inst_len;
 
index 1a64ba5..0cc5890 100644 (file)
@@ -4596,10 +4596,10 @@ static void update_pkru_bitmask(struct kvm_mmu *mmu)
        unsigned bit;
        bool wp;
 
-       if (!is_cr4_pke(mmu)) {
-               mmu->pkru_mask = 0;
+       mmu->pkru_mask = 0;
+
+       if (!is_cr4_pke(mmu))
                return;
-       }
 
        wp = is_cr0_wp(mmu);
 
index 0d21d59..2e4916b 100644 (file)
@@ -1484,6 +1484,13 @@ static int sev_receive_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
                goto e_free_trans;
        }
 
+       /*
+        * Flush (on non-coherent CPUs) before RECEIVE_UPDATE_DATA, the PSP
+        * encrypts the written data with the guest's key, and the cache may
+        * contain dirty, unencrypted data.
+        */
+       sev_clflush_pages(guest_page, n);
+
        /* The RECEIVE_UPDATE_DATA command requires C-bit to be always set. */
        data.guest_address = (page_to_pfn(guest_page[0]) << PAGE_SHIFT) + offset;
        data.guest_address |= sev_me_mask;
index 7fb2a3a..7d595ef 100644 (file)
@@ -6305,18 +6305,13 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
 
                /*
                 * If we are running L2 and L1 has a new pending interrupt
-                * which can be injected, we should re-evaluate
-                * what should be done with this new L1 interrupt.
-                * If L1 intercepts external-interrupts, we should
-                * exit from L2 to L1. Otherwise, interrupt should be
-                * delivered directly to L2.
+                * which can be injected, this may cause a vmexit or it may
+                * be injected into L2.  Either way, this interrupt will be
+                * processed via KVM_REQ_EVENT, not RVI, because we do not use
+                * virtual interrupt delivery to inject L1 interrupts into L2.
                 */
-               if (is_guest_mode(vcpu) && max_irr_updated) {
-                       if (nested_exit_on_intr(vcpu))
-                               kvm_vcpu_exiting_guest_mode(vcpu);
-                       else
-                               kvm_make_request(KVM_REQ_EVENT, vcpu);
-               }
+               if (is_guest_mode(vcpu) && max_irr_updated)
+                       kvm_make_request(KVM_REQ_EVENT, vcpu);
        } else {
                max_irr = kvm_lapic_find_highest_irr(vcpu);
        }
index 0c8b512..b26647a 100644 (file)
@@ -6906,7 +6906,7 @@ static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
 }
 
 static int emulator_pio_in_out(struct kvm_vcpu *vcpu, int size,
-                              unsigned short port, void *val,
+                              unsigned short port,
                               unsigned int count, bool in)
 {
        vcpu->arch.pio.port = port;
@@ -6914,10 +6914,8 @@ static int emulator_pio_in_out(struct kvm_vcpu *vcpu, int size,
        vcpu->arch.pio.count  = count;
        vcpu->arch.pio.size = size;
 
-       if (!kernel_pio(vcpu, vcpu->arch.pio_data)) {
-               vcpu->arch.pio.count = 0;
+       if (!kernel_pio(vcpu, vcpu->arch.pio_data))
                return 1;
-       }
 
        vcpu->run->exit_reason = KVM_EXIT_IO;
        vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
@@ -6929,26 +6927,39 @@ static int emulator_pio_in_out(struct kvm_vcpu *vcpu, int size,
        return 0;
 }
 
-static int emulator_pio_in(struct kvm_vcpu *vcpu, int size,
-                          unsigned short port, void *val, unsigned int count)
+static int __emulator_pio_in(struct kvm_vcpu *vcpu, int size,
+                            unsigned short port, unsigned int count)
 {
-       int ret;
+       WARN_ON(vcpu->arch.pio.count);
+       memset(vcpu->arch.pio_data, 0, size * count);
+       return emulator_pio_in_out(vcpu, size, port, count, true);
+}
 
-       if (vcpu->arch.pio.count)
-               goto data_avail;
+static void complete_emulator_pio_in(struct kvm_vcpu *vcpu, void *val)
+{
+       int size = vcpu->arch.pio.size;
+       unsigned count = vcpu->arch.pio.count;
+       memcpy(val, vcpu->arch.pio_data, size * count);
+       trace_kvm_pio(KVM_PIO_IN, vcpu->arch.pio.port, size, count, vcpu->arch.pio_data);
+       vcpu->arch.pio.count = 0;
+}
 
-       memset(vcpu->arch.pio_data, 0, size * count);
+static int emulator_pio_in(struct kvm_vcpu *vcpu, int size,
+                          unsigned short port, void *val, unsigned int count)
+{
+       if (vcpu->arch.pio.count) {
+               /* Complete previous iteration.  */
+       } else {
+               int r = __emulator_pio_in(vcpu, size, port, count);
+               if (!r)
+                       return r;
 
-       ret = emulator_pio_in_out(vcpu, size, port, val, count, true);
-       if (ret) {
-data_avail:
-               memcpy(val, vcpu->arch.pio_data, size * count);
-               trace_kvm_pio(KVM_PIO_IN, port, size, count, vcpu->arch.pio_data);
-               vcpu->arch.pio.count = 0;
-               return 1;
+               /* Results already available, fall through.  */
        }
 
-       return 0;
+       WARN_ON(count != vcpu->arch.pio.count);
+       complete_emulator_pio_in(vcpu, val);
+       return 1;
 }
 
 static int emulator_pio_in_emulated(struct x86_emulate_ctxt *ctxt,
@@ -6963,9 +6974,15 @@ static int emulator_pio_out(struct kvm_vcpu *vcpu, int size,
                            unsigned short port, const void *val,
                            unsigned int count)
 {
+       int ret;
+
        memcpy(vcpu->arch.pio_data, val, size * count);
        trace_kvm_pio(KVM_PIO_OUT, port, size, count, vcpu->arch.pio_data);
-       return emulator_pio_in_out(vcpu, size, port, (void *)val, count, false);
+       ret = emulator_pio_in_out(vcpu, size, port, count, false);
+       if (ret)
+                vcpu->arch.pio.count = 0;
+
+        return ret;
 }
 
 static int emulator_pio_out_emulated(struct x86_emulate_ctxt *ctxt,
@@ -9643,14 +9660,14 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
                if (likely(exit_fastpath != EXIT_FASTPATH_REENTER_GUEST))
                        break;
 
-                if (unlikely(kvm_vcpu_exit_request(vcpu))) {
+               if (vcpu->arch.apicv_active)
+                       static_call(kvm_x86_sync_pir_to_irr)(vcpu);
+
+               if (unlikely(kvm_vcpu_exit_request(vcpu))) {
                        exit_fastpath = EXIT_FASTPATH_EXIT_HANDLED;
                        break;
                }
-
-               if (vcpu->arch.apicv_active)
-                       static_call(kvm_x86_sync_pir_to_irr)(vcpu);
-        }
+       }
 
        /*
         * Do this here before restoring debug registers on the host.  And
@@ -12368,44 +12385,81 @@ int kvm_sev_es_mmio_read(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned int bytes,
 }
 EXPORT_SYMBOL_GPL(kvm_sev_es_mmio_read);
 
-static int complete_sev_es_emulated_ins(struct kvm_vcpu *vcpu)
+static int kvm_sev_es_outs(struct kvm_vcpu *vcpu, unsigned int size,
+                          unsigned int port);
+
+static int complete_sev_es_emulated_outs(struct kvm_vcpu *vcpu)
 {
-       memcpy(vcpu->arch.guest_ins_data, vcpu->arch.pio_data,
-              vcpu->arch.pio.count * vcpu->arch.pio.size);
-       vcpu->arch.pio.count = 0;
+       int size = vcpu->arch.pio.size;
+       int port = vcpu->arch.pio.port;
 
+       vcpu->arch.pio.count = 0;
+       if (vcpu->arch.sev_pio_count)
+               return kvm_sev_es_outs(vcpu, size, port);
        return 1;
 }
 
 static int kvm_sev_es_outs(struct kvm_vcpu *vcpu, unsigned int size,
-                          unsigned int port, void *data,  unsigned int count)
+                          unsigned int port)
 {
-       int ret;
-
-       ret = emulator_pio_out_emulated(vcpu->arch.emulate_ctxt, size, port,
-                                       data, count);
-       if (ret)
-               return ret;
+       for (;;) {
+               unsigned int count =
+                       min_t(unsigned int, PAGE_SIZE / size, vcpu->arch.sev_pio_count);
+               int ret = emulator_pio_out(vcpu, size, port, vcpu->arch.sev_pio_data, count);
+
+               /* memcpy done already by emulator_pio_out.  */
+               vcpu->arch.sev_pio_count -= count;
+               vcpu->arch.sev_pio_data += count * vcpu->arch.pio.size;
+               if (!ret)
+                       break;
 
-       vcpu->arch.pio.count = 0;
+               /* Emulation done by the kernel.  */
+               if (!vcpu->arch.sev_pio_count)
+                       return 1;
+       }
 
+       vcpu->arch.complete_userspace_io = complete_sev_es_emulated_outs;
        return 0;
 }
 
 static int kvm_sev_es_ins(struct kvm_vcpu *vcpu, unsigned int size,
-                         unsigned int port, void *data, unsigned int count)
+                         unsigned int port);
+
+static void advance_sev_es_emulated_ins(struct kvm_vcpu *vcpu)
 {
-       int ret;
+       unsigned count = vcpu->arch.pio.count;
+       complete_emulator_pio_in(vcpu, vcpu->arch.sev_pio_data);
+       vcpu->arch.sev_pio_count -= count;
+       vcpu->arch.sev_pio_data += count * vcpu->arch.pio.size;
+}
 
-       ret = emulator_pio_in_emulated(vcpu->arch.emulate_ctxt, size, port,
-                                      data, count);
-       if (ret) {
-               vcpu->arch.pio.count = 0;
-       } else {
-               vcpu->arch.guest_ins_data = data;
-               vcpu->arch.complete_userspace_io = complete_sev_es_emulated_ins;
+static int complete_sev_es_emulated_ins(struct kvm_vcpu *vcpu)
+{
+       int size = vcpu->arch.pio.size;
+       int port = vcpu->arch.pio.port;
+
+       advance_sev_es_emulated_ins(vcpu);
+       if (vcpu->arch.sev_pio_count)
+               return kvm_sev_es_ins(vcpu, size, port);
+       return 1;
+}
+
+static int kvm_sev_es_ins(struct kvm_vcpu *vcpu, unsigned int size,
+                         unsigned int port)
+{
+       for (;;) {
+               unsigned int count =
+                       min_t(unsigned int, PAGE_SIZE / size, vcpu->arch.sev_pio_count);
+               if (!__emulator_pio_in(vcpu, size, port, count))
+                       break;
+
+               /* Emulation done by the kernel.  */
+               advance_sev_es_emulated_ins(vcpu);
+               if (!vcpu->arch.sev_pio_count)
+                       return 1;
        }
 
+       vcpu->arch.complete_userspace_io = complete_sev_es_emulated_ins;
        return 0;
 }
 
@@ -12413,8 +12467,10 @@ int kvm_sev_es_string_io(struct kvm_vcpu *vcpu, unsigned int size,
                         unsigned int port, void *data,  unsigned int count,
                         int in)
 {
-       return in ? kvm_sev_es_ins(vcpu, size, port, data, count)
-                 : kvm_sev_es_outs(vcpu, size, port, data, count);
+       vcpu->arch.sev_pio_data = data;
+       vcpu->arch.sev_pio_count = count;
+       return in ? kvm_sev_es_ins(vcpu, size, port)
+                 : kvm_sev_es_outs(vcpu, size, port);
 }
 EXPORT_SYMBOL_GPL(kvm_sev_es_string_io);
 
index 8b806d3..962e5e1 100644 (file)
@@ -124,7 +124,7 @@ static char *split_if_spec(char *str, ...)
 
 static void setup_etheraddr(struct net_device *dev, char *str)
 {
-       unsigned char *addr = dev->dev_addr;
+       u8 addr[ETH_ALEN];
 
        if (str == NULL)
                goto random;
@@ -147,6 +147,7 @@ static void setup_etheraddr(struct net_device *dev, char *str)
        if (!is_local_ether_addr(addr))
                pr_warn("%s: assigning a globally valid ethernet address\n",
                        dev->name);
+       eth_hw_addr_set(dev, addr);
        return;
 
 random:
index 38b9f76..9a1c583 100644 (file)
@@ -1897,10 +1897,11 @@ void blk_cgroup_bio_start(struct bio *bio)
 {
        int rwd = blk_cgroup_io_type(bio), cpu;
        struct blkg_iostat_set *bis;
+       unsigned long flags;
 
        cpu = get_cpu();
        bis = per_cpu_ptr(bio->bi_blkg->iostat_cpu, cpu);
-       u64_stats_update_begin(&bis->sync);
+       flags = u64_stats_update_begin_irqsave(&bis->sync);
 
        /*
         * If the bio is flagged with BIO_CGROUP_ACCT it means this is a split
@@ -1912,7 +1913,7 @@ void blk_cgroup_bio_start(struct bio *bio)
        }
        bis->cur.ios[rwd]++;
 
-       u64_stats_update_end(&bis->sync);
+       u64_stats_update_end_irqrestore(&bis->sync, flags);
        if (cgroup_subsys_on_dfl(io_cgrp_subsys))
                cgroup_rstat_updated(bio->bi_blkg->blkcg->css.cgroup, cpu);
        put_cpu();
index 58c4c36..7bea19d 100644 (file)
@@ -423,6 +423,7 @@ out_del:
        device_del(pdev);
 out_put:
        put_device(pdev);
+       return ERR_PTR(err);
 out_put_disk:
        put_disk(disk);
        return ERR_PTR(err);
index b9863e2..f0ed441 100644 (file)
@@ -1035,13 +1035,8 @@ void acpi_turn_off_unused_power_resources(void)
        list_for_each_entry_reverse(resource, &acpi_power_resource_list, list_node) {
                mutex_lock(&resource->resource_lock);
 
-               /*
-                * Turn off power resources in an unknown state too, because the
-                * platform firmware on some system expects the OS to turn off
-                * power resources without any users unconditionally.
-                */
                if (!resource->ref_count &&
-                   resource->state != ACPI_POWER_RESOURCE_STATE_OFF) {
+                   resource->state == ACPI_POWER_RESOURCE_STATE_ON) {
                        acpi_handle_debug(resource->device.handle, "Turning OFF\n");
                        __acpi_power_off(resource);
                }
index 9d86203..c53633d 100644 (file)
@@ -3896,8 +3896,8 @@ static int mv_chip_id(struct ata_host *host, unsigned int board_idx)
                break;
 
        default:
-               dev_err(host->dev, "BUG: invalid board index %u\n", board_idx);
-               return 1;
+               dev_alert(host->dev, "BUG: invalid board index %u\n", board_idx);
+               return -EINVAL;
        }
 
        hpriv->hp_flags = hp_flags;
index cfa29dc..fabf870 100644 (file)
@@ -281,14 +281,14 @@ static int regcache_rbtree_insert_to_block(struct regmap *map,
        if (!blk)
                return -ENOMEM;
 
+       rbnode->block = blk;
+
        if (BITS_TO_LONGS(blklen) > BITS_TO_LONGS(rbnode->blklen)) {
                present = krealloc(rbnode->cache_present,
                                   BITS_TO_LONGS(blklen) * sizeof(*present),
                                   GFP_KERNEL);
-               if (!present) {
-                       kfree(blk);
+               if (!present)
                        return -ENOMEM;
-               }
 
                memset(present + BITS_TO_LONGS(rbnode->blklen), 0,
                       (BITS_TO_LONGS(blklen) - BITS_TO_LONGS(rbnode->blklen))
@@ -305,7 +305,6 @@ static int regcache_rbtree_insert_to_block(struct regmap *map,
        }
 
        /* update the rbnode block, its size and the base register */
-       rbnode->block = blk;
        rbnode->blklen = blklen;
        rbnode->base_reg = base_reg;
        rbnode->cache_present = present;
index 4c3fd2e..dcc1410 100644 (file)
@@ -1443,8 +1443,8 @@ static int fwnet_probe(struct fw_unit *unit,
        struct net_device *net;
        bool allocated_netdev = false;
        struct fwnet_device *dev;
+       union fwnet_hwaddr ha;
        int ret;
-       union fwnet_hwaddr *ha;
 
        mutex_lock(&fwnet_device_mutex);
 
@@ -1491,12 +1491,12 @@ static int fwnet_probe(struct fw_unit *unit,
        net->max_mtu = 4096U;
 
        /* Set our hardware address while we're at it */
-       ha = (union fwnet_hwaddr *)net->dev_addr;
-       put_unaligned_be64(card->guid, &ha->uc.uniq_id);
-       ha->uc.max_rec = dev->card->max_receive;
-       ha->uc.sspd = dev->card->link_speed;
-       put_unaligned_be16(dev->local_fifo >> 32, &ha->uc.fifo_hi);
-       put_unaligned_be32(dev->local_fifo & 0xffffffff, &ha->uc.fifo_lo);
+       ha.uc.uniq_id = cpu_to_be64(card->guid);
+       ha.uc.max_rec = dev->card->max_receive;
+       ha.uc.sspd = dev->card->link_speed;
+       ha.uc.fifo_hi = cpu_to_be16(dev->local_fifo >> 32);
+       ha.uc.fifo_lo = cpu_to_be32(dev->local_fifo & 0xffffffff);
+       dev_addr_set(net, ha.u);
 
        memset(net->broadcast, -1, net->addr_len);
 
index 42f3d9d..d030577 100644 (file)
@@ -13,6 +13,7 @@
 #define _HYPERV_VMBUS_H
 
 #include <linux/list.h>
+#include <linux/bitops.h>
 #include <asm/sync_bitops.h>
 #include <asm/hyperv-tlfs.h>
 #include <linux/atomic.h>
index a20b810..c00f8e2 100644 (file)
@@ -706,8 +706,9 @@ static void ib_nl_set_path_rec_attrs(struct sk_buff *skb,
 
        /* Construct the family header first */
        header = skb_put(skb, NLMSG_ALIGN(sizeof(*header)));
-       memcpy(header->device_name, dev_name(&query->port->agent->device->dev),
-              LS_DEVICE_NAME_MAX);
+       strscpy_pad(header->device_name,
+                   dev_name(&query->port->agent->device->dev),
+                   LS_DEVICE_NAME_MAX);
        header->port_num = query->port->port_num;
 
        if ((comp_mask & IB_SA_PATH_REC_REVERSIBLE) &&
index 489b436..3d42bd2 100644 (file)
@@ -878,6 +878,7 @@ void sc_disable(struct send_context *sc)
 {
        u64 reg;
        struct pio_buf *pbuf;
+       LIST_HEAD(wake_list);
 
        if (!sc)
                return;
@@ -912,19 +913,21 @@ void sc_disable(struct send_context *sc)
        spin_unlock(&sc->release_lock);
 
        write_seqlock(&sc->waitlock);
-       while (!list_empty(&sc->piowait)) {
+       if (!list_empty(&sc->piowait))
+               list_move(&sc->piowait, &wake_list);
+       write_sequnlock(&sc->waitlock);
+       while (!list_empty(&wake_list)) {
                struct iowait *wait;
                struct rvt_qp *qp;
                struct hfi1_qp_priv *priv;
 
-               wait = list_first_entry(&sc->piowait, struct iowait, list);
+               wait = list_first_entry(&wake_list, struct iowait, list);
                qp = iowait_to_qp(wait);
                priv = qp->priv;
                list_del_init(&priv->s_iowait.list);
                priv->s_iowait.lock = NULL;
                hfi1_qp_wakeup(qp, RVT_S_WAIT_PIO | HFI1_S_WAIT_PIO_DRAIN);
        }
-       write_sequnlock(&sc->waitlock);
 
        spin_unlock_irq(&sc->alloc_lock);
 }
index 5fb92de..9b544a3 100644 (file)
@@ -1092,12 +1092,12 @@ irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, struct irdma_cq_poll_info *info)
                if (cq->avoid_mem_cflct) {
                        ext_cqe = (__le64 *)((u8 *)cqe + 32);
                        get_64bit_val(ext_cqe, 24, &qword7);
-                       polarity = (u8)FIELD_GET(IRDMA_CQ_VALID, qword3);
+                       polarity = (u8)FIELD_GET(IRDMA_CQ_VALID, qword7);
                } else {
                        peek_head = (cq->cq_ring.head + 1) % cq->cq_ring.size;
                        ext_cqe = cq->cq_base[peek_head].buf;
                        get_64bit_val(ext_cqe, 24, &qword7);
-                       polarity = (u8)FIELD_GET(IRDMA_CQ_VALID, qword3);
+                       polarity = (u8)FIELD_GET(IRDMA_CQ_VALID, qword7);
                        if (!peek_head)
                                polarity ^= 1;
                }
index 7110ebf..102dc93 100644 (file)
@@ -3399,9 +3399,13 @@ static void irdma_process_cqe(struct ib_wc *entry,
                }
 
                if (cq_poll_info->ud_vlan_valid) {
-                       entry->vlan_id = cq_poll_info->ud_vlan & VLAN_VID_MASK;
-                       entry->wc_flags |= IB_WC_WITH_VLAN;
+                       u16 vlan = cq_poll_info->ud_vlan & VLAN_VID_MASK;
+
                        entry->sl = cq_poll_info->ud_vlan >> VLAN_PRIO_SHIFT;
+                       if (vlan) {
+                               entry->vlan_id = vlan;
+                               entry->wc_flags |= IB_WC_WITH_VLAN;
+                       }
                } else {
                        entry->sl = 0;
                }
index b68c575..b0d6ee0 100644 (file)
@@ -330,8 +330,10 @@ enum irdma_status_code irdma_ws_add(struct irdma_sc_vsi *vsi, u8 user_pri)
 
                tc_node->enable = true;
                ret = irdma_ws_cqp_cmd(vsi, tc_node, IRDMA_OP_WS_MODIFY_NODE);
-               if (ret)
+               if (ret) {
+                       vsi->unregister_qset(vsi, tc_node);
                        goto reg_err;
+               }
        }
        ibdev_dbg(to_ibdev(vsi->dev),
                  "WS: Using node %d which represents VSI %d TC %d\n",
@@ -350,6 +352,10 @@ enum irdma_status_code irdma_ws_add(struct irdma_sc_vsi *vsi, u8 user_pri)
        }
        goto exit;
 
+reg_err:
+       irdma_ws_cqp_cmd(vsi, tc_node, IRDMA_OP_WS_DELETE_NODE);
+       list_del(&tc_node->siblings);
+       irdma_free_node(vsi, tc_node);
 leaf_add_err:
        if (list_empty(&vsi_node->child_list_head)) {
                if (irdma_ws_cqp_cmd(vsi, vsi_node, IRDMA_OP_WS_DELETE_NODE))
@@ -369,11 +375,6 @@ vsi_add_err:
 exit:
        mutex_unlock(&vsi->dev->ws_mutex);
        return ret;
-
-reg_err:
-       mutex_unlock(&vsi->dev->ws_mutex);
-       irdma_ws_remove(vsi, user_pri);
-       return ret;
 }
 
 /**
index a8db8a0..ff3742b 100644 (file)
@@ -206,3 +206,29 @@ out:
        kfree(in);
        return err;
 }
+
+int mlx5_cmd_uar_alloc(struct mlx5_core_dev *dev, u32 *uarn, u16 uid)
+{
+       u32 out[MLX5_ST_SZ_DW(alloc_uar_out)] = {};
+       u32 in[MLX5_ST_SZ_DW(alloc_uar_in)] = {};
+       int err;
+
+       MLX5_SET(alloc_uar_in, in, opcode, MLX5_CMD_OP_ALLOC_UAR);
+       MLX5_SET(alloc_uar_in, in, uid, uid);
+       err = mlx5_cmd_exec_inout(dev, alloc_uar, in, out);
+       if (err)
+               return err;
+
+       *uarn = MLX5_GET(alloc_uar_out, out, uar);
+       return 0;
+}
+
+int mlx5_cmd_uar_dealloc(struct mlx5_core_dev *dev, u32 uarn, u16 uid)
+{
+       u32 in[MLX5_ST_SZ_DW(dealloc_uar_in)] = {};
+
+       MLX5_SET(dealloc_uar_in, in, opcode, MLX5_CMD_OP_DEALLOC_UAR);
+       MLX5_SET(dealloc_uar_in, in, uar, uarn);
+       MLX5_SET(dealloc_uar_in, in, uid, uid);
+       return mlx5_cmd_exec_in(dev, dealloc_uar, in);
+}
index 66c9629..ee46638 100644 (file)
@@ -57,4 +57,6 @@ int mlx5_cmd_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn, u16 uid);
 int mlx5_cmd_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn, u16 uid);
 int mlx5_cmd_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb,
                     u16 opmod, u8 port);
+int mlx5_cmd_uar_alloc(struct mlx5_core_dev *dev, u32 *uarn, u16 uid);
+int mlx5_cmd_uar_dealloc(struct mlx5_core_dev *dev, u32 uarn, u16 uid);
 #endif /* MLX5_IB_CMD_H */
index e95967a..08b7f6b 100644 (file)
@@ -1292,21 +1292,16 @@ static int devx_handle_mkey_indirect(struct devx_obj *obj,
                                     struct mlx5_ib_dev *dev,
                                     void *in, void *out)
 {
-       struct mlx5_ib_devx_mr *devx_mr = &obj->devx_mr;
-       struct mlx5_core_mkey *mkey;
+       struct mlx5_ib_mkey *mkey = &obj->mkey;
        void *mkc;
        u8 key;
 
-       mkey = &devx_mr->mmkey;
        mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
        key = MLX5_GET(mkc, mkc, mkey_7_0);
        mkey->key = mlx5_idx_to_mkey(
                        MLX5_GET(create_mkey_out, out, mkey_index)) | key;
        mkey->type = MLX5_MKEY_INDIRECT_DEVX;
-       mkey->iova = MLX5_GET64(mkc, mkc, start_addr);
-       mkey->size = MLX5_GET64(mkc, mkc, len);
-       mkey->pd = MLX5_GET(mkc, mkc, pd);
-       devx_mr->ndescs = MLX5_GET(mkc, mkc, translations_octword_size);
+       mkey->ndescs = MLX5_GET(mkc, mkc, translations_octword_size);
        init_waitqueue_head(&mkey->wait);
 
        return mlx5r_store_odp_mkey(dev, mkey);
@@ -1384,13 +1379,13 @@ static int devx_obj_cleanup(struct ib_uobject *uobject,
        dev = mlx5_udata_to_mdev(&attrs->driver_udata);
        if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY &&
            xa_erase(&obj->ib_dev->odp_mkeys,
-                    mlx5_base_mkey(obj->devx_mr.mmkey.key)))
+                    mlx5_base_mkey(obj->mkey.key)))
                /*
                 * The pagefault_single_data_segment() does commands against
                 * the mmkey, we must wait for that to stop before freeing the
                 * mkey, as another allocation could get the same mkey #.
                 */
-               mlx5r_deref_wait_odp_mkey(&obj->devx_mr.mmkey);
+               mlx5r_deref_wait_odp_mkey(&obj->mkey);
 
        if (obj->flags & DEVX_OBJ_FLAGS_DCT)
                ret = mlx5_core_destroy_dct(obj->ib_dev, &obj->core_dct);
index 1f69866..ee22132 100644 (file)
@@ -16,7 +16,7 @@ struct devx_obj {
        u32                     dinbox[MLX5_MAX_DESTROY_INBOX_SIZE_DW];
        u32                     flags;
        union {
-               struct mlx5_ib_devx_mr  devx_mr;
+               struct mlx5_ib_mkey     mkey;
                struct mlx5_core_dct    core_dct;
                struct mlx5_core_cq     core_cq;
                u32                     flow_counter_bulk_size;
index 8664bcf..5ec8bd2 100644 (file)
@@ -1643,7 +1643,8 @@ static int allocate_uars(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *conte
 
        bfregi = &context->bfregi;
        for (i = 0; i < bfregi->num_static_sys_pages; i++) {
-               err = mlx5_cmd_alloc_uar(dev->mdev, &bfregi->sys_pages[i]);
+               err = mlx5_cmd_uar_alloc(dev->mdev, &bfregi->sys_pages[i],
+                                        context->devx_uid);
                if (err)
                        goto error;
 
@@ -1657,7 +1658,8 @@ static int allocate_uars(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *conte
 
 error:
        for (--i; i >= 0; i--)
-               if (mlx5_cmd_free_uar(dev->mdev, bfregi->sys_pages[i]))
+               if (mlx5_cmd_uar_dealloc(dev->mdev, bfregi->sys_pages[i],
+                                        context->devx_uid))
                        mlx5_ib_warn(dev, "failed to free uar %d\n", i);
 
        return err;
@@ -1673,7 +1675,8 @@ static void deallocate_uars(struct mlx5_ib_dev *dev,
        for (i = 0; i < bfregi->num_sys_pages; i++)
                if (i < bfregi->num_static_sys_pages ||
                    bfregi->sys_pages[i] != MLX5_IB_INVALID_UAR_INDEX)
-                       mlx5_cmd_free_uar(dev->mdev, bfregi->sys_pages[i]);
+                       mlx5_cmd_uar_dealloc(dev->mdev, bfregi->sys_pages[i],
+                                            context->devx_uid);
 }
 
 int mlx5_ib_enable_lb(struct mlx5_ib_dev *dev, bool td, bool qp)
@@ -1891,6 +1894,13 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx,
        if (req.num_low_latency_bfregs > req.total_num_bfregs - 1)
                return -EINVAL;
 
+       if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) {
+               err = mlx5_ib_devx_create(dev, true);
+               if (err < 0)
+                       goto out_ctx;
+               context->devx_uid = err;
+       }
+
        lib_uar_4k = req.lib_caps & MLX5_LIB_CAP_4K_UAR;
        lib_uar_dyn = req.lib_caps & MLX5_LIB_CAP_DYN_UAR;
        bfregi = &context->bfregi;
@@ -1903,7 +1913,7 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx,
        /* updates req->total_num_bfregs */
        err = calc_total_bfregs(dev, lib_uar_4k, &req, bfregi);
        if (err)
-               goto out_ctx;
+               goto out_devx;
 
        mutex_init(&bfregi->lock);
        bfregi->lib_uar_4k = lib_uar_4k;
@@ -1911,7 +1921,7 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx,
                                GFP_KERNEL);
        if (!bfregi->count) {
                err = -ENOMEM;
-               goto out_ctx;
+               goto out_devx;
        }
 
        bfregi->sys_pages = kcalloc(bfregi->num_sys_pages,
@@ -1927,17 +1937,10 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx,
                goto out_sys_pages;
 
 uar_done:
-       if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) {
-               err = mlx5_ib_devx_create(dev, true);
-               if (err < 0)
-                       goto out_uars;
-               context->devx_uid = err;
-       }
-
        err = mlx5_ib_alloc_transport_domain(dev, &context->tdn,
                                             context->devx_uid);
        if (err)
-               goto out_devx;
+               goto out_uars;
 
        INIT_LIST_HEAD(&context->db_page_list);
        mutex_init(&context->db_page_mutex);
@@ -1972,9 +1975,6 @@ uar_done:
 
 out_mdev:
        mlx5_ib_dealloc_transport_domain(dev, context->tdn, context->devx_uid);
-out_devx:
-       if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX)
-               mlx5_ib_devx_destroy(dev, context->devx_uid);
 
 out_uars:
        deallocate_uars(dev, context);
@@ -1985,6 +1985,10 @@ out_sys_pages:
 out_count:
        kfree(bfregi->count);
 
+out_devx:
+       if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX)
+               mlx5_ib_devx_destroy(dev, context->devx_uid);
+
 out_ctx:
        return err;
 }
@@ -2021,12 +2025,12 @@ static void mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
        bfregi = &context->bfregi;
        mlx5_ib_dealloc_transport_domain(dev, context->tdn, context->devx_uid);
 
-       if (context->devx_uid)
-               mlx5_ib_devx_destroy(dev, context->devx_uid);
-
        deallocate_uars(dev, context);
        kfree(bfregi->sys_pages);
        kfree(bfregi->count);
+
+       if (context->devx_uid)
+               mlx5_ib_devx_destroy(dev, context->devx_uid);
 }
 
 static phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev,
@@ -2119,6 +2123,7 @@ static void mlx5_ib_mmap_free(struct rdma_user_mmap_entry *entry)
        struct mlx5_user_mmap_entry *mentry = to_mmmap(entry);
        struct mlx5_ib_dev *dev = to_mdev(entry->ucontext->device);
        struct mlx5_var_table *var_table = &dev->var_table;
+       struct mlx5_ib_ucontext *context = to_mucontext(entry->ucontext);
 
        switch (mentry->mmap_flag) {
        case MLX5_IB_MMAP_TYPE_MEMIC:
@@ -2133,7 +2138,8 @@ static void mlx5_ib_mmap_free(struct rdma_user_mmap_entry *entry)
                break;
        case MLX5_IB_MMAP_TYPE_UAR_WC:
        case MLX5_IB_MMAP_TYPE_UAR_NC:
-               mlx5_cmd_free_uar(dev->mdev, mentry->page_idx);
+               mlx5_cmd_uar_dealloc(dev->mdev, mentry->page_idx,
+                                    context->devx_uid);
                kfree(mentry);
                break;
        default:
@@ -2211,7 +2217,8 @@ static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd,
                bfregi->count[bfreg_dyn_idx]++;
                mutex_unlock(&bfregi->lock);
 
-               err = mlx5_cmd_alloc_uar(dev->mdev, &uar_index);
+               err = mlx5_cmd_uar_alloc(dev->mdev, &uar_index,
+                                        context->devx_uid);
                if (err) {
                        mlx5_ib_warn(dev, "UAR alloc failed\n");
                        goto free_bfreg;
@@ -2240,7 +2247,7 @@ err:
        if (!dyn_uar)
                return err;
 
-       mlx5_cmd_free_uar(dev->mdev, idx);
+       mlx5_cmd_uar_dealloc(dev->mdev, idx, context->devx_uid);
 
 free_bfreg:
        mlx5_ib_free_bfreg(dev, bfregi, bfreg_dyn_idx);
@@ -3489,7 +3496,7 @@ alloc_uar_entry(struct mlx5_ib_ucontext *c,
                return ERR_PTR(-ENOMEM);
 
        dev = to_mdev(c->ibucontext.device);
-       err = mlx5_cmd_alloc_uar(dev->mdev, &uar_index);
+       err = mlx5_cmd_uar_alloc(dev->mdev, &uar_index, c->devx_uid);
        if (err)
                goto end;
 
@@ -3507,7 +3514,7 @@ alloc_uar_entry(struct mlx5_ib_ucontext *c,
        return entry;
 
 err_insert:
-       mlx5_cmd_free_uar(dev->mdev, uar_index);
+       mlx5_cmd_uar_dealloc(dev->mdev, uar_index, c->devx_uid);
 end:
        kfree(entry);
        return ERR_PTR(err);
index bf20a38..e462e36 100644 (file)
@@ -619,6 +619,20 @@ struct mlx5_user_mmap_entry {
        u32 page_idx;
 };
 
+enum mlx5_mkey_type {
+       MLX5_MKEY_MR = 1,
+       MLX5_MKEY_MW,
+       MLX5_MKEY_INDIRECT_DEVX,
+};
+
+struct mlx5_ib_mkey {
+       u32 key;
+       enum mlx5_mkey_type type;
+       unsigned int ndescs;
+       struct wait_queue_head wait;
+       refcount_t usecount;
+};
+
 #define MLX5_IB_MTT_PRESENT (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE)
 
 #define MLX5_IB_DM_MEMIC_ALLOWED_ACCESS (IB_ACCESS_LOCAL_WRITE   |\
@@ -637,7 +651,7 @@ struct mlx5_user_mmap_entry {
 
 struct mlx5_ib_mr {
        struct ib_mr ibmr;
-       struct mlx5_core_mkey mmkey;
+       struct mlx5_ib_mkey mmkey;
 
        /* User MR data */
        struct mlx5_cache_ent *cache_ent;
@@ -659,7 +673,6 @@ struct mlx5_ib_mr {
                        void *descs_alloc;
                        dma_addr_t desc_map;
                        int max_descs;
-                       int ndescs;
                        int desc_size;
                        int access_mode;
 
@@ -713,13 +726,7 @@ static inline bool is_dmabuf_mr(struct mlx5_ib_mr *mr)
 
 struct mlx5_ib_mw {
        struct ib_mw            ibmw;
-       struct mlx5_core_mkey   mmkey;
-       int                     ndescs;
-};
-
-struct mlx5_ib_devx_mr {
-       struct mlx5_core_mkey   mmkey;
-       int                     ndescs;
+       struct mlx5_ib_mkey     mmkey;
 };
 
 struct mlx5_ib_umr_context {
@@ -1579,7 +1586,7 @@ static inline bool mlx5_ib_can_reconfig_with_umr(struct mlx5_ib_dev *dev,
 }
 
 static inline int mlx5r_store_odp_mkey(struct mlx5_ib_dev *dev,
-                                      struct mlx5_core_mkey *mmkey)
+                                      struct mlx5_ib_mkey *mmkey)
 {
        refcount_set(&mmkey->usecount, 1);
 
@@ -1588,14 +1595,14 @@ static inline int mlx5r_store_odp_mkey(struct mlx5_ib_dev *dev,
 }
 
 /* deref an mkey that can participate in ODP flow */
-static inline void mlx5r_deref_odp_mkey(struct mlx5_core_mkey *mmkey)
+static inline void mlx5r_deref_odp_mkey(struct mlx5_ib_mkey *mmkey)
 {
        if (refcount_dec_and_test(&mmkey->usecount))
                wake_up(&mmkey->wait);
 }
 
 /* deref an mkey that can participate in ODP flow and wait for relese */
-static inline void mlx5r_deref_wait_odp_mkey(struct mlx5_core_mkey *mmkey)
+static inline void mlx5r_deref_wait_odp_mkey(struct mlx5_ib_mkey *mmkey)
 {
        mlx5r_deref_odp_mkey(mmkey);
        wait_event(mmkey->wait, refcount_read(&mmkey->usecount) == 0);
index 3be36eb..d2044df 100644 (file)
@@ -88,9 +88,8 @@ static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr,
        MLX5_SET64(mkc, mkc, start_addr, start_addr);
 }
 
-static void
-assign_mkey_variant(struct mlx5_ib_dev *dev, struct mlx5_core_mkey *mkey,
-                   u32 *in)
+static void assign_mkey_variant(struct mlx5_ib_dev *dev,
+                               struct mlx5_ib_mkey *mkey, u32 *in)
 {
        u8 key = atomic_inc_return(&dev->mkey_var);
        void *mkc;
@@ -100,17 +99,22 @@ assign_mkey_variant(struct mlx5_ib_dev *dev, struct mlx5_core_mkey *mkey,
        mkey->key = key;
 }
 
-static int
-mlx5_ib_create_mkey(struct mlx5_ib_dev *dev, struct mlx5_core_mkey *mkey,
-                   u32 *in, int inlen)
+static int mlx5_ib_create_mkey(struct mlx5_ib_dev *dev,
+                              struct mlx5_ib_mkey *mkey, u32 *in, int inlen)
 {
+       int ret;
+
        assign_mkey_variant(dev, mkey, in);
-       return mlx5_core_create_mkey(dev->mdev, mkey, in, inlen);
+       ret = mlx5_core_create_mkey(dev->mdev, &mkey->key, in, inlen);
+       if (!ret)
+               init_waitqueue_head(&mkey->wait);
+
+       return ret;
 }
 
 static int
 mlx5_ib_create_mkey_cb(struct mlx5_ib_dev *dev,
-                      struct mlx5_core_mkey *mkey,
+                      struct mlx5_ib_mkey *mkey,
                       struct mlx5_async_ctx *async_ctx,
                       u32 *in, int inlen, u32 *out, int outlen,
                       struct mlx5_async_work *context)
@@ -133,7 +137,7 @@ static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
 {
        WARN_ON(xa_load(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)));
 
-       return mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
+       return mlx5_core_destroy_mkey(dev->mdev, mr->mmkey.key);
 }
 
 static void create_mkey_callback(int status, struct mlx5_async_work *context)
@@ -260,10 +264,11 @@ static struct mlx5_ib_mr *create_cache_mr(struct mlx5_cache_ent *ent)
                goto free_in;
        }
 
-       err = mlx5_core_create_mkey(ent->dev->mdev, &mr->mmkey, in, inlen);
+       err = mlx5_core_create_mkey(ent->dev->mdev, &mr->mmkey.key, in, inlen);
        if (err)
                goto free_mr;
 
+       init_waitqueue_head(&mr->mmkey.wait);
        mr->mmkey.type = MLX5_MKEY_MR;
        WRITE_ONCE(ent->dev->cache.last_add, jiffies);
        spin_lock_irq(&ent->lock);
@@ -290,7 +295,7 @@ static void remove_cache_mr_locked(struct mlx5_cache_ent *ent)
        ent->available_mrs--;
        ent->total_mrs--;
        spin_unlock_irq(&ent->lock);
-       mlx5_core_destroy_mkey(ent->dev->mdev, &mr->mmkey);
+       mlx5_core_destroy_mkey(ent->dev->mdev, mr->mmkey.key);
        kfree(mr);
        spin_lock_irq(&ent->lock);
 }
@@ -658,7 +663,7 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c)
                ent->available_mrs--;
                ent->total_mrs--;
                spin_unlock_irq(&ent->lock);
-               mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
+               mlx5_core_destroy_mkey(dev->mdev, mr->mmkey.key);
        }
 
        list_for_each_entry_safe(mr, tmp_mr, &del_list, list) {
@@ -911,12 +916,13 @@ static struct mlx5_cache_ent *mr_cache_ent_from_order(struct mlx5_ib_dev *dev,
 }
 
 static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
-                         u64 length, int access_flags)
+                         u64 length, int access_flags, u64 iova)
 {
        mr->ibmr.lkey = mr->mmkey.key;
        mr->ibmr.rkey = mr->mmkey.key;
        mr->ibmr.length = length;
        mr->ibmr.device = &dev->ib_dev;
+       mr->ibmr.iova = iova;
        mr->access_flags = access_flags;
 }
 
@@ -974,11 +980,8 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
 
        mr->ibmr.pd = pd;
        mr->umem = umem;
-       mr->mmkey.iova = iova;
-       mr->mmkey.size = umem->length;
-       mr->mmkey.pd = to_mpd(pd)->pdn;
        mr->page_shift = order_base_2(page_size);
-       set_mr_fields(dev, mr, umem->length, access_flags);
+       set_mr_fields(dev, mr, umem->length, access_flags, iova);
 
        return mr;
 }
@@ -1087,8 +1090,8 @@ static void *mlx5_ib_create_xlt_wr(struct mlx5_ib_mr *mr,
        wr->wr.opcode = MLX5_IB_WR_UMR;
        wr->pd = mr->ibmr.pd;
        wr->mkey = mr->mmkey.key;
-       wr->length = mr->mmkey.size;
-       wr->virt_addr = mr->mmkey.iova;
+       wr->length = mr->ibmr.length;
+       wr->virt_addr = mr->ibmr.iova;
        wr->access_flags = mr->access_flags;
        wr->page_shift = mr->page_shift;
        wr->xlt_size = sg->length;
@@ -1339,9 +1342,8 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem,
                goto err_2;
        }
        mr->mmkey.type = MLX5_MKEY_MR;
-       mr->desc_size = sizeof(struct mlx5_mtt);
        mr->umem = umem;
-       set_mr_fields(dev, mr, umem->length, access_flags);
+       set_mr_fields(dev, mr, umem->length, access_flags, iova);
        kvfree(in);
 
        mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmkey.key);
@@ -1388,7 +1390,7 @@ static struct ib_mr *mlx5_ib_get_dm_mr(struct ib_pd *pd, u64 start_addr,
 
        kfree(in);
 
-       set_mr_fields(dev, mr, length, acc);
+       set_mr_fields(dev, mr, length, acc, start_addr);
 
        return &mr->ibmr;
 
@@ -1533,6 +1535,7 @@ static struct ib_mr *create_user_odp_mr(struct ib_pd *pd, u64 start, u64 length,
                ib_umem_release(&odp->umem);
                return ERR_CAST(mr);
        }
+       xa_init(&mr->implicit_children);
 
        odp->private = mr;
        err = mlx5r_store_odp_mkey(dev, &mr->mmkey);
@@ -1709,7 +1712,6 @@ static int umr_rereg_pd_access(struct mlx5_ib_mr *mr, struct ib_pd *pd,
                return err;
 
        mr->access_flags = access_flags;
-       mr->mmkey.pd = to_mpd(pd)->pdn;
        return 0;
 }
 
@@ -1754,7 +1756,6 @@ static int umr_rereg_pas(struct mlx5_ib_mr *mr, struct ib_pd *pd,
 
        if (flags & IB_MR_REREG_PD) {
                mr->ibmr.pd = pd;
-               mr->mmkey.pd = to_mpd(pd)->pdn;
                upd_flags |= MLX5_IB_UPD_XLT_PD;
        }
        if (flags & IB_MR_REREG_ACCESS) {
@@ -1763,8 +1764,8 @@ static int umr_rereg_pas(struct mlx5_ib_mr *mr, struct ib_pd *pd,
        }
 
        mr->ibmr.length = new_umem->length;
-       mr->mmkey.iova = iova;
-       mr->mmkey.size = new_umem->length;
+       mr->ibmr.iova = iova;
+       mr->ibmr.length = new_umem->length;
        mr->page_shift = order_base_2(page_size);
        mr->umem = new_umem;
        err = mlx5_ib_update_mr_pas(mr, upd_flags);
@@ -1834,7 +1835,7 @@ struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
                mr->umem = NULL;
                atomic_sub(ib_umem_num_pages(umem), &dev->mdev->priv.reg_pages);
 
-               return create_real_mr(new_pd, umem, mr->mmkey.iova,
+               return create_real_mr(new_pd, umem, mr->ibmr.iova,
                                      new_access_flags);
        }
 
@@ -2263,9 +2264,9 @@ int mlx5_ib_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata)
        struct mlx5_ib_dev *dev = to_mdev(ibmw->device);
        int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
        struct mlx5_ib_mw *mw = to_mmw(ibmw);
+       unsigned int ndescs;
        u32 *in = NULL;
        void *mkc;
-       int ndescs;
        int err;
        struct mlx5_ib_alloc_mw req = {};
        struct {
@@ -2310,7 +2311,7 @@ int mlx5_ib_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata)
 
        mw->mmkey.type = MLX5_MKEY_MW;
        ibmw->rkey = mw->mmkey.key;
-       mw->ndescs = ndescs;
+       mw->mmkey.ndescs = ndescs;
 
        resp.response_length =
                min(offsetofend(typeof(resp), response_length), udata->outlen);
@@ -2330,7 +2331,7 @@ int mlx5_ib_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata)
        return 0;
 
 free_mkey:
-       mlx5_core_destroy_mkey(dev->mdev, &mw->mmkey);
+       mlx5_core_destroy_mkey(dev->mdev, mw->mmkey.key);
 free:
        kfree(in);
        return err;
@@ -2349,7 +2350,7 @@ int mlx5_ib_dealloc_mw(struct ib_mw *mw)
                 */
                mlx5r_deref_wait_odp_mkey(&mmw->mmkey);
 
-       return mlx5_core_destroy_mkey(dev->mdev, &mmw->mmkey);
+       return mlx5_core_destroy_mkey(dev->mdev, mmw->mmkey.key);
 }
 
 int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
@@ -2406,7 +2407,7 @@ mlx5_ib_map_pa_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
        mr->meta_length = 0;
        if (data_sg_nents == 1) {
                n++;
-               mr->ndescs = 1;
+               mr->mmkey.ndescs = 1;
                if (data_sg_offset)
                        sg_offset = *data_sg_offset;
                mr->data_length = sg_dma_len(data_sg) - sg_offset;
@@ -2459,7 +2460,7 @@ mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr,
        if (sg_offset_p)
                *sg_offset_p = sg_offset;
 
-       mr->ndescs = i;
+       mr->mmkey.ndescs = i;
        mr->data_length = mr->ibmr.length;
 
        if (meta_sg_nents) {
@@ -2492,11 +2493,11 @@ static int mlx5_set_page(struct ib_mr *ibmr, u64 addr)
        struct mlx5_ib_mr *mr = to_mmr(ibmr);
        __be64 *descs;
 
-       if (unlikely(mr->ndescs == mr->max_descs))
+       if (unlikely(mr->mmkey.ndescs == mr->max_descs))
                return -ENOMEM;
 
        descs = mr->descs;
-       descs[mr->ndescs++] = cpu_to_be64(addr | MLX5_EN_RD | MLX5_EN_WR);
+       descs[mr->mmkey.ndescs++] = cpu_to_be64(addr | MLX5_EN_RD | MLX5_EN_WR);
 
        return 0;
 }
@@ -2506,11 +2507,11 @@ static int mlx5_set_page_pi(struct ib_mr *ibmr, u64 addr)
        struct mlx5_ib_mr *mr = to_mmr(ibmr);
        __be64 *descs;
 
-       if (unlikely(mr->ndescs + mr->meta_ndescs == mr->max_descs))
+       if (unlikely(mr->mmkey.ndescs + mr->meta_ndescs == mr->max_descs))
                return -ENOMEM;
 
        descs = mr->descs;
-       descs[mr->ndescs + mr->meta_ndescs++] =
+       descs[mr->mmkey.ndescs + mr->meta_ndescs++] =
                cpu_to_be64(addr | MLX5_EN_RD | MLX5_EN_WR);
 
        return 0;
@@ -2526,7 +2527,7 @@ mlx5_ib_map_mtt_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
        struct mlx5_ib_mr *pi_mr = mr->mtt_mr;
        int n;
 
-       pi_mr->ndescs = 0;
+       pi_mr->mmkey.ndescs = 0;
        pi_mr->meta_ndescs = 0;
        pi_mr->meta_length = 0;
 
@@ -2560,7 +2561,7 @@ mlx5_ib_map_mtt_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
                 * metadata offset at the first metadata page
                 */
                pi_mr->pi_iova = (iova & page_mask) +
-                                pi_mr->ndescs * ibmr->page_size +
+                                pi_mr->mmkey.ndescs * ibmr->page_size +
                                 (pi_mr->ibmr.iova & ~page_mask);
                /*
                 * In order to use one MTT MR for data and metadata, we register
@@ -2591,7 +2592,7 @@ mlx5_ib_map_klm_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
        struct mlx5_ib_mr *pi_mr = mr->klm_mr;
        int n;
 
-       pi_mr->ndescs = 0;
+       pi_mr->mmkey.ndescs = 0;
        pi_mr->meta_ndescs = 0;
        pi_mr->meta_length = 0;
 
@@ -2626,7 +2627,7 @@ int mlx5_ib_map_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
 
        WARN_ON(ibmr->type != IB_MR_TYPE_INTEGRITY);
 
-       mr->ndescs = 0;
+       mr->mmkey.ndescs = 0;
        mr->data_length = 0;
        mr->data_iova = 0;
        mr->meta_ndescs = 0;
@@ -2682,7 +2683,7 @@ int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
        struct mlx5_ib_mr *mr = to_mmr(ibmr);
        int n;
 
-       mr->ndescs = 0;
+       mr->mmkey.ndescs = 0;
 
        ib_dma_sync_single_for_cpu(ibmr->device, mr->desc_map,
                                   mr->desc_size * mr->max_descs,
index 81147d7..b1e2725 100644 (file)
@@ -430,7 +430,7 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr,
        mr->umem = &odp->umem;
        mr->ibmr.lkey = mr->mmkey.key;
        mr->ibmr.rkey = mr->mmkey.key;
-       mr->mmkey.iova = idx * MLX5_IMR_MTT_SIZE;
+       mr->ibmr.iova = idx * MLX5_IMR_MTT_SIZE;
        mr->parent = imr;
        odp->private = mr;
 
@@ -500,7 +500,7 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
        }
 
        imr->ibmr.pd = &pd->ibpd;
-       imr->mmkey.iova = 0;
+       imr->ibmr.iova = 0;
        imr->umem = &umem_odp->umem;
        imr->ibmr.lkey = imr->mmkey.key;
        imr->ibmr.rkey = imr->mmkey.key;
@@ -738,7 +738,7 @@ static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt,
 {
        struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
 
-       if (unlikely(io_virt < mr->mmkey.iova))
+       if (unlikely(io_virt < mr->ibmr.iova))
                return -EFAULT;
 
        if (mr->umem->is_dmabuf)
@@ -747,7 +747,7 @@ static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt,
        if (!odp->is_implicit_odp) {
                u64 user_va;
 
-               if (check_add_overflow(io_virt - mr->mmkey.iova,
+               if (check_add_overflow(io_virt - mr->ibmr.iova,
                                       (u64)odp->umem.address, &user_va))
                        return -EFAULT;
                if (unlikely(user_va >= ib_umem_end(odp) ||
@@ -788,7 +788,7 @@ struct pf_frame {
        int depth;
 };
 
-static bool mkey_is_eq(struct mlx5_core_mkey *mmkey, u32 key)
+static bool mkey_is_eq(struct mlx5_ib_mkey *mmkey, u32 key)
 {
        if (!mmkey)
                return false;
@@ -797,21 +797,6 @@ static bool mkey_is_eq(struct mlx5_core_mkey *mmkey, u32 key)
        return mmkey->key == key;
 }
 
-static int get_indirect_num_descs(struct mlx5_core_mkey *mmkey)
-{
-       struct mlx5_ib_mw *mw;
-       struct mlx5_ib_devx_mr *devx_mr;
-
-       if (mmkey->type == MLX5_MKEY_MW) {
-               mw = container_of(mmkey, struct mlx5_ib_mw, mmkey);
-               return mw->ndescs;
-       }
-
-       devx_mr = container_of(mmkey, struct mlx5_ib_devx_mr,
-                              mmkey);
-       return devx_mr->ndescs;
-}
-
 /*
  * Handle a single data segment in a page-fault WQE or RDMA region.
  *
@@ -831,12 +816,11 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev,
 {
        int npages = 0, ret, i, outlen, cur_outlen = 0, depth = 0;
        struct pf_frame *head = NULL, *frame;
-       struct mlx5_core_mkey *mmkey;
+       struct mlx5_ib_mkey *mmkey;
        struct mlx5_ib_mr *mr;
        struct mlx5_klm *pklm;
        u32 *out = NULL;
        size_t offset;
-       int ndescs;
 
        io_virt += *bytes_committed;
        bcnt -= *bytes_committed;
@@ -885,8 +869,6 @@ next_mr:
 
        case MLX5_MKEY_MW:
        case MLX5_MKEY_INDIRECT_DEVX:
-               ndescs = get_indirect_num_descs(mmkey);
-
                if (depth >= MLX5_CAP_GEN(dev->mdev, max_indirection)) {
                        mlx5_ib_dbg(dev, "indirection level exceeded\n");
                        ret = -EFAULT;
@@ -894,7 +876,7 @@ next_mr:
                }
 
                outlen = MLX5_ST_SZ_BYTES(query_mkey_out) +
-                       sizeof(*pklm) * (ndescs - 2);
+                       sizeof(*pklm) * (mmkey->ndescs - 2);
 
                if (outlen > cur_outlen) {
                        kfree(out);
@@ -909,14 +891,14 @@ next_mr:
                pklm = (struct mlx5_klm *)MLX5_ADDR_OF(query_mkey_out, out,
                                                       bsf0_klm0_pas_mtt0_1);
 
-               ret = mlx5_core_query_mkey(dev->mdev, mmkey, out, outlen);
+               ret = mlx5_core_query_mkey(dev->mdev, mmkey->key, out, outlen);
                if (ret)
                        goto end;
 
                offset = io_virt - MLX5_GET64(query_mkey_out, out,
                                              memory_key_mkey_entry.start_addr);
 
-               for (i = 0; bcnt && i < ndescs; i++, pklm++) {
+               for (i = 0; bcnt && i < mmkey->ndescs; i++, pklm++) {
                        if (offset >= be32_to_cpu(pklm->bcount)) {
                                offset -= be32_to_cpu(pklm->bcount);
                                continue;
@@ -1704,8 +1686,8 @@ get_prefetchable_mr(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice,
                    u32 lkey)
 {
        struct mlx5_ib_dev *dev = to_mdev(pd->device);
-       struct mlx5_core_mkey *mmkey;
        struct mlx5_ib_mr *mr = NULL;
+       struct mlx5_ib_mkey *mmkey;
 
        xa_lock(&dev->odp_mkeys);
        mmkey = xa_load(&dev->odp_mkeys, mlx5_base_mkey(lkey));
index b2fca11..e5abbcf 100644 (file)
@@ -4458,6 +4458,8 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr,
                MLX5_SET(dctc, dctc, mtu, attr->path_mtu);
                MLX5_SET(dctc, dctc, my_addr_index, attr->ah_attr.grh.sgid_index);
                MLX5_SET(dctc, dctc, hop_limit, attr->ah_attr.grh.hop_limit);
+               if (attr->ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE)
+                       MLX5_SET(dctc, dctc, eth_prio, attr->ah_attr.sl & 0x7);
 
                err = mlx5_core_create_dct(dev, &qp->dct.mdct, qp->dct.in,
                                           MLX5_ST_SZ_BYTES(create_dct_in), out,
index 8841620..51e48ca 100644 (file)
@@ -217,7 +217,7 @@ static __be64 sig_mkey_mask(void)
 static void set_reg_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr,
                            struct mlx5_ib_mr *mr, u8 flags, bool atomic)
 {
-       int size = (mr->ndescs + mr->meta_ndescs) * mr->desc_size;
+       int size = (mr->mmkey.ndescs + mr->meta_ndescs) * mr->desc_size;
 
        memset(umr, 0, sizeof(*umr));
 
@@ -374,7 +374,7 @@ static void set_reg_mkey_seg(struct mlx5_mkey_seg *seg,
                             struct mlx5_ib_mr *mr,
                             u32 key, int access)
 {
-       int ndescs = ALIGN(mr->ndescs + mr->meta_ndescs, 8) >> 1;
+       int ndescs = ALIGN(mr->mmkey.ndescs + mr->meta_ndescs, 8) >> 1;
 
        memset(seg, 0, sizeof(*seg));
 
@@ -439,7 +439,7 @@ static void set_reg_data_seg(struct mlx5_wqe_data_seg *dseg,
                             struct mlx5_ib_mr *mr,
                             struct mlx5_ib_pd *pd)
 {
-       int bcount = mr->desc_size * (mr->ndescs + mr->meta_ndescs);
+       int bcount = mr->desc_size * (mr->mmkey.ndescs + mr->meta_ndescs);
 
        dseg->addr = cpu_to_be64(mr->desc_map);
        dseg->byte_count = cpu_to_be32(ALIGN(bcount, 64));
@@ -861,7 +861,7 @@ static int set_reg_wr(struct mlx5_ib_qp *qp,
        struct mlx5_ib_mr *mr = to_mmr(wr->mr);
        struct mlx5_ib_pd *pd = to_mpd(qp->ibqp.pd);
        struct mlx5_ib_dev *dev = to_mdev(pd->ibpd.device);
-       int mr_list_size = (mr->ndescs + mr->meta_ndescs) * mr->desc_size;
+       int mr_list_size = (mr->mmkey.ndescs + mr->meta_ndescs) * mr->desc_size;
        bool umr_inline = mr_list_size <= MLX5_IB_SQ_UMR_INLINE_THRESHOLD;
        bool atomic = wr->access & IB_ACCESS_REMOTE_ATOMIC;
        u8 flags = 0;
@@ -1111,7 +1111,7 @@ static int handle_reg_mr_integrity(struct mlx5_ib_dev *dev,
                memset(&pa_pi_mr, 0, sizeof(struct mlx5_ib_mr));
                /* No UMR, use local_dma_lkey */
                pa_pi_mr.ibmr.lkey = mr->ibmr.pd->local_dma_lkey;
-               pa_pi_mr.ndescs = mr->ndescs;
+               pa_pi_mr.mmkey.ndescs = mr->mmkey.ndescs;
                pa_pi_mr.data_length = mr->data_length;
                pa_pi_mr.data_iova = mr->data_iova;
                if (mr->meta_ndescs) {
index 3cb4feb..8def88c 100644 (file)
@@ -455,6 +455,7 @@ struct qedr_qp {
        /* synchronization objects used with iwarp ep */
        struct kref refcnt;
        struct completion iwarp_cm_comp;
+       struct completion qp_rel_comp;
        unsigned long iwarp_cm_flags; /* enum iwarp_cm_flags */
 };
 
index 1715fbe..a51fc68 100644 (file)
@@ -83,7 +83,7 @@ static void qedr_iw_free_qp(struct kref *ref)
 {
        struct qedr_qp *qp = container_of(ref, struct qedr_qp, refcnt);
 
-       kfree(qp);
+       complete(&qp->qp_rel_comp);
 }
 
 static void
index 3fbf172..dcb3653 100644 (file)
@@ -1357,6 +1357,7 @@ static void qedr_set_common_qp_params(struct qedr_dev *dev,
        if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
                kref_init(&qp->refcnt);
                init_completion(&qp->iwarp_cm_comp);
+               init_completion(&qp->qp_rel_comp);
        }
 
        qp->pd = pd;
@@ -2857,8 +2858,10 @@ int qedr_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
 
        qedr_free_qp_resources(dev, qp, udata);
 
-       if (rdma_protocol_iwarp(&dev->ibdev, 1))
+       if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
                qedr_iw_qp_rem_ref(&qp->ibqp);
+               wait_for_completion(&qp->qp_rel_comp);
+       }
 
        return 0;
 }
index a67599b..ac11943 100644 (file)
@@ -602,7 +602,7 @@ done:
 /*
  * How many pages in this iovec element?
  */
-static int qib_user_sdma_num_pages(const struct iovec *iov)
+static size_t qib_user_sdma_num_pages(const struct iovec *iov)
 {
        const unsigned long addr  = (unsigned long) iov->iov_base;
        const unsigned long  len  = iov->iov_len;
@@ -658,7 +658,7 @@ static void qib_user_sdma_free_pkt_frag(struct device *dev,
 static int qib_user_sdma_pin_pages(const struct qib_devdata *dd,
                                   struct qib_user_sdma_queue *pq,
                                   struct qib_user_sdma_pkt *pkt,
-                                  unsigned long addr, int tlen, int npages)
+                                  unsigned long addr, int tlen, size_t npages)
 {
        struct page *pages[8];
        int i, j;
@@ -722,7 +722,7 @@ static int qib_user_sdma_pin_pkt(const struct qib_devdata *dd,
        unsigned long idx;
 
        for (idx = 0; idx < niov; idx++) {
-               const int npages = qib_user_sdma_num_pages(iov + idx);
+               const size_t npages = qib_user_sdma_num_pages(iov + idx);
                const unsigned long addr = (unsigned long) iov[idx].iov_base;
 
                ret = qib_user_sdma_pin_pages(dd, pq, pkt, addr,
@@ -824,8 +824,8 @@ static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd,
                unsigned pktnw;
                unsigned pktnwc;
                int nfrags = 0;
-               int npages = 0;
-               int bytes_togo = 0;
+               size_t npages = 0;
+               size_t bytes_togo = 0;
                int tiddma = 0;
                int cfur;
 
@@ -885,7 +885,11 @@ static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd,
 
                        npages += qib_user_sdma_num_pages(&iov[idx]);
 
-                       bytes_togo += slen;
+                       if (check_add_overflow(bytes_togo, slen, &bytes_togo) ||
+                           bytes_togo > type_max(typeof(pkt->bytes_togo))) {
+                               ret = -EINVAL;
+                               goto free_pbc;
+                       }
                        pktnwc += slen >> 2;
                        idx++;
                        nfrags++;
@@ -904,8 +908,7 @@ static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd,
                }
 
                if (frag_size) {
-                       int tidsmsize, n;
-                       size_t pktsize;
+                       size_t tidsmsize, n, pktsize, sz, addrlimit;
 
                        n = npages*((2*PAGE_SIZE/frag_size)+1);
                        pktsize = struct_size(pkt, addr, n);
@@ -923,14 +926,24 @@ static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd,
                        else
                                tidsmsize = 0;
 
-                       pkt = kmalloc(pktsize+tidsmsize, GFP_KERNEL);
+                       if (check_add_overflow(pktsize, tidsmsize, &sz)) {
+                               ret = -EINVAL;
+                               goto free_pbc;
+                       }
+                       pkt = kmalloc(sz, GFP_KERNEL);
                        if (!pkt) {
                                ret = -ENOMEM;
                                goto free_pbc;
                        }
                        pkt->largepkt = 1;
                        pkt->frag_size = frag_size;
-                       pkt->addrlimit = n + ARRAY_SIZE(pkt->addr);
+                       if (check_add_overflow(n, ARRAY_SIZE(pkt->addr),
+                                              &addrlimit) ||
+                           addrlimit > type_max(typeof(pkt->addrlimit))) {
+                               ret = -EINVAL;
+                               goto free_pbc;
+                       }
+                       pkt->addrlimit = addrlimit;
 
                        if (tiddma) {
                                char *tidsm = (char *)pkt + pktsize;
index 49bdd78..3305f27 100644 (file)
@@ -1223,7 +1223,7 @@ int rvt_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init_attr,
        spin_lock(&rdi->n_qps_lock);
        if (rdi->n_qps_allocated == rdi->dparms.props.max_qp) {
                spin_unlock(&rdi->n_qps_lock);
-               ret = ENOMEM;
+               ret = -ENOMEM;
                goto bail_ip;
        }
 
index dddebea..8a2febf 100644 (file)
@@ -1008,7 +1008,7 @@ static u8 mask_promisc[6]={0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
 
 static int dvb_net_filter_sec_set(struct net_device *dev,
                   struct dmx_section_filter **secfilter,
-                  u8 *mac, u8 *mac_mask)
+                  const u8 *mac, u8 *mac_mask)
 {
        struct dvb_net_priv *priv = netdev_priv(dev);
        int ret;
@@ -1052,7 +1052,7 @@ static int dvb_net_feed_start(struct net_device *dev)
        int ret = 0, i;
        struct dvb_net_priv *priv = netdev_priv(dev);
        struct dmx_demux *demux = priv->demux;
-       unsigned char *mac = (unsigned char *) dev->dev_addr;
+       const unsigned char *mac = (const unsigned char *) dev->dev_addr;
 
        netdev_dbg(dev, "rx_mode %i\n", priv->rx_mode);
        mutex_lock(&priv->mutex);
@@ -1272,7 +1272,7 @@ static int dvb_net_set_mac (struct net_device *dev, void *p)
        struct dvb_net_priv *priv = netdev_priv(dev);
        struct sockaddr *addr=p;
 
-       memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
+       eth_hw_addr_set(dev, addr->sa_data);
 
        if (netif_running(dev))
                schedule_work(&priv->restart_net_feed_wq);
@@ -1367,7 +1367,7 @@ static int dvb_net_add_if(struct dvb_net *dvbnet, u16 pid, u8 feedtype)
                         dvbnet->dvbdev->adapter->num, if_num);
 
        net->addr_len = 6;
-       memcpy(net->dev_addr, dvbnet->dvbdev->adapter->proposed_mac, 6);
+       eth_hw_addr_set(net, dvbnet->dvbdev->adapter->proposed_mac);
 
        dvbnet->device[if_num] = net;
 
index 3261cac..acdc257 100644 (file)
@@ -1350,7 +1350,7 @@ mpt_register_lan_device (MPT_ADAPTER *mpt_dev, int pnum)
        HWaddr[5] = a[0];
 
        dev->addr_len = FC_ALEN;
-       memcpy(dev->dev_addr, HWaddr, FC_ALEN);
+       dev_addr_set(dev, HWaddr);
        memset(dev->broadcast, 0xff, FC_ALEN);
 
        /* The Tx queue is 127 deep on the 909.
index 2508f83..dab7b92 100644 (file)
@@ -514,6 +514,7 @@ static const struct net_device_ops xpnet_netdev_ops = {
 static int __init
 xpnet_init(void)
 {
+       u8 addr[ETH_ALEN];
        int result;
 
        if (!is_uv_system())
@@ -545,15 +546,17 @@ xpnet_init(void)
        xpnet_device->min_mtu = XPNET_MIN_MTU;
        xpnet_device->max_mtu = XPNET_MAX_MTU;
 
+       memset(addr, 0, sizeof(addr));
        /*
         * Multicast assumes the LSB of the first octet is set for multicast
         * MAC addresses.  We chose the first octet of the MAC to be unlikely
         * to collide with any vendor's officially issued MAC.
         */
-       xpnet_device->dev_addr[0] = 0x02;     /* locally administered, no OUI */
+       addr[0] = 0x02;     /* locally administered, no OUI */
 
-       xpnet_device->dev_addr[XPNET_PARTID_OCTET + 1] = xp_partition_id;
-       xpnet_device->dev_addr[XPNET_PARTID_OCTET + 0] = (xp_partition_id >> 8);
+       addr[XPNET_PARTID_OCTET + 1] = xp_partition_id;
+       addr[XPNET_PARTID_OCTET + 0] = (xp_partition_id >> 8);
+       eth_hw_addr_set(xpnet_device, addr);
 
        /*
         * ether_setup() sets this to a multicast device.  We are
index 54e321a..edffc34 100644 (file)
@@ -577,11 +577,8 @@ static int bareudp2info(struct nlattr *data[], struct bareudp_conf *conf,
                return -EINVAL;
        }
 
-       if (data[IFLA_BAREUDP_PORT])
-               conf->port =  nla_get_u16(data[IFLA_BAREUDP_PORT]);
-
-       if (data[IFLA_BAREUDP_ETHERTYPE])
-               conf->ethertype =  nla_get_u16(data[IFLA_BAREUDP_ETHERTYPE]);
+       conf->port = nla_get_u16(data[IFLA_BAREUDP_PORT]);
+       conf->ethertype = nla_get_u16(data[IFLA_BAREUDP_ETHERTYPE]);
 
        if (data[IFLA_BAREUDP_SRCPORT_MIN])
                conf->sport_min =  nla_get_u16(data[IFLA_BAREUDP_SRCPORT_MIN]);
index c6ef7ec..2bc2b70 100644 (file)
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0-only
 obj-$(CONFIG_BNXT) += bnxt_en.o
 
-bnxt_en-y := bnxt.o bnxt_hwrm.o bnxt_sriov.o bnxt_ethtool.o bnxt_dcb.o bnxt_ulp.o bnxt_xdp.o bnxt_ptp.o bnxt_vfr.o bnxt_devlink.o bnxt_dim.o
+bnxt_en-y := bnxt.o bnxt_hwrm.o bnxt_sriov.o bnxt_ethtool.o bnxt_dcb.o bnxt_ulp.o bnxt_xdp.o bnxt_ptp.o bnxt_vfr.o bnxt_devlink.o bnxt_dim.o bnxt_coredump.o
 bnxt_en-$(CONFIG_BNXT_FLOWER_OFFLOAD) += bnxt_tc.o
 bnxt_en-$(CONFIG_DEBUG_FS) += bnxt_debugfs.o
index 66263aa..c04ea83 100644 (file)
@@ -49,8 +49,6 @@
 #include <linux/log2.h>
 #include <linux/aer.h>
 #include <linux/bitmap.h>
-#include <linux/ptp_clock_kernel.h>
-#include <linux/timecounter.h>
 #include <linux/cpu_rmap.h>
 #include <linux/cpumask.h>
 #include <net/pkt_cls.h>
@@ -85,55 +83,7 @@ MODULE_DESCRIPTION("Broadcom BCM573xx network driver");
 
 #define BNXT_TX_PUSH_THRESH 164
 
-enum board_idx {
-       BCM57301,
-       BCM57302,
-       BCM57304,
-       BCM57417_NPAR,
-       BCM58700,
-       BCM57311,
-       BCM57312,
-       BCM57402,
-       BCM57404,
-       BCM57406,
-       BCM57402_NPAR,
-       BCM57407,
-       BCM57412,
-       BCM57414,
-       BCM57416,
-       BCM57417,
-       BCM57412_NPAR,
-       BCM57314,
-       BCM57417_SFP,
-       BCM57416_SFP,
-       BCM57404_NPAR,
-       BCM57406_NPAR,
-       BCM57407_SFP,
-       BCM57407_NPAR,
-       BCM57414_NPAR,
-       BCM57416_NPAR,
-       BCM57452,
-       BCM57454,
-       BCM5745x_NPAR,
-       BCM57508,
-       BCM57504,
-       BCM57502,
-       BCM57508_NPAR,
-       BCM57504_NPAR,
-       BCM57502_NPAR,
-       BCM58802,
-       BCM58804,
-       BCM58808,
-       NETXTREME_E_VF,
-       NETXTREME_C_VF,
-       NETXTREME_S_VF,
-       NETXTREME_C_VF_HV,
-       NETXTREME_E_VF_HV,
-       NETXTREME_E_P5_VF,
-       NETXTREME_E_P5_VF_HV,
-};
-
-/* indexed by enum above */
+/* indexed by enum board_idx */
 static const struct {
        char *name;
 } board_info[] = {
@@ -2172,7 +2122,7 @@ static int bnxt_async_event_process(struct bnxt *bp,
                set_bit(BNXT_RESET_TASK_SILENT_SP_EVENT, &bp->sp_event);
                break;
        case ASYNC_EVENT_CMPL_EVENT_ID_RESET_NOTIFY: {
-               char *fatal_str = "non-fatal";
+               char *type_str = "Solicited";
 
                if (!bp->fw_health)
                        goto async_event_process_exit;
@@ -2184,13 +2134,21 @@ static int bnxt_async_event_process(struct bnxt *bp,
                bp->fw_reset_max_dsecs = le16_to_cpu(cmpl->timestamp_hi);
                if (!bp->fw_reset_max_dsecs)
                        bp->fw_reset_max_dsecs = BNXT_DFLT_FW_RST_MAX_DSECS;
-               if (EVENT_DATA1_RESET_NOTIFY_FATAL(data1)) {
-                       fatal_str = "fatal";
+               if (EVENT_DATA1_RESET_NOTIFY_FW_ACTIVATION(data1)) {
+                       set_bit(BNXT_STATE_FW_ACTIVATE_RESET, &bp->state);
+               } else if (EVENT_DATA1_RESET_NOTIFY_FATAL(data1)) {
+                       type_str = "Fatal";
+                       bp->fw_health->fatalities++;
                        set_bit(BNXT_STATE_FW_FATAL_COND, &bp->state);
+               } else if (data2 && BNXT_FW_STATUS_HEALTHY !=
+                          EVENT_DATA2_RESET_NOTIFY_FW_STATUS_CODE(data2)) {
+                       type_str = "Non-fatal";
+                       bp->fw_health->survivals++;
+                       set_bit(BNXT_STATE_FW_NON_FATAL_COND, &bp->state);
                }
                netif_warn(bp, hw, bp->dev,
-                          "Firmware %s reset event, data1: 0x%x, data2: 0x%x, min wait %u ms, max wait %u ms\n",
-                          fatal_str, data1, data2,
+                          "%s firmware reset event, data1: 0x%x, data2: 0x%x, min wait %u ms, max wait %u ms\n",
+                          type_str, data1, data2,
                           bp->fw_reset_min_dsecs * 100,
                           bp->fw_reset_max_dsecs * 100);
                set_bit(BNXT_FW_RESET_NOTIFY_SP_EVENT, &bp->sp_event);
@@ -2198,17 +2156,18 @@ static int bnxt_async_event_process(struct bnxt *bp,
        }
        case ASYNC_EVENT_CMPL_EVENT_ID_ERROR_RECOVERY: {
                struct bnxt_fw_health *fw_health = bp->fw_health;
+               char *status_desc = "healthy";
+               u32 status;
 
                if (!fw_health)
                        goto async_event_process_exit;
 
                if (!EVENT_DATA1_RECOVERY_ENABLED(data1)) {
                        fw_health->enabled = false;
-                       netif_info(bp, drv, bp->dev,
-                                  "Error recovery info: error recovery[0]\n");
+                       netif_info(bp, drv, bp->dev, "Driver recovery watchdog is disabled\n");
                        break;
                }
-               fw_health->master = EVENT_DATA1_RECOVERY_MASTER_FUNC(data1);
+               fw_health->primary = EVENT_DATA1_RECOVERY_MASTER_FUNC(data1);
                fw_health->tmr_multiplier =
                        DIV_ROUND_UP(fw_health->polling_dsecs * HZ,
                                     bp->current_interval * 10);
@@ -2218,10 +2177,13 @@ static int bnxt_async_event_process(struct bnxt *bp,
                                bnxt_fw_health_readl(bp, BNXT_FW_HEARTBEAT_REG);
                fw_health->last_fw_reset_cnt =
                        bnxt_fw_health_readl(bp, BNXT_FW_RESET_CNT_REG);
+               status = bnxt_fw_health_readl(bp, BNXT_FW_HEALTH_REG);
+               if (status != BNXT_FW_STATUS_HEALTHY)
+                       status_desc = "unhealthy";
                netif_info(bp, drv, bp->dev,
-                          "Error recovery info: error recovery[1], master[%d], reset count[%u], health status: 0x%x\n",
-                          fw_health->master, fw_health->last_fw_reset_cnt,
-                          bnxt_fw_health_readl(bp, BNXT_FW_HEALTH_REG));
+                          "Driver recovery watchdog, role: %s, firmware status: 0x%x (%s), resets: %u\n",
+                          fw_health->primary ? "primary" : "backup", status,
+                          status_desc, fw_health->last_fw_reset_cnt);
                if (!fw_health->enabled) {
                        /* Make sure tmr_counter is set and visible to
                         * bnxt_health_check() before setting enabled to true.
@@ -4651,7 +4613,7 @@ int bnxt_hwrm_func_drv_rgtr(struct bnxt *bp, unsigned long *bmap, int bmap_size,
        return rc;
 }
 
-static int bnxt_hwrm_func_drv_unrgtr(struct bnxt *bp)
+int bnxt_hwrm_func_drv_unrgtr(struct bnxt *bp)
 {
        struct hwrm_func_drv_unrgtr_input *req;
        int rc;
@@ -7192,7 +7154,7 @@ static void bnxt_free_ctx_pg_tbls(struct bnxt *bp,
        ctx_pg->nr_pages = 0;
 }
 
-static void bnxt_free_ctx_mem(struct bnxt *bp)
+void bnxt_free_ctx_mem(struct bnxt *bp)
 {
        struct bnxt_ctx_mem_info *ctx = bp->ctx;
        int i;
@@ -7518,12 +7480,18 @@ static int __bnxt_hwrm_func_qcaps(struct bnxt *bp)
                bp->fw_cap |= BNXT_FW_CAP_ERR_RECOVER_RELOAD;
        if (!(flags & FUNC_QCAPS_RESP_FLAGS_VLAN_ACCELERATION_TX_DISABLED))
                bp->fw_cap |= BNXT_FW_CAP_VLAN_TX_INSERT;
+       if (flags & FUNC_QCAPS_RESP_FLAGS_DBG_QCAPS_CMD_SUPPORTED)
+               bp->fw_cap |= BNXT_FW_CAP_DBG_QCAPS;
 
        flags_ext = le32_to_cpu(resp->flags_ext);
        if (flags_ext & FUNC_QCAPS_RESP_FLAGS_EXT_EXT_HW_STATS_SUPPORTED)
                bp->fw_cap |= BNXT_FW_CAP_EXT_HW_STATS_SUPPORTED;
        if (BNXT_PF(bp) && (flags_ext & FUNC_QCAPS_RESP_FLAGS_EXT_PTP_PPS_SUPPORTED))
                bp->fw_cap |= BNXT_FW_CAP_PTP_PPS;
+       if (BNXT_PF(bp) && (flags_ext & FUNC_QCAPS_RESP_FLAGS_EXT_HOT_RESET_IF_SUPPORT))
+               bp->fw_cap |= BNXT_FW_CAP_HOT_RESET_IF;
+       if (BNXT_PF(bp) && (flags_ext & FUNC_QCAPS_RESP_FLAGS_EXT_FW_LIVEPATCH_SUPPORTED))
+               bp->fw_cap |= BNXT_FW_CAP_LIVEPATCH;
 
        bp->tx_push_thresh = 0;
        if ((flags & FUNC_QCAPS_RESP_FLAGS_PUSH_MODE_SUPPORTED) &&
@@ -7579,6 +7547,32 @@ hwrm_func_qcaps_exit:
        return rc;
 }
 
+static void bnxt_hwrm_dbg_qcaps(struct bnxt *bp)
+{
+       struct hwrm_dbg_qcaps_output *resp;
+       struct hwrm_dbg_qcaps_input *req;
+       int rc;
+
+       bp->fw_dbg_cap = 0;
+       if (!(bp->fw_cap & BNXT_FW_CAP_DBG_QCAPS))
+               return;
+
+       rc = hwrm_req_init(bp, req, HWRM_DBG_QCAPS);
+       if (rc)
+               return;
+
+       req->fid = cpu_to_le16(0xffff);
+       resp = hwrm_req_hold(bp, req);
+       rc = hwrm_req_send(bp, req);
+       if (rc)
+               goto hwrm_dbg_qcaps_exit;
+
+       bp->fw_dbg_cap = le32_to_cpu(resp->flags);
+
+hwrm_dbg_qcaps_exit:
+       hwrm_req_drop(bp, req);
+}
+
 static int bnxt_hwrm_queue_qportcfg(struct bnxt *bp);
 
 static int bnxt_hwrm_func_qcaps(struct bnxt *bp)
@@ -7588,6 +7582,9 @@ static int bnxt_hwrm_func_qcaps(struct bnxt *bp)
        rc = __bnxt_hwrm_func_qcaps(bp);
        if (rc)
                return rc;
+
+       bnxt_hwrm_dbg_qcaps(bp);
+
        rc = bnxt_hwrm_queue_qportcfg(bp);
        if (rc) {
                netdev_err(bp->dev, "hwrm query qportcfg failure rc: %d\n", rc);
@@ -7642,6 +7639,7 @@ static int __bnxt_alloc_fw_health(struct bnxt *bp)
        if (!bp->fw_health)
                return -ENOMEM;
 
+       mutex_init(&bp->fw_health->lock);
        return 0;
 }
 
@@ -7688,12 +7686,16 @@ static void bnxt_inv_fw_health_reg(struct bnxt *bp)
        struct bnxt_fw_health *fw_health = bp->fw_health;
        u32 reg_type;
 
-       if (!fw_health || !fw_health->status_reliable)
+       if (!fw_health)
                return;
 
        reg_type = BNXT_FW_HEALTH_REG_TYPE(fw_health->regs[BNXT_FW_HEALTH_REG]);
        if (reg_type == BNXT_FW_HEALTH_REG_TYPE_GRC)
                fw_health->status_reliable = false;
+
+       reg_type = BNXT_FW_HEALTH_REG_TYPE(fw_health->regs[BNXT_FW_RESET_CNT_REG]);
+       if (reg_type == BNXT_FW_HEALTH_REG_TYPE_GRC)
+               fw_health->resets_reliable = false;
 }
 
 static void bnxt_try_map_fw_health_reg(struct bnxt *bp)
@@ -7750,6 +7752,7 @@ static int bnxt_map_fw_health_regs(struct bnxt *bp)
        int i;
 
        bp->fw_health->status_reliable = false;
+       bp->fw_health->resets_reliable = false;
        /* Only pre-map the monitoring GRC registers using window 3 */
        for (i = 0; i < 4; i++) {
                u32 reg = fw_health->regs[i];
@@ -7763,6 +7766,7 @@ static int bnxt_map_fw_health_regs(struct bnxt *bp)
                fw_health->mapped_regs[i] = BNXT_FW_HEALTH_WIN_OFF(reg);
        }
        bp->fw_health->status_reliable = true;
+       bp->fw_health->resets_reliable = true;
        if (reg_base == 0xffffffff)
                return 0;
 
@@ -8208,6 +8212,10 @@ static int bnxt_hwrm_port_qstats_ext(struct bnxt *bp, u8 flags)
        if (!rc) {
                bp->fw_rx_stats_ext_size =
                        le16_to_cpu(resp_qs->rx_stat_size) / 8;
+               if (BNXT_FW_MAJ(bp) < 220 &&
+                   bp->fw_rx_stats_ext_size > BNXT_RX_STATS_EXT_NUM_LEGACY)
+                       bp->fw_rx_stats_ext_size = BNXT_RX_STATS_EXT_NUM_LEGACY;
+
                bp->fw_tx_stats_ext_size = tx_stat_size ?
                        le16_to_cpu(resp_qs->tx_stat_size) / 8 : 0;
        } else {
@@ -9246,7 +9254,7 @@ static char *bnxt_report_fec(struct bnxt_link_info *link_info)
        }
 }
 
-static void bnxt_report_link(struct bnxt *bp)
+void bnxt_report_link(struct bnxt *bp)
 {
        if (bp->link_info.link_up) {
                const char *signal = "";
@@ -9691,8 +9699,6 @@ static int bnxt_hwrm_shutdown_link(struct bnxt *bp)
        return hwrm_req_send(bp, req);
 }
 
-static int bnxt_fw_init_one(struct bnxt *bp);
-
 static int bnxt_fw_reset_via_optee(struct bnxt *bp)
 {
 #ifdef CONFIG_TEE_BNXT_FW
@@ -9739,6 +9745,33 @@ static int bnxt_try_recover_fw(struct bnxt *bp)
        return -ENODEV;
 }
 
+int bnxt_cancel_reservations(struct bnxt *bp, bool fw_reset)
+{
+       struct bnxt_hw_resc *hw_resc = &bp->hw_resc;
+       int rc;
+
+       if (!BNXT_NEW_RM(bp))
+               return 0; /* no resource reservations required */
+
+       rc = bnxt_hwrm_func_resc_qcaps(bp, true);
+       if (rc)
+               netdev_err(bp->dev, "resc_qcaps failed\n");
+
+       hw_resc->resv_cp_rings = 0;
+       hw_resc->resv_stat_ctxs = 0;
+       hw_resc->resv_irqs = 0;
+       hw_resc->resv_tx_rings = 0;
+       hw_resc->resv_rx_rings = 0;
+       hw_resc->resv_hw_ring_grps = 0;
+       hw_resc->resv_vnics = 0;
+       if (!fw_reset) {
+               bp->tx_nr_rings = 0;
+               bp->rx_nr_rings = 0;
+       }
+
+       return rc;
+}
+
 static int bnxt_hwrm_if_change(struct bnxt *bp, bool up)
 {
        struct hwrm_func_drv_if_change_output *resp;
@@ -9822,25 +9855,7 @@ static int bnxt_hwrm_if_change(struct bnxt *bp, bool up)
                                return rc;
                        }
                }
-               if (BNXT_NEW_RM(bp)) {
-                       struct bnxt_hw_resc *hw_resc = &bp->hw_resc;
-
-                       rc = bnxt_hwrm_func_resc_qcaps(bp, true);
-                       if (rc)
-                               netdev_err(bp->dev, "resc_qcaps failed\n");
-
-                       hw_resc->resv_cp_rings = 0;
-                       hw_resc->resv_stat_ctxs = 0;
-                       hw_resc->resv_irqs = 0;
-                       hw_resc->resv_tx_rings = 0;
-                       hw_resc->resv_rx_rings = 0;
-                       hw_resc->resv_hw_ring_grps = 0;
-                       hw_resc->resv_vnics = 0;
-                       if (!fw_reset) {
-                               bp->tx_nr_rings = 0;
-                               bp->rx_nr_rings = 0;
-                       }
-               }
+               rc = bnxt_cancel_reservations(bp, fw_reset);
        }
        return rc;
 }
@@ -10318,7 +10333,7 @@ void bnxt_half_close_nic(struct bnxt *bp)
        bnxt_free_mem(bp, false);
 }
 
-static void bnxt_reenable_sriov(struct bnxt *bp)
+void bnxt_reenable_sriov(struct bnxt *bp)
 {
        if (BNXT_PF(bp)) {
                struct bnxt_pf_info *pf = &bp->pf;
@@ -11295,14 +11310,18 @@ static void bnxt_fw_health_check(struct bnxt *bp)
        }
 
        val = bnxt_fw_health_readl(bp, BNXT_FW_HEARTBEAT_REG);
-       if (val == fw_health->last_fw_heartbeat)
+       if (val == fw_health->last_fw_heartbeat) {
+               fw_health->arrests++;
                goto fw_reset;
+       }
 
        fw_health->last_fw_heartbeat = val;
 
        val = bnxt_fw_health_readl(bp, BNXT_FW_RESET_CNT_REG);
-       if (val != fw_health->last_fw_reset_cnt)
+       if (val != fw_health->last_fw_reset_cnt) {
+               fw_health->discoveries++;
                goto fw_reset;
+       }
 
        fw_health->tmr_counter = fw_health->tmr_multiplier;
        return;
@@ -11508,7 +11527,7 @@ static void bnxt_force_fw_reset(struct bnxt *bp)
        }
        bnxt_fw_reset_close(bp);
        wait_dsecs = fw_health->master_func_wait_dsecs;
-       if (fw_health->master) {
+       if (fw_health->primary) {
                if (fw_health->flags & ERROR_RECOVERY_QCFG_RESP_FLAGS_CO_CPU)
                        wait_dsecs = 0;
                bp->fw_reset_state = BNXT_FW_RESET_STATE_RESET_FW;
@@ -11772,13 +11791,17 @@ static void bnxt_sp_task(struct work_struct *work)
        if (test_and_clear_bit(BNXT_RST_RING_SP_EVENT, &bp->sp_event))
                bnxt_rx_ring_reset(bp);
 
-       if (test_and_clear_bit(BNXT_FW_RESET_NOTIFY_SP_EVENT, &bp->sp_event))
-               bnxt_devlink_health_report(bp, BNXT_FW_RESET_NOTIFY_SP_EVENT);
+       if (test_and_clear_bit(BNXT_FW_RESET_NOTIFY_SP_EVENT, &bp->sp_event)) {
+               if (test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state) ||
+                   test_bit(BNXT_STATE_FW_NON_FATAL_COND, &bp->state))
+                       bnxt_devlink_health_fw_report(bp);
+               else
+                       bnxt_fw_reset(bp);
+       }
 
        if (test_and_clear_bit(BNXT_FW_EXCEPTION_SP_EVENT, &bp->sp_event)) {
                if (!is_bnxt_fw_ok(bp))
-                       bnxt_devlink_health_report(bp,
-                                                  BNXT_FW_EXCEPTION_SP_EVENT);
+                       bnxt_devlink_health_fw_report(bp);
        }
 
        smp_mb__before_atomic();
@@ -11989,7 +12012,7 @@ static void bnxt_fw_init_one_p3(struct bnxt *bp)
 
 static int bnxt_probe_phy(struct bnxt *bp, bool fw_dflt);
 
-static int bnxt_fw_init_one(struct bnxt *bp)
+int bnxt_fw_init_one(struct bnxt *bp)
 {
        int rc;
 
@@ -12051,6 +12074,27 @@ static void bnxt_fw_reset_writel(struct bnxt *bp, int reg_idx)
        }
 }
 
+bool bnxt_hwrm_reset_permitted(struct bnxt *bp)
+{
+       struct hwrm_func_qcfg_output *resp;
+       struct hwrm_func_qcfg_input *req;
+       bool result = true; /* firmware will enforce if unknown */
+
+       if (~bp->fw_cap & BNXT_FW_CAP_HOT_RESET_IF)
+               return result;
+
+       if (hwrm_req_init(bp, req, HWRM_FUNC_QCFG))
+               return result;
+
+       req->fid = cpu_to_le16(0xffff);
+       resp = hwrm_req_hold(bp, req);
+       if (!hwrm_req_send(bp, req))
+               result = !!(le16_to_cpu(resp->flags) &
+                           FUNC_QCFG_RESP_FLAGS_HOT_RESET_ALLOWED);
+       hwrm_req_drop(bp, req);
+       return result;
+}
+
 static void bnxt_reset_all(struct bnxt *bp)
 {
        struct bnxt_fw_health *fw_health = bp->fw_health;
@@ -12093,7 +12137,7 @@ static void bnxt_fw_reset_abort(struct bnxt *bp, int rc)
        clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state);
        if (bp->fw_reset_state != BNXT_FW_RESET_STATE_POLL_VF) {
                bnxt_ulp_start(bp, rc);
-               bnxt_dl_health_status_update(bp, false);
+               bnxt_dl_health_fw_status_update(bp, false);
        }
        bp->fw_reset_state = 0;
        dev_close(bp->dev);
@@ -12159,7 +12203,7 @@ static void bnxt_fw_reset_task(struct work_struct *work)
                        return;
                }
 
-               if (!bp->fw_health->master) {
+               if (!bp->fw_health->primary) {
                        u32 wait_dsecs = bp->fw_health->normal_func_wait_dsecs;
 
                        bp->fw_reset_state = BNXT_FW_RESET_STATE_ENABLE_DEV;
@@ -12192,6 +12236,10 @@ static void bnxt_fw_reset_task(struct work_struct *work)
                        }
                }
                clear_bit(BNXT_STATE_FW_FATAL_COND, &bp->state);
+               clear_bit(BNXT_STATE_FW_NON_FATAL_COND, &bp->state);
+               if (test_and_clear_bit(BNXT_STATE_FW_ACTIVATE_RESET, &bp->state) &&
+                   !test_bit(BNXT_STATE_FW_ACTIVATE, &bp->state))
+                       bnxt_dl_remote_reload(bp);
                if (pci_enable_device(bp->pdev)) {
                        netdev_err(bp->dev, "Cannot re-enable PCI device\n");
                        rc = -ENODEV;
@@ -12241,8 +12289,11 @@ static void bnxt_fw_reset_task(struct work_struct *work)
                bnxt_vf_reps_alloc(bp);
                bnxt_vf_reps_open(bp);
                bnxt_ptp_reapply_pps(bp);
-               bnxt_dl_health_recovery_done(bp);
-               bnxt_dl_health_status_update(bp, true);
+               clear_bit(BNXT_STATE_FW_ACTIVATE, &bp->state);
+               if (test_and_clear_bit(BNXT_STATE_RECOVER, &bp->state)) {
+                       bnxt_dl_health_fw_recovery_done(bp);
+                       bnxt_dl_health_fw_status_update(bp, true);
+               }
                rtnl_unlock();
                break;
        }
@@ -13186,6 +13237,15 @@ static int bnxt_map_db_bar(struct bnxt *bp)
        return 0;
 }
 
+void bnxt_print_device_info(struct bnxt *bp)
+{
+       netdev_info(bp->dev, "%s found at mem %lx, node addr %pM\n",
+                   board_info[bp->board_idx].name,
+                   (long)pci_resource_start(bp->pdev, 0), bp->dev->dev_addr);
+
+       pcie_print_link_status(bp->pdev);
+}
+
 static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
        struct net_device *dev;
@@ -13209,10 +13269,11 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
                return -ENOMEM;
 
        bp = netdev_priv(dev);
+       bp->board_idx = ent->driver_data;
        bp->msg_enable = BNXT_DEF_MSG_ENABLE;
        bnxt_set_max_func_irqs(bp, max_irqs);
 
-       if (bnxt_vf_pciid(ent->driver_data))
+       if (bnxt_vf_pciid(bp->board_idx))
                bp->flags |= BNXT_FLAG_VF;
 
        if (pdev->msix_cap)
@@ -13382,10 +13443,7 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
                devlink_port_type_eth_set(&bp->dl_port, bp->dev);
        bnxt_dl_fw_reporters_create(bp);
 
-       netdev_info(dev, "%s found at mem %lx, node addr %pM\n",
-                   board_info[ent->driver_data].name,
-                   (long)pci_resource_start(pdev, 0), dev->dev_addr);
-       pcie_print_link_status(pdev);
+       bnxt_print_device_info(bp);
 
        pci_save_state(pdev);
        return 0;
index 19fe647..d0d5da9 100644 (file)
@@ -489,6 +489,15 @@ struct rx_tpa_end_cmp_ext {
          ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_MASK) ==\
         ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_FW_EXCEPTION_FATAL)
 
+#define EVENT_DATA1_RESET_NOTIFY_FW_ACTIVATION(data1)                  \
+       (((data1) &                                                     \
+         ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_MASK) ==\
+       ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_FW_ACTIVATION)
+
+#define EVENT_DATA2_RESET_NOTIFY_FW_STATUS_CODE(data2)                 \
+       ((data2) &                                                      \
+       ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA2_FW_STATUS_CODE_MASK)
+
 #define EVENT_DATA1_RECOVERY_MASTER_FUNC(data1)                                \
        !!((data1) &                                                    \
           ASYNC_EVENT_CMPL_ERROR_RECOVERY_EVENT_DATA1_FLAGS_MASTER_FUNC)
@@ -1514,6 +1523,21 @@ struct bnxt_ctx_mem_info {
        struct bnxt_mem_init    mem_init[BNXT_CTX_MEM_INIT_MAX];
 };
 
+enum bnxt_health_severity {
+       SEVERITY_NORMAL = 0,
+       SEVERITY_WARNING,
+       SEVERITY_RECOVERABLE,
+       SEVERITY_FATAL,
+};
+
+enum bnxt_health_remedy {
+       REMEDY_DEVLINK_RECOVER,
+       REMEDY_POWER_CYCLE_DEVICE,
+       REMEDY_POWER_CYCLE_HOST,
+       REMEDY_FW_UPDATE,
+       REMEDY_HW_REPLACE,
+};
+
 struct bnxt_fw_health {
        u32 flags;
        u32 polling_dsecs;
@@ -1531,9 +1555,9 @@ struct bnxt_fw_health {
        u32 last_fw_heartbeat;
        u32 last_fw_reset_cnt;
        u8 enabled:1;
-       u8 master:1;
-       u8 fatal:1;
+       u8 primary:1;
        u8 status_reliable:1;
+       u8 resets_reliable:1;
        u8 tmr_multiplier;
        u8 tmr_counter;
        u8 fw_reset_seq_cnt;
@@ -1543,12 +1567,15 @@ struct bnxt_fw_health {
        u32 echo_req_data1;
        u32 echo_req_data2;
        struct devlink_health_reporter  *fw_reporter;
-       struct devlink_health_reporter *fw_reset_reporter;
-       struct devlink_health_reporter *fw_fatal_reporter;
-};
-
-struct bnxt_fw_reporter_ctx {
-       unsigned long sp_event;
+       /* Protects severity and remedy */
+       struct mutex lock;
+       enum bnxt_health_severity severity;
+       enum bnxt_health_remedy remedy;
+       u32 arrests;
+       u32 discoveries;
+       u32 survivals;
+       u32 fatalities;
+       u32 diagnoses;
 };
 
 #define BNXT_FW_HEALTH_REG_TYPE_MASK   3
@@ -1586,6 +1613,54 @@ struct bnxt_fw_reporter_ctx {
 #define BNXT_FW_RETRY                  5
 #define BNXT_FW_IF_RETRY               10
 
+enum board_idx {
+       BCM57301,
+       BCM57302,
+       BCM57304,
+       BCM57417_NPAR,
+       BCM58700,
+       BCM57311,
+       BCM57312,
+       BCM57402,
+       BCM57404,
+       BCM57406,
+       BCM57402_NPAR,
+       BCM57407,
+       BCM57412,
+       BCM57414,
+       BCM57416,
+       BCM57417,
+       BCM57412_NPAR,
+       BCM57314,
+       BCM57417_SFP,
+       BCM57416_SFP,
+       BCM57404_NPAR,
+       BCM57406_NPAR,
+       BCM57407_SFP,
+       BCM57407_NPAR,
+       BCM57414_NPAR,
+       BCM57416_NPAR,
+       BCM57452,
+       BCM57454,
+       BCM5745x_NPAR,
+       BCM57508,
+       BCM57504,
+       BCM57502,
+       BCM57508_NPAR,
+       BCM57504_NPAR,
+       BCM57502_NPAR,
+       BCM58802,
+       BCM58804,
+       BCM58808,
+       NETXTREME_E_VF,
+       NETXTREME_C_VF,
+       NETXTREME_S_VF,
+       NETXTREME_C_VF_HV,
+       NETXTREME_E_VF_HV,
+       NETXTREME_E_P5_VF,
+       NETXTREME_E_P5_VF_HV,
+};
+
 struct bnxt {
        void __iomem            *bar0;
        void __iomem            *bar1;
@@ -1840,6 +1915,10 @@ struct bnxt {
 #define BNXT_STATE_DRV_REGISTERED      7
 #define BNXT_STATE_PCI_CHANNEL_IO_FROZEN       8
 #define BNXT_STATE_NAPI_DISABLED       9
+#define BNXT_STATE_FW_ACTIVATE         11
+#define BNXT_STATE_RECOVER             12
+#define BNXT_STATE_FW_NON_FATAL_COND   13
+#define BNXT_STATE_FW_ACTIVATE_RESET   14
 
 #define BNXT_NO_FW_ACCESS(bp)                                  \
        (test_bit(BNXT_STATE_FW_FATAL_COND, &(bp)->state) ||    \
@@ -1879,8 +1958,13 @@ struct bnxt {
        #define BNXT_FW_CAP_VLAN_RX_STRIP               0x01000000
        #define BNXT_FW_CAP_VLAN_TX_INSERT              0x02000000
        #define BNXT_FW_CAP_EXT_HW_STATS_SUPPORTED      0x04000000
+       #define BNXT_FW_CAP_LIVEPATCH                   0x08000000
        #define BNXT_FW_CAP_PTP_PPS                     0x10000000
+       #define BNXT_FW_CAP_HOT_RESET_IF                0x20000000
        #define BNXT_FW_CAP_RING_MONITOR                0x40000000
+       #define BNXT_FW_CAP_DBG_QCAPS                   0x80000000
+
+       u32                     fw_dbg_cap;
 
 #define BNXT_NEW_RM(bp)                ((bp)->fw_cap & BNXT_FW_CAP_NEW_RM)
        u32                     hwrm_spec_code;
@@ -2049,6 +2133,7 @@ struct bnxt {
        struct list_head        tc_indr_block_list;
        struct dentry           *debugfs_pdev;
        struct device           *hwmon_dev;
+       enum board_idx          board_idx;
 };
 
 #define BNXT_NUM_RX_RING_STATS                 8
@@ -2090,6 +2175,9 @@ struct bnxt {
 #define BNXT_RX_STATS_EXT_OFFSET(counter)              \
        (offsetof(struct rx_port_stats_ext, counter) / 8)
 
+#define BNXT_RX_STATS_EXT_NUM_LEGACY                   \
+       BNXT_RX_STATS_EXT_OFFSET(rx_fec_corrected_blocks)
+
 #define BNXT_TX_STATS_EXT_OFFSET(counter)              \
        (offsetof(struct tx_port_stats_ext, counter) / 8)
 
@@ -2181,11 +2269,13 @@ void bnxt_set_ring_params(struct bnxt *);
 int bnxt_set_rx_skb_mode(struct bnxt *bp, bool page_mode);
 int bnxt_hwrm_func_drv_rgtr(struct bnxt *bp, unsigned long *bmap,
                            int bmap_size, bool async_only);
+int bnxt_hwrm_func_drv_unrgtr(struct bnxt *bp);
 int bnxt_get_nr_rss_ctxs(struct bnxt *bp, int rx_rings);
 int bnxt_hwrm_vnic_cfg(struct bnxt *bp, u16 vnic_id);
 int __bnxt_hwrm_get_tx_rings(struct bnxt *bp, u16 fid, int *tx_rings);
 int bnxt_nq_rings_in_use(struct bnxt *bp);
 int bnxt_hwrm_set_coal(struct bnxt *);
+void bnxt_free_ctx_mem(struct bnxt *bp);
 unsigned int bnxt_get_max_func_stat_ctxs(struct bnxt *bp);
 unsigned int bnxt_get_avail_stat_ctxs_for_en(struct bnxt *bp);
 unsigned int bnxt_get_max_func_cp_rings(struct bnxt *bp);
@@ -2194,9 +2284,11 @@ int bnxt_get_avail_msix(struct bnxt *bp, int num);
 int bnxt_reserve_rings(struct bnxt *bp, bool irq_re_init);
 void bnxt_tx_disable(struct bnxt *bp);
 void bnxt_tx_enable(struct bnxt *bp);
+void bnxt_report_link(struct bnxt *bp);
 int bnxt_update_link(struct bnxt *bp, bool chng_link_state);
 int bnxt_hwrm_set_pause(struct bnxt *);
 int bnxt_hwrm_set_link_setting(struct bnxt *, bool, bool);
+int bnxt_cancel_reservations(struct bnxt *bp, bool fw_reset);
 int bnxt_hwrm_alloc_wol_fltr(struct bnxt *bp);
 int bnxt_hwrm_free_wol_fltr(struct bnxt *bp);
 int bnxt_hwrm_func_resc_qcaps(struct bnxt *bp, bool all);
@@ -2205,6 +2297,7 @@ int bnxt_hwrm_fw_set_time(struct bnxt *);
 int bnxt_open_nic(struct bnxt *, bool, bool);
 int bnxt_half_open_nic(struct bnxt *bp);
 void bnxt_half_close_nic(struct bnxt *bp);
+void bnxt_reenable_sriov(struct bnxt *bp);
 int bnxt_close_nic(struct bnxt *, bool, bool);
 int bnxt_dbg_hwrm_rd_reg(struct bnxt *bp, u32 reg_off, u16 num_words,
                         u32 *reg_buf);
@@ -2212,6 +2305,8 @@ void bnxt_fw_exception(struct bnxt *bp);
 void bnxt_fw_reset(struct bnxt *bp);
 int bnxt_check_rings(struct bnxt *bp, int tx, int rx, bool sh, int tcs,
                     int tx_xdp);
+int bnxt_fw_init_one(struct bnxt *bp);
+bool bnxt_hwrm_reset_permitted(struct bnxt *bp);
 int bnxt_setup_mq_tc(struct net_device *dev, u8 tc);
 int bnxt_get_max_rings(struct bnxt *, int *, int *, bool);
 int bnxt_restore_pf_fw_resources(struct bnxt *bp);
@@ -2219,5 +2314,5 @@ int bnxt_get_port_parent_id(struct net_device *dev,
                            struct netdev_phys_item_id *ppid);
 void bnxt_dim_work(struct work_struct *work);
 int bnxt_hwrm_set_ring_coal(struct bnxt *bp, struct bnxt_napi *bnapi);
-
+void bnxt_print_device_info(struct bnxt *bp);
 #endif
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c
new file mode 100644 (file)
index 0000000..d3cb2f2
--- /dev/null
@@ -0,0 +1,444 @@
+/* Broadcom NetXtreme-C/E network driver.
+ *
+ * Copyright (c) 2021 Broadcom Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/pci.h>
+#include "bnxt_hsi.h"
+#include "bnxt.h"
+#include "bnxt_hwrm.h"
+#include "bnxt_coredump.h"
+
+static int bnxt_hwrm_dbg_dma_data(struct bnxt *bp, void *msg,
+                                 struct bnxt_hwrm_dbg_dma_info *info)
+{
+       struct hwrm_dbg_cmn_input *cmn_req = msg;
+       __le16 *seq_ptr = msg + info->seq_off;
+       struct hwrm_dbg_cmn_output *cmn_resp;
+       u16 seq = 0, len, segs_off;
+       dma_addr_t dma_handle;
+       void *dma_buf, *resp;
+       int rc, off = 0;
+
+       dma_buf = hwrm_req_dma_slice(bp, msg, info->dma_len, &dma_handle);
+       if (!dma_buf) {
+               hwrm_req_drop(bp, msg);
+               return -ENOMEM;
+       }
+
+       hwrm_req_timeout(bp, msg, HWRM_COREDUMP_TIMEOUT);
+       cmn_resp = hwrm_req_hold(bp, msg);
+       resp = cmn_resp;
+
+       segs_off = offsetof(struct hwrm_dbg_coredump_list_output,
+                           total_segments);
+       cmn_req->host_dest_addr = cpu_to_le64(dma_handle);
+       cmn_req->host_buf_len = cpu_to_le32(info->dma_len);
+       while (1) {
+               *seq_ptr = cpu_to_le16(seq);
+               rc = hwrm_req_send(bp, msg);
+               if (rc)
+                       break;
+
+               len = le16_to_cpu(*((__le16 *)(resp + info->data_len_off)));
+               if (!seq &&
+                   cmn_req->req_type == cpu_to_le16(HWRM_DBG_COREDUMP_LIST)) {
+                       info->segs = le16_to_cpu(*((__le16 *)(resp +
+                                                             segs_off)));
+                       if (!info->segs) {
+                               rc = -EIO;
+                               break;
+                       }
+
+                       info->dest_buf_size = info->segs *
+                                       sizeof(struct coredump_segment_record);
+                       info->dest_buf = kmalloc(info->dest_buf_size,
+                                                GFP_KERNEL);
+                       if (!info->dest_buf) {
+                               rc = -ENOMEM;
+                               break;
+                       }
+               }
+
+               if (info->dest_buf) {
+                       if ((info->seg_start + off + len) <=
+                           BNXT_COREDUMP_BUF_LEN(info->buf_len)) {
+                               memcpy(info->dest_buf + off, dma_buf, len);
+                       } else {
+                               rc = -ENOBUFS;
+                               break;
+                       }
+               }
+
+               if (cmn_req->req_type ==
+                               cpu_to_le16(HWRM_DBG_COREDUMP_RETRIEVE))
+                       info->dest_buf_size += len;
+
+               if (!(cmn_resp->flags & HWRM_DBG_CMN_FLAGS_MORE))
+                       break;
+
+               seq++;
+               off += len;
+       }
+       hwrm_req_drop(bp, msg);
+       return rc;
+}
+
+static int bnxt_hwrm_dbg_coredump_list(struct bnxt *bp,
+                                      struct bnxt_coredump *coredump)
+{
+       struct bnxt_hwrm_dbg_dma_info info = {NULL};
+       struct hwrm_dbg_coredump_list_input *req;
+       int rc;
+
+       rc = hwrm_req_init(bp, req, HWRM_DBG_COREDUMP_LIST);
+       if (rc)
+               return rc;
+
+       info.dma_len = COREDUMP_LIST_BUF_LEN;
+       info.seq_off = offsetof(struct hwrm_dbg_coredump_list_input, seq_no);
+       info.data_len_off = offsetof(struct hwrm_dbg_coredump_list_output,
+                                    data_len);
+
+       rc = bnxt_hwrm_dbg_dma_data(bp, req, &info);
+       if (!rc) {
+               coredump->data = info.dest_buf;
+               coredump->data_size = info.dest_buf_size;
+               coredump->total_segs = info.segs;
+       }
+       return rc;
+}
+
+static int bnxt_hwrm_dbg_coredump_initiate(struct bnxt *bp, u16 component_id,
+                                          u16 segment_id)
+{
+       struct hwrm_dbg_coredump_initiate_input *req;
+       int rc;
+
+       rc = hwrm_req_init(bp, req, HWRM_DBG_COREDUMP_INITIATE);
+       if (rc)
+               return rc;
+
+       hwrm_req_timeout(bp, req, HWRM_COREDUMP_TIMEOUT);
+       req->component_id = cpu_to_le16(component_id);
+       req->segment_id = cpu_to_le16(segment_id);
+
+       return hwrm_req_send(bp, req);
+}
+
+static int bnxt_hwrm_dbg_coredump_retrieve(struct bnxt *bp, u16 component_id,
+                                          u16 segment_id, u32 *seg_len,
+                                          void *buf, u32 buf_len, u32 offset)
+{
+       struct hwrm_dbg_coredump_retrieve_input *req;
+       struct bnxt_hwrm_dbg_dma_info info = {NULL};
+       int rc;
+
+       rc = hwrm_req_init(bp, req, HWRM_DBG_COREDUMP_RETRIEVE);
+       if (rc)
+               return rc;
+
+       req->component_id = cpu_to_le16(component_id);
+       req->segment_id = cpu_to_le16(segment_id);
+
+       info.dma_len = COREDUMP_RETRIEVE_BUF_LEN;
+       info.seq_off = offsetof(struct hwrm_dbg_coredump_retrieve_input,
+                               seq_no);
+       info.data_len_off = offsetof(struct hwrm_dbg_coredump_retrieve_output,
+                                    data_len);
+       if (buf) {
+               info.dest_buf = buf + offset;
+               info.buf_len = buf_len;
+               info.seg_start = offset;
+       }
+
+       rc = bnxt_hwrm_dbg_dma_data(bp, req, &info);
+       if (!rc)
+               *seg_len = info.dest_buf_size;
+
+       return rc;
+}
+
+static void
+bnxt_fill_coredump_seg_hdr(struct bnxt *bp,
+                          struct bnxt_coredump_segment_hdr *seg_hdr,
+                          struct coredump_segment_record *seg_rec, u32 seg_len,
+                          int status, u32 duration, u32 instance)
+{
+       memset(seg_hdr, 0, sizeof(*seg_hdr));
+       memcpy(seg_hdr->signature, "sEgM", 4);
+       if (seg_rec) {
+               seg_hdr->component_id = (__force __le32)seg_rec->component_id;
+               seg_hdr->segment_id = (__force __le32)seg_rec->segment_id;
+               seg_hdr->low_version = seg_rec->version_low;
+               seg_hdr->high_version = seg_rec->version_hi;
+               seg_hdr->flags = cpu_to_le32(seg_rec->compress_flags);
+       } else {
+               /* For hwrm_ver_get response Component id = 2
+                * and Segment id = 0
+                */
+               seg_hdr->component_id = cpu_to_le32(2);
+               seg_hdr->segment_id = 0;
+       }
+       seg_hdr->function_id = cpu_to_le16(bp->pdev->devfn);
+       seg_hdr->length = cpu_to_le32(seg_len);
+       seg_hdr->status = cpu_to_le32(status);
+       seg_hdr->duration = cpu_to_le32(duration);
+       seg_hdr->data_offset = cpu_to_le32(sizeof(*seg_hdr));
+       seg_hdr->instance = cpu_to_le32(instance);
+}
+
+static void bnxt_fill_cmdline(struct bnxt_coredump_record *record)
+{
+       struct mm_struct *mm = current->mm;
+       int i, len, last = 0;
+
+       if (mm) {
+               len = min_t(int, mm->arg_end - mm->arg_start,
+                           sizeof(record->commandline) - 1);
+               if (len && !copy_from_user(record->commandline,
+                                          (char __user *)mm->arg_start, len)) {
+                       for (i = 0; i < len; i++) {
+                               if (record->commandline[i])
+                                       last = i;
+                               else
+                                       record->commandline[i] = ' ';
+                       }
+                       record->commandline[last + 1] = 0;
+                       return;
+               }
+       }
+
+       strscpy(record->commandline, current->comm, TASK_COMM_LEN);
+}
+
+static void
+bnxt_fill_coredump_record(struct bnxt *bp, struct bnxt_coredump_record *record,
+                         time64_t start, s16 start_utc, u16 total_segs,
+                         int status)
+{
+       time64_t end = ktime_get_real_seconds();
+       u32 os_ver_major = 0, os_ver_minor = 0;
+       struct tm tm;
+
+       time64_to_tm(start, 0, &tm);
+       memset(record, 0, sizeof(*record));
+       memcpy(record->signature, "cOrE", 4);
+       record->flags = 0;
+       record->low_version = 0;
+       record->high_version = 1;
+       record->asic_state = 0;
+       strscpy(record->system_name, utsname()->nodename,
+               sizeof(record->system_name));
+       record->year = cpu_to_le16(tm.tm_year + 1900);
+       record->month = cpu_to_le16(tm.tm_mon + 1);
+       record->day = cpu_to_le16(tm.tm_mday);
+       record->hour = cpu_to_le16(tm.tm_hour);
+       record->minute = cpu_to_le16(tm.tm_min);
+       record->second = cpu_to_le16(tm.tm_sec);
+       record->utc_bias = cpu_to_le16(start_utc);
+       bnxt_fill_cmdline(record);
+       record->total_segments = cpu_to_le32(total_segs);
+
+       if (sscanf(utsname()->release, "%u.%u", &os_ver_major, &os_ver_minor) != 2)
+               netdev_warn(bp->dev, "Unknown OS release in coredump\n");
+       record->os_ver_major = cpu_to_le32(os_ver_major);
+       record->os_ver_minor = cpu_to_le32(os_ver_minor);
+
+       strscpy(record->os_name, utsname()->sysname, sizeof(record->os_name));
+       time64_to_tm(end, 0, &tm);
+       record->end_year = cpu_to_le16(tm.tm_year + 1900);
+       record->end_month = cpu_to_le16(tm.tm_mon + 1);
+       record->end_day = cpu_to_le16(tm.tm_mday);
+       record->end_hour = cpu_to_le16(tm.tm_hour);
+       record->end_minute = cpu_to_le16(tm.tm_min);
+       record->end_second = cpu_to_le16(tm.tm_sec);
+       record->end_utc_bias = cpu_to_le16(sys_tz.tz_minuteswest * 60);
+       record->asic_id1 = cpu_to_le32(bp->chip_num << 16 |
+                                      bp->ver_resp.chip_rev << 8 |
+                                      bp->ver_resp.chip_metal);
+       record->asic_id2 = 0;
+       record->coredump_status = cpu_to_le32(status);
+       record->ioctl_low_version = 0;
+       record->ioctl_high_version = 0;
+}
+
+static int __bnxt_get_coredump(struct bnxt *bp, void *buf, u32 *dump_len)
+{
+       u32 ver_get_resp_len = sizeof(struct hwrm_ver_get_output);
+       u32 offset = 0, seg_hdr_len, seg_record_len, buf_len = 0;
+       struct coredump_segment_record *seg_record = NULL;
+       struct bnxt_coredump_segment_hdr seg_hdr;
+       struct bnxt_coredump coredump = {NULL};
+       time64_t start_time;
+       u16 start_utc;
+       int rc = 0, i;
+
+       if (buf)
+               buf_len = *dump_len;
+
+       start_time = ktime_get_real_seconds();
+       start_utc = sys_tz.tz_minuteswest * 60;
+       seg_hdr_len = sizeof(seg_hdr);
+
+       /* First segment should be hwrm_ver_get response */
+       *dump_len = seg_hdr_len + ver_get_resp_len;
+       if (buf) {
+               bnxt_fill_coredump_seg_hdr(bp, &seg_hdr, NULL, ver_get_resp_len,
+                                          0, 0, 0);
+               memcpy(buf + offset, &seg_hdr, seg_hdr_len);
+               offset += seg_hdr_len;
+               memcpy(buf + offset, &bp->ver_resp, ver_get_resp_len);
+               offset += ver_get_resp_len;
+       }
+
+       rc = bnxt_hwrm_dbg_coredump_list(bp, &coredump);
+       if (rc) {
+               netdev_err(bp->dev, "Failed to get coredump segment list\n");
+               goto err;
+       }
+
+       *dump_len += seg_hdr_len * coredump.total_segs;
+
+       seg_record = (struct coredump_segment_record *)coredump.data;
+       seg_record_len = sizeof(*seg_record);
+
+       for (i = 0; i < coredump.total_segs; i++) {
+               u16 comp_id = le16_to_cpu(seg_record->component_id);
+               u16 seg_id = le16_to_cpu(seg_record->segment_id);
+               u32 duration = 0, seg_len = 0;
+               unsigned long start, end;
+
+               if (buf && ((offset + seg_hdr_len) >
+                           BNXT_COREDUMP_BUF_LEN(buf_len))) {
+                       rc = -ENOBUFS;
+                       goto err;
+               }
+
+               start = jiffies;
+
+               rc = bnxt_hwrm_dbg_coredump_initiate(bp, comp_id, seg_id);
+               if (rc) {
+                       netdev_err(bp->dev,
+                                  "Failed to initiate coredump for seg = %d\n",
+                                  seg_record->segment_id);
+                       goto next_seg;
+               }
+
+               /* Write segment data into the buffer */
+               rc = bnxt_hwrm_dbg_coredump_retrieve(bp, comp_id, seg_id,
+                                                    &seg_len, buf, buf_len,
+                                                    offset + seg_hdr_len);
+               if (rc && rc == -ENOBUFS)
+                       goto err;
+               else if (rc)
+                       netdev_err(bp->dev,
+                                  "Failed to retrieve coredump for seg = %d\n",
+                                  seg_record->segment_id);
+
+next_seg:
+               end = jiffies;
+               duration = jiffies_to_msecs(end - start);
+               bnxt_fill_coredump_seg_hdr(bp, &seg_hdr, seg_record, seg_len,
+                                          rc, duration, 0);
+
+               if (buf) {
+                       /* Write segment header into the buffer */
+                       memcpy(buf + offset, &seg_hdr, seg_hdr_len);
+                       offset += seg_hdr_len + seg_len;
+               }
+
+               *dump_len += seg_len;
+               seg_record =
+                       (struct coredump_segment_record *)((u8 *)seg_record +
+                                                          seg_record_len);
+       }
+
+err:
+       if (buf)
+               bnxt_fill_coredump_record(bp, buf + offset, start_time,
+                                         start_utc, coredump.total_segs + 1,
+                                         rc);
+       kfree(coredump.data);
+       *dump_len += sizeof(struct bnxt_coredump_record);
+       if (rc == -ENOBUFS)
+               netdev_err(bp->dev, "Firmware returned large coredump buffer\n");
+       return rc;
+}
+
+int bnxt_get_coredump(struct bnxt *bp, u16 dump_type, void *buf, u32 *dump_len)
+{
+       if (dump_type == BNXT_DUMP_CRASH) {
+#ifdef CONFIG_TEE_BNXT_FW
+               return tee_bnxt_copy_coredump(buf, 0, *dump_len);
+#else
+               return -EOPNOTSUPP;
+#endif
+       } else {
+               return __bnxt_get_coredump(bp, buf, dump_len);
+       }
+}
+
+static int bnxt_hwrm_get_dump_len(struct bnxt *bp, u16 dump_type, u32 *dump_len)
+{
+       struct hwrm_dbg_qcfg_output *resp;
+       struct hwrm_dbg_qcfg_input *req;
+       int rc, hdr_len = 0;
+
+       if (!(bp->fw_cap & BNXT_FW_CAP_DBG_QCAPS))
+               return -EOPNOTSUPP;
+
+       if (dump_type == BNXT_DUMP_CRASH &&
+           !(bp->fw_dbg_cap & DBG_QCAPS_RESP_FLAGS_CRASHDUMP_SOC_DDR))
+               return -EOPNOTSUPP;
+
+       rc = hwrm_req_init(bp, req, HWRM_DBG_QCFG);
+       if (rc)
+               return rc;
+
+       req->fid = cpu_to_le16(0xffff);
+       if (dump_type == BNXT_DUMP_CRASH)
+               req->flags = cpu_to_le16(DBG_QCFG_REQ_FLAGS_CRASHDUMP_SIZE_FOR_DEST_DEST_SOC_DDR);
+
+       resp = hwrm_req_hold(bp, req);
+       rc = hwrm_req_send(bp, req);
+       if (rc)
+               goto get_dump_len_exit;
+
+       if (dump_type == BNXT_DUMP_CRASH) {
+               *dump_len = le32_to_cpu(resp->crashdump_size);
+       } else {
+               /* Driver adds coredump header and "HWRM_VER_GET response"
+                * segment additionally to coredump.
+                */
+               hdr_len = sizeof(struct bnxt_coredump_segment_hdr) +
+               sizeof(struct hwrm_ver_get_output) +
+               sizeof(struct bnxt_coredump_record);
+               *dump_len = le32_to_cpu(resp->coredump_size) + hdr_len;
+       }
+       if (*dump_len <= hdr_len)
+               rc = -EINVAL;
+
+get_dump_len_exit:
+       hwrm_req_drop(bp, req);
+       return rc;
+}
+
+u32 bnxt_get_coredump_length(struct bnxt *bp, u16 dump_type)
+{
+       u32 len = 0;
+
+       if (bnxt_hwrm_get_dump_len(bp, dump_type, &len)) {
+               if (dump_type == BNXT_DUMP_CRASH)
+                       len = BNXT_CRASH_DUMP_LEN;
+               else
+                       __bnxt_get_coredump(bp, NULL, &len);
+       }
+       return len;
+}
index 09c22f8..b1a1b2f 100644 (file)
 #ifndef BNXT_COREDUMP_H
 #define BNXT_COREDUMP_H
 
+#include <linux/utsname.h>
+#include <linux/time.h>
+#include <linux/rtc.h>
+
 struct bnxt_coredump_segment_hdr {
        __u8 signature[4];
        __le32 component_id;
@@ -63,4 +67,51 @@ struct bnxt_coredump_record {
        __u8 ioctl_high_version;
        __le16 rsvd3[313];
 };
+
+#define BNXT_CRASH_DUMP_LEN    (8 << 20)
+
+#define COREDUMP_LIST_BUF_LEN          2048
+#define COREDUMP_RETRIEVE_BUF_LEN      4096
+
+struct bnxt_coredump {
+       void            *data;
+       int             data_size;
+       u16             total_segs;
+};
+
+#define BNXT_COREDUMP_BUF_LEN(len) ((len) - sizeof(struct bnxt_coredump_record))
+
+struct bnxt_hwrm_dbg_dma_info {
+       void *dest_buf;
+       int dest_buf_size;
+       u16 dma_len;
+       u16 seq_off;
+       u16 data_len_off;
+       u16 segs;
+       u32 seg_start;
+       u32 buf_len;
+};
+
+struct hwrm_dbg_cmn_input {
+       __le16 req_type;
+       __le16 cmpl_ring;
+       __le16 seq_id;
+       __le16 target_id;
+       __le64 resp_addr;
+       __le64 host_dest_addr;
+       __le32 host_buf_len;
+};
+
+struct hwrm_dbg_cmn_output {
+       __le16 error_code;
+       __le16 req_type;
+       __le16 seq_id;
+       __le16 resp_len;
+       u8 flags;
+       #define HWRM_DBG_CMN_FLAGS_MORE 1
+};
+
+int bnxt_get_coredump(struct bnxt *bp, u16 dump_type, void *buf, u32 *dump_len);
+u32 bnxt_get_coredump_length(struct bnxt *bp, u16 dump_type);
+
 #endif
index 951c0c0..ce790e9 100644 (file)
 #include "bnxt_vfr.h"
 #include "bnxt_devlink.h"
 #include "bnxt_ethtool.h"
+#include "bnxt_ulp.h"
+#include "bnxt_ptp.h"
+#include "bnxt_coredump.h"
+
+static void __bnxt_fw_recover(struct bnxt *bp)
+{
+       if (test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state) ||
+           test_bit(BNXT_STATE_FW_NON_FATAL_COND, &bp->state))
+               bnxt_fw_reset(bp);
+       else
+               bnxt_fw_exception(bp);
+}
 
 static int
 bnxt_dl_flash_update(struct devlink *dl,
@@ -40,146 +52,208 @@ bnxt_dl_flash_update(struct devlink *dl,
        return rc;
 }
 
-static int bnxt_fw_reporter_diagnose(struct devlink_health_reporter *reporter,
-                                    struct devlink_fmsg *fmsg,
-                                    struct netlink_ext_ack *extack)
+static int bnxt_hwrm_remote_dev_reset_set(struct bnxt *bp, bool remote_reset)
+{
+       struct hwrm_func_cfg_input *req;
+       int rc;
+
+       if (~bp->fw_cap & BNXT_FW_CAP_HOT_RESET_IF)
+               return -EOPNOTSUPP;
+
+       rc = hwrm_req_init(bp, req, HWRM_FUNC_CFG);
+       if (rc)
+               return rc;
+
+       req->fid = cpu_to_le16(0xffff);
+       req->enables = cpu_to_le32(FUNC_CFG_REQ_ENABLES_HOT_RESET_IF_SUPPORT);
+       if (remote_reset)
+               req->flags = cpu_to_le32(FUNC_CFG_REQ_FLAGS_HOT_RESET_IF_EN_DIS);
+
+       return hwrm_req_send(bp, req);
+}
+
+static char *bnxt_health_severity_str(enum bnxt_health_severity severity)
+{
+       switch (severity) {
+       case SEVERITY_NORMAL: return "normal";
+       case SEVERITY_WARNING: return "warning";
+       case SEVERITY_RECOVERABLE: return "recoverable";
+       case SEVERITY_FATAL: return "fatal";
+       default: return "unknown";
+       }
+}
+
+static char *bnxt_health_remedy_str(enum bnxt_health_remedy remedy)
+{
+       switch (remedy) {
+       case REMEDY_DEVLINK_RECOVER: return "devlink recover";
+       case REMEDY_POWER_CYCLE_DEVICE: return "device power cycle";
+       case REMEDY_POWER_CYCLE_HOST: return "host power cycle";
+       case REMEDY_FW_UPDATE: return "update firmware";
+       case REMEDY_HW_REPLACE: return "replace hardware";
+       default: return "unknown";
+       }
+}
+
+static int bnxt_fw_diagnose(struct devlink_health_reporter *reporter,
+                           struct devlink_fmsg *fmsg,
+                           struct netlink_ext_ack *extack)
 {
        struct bnxt *bp = devlink_health_reporter_priv(reporter);
-       u32 val;
+       struct bnxt_fw_health *h = bp->fw_health;
+       u32 fw_status, fw_resets;
        int rc;
 
        if (test_bit(BNXT_STATE_IN_FW_RESET, &bp->state))
-               return 0;
+               return devlink_fmsg_string_pair_put(fmsg, "Status", "recovering");
 
-       val = bnxt_fw_health_readl(bp, BNXT_FW_HEALTH_REG);
+       if (!h->status_reliable)
+               return devlink_fmsg_string_pair_put(fmsg, "Status", "unknown");
 
-       if (BNXT_FW_IS_BOOTING(val)) {
-               rc = devlink_fmsg_string_pair_put(fmsg, "Description",
-                                                 "Not yet completed initialization");
+       mutex_lock(&h->lock);
+       fw_status = bnxt_fw_health_readl(bp, BNXT_FW_HEALTH_REG);
+       if (BNXT_FW_IS_BOOTING(fw_status)) {
+               rc = devlink_fmsg_string_pair_put(fmsg, "Status", "initializing");
                if (rc)
-                       return rc;
-       } else if (BNXT_FW_IS_ERR(val)) {
-               rc = devlink_fmsg_string_pair_put(fmsg, "Description",
-                                                 "Encountered fatal error and cannot recover");
+                       goto unlock;
+       } else if (h->severity || fw_status != BNXT_FW_STATUS_HEALTHY) {
+               if (!h->severity) {
+                       h->severity = SEVERITY_FATAL;
+                       h->remedy = REMEDY_POWER_CYCLE_DEVICE;
+                       h->diagnoses++;
+                       devlink_health_report(h->fw_reporter,
+                                             "FW error diagnosed", h);
+               }
+               rc = devlink_fmsg_string_pair_put(fmsg, "Status", "error");
                if (rc)
-                       return rc;
+                       goto unlock;
+               rc = devlink_fmsg_u32_pair_put(fmsg, "Syndrome", fw_status);
+               if (rc)
+                       goto unlock;
+       } else {
+               rc = devlink_fmsg_string_pair_put(fmsg, "Status", "healthy");
+               if (rc)
+                       goto unlock;
        }
 
-       if (val >> 16) {
-               rc = devlink_fmsg_u32_pair_put(fmsg, "Error code", val >> 16);
+       rc = devlink_fmsg_string_pair_put(fmsg, "Severity",
+                                         bnxt_health_severity_str(h->severity));
+       if (rc)
+               goto unlock;
+
+       if (h->severity) {
+               rc = devlink_fmsg_string_pair_put(fmsg, "Remedy",
+                                                 bnxt_health_remedy_str(h->remedy));
                if (rc)
-                       return rc;
+                       goto unlock;
+               if (h->remedy == REMEDY_DEVLINK_RECOVER) {
+                       rc = devlink_fmsg_string_pair_put(fmsg, "Impact",
+                                                         "traffic+ntuple_cfg");
+                       if (rc)
+                               goto unlock;
+               }
        }
 
-       val = bnxt_fw_health_readl(bp, BNXT_FW_RESET_CNT_REG);
-       rc = devlink_fmsg_u32_pair_put(fmsg, "Reset count", val);
-       if (rc)
+unlock:
+       mutex_unlock(&h->lock);
+       if (rc || !h->resets_reliable)
                return rc;
 
-       return 0;
+       fw_resets = bnxt_fw_health_readl(bp, BNXT_FW_RESET_CNT_REG);
+       rc = devlink_fmsg_u32_pair_put(fmsg, "Resets", fw_resets);
+       if (rc)
+               return rc;
+       rc = devlink_fmsg_u32_pair_put(fmsg, "Arrests", h->arrests);
+       if (rc)
+               return rc;
+       rc = devlink_fmsg_u32_pair_put(fmsg, "Survivals", h->survivals);
+       if (rc)
+               return rc;
+       rc = devlink_fmsg_u32_pair_put(fmsg, "Discoveries", h->discoveries);
+       if (rc)
+               return rc;
+       rc = devlink_fmsg_u32_pair_put(fmsg, "Fatalities", h->fatalities);
+       if (rc)
+               return rc;
+       return devlink_fmsg_u32_pair_put(fmsg, "Diagnoses", h->diagnoses);
 }
 
-static const struct devlink_health_reporter_ops bnxt_dl_fw_reporter_ops = {
-       .name = "fw",
-       .diagnose = bnxt_fw_reporter_diagnose,
-};
-
-static int bnxt_fw_reset_recover(struct devlink_health_reporter *reporter,
-                                void *priv_ctx,
-                                struct netlink_ext_ack *extack)
+static int bnxt_fw_dump(struct devlink_health_reporter *reporter,
+                       struct devlink_fmsg *fmsg, void *priv_ctx,
+                       struct netlink_ext_ack *extack)
 {
        struct bnxt *bp = devlink_health_reporter_priv(reporter);
+       u32 dump_len;
+       void *data;
+       int rc;
 
-       if (!priv_ctx)
+       /* TODO: no firmware dump support in devlink_health_report() context */
+       if (priv_ctx)
                return -EOPNOTSUPP;
 
-       bnxt_fw_reset(bp);
-       return -EINPROGRESS;
-}
+       dump_len = bnxt_get_coredump_length(bp, BNXT_DUMP_LIVE);
+       if (!dump_len)
+               return -EIO;
 
-static const
-struct devlink_health_reporter_ops bnxt_dl_fw_reset_reporter_ops = {
-       .name = "fw_reset",
-       .recover = bnxt_fw_reset_recover,
-};
+       data = vmalloc(dump_len);
+       if (!data)
+               return -ENOMEM;
 
-static int bnxt_fw_fatal_recover(struct devlink_health_reporter *reporter,
-                                void *priv_ctx,
-                                struct netlink_ext_ack *extack)
+       rc = bnxt_get_coredump(bp, BNXT_DUMP_LIVE, data, &dump_len);
+       if (!rc) {
+               rc = devlink_fmsg_pair_nest_start(fmsg, "core");
+               if (rc)
+                       goto exit;
+               rc = devlink_fmsg_binary_pair_put(fmsg, "data", data, dump_len);
+               if (rc)
+                       goto exit;
+               rc = devlink_fmsg_u32_pair_put(fmsg, "size", dump_len);
+               if (rc)
+                       goto exit;
+               rc = devlink_fmsg_pair_nest_end(fmsg);
+       }
+
+exit:
+       vfree(data);
+       return rc;
+}
+
+static int bnxt_fw_recover(struct devlink_health_reporter *reporter,
+                          void *priv_ctx,
+                          struct netlink_ext_ack *extack)
 {
        struct bnxt *bp = devlink_health_reporter_priv(reporter);
-       struct bnxt_fw_reporter_ctx *fw_reporter_ctx = priv_ctx;
-       unsigned long event;
 
-       if (!priv_ctx)
-               return -EOPNOTSUPP;
+       if (bp->fw_health->severity == SEVERITY_FATAL)
+               return -ENODEV;
 
-       bp->fw_health->fatal = true;
-       event = fw_reporter_ctx->sp_event;
-       if (event == BNXT_FW_RESET_NOTIFY_SP_EVENT)
-               bnxt_fw_reset(bp);
-       else if (event == BNXT_FW_EXCEPTION_SP_EVENT)
-               bnxt_fw_exception(bp);
+       set_bit(BNXT_STATE_RECOVER, &bp->state);
+       __bnxt_fw_recover(bp);
 
        return -EINPROGRESS;
 }
 
-static const
-struct devlink_health_reporter_ops bnxt_dl_fw_fatal_reporter_ops = {
-       .name = "fw_fatal",
-       .recover = bnxt_fw_fatal_recover,
+static const struct devlink_health_reporter_ops bnxt_dl_fw_reporter_ops = {
+       .name = "fw",
+       .diagnose = bnxt_fw_diagnose,
+       .dump = bnxt_fw_dump,
+       .recover = bnxt_fw_recover,
 };
 
 void bnxt_dl_fw_reporters_create(struct bnxt *bp)
 {
        struct bnxt_fw_health *health = bp->fw_health;
 
-       if (!health)
+       if (!health || health->fw_reporter)
                return;
 
-       if (!(bp->fw_cap & BNXT_FW_CAP_HOT_RESET) || health->fw_reset_reporter)
-               goto err_recovery;
-
-       health->fw_reset_reporter =
-               devlink_health_reporter_create(bp->dl,
-                                              &bnxt_dl_fw_reset_reporter_ops,
+       health->fw_reporter =
+               devlink_health_reporter_create(bp->dl, &bnxt_dl_fw_reporter_ops,
                                               0, bp);
-       if (IS_ERR(health->fw_reset_reporter)) {
-               netdev_warn(bp->dev, "Failed to create FW fatal health reporter, rc = %ld\n",
-                           PTR_ERR(health->fw_reset_reporter));
-               health->fw_reset_reporter = NULL;
-               bp->fw_cap &= ~BNXT_FW_CAP_HOT_RESET;
-       }
-
-err_recovery:
-       if (!(bp->fw_cap & BNXT_FW_CAP_ERROR_RECOVERY))
-               return;
-
-       if (!health->fw_reporter) {
-               health->fw_reporter =
-                       devlink_health_reporter_create(bp->dl,
-                                                      &bnxt_dl_fw_reporter_ops,
-                                                      0, bp);
-               if (IS_ERR(health->fw_reporter)) {
-                       netdev_warn(bp->dev, "Failed to create FW health reporter, rc = %ld\n",
-                                   PTR_ERR(health->fw_reporter));
-                       health->fw_reporter = NULL;
-                       bp->fw_cap &= ~BNXT_FW_CAP_ERROR_RECOVERY;
-                       return;
-               }
-       }
-
-       if (health->fw_fatal_reporter)
-               return;
-
-       health->fw_fatal_reporter =
-               devlink_health_reporter_create(bp->dl,
-                                              &bnxt_dl_fw_fatal_reporter_ops,
-                                              0, bp);
-       if (IS_ERR(health->fw_fatal_reporter)) {
-               netdev_warn(bp->dev, "Failed to create FW fatal health reporter, rc = %ld\n",
-                           PTR_ERR(health->fw_fatal_reporter));
-               health->fw_fatal_reporter = NULL;
+       if (IS_ERR(health->fw_reporter)) {
+               netdev_warn(bp->dev, "Failed to create FW health reporter, rc = %ld\n",
+                           PTR_ERR(health->fw_reporter));
+               health->fw_reporter = NULL;
                bp->fw_cap &= ~BNXT_FW_CAP_ERROR_RECOVERY;
        }
 }
@@ -191,12 +265,6 @@ void bnxt_dl_fw_reporters_destroy(struct bnxt *bp, bool all)
        if (!health)
                return;
 
-       if ((all || !(bp->fw_cap & BNXT_FW_CAP_HOT_RESET)) &&
-           health->fw_reset_reporter) {
-               devlink_health_reporter_destroy(health->fw_reset_reporter);
-               health->fw_reset_reporter = NULL;
-       }
-
        if ((bp->fw_cap & BNXT_FW_CAP_ERROR_RECOVERY) && !all)
                return;
 
@@ -204,82 +272,319 @@ void bnxt_dl_fw_reporters_destroy(struct bnxt *bp, bool all)
                devlink_health_reporter_destroy(health->fw_reporter);
                health->fw_reporter = NULL;
        }
-
-       if (health->fw_fatal_reporter) {
-               devlink_health_reporter_destroy(health->fw_fatal_reporter);
-               health->fw_fatal_reporter = NULL;
-       }
 }
 
-void bnxt_devlink_health_report(struct bnxt *bp, unsigned long event)
+void bnxt_devlink_health_fw_report(struct bnxt *bp)
 {
        struct bnxt_fw_health *fw_health = bp->fw_health;
-       struct bnxt_fw_reporter_ctx fw_reporter_ctx;
-
-       fw_reporter_ctx.sp_event = event;
-       switch (event) {
-       case BNXT_FW_RESET_NOTIFY_SP_EVENT:
-               if (test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state)) {
-                       if (!fw_health->fw_fatal_reporter)
-                               return;
-
-                       devlink_health_report(fw_health->fw_fatal_reporter,
-                                             "FW fatal async event received",
-                                             &fw_reporter_ctx);
-                       return;
-               }
-               if (!fw_health->fw_reset_reporter)
-                       return;
+       int rc;
 
-               devlink_health_report(fw_health->fw_reset_reporter,
-                                     "FW non-fatal reset event received",
-                                     &fw_reporter_ctx);
+       if (!fw_health)
                return;
 
-       case BNXT_FW_EXCEPTION_SP_EVENT:
-               if (!fw_health->fw_fatal_reporter)
-                       return;
-
-               devlink_health_report(fw_health->fw_fatal_reporter,
-                                     "FW fatal error reported",
-                                     &fw_reporter_ctx);
+       if (!fw_health->fw_reporter) {
+               __bnxt_fw_recover(bp);
                return;
        }
+
+       mutex_lock(&fw_health->lock);
+       fw_health->severity = SEVERITY_RECOVERABLE;
+       fw_health->remedy = REMEDY_DEVLINK_RECOVER;
+       mutex_unlock(&fw_health->lock);
+       rc = devlink_health_report(fw_health->fw_reporter, "FW error reported",
+                                  fw_health);
+       if (rc == -ECANCELED)
+               __bnxt_fw_recover(bp);
 }
 
-void bnxt_dl_health_status_update(struct bnxt *bp, bool healthy)
+void bnxt_dl_health_fw_status_update(struct bnxt *bp, bool healthy)
 {
-       struct bnxt_fw_health *health = bp->fw_health;
+       struct bnxt_fw_health *fw_health = bp->fw_health;
        u8 state;
 
-       if (healthy)
+       mutex_lock(&fw_health->lock);
+       if (healthy) {
+               fw_health->severity = SEVERITY_NORMAL;
                state = DEVLINK_HEALTH_REPORTER_STATE_HEALTHY;
-       else
+       } else {
+               fw_health->severity = SEVERITY_FATAL;
+               fw_health->remedy = REMEDY_POWER_CYCLE_DEVICE;
                state = DEVLINK_HEALTH_REPORTER_STATE_ERROR;
-
-       if (health->fatal)
-               devlink_health_reporter_state_update(health->fw_fatal_reporter,
-                                                    state);
-       else
-               devlink_health_reporter_state_update(health->fw_reset_reporter,
-                                                    state);
-
-       health->fatal = false;
+       }
+       mutex_unlock(&fw_health->lock);
+       devlink_health_reporter_state_update(fw_health->fw_reporter, state);
 }
 
-void bnxt_dl_health_recovery_done(struct bnxt *bp)
+void bnxt_dl_health_fw_recovery_done(struct bnxt *bp)
 {
-       struct bnxt_fw_health *hlth = bp->fw_health;
+       struct bnxt_dl *dl = devlink_priv(bp->dl);
 
-       if (hlth->fatal)
-               devlink_health_reporter_recovery_done(hlth->fw_fatal_reporter);
-       else
-               devlink_health_reporter_recovery_done(hlth->fw_reset_reporter);
+       devlink_health_reporter_recovery_done(bp->fw_health->fw_reporter);
+       bnxt_hwrm_remote_dev_reset_set(bp, dl->remote_reset);
 }
 
 static int bnxt_dl_info_get(struct devlink *dl, struct devlink_info_req *req,
                            struct netlink_ext_ack *extack);
 
+static void
+bnxt_dl_livepatch_report_err(struct bnxt *bp, struct netlink_ext_ack *extack,
+                            struct hwrm_fw_livepatch_output *resp)
+{
+       int err = ((struct hwrm_err_output *)resp)->cmd_err;
+
+       switch (err) {
+       case FW_LIVEPATCH_CMD_ERR_CODE_INVALID_OPCODE:
+               netdev_err(bp->dev, "Illegal live patch opcode");
+               NL_SET_ERR_MSG_MOD(extack, "Invalid opcode");
+               break;
+       case FW_LIVEPATCH_CMD_ERR_CODE_NOT_SUPPORTED:
+               NL_SET_ERR_MSG_MOD(extack, "Live patch operation not supported");
+               break;
+       case FW_LIVEPATCH_CMD_ERR_CODE_NOT_INSTALLED:
+               NL_SET_ERR_MSG_MOD(extack, "Live patch not found");
+               break;
+       case FW_LIVEPATCH_CMD_ERR_CODE_NOT_PATCHED:
+               NL_SET_ERR_MSG_MOD(extack,
+                                  "Live patch deactivation failed. Firmware not patched.");
+               break;
+       case FW_LIVEPATCH_CMD_ERR_CODE_AUTH_FAIL:
+               NL_SET_ERR_MSG_MOD(extack, "Live patch not authenticated");
+               break;
+       case FW_LIVEPATCH_CMD_ERR_CODE_INVALID_HEADER:
+               NL_SET_ERR_MSG_MOD(extack, "Incompatible live patch");
+               break;
+       case FW_LIVEPATCH_CMD_ERR_CODE_INVALID_SIZE:
+               NL_SET_ERR_MSG_MOD(extack, "Live patch has invalid size");
+               break;
+       case FW_LIVEPATCH_CMD_ERR_CODE_ALREADY_PATCHED:
+               NL_SET_ERR_MSG_MOD(extack, "Live patch already applied");
+               break;
+       default:
+               netdev_err(bp->dev, "Unexpected live patch error: %hhd\n", err);
+               NL_SET_ERR_MSG_MOD(extack, "Failed to activate live patch");
+               break;
+       }
+}
+
+static int
+bnxt_dl_livepatch_activate(struct bnxt *bp, struct netlink_ext_ack *extack)
+{
+       struct hwrm_fw_livepatch_query_output *query_resp;
+       struct hwrm_fw_livepatch_query_input *query_req;
+       struct hwrm_fw_livepatch_output *patch_resp;
+       struct hwrm_fw_livepatch_input *patch_req;
+       u32 installed = 0;
+       u16 flags;
+       u8 target;
+       int rc;
+
+       if (~bp->fw_cap & BNXT_FW_CAP_LIVEPATCH) {
+               NL_SET_ERR_MSG_MOD(extack, "Device does not support live patch");
+               return -EOPNOTSUPP;
+       }
+
+       rc = hwrm_req_init(bp, query_req, HWRM_FW_LIVEPATCH_QUERY);
+       if (rc)
+               return rc;
+       query_resp = hwrm_req_hold(bp, query_req);
+
+       rc = hwrm_req_init(bp, patch_req, HWRM_FW_LIVEPATCH);
+       if (rc) {
+               hwrm_req_drop(bp, query_req);
+               return rc;
+       }
+       patch_req->opcode = FW_LIVEPATCH_REQ_OPCODE_ACTIVATE;
+       patch_req->loadtype = FW_LIVEPATCH_REQ_LOADTYPE_NVM_INSTALL;
+       patch_resp = hwrm_req_hold(bp, patch_req);
+
+       for (target = 1; target <= FW_LIVEPATCH_REQ_FW_TARGET_LAST; target++) {
+               query_req->fw_target = target;
+               rc = hwrm_req_send(bp, query_req);
+               if (rc) {
+                       NL_SET_ERR_MSG_MOD(extack, "Failed to query packages");
+                       break;
+               }
+
+               flags = le16_to_cpu(query_resp->status_flags);
+               if (~flags & FW_LIVEPATCH_QUERY_RESP_STATUS_FLAGS_INSTALL)
+                       continue;
+               if ((flags & FW_LIVEPATCH_QUERY_RESP_STATUS_FLAGS_ACTIVE) &&
+                   !strncmp(query_resp->active_ver, query_resp->install_ver,
+                            sizeof(query_resp->active_ver)))
+                       continue;
+
+               patch_req->fw_target = target;
+               rc = hwrm_req_send(bp, patch_req);
+               if (rc) {
+                       bnxt_dl_livepatch_report_err(bp, extack, patch_resp);
+                       break;
+               }
+               installed++;
+       }
+
+       if (!rc && !installed) {
+               NL_SET_ERR_MSG_MOD(extack, "No live patches found");
+               rc = -ENOENT;
+       }
+       hwrm_req_drop(bp, query_req);
+       hwrm_req_drop(bp, patch_req);
+       return rc;
+}
+
+static int bnxt_dl_reload_down(struct devlink *dl, bool netns_change,
+                              enum devlink_reload_action action,
+                              enum devlink_reload_limit limit,
+                              struct netlink_ext_ack *extack)
+{
+       struct bnxt *bp = bnxt_get_bp_from_dl(dl);
+       int rc = 0;
+
+       switch (action) {
+       case DEVLINK_RELOAD_ACTION_DRIVER_REINIT: {
+               if (BNXT_PF(bp) && bp->pf.active_vfs) {
+                       NL_SET_ERR_MSG_MOD(extack,
+                                          "reload is unsupported when VFs are allocated\n");
+                       return -EOPNOTSUPP;
+               }
+               rtnl_lock();
+               if (bp->dev->reg_state == NETREG_UNREGISTERED) {
+                       rtnl_unlock();
+                       return -ENODEV;
+               }
+               bnxt_ulp_stop(bp);
+               if (netif_running(bp->dev)) {
+                       rc = bnxt_close_nic(bp, true, true);
+                       if (rc) {
+                               NL_SET_ERR_MSG_MOD(extack, "Failed to close");
+                               dev_close(bp->dev);
+                               rtnl_unlock();
+                               break;
+                       }
+               }
+               bnxt_vf_reps_free(bp);
+               rc = bnxt_hwrm_func_drv_unrgtr(bp);
+               if (rc) {
+                       NL_SET_ERR_MSG_MOD(extack, "Failed to deregister");
+                       if (netif_running(bp->dev))
+                               dev_close(bp->dev);
+                       rtnl_unlock();
+                       break;
+               }
+               bnxt_cancel_reservations(bp, false);
+               bnxt_free_ctx_mem(bp);
+               kfree(bp->ctx);
+               bp->ctx = NULL;
+               break;
+       }
+       case DEVLINK_RELOAD_ACTION_FW_ACTIVATE: {
+               if (limit == DEVLINK_RELOAD_LIMIT_NO_RESET)
+                       return bnxt_dl_livepatch_activate(bp, extack);
+               if (~bp->fw_cap & BNXT_FW_CAP_HOT_RESET) {
+                       NL_SET_ERR_MSG_MOD(extack, "Device not capable, requires reboot");
+                       return -EOPNOTSUPP;
+               }
+               if (!bnxt_hwrm_reset_permitted(bp)) {
+                       NL_SET_ERR_MSG_MOD(extack,
+                                          "Reset denied by firmware, it may be inhibited by remote driver");
+                       return -EPERM;
+               }
+               rtnl_lock();
+               if (bp->dev->reg_state == NETREG_UNREGISTERED) {
+                       rtnl_unlock();
+                       return -ENODEV;
+               }
+               if (netif_running(bp->dev))
+                       set_bit(BNXT_STATE_FW_ACTIVATE, &bp->state);
+               rc = bnxt_hwrm_firmware_reset(bp->dev,
+                                             FW_RESET_REQ_EMBEDDED_PROC_TYPE_CHIP,
+                                             FW_RESET_REQ_SELFRST_STATUS_SELFRSTASAP,
+                                             FW_RESET_REQ_FLAGS_RESET_GRACEFUL |
+                                             FW_RESET_REQ_FLAGS_FW_ACTIVATION);
+               if (rc) {
+                       NL_SET_ERR_MSG_MOD(extack, "Failed to activate firmware");
+                       clear_bit(BNXT_STATE_FW_ACTIVATE, &bp->state);
+                       rtnl_unlock();
+               }
+               break;
+       }
+       default:
+               rc = -EOPNOTSUPP;
+       }
+
+       return rc;
+}
+
+static int bnxt_dl_reload_up(struct devlink *dl, enum devlink_reload_action action,
+                            enum devlink_reload_limit limit, u32 *actions_performed,
+                            struct netlink_ext_ack *extack)
+{
+       struct bnxt *bp = bnxt_get_bp_from_dl(dl);
+       int rc = 0;
+
+       *actions_performed = 0;
+       switch (action) {
+       case DEVLINK_RELOAD_ACTION_DRIVER_REINIT: {
+               bnxt_fw_init_one(bp);
+               bnxt_vf_reps_alloc(bp);
+               if (netif_running(bp->dev))
+                       rc = bnxt_open_nic(bp, true, true);
+               bnxt_ulp_start(bp, rc);
+               if (!rc) {
+                       bnxt_reenable_sriov(bp);
+                       bnxt_ptp_reapply_pps(bp);
+               }
+               break;
+       }
+       case DEVLINK_RELOAD_ACTION_FW_ACTIVATE: {
+               unsigned long start = jiffies;
+               unsigned long timeout = start + BNXT_DFLT_FW_RST_MAX_DSECS * HZ / 10;
+
+               if (limit == DEVLINK_RELOAD_LIMIT_NO_RESET)
+                       break;
+               if (bp->fw_cap & BNXT_FW_CAP_ERROR_RECOVERY)
+                       timeout = start + bp->fw_health->normal_func_wait_dsecs * HZ / 10;
+               if (!netif_running(bp->dev))
+                       NL_SET_ERR_MSG_MOD(extack,
+                                          "Device is closed, not waiting for reset notice that will never come");
+               rtnl_unlock();
+               while (test_bit(BNXT_STATE_FW_ACTIVATE, &bp->state)) {
+                       if (time_after(jiffies, timeout)) {
+                               NL_SET_ERR_MSG_MOD(extack, "Activation incomplete");
+                               rc = -ETIMEDOUT;
+                               break;
+                       }
+                       if (test_bit(BNXT_STATE_ABORT_ERR, &bp->state)) {
+                               NL_SET_ERR_MSG_MOD(extack, "Activation aborted");
+                               rc = -ENODEV;
+                               break;
+                       }
+                       msleep(50);
+               }
+               rtnl_lock();
+               if (!rc)
+                       *actions_performed |= BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT);
+               clear_bit(BNXT_STATE_FW_ACTIVATE, &bp->state);
+               break;
+       }
+       default:
+               return -EOPNOTSUPP;
+       }
+
+       if (!rc) {
+               bnxt_print_device_info(bp);
+               if (netif_running(bp->dev)) {
+                       mutex_lock(&bp->link_lock);
+                       bnxt_report_link(bp);
+                       mutex_unlock(&bp->link_lock);
+               }
+               *actions_performed |= BIT(action);
+       } else if (netif_running(bp->dev)) {
+               dev_close(bp->dev);
+       }
+       rtnl_unlock();
+       return rc;
+}
+
 static const struct devlink_ops bnxt_dl_ops = {
 #ifdef CONFIG_BNXT_SRIOV
        .eswitch_mode_set = bnxt_dl_eswitch_mode_set,
@@ -287,6 +592,11 @@ static const struct devlink_ops bnxt_dl_ops = {
 #endif /* CONFIG_BNXT_SRIOV */
        .info_get         = bnxt_dl_info_get,
        .flash_update     = bnxt_dl_flash_update,
+       .reload_actions   = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT) |
+                           BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE),
+       .reload_limits    = BIT(DEVLINK_RELOAD_LIMIT_NO_RESET),
+       .reload_down      = bnxt_dl_reload_down,
+       .reload_up        = bnxt_dl_reload_up,
 };
 
 static const struct devlink_ops bnxt_vf_dl_ops;
@@ -430,6 +740,57 @@ static int bnxt_dl_info_put(struct bnxt *bp, struct devlink_info_req *req,
        return 0;
 }
 
+#define BNXT_FW_SRT_PATCH      "fw.srt.patch"
+#define BNXT_FW_CRT_PATCH      "fw.crt.patch"
+
+static int bnxt_dl_livepatch_info_put(struct bnxt *bp,
+                                     struct devlink_info_req *req,
+                                     const char *key)
+{
+       struct hwrm_fw_livepatch_query_input *query;
+       struct hwrm_fw_livepatch_query_output *resp;
+       u16 flags;
+       int rc;
+
+       if (~bp->fw_cap & BNXT_FW_CAP_LIVEPATCH)
+               return 0;
+
+       rc = hwrm_req_init(bp, query, HWRM_FW_LIVEPATCH_QUERY);
+       if (rc)
+               return rc;
+
+       if (!strcmp(key, BNXT_FW_SRT_PATCH))
+               query->fw_target = FW_LIVEPATCH_QUERY_REQ_FW_TARGET_SECURE_FW;
+       else if (!strcmp(key, BNXT_FW_CRT_PATCH))
+               query->fw_target = FW_LIVEPATCH_QUERY_REQ_FW_TARGET_COMMON_FW;
+       else
+               goto exit;
+
+       resp = hwrm_req_hold(bp, query);
+       rc = hwrm_req_send(bp, query);
+       if (rc)
+               goto exit;
+
+       flags = le16_to_cpu(resp->status_flags);
+       if (flags & FW_LIVEPATCH_QUERY_RESP_STATUS_FLAGS_ACTIVE) {
+               resp->active_ver[sizeof(resp->active_ver) - 1] = '\0';
+               rc = devlink_info_version_running_put(req, key, resp->active_ver);
+               if (rc)
+                       goto exit;
+       }
+
+       if (flags & FW_LIVEPATCH_QUERY_RESP_STATUS_FLAGS_INSTALL) {
+               resp->install_ver[sizeof(resp->install_ver) - 1] = '\0';
+               rc = devlink_info_version_stored_put(req, key, resp->install_ver);
+               if (rc)
+                       goto exit;
+       }
+
+exit:
+       hwrm_req_drop(bp, query);
+       return rc;
+}
+
 #define HWRM_FW_VER_STR_LEN    16
 
 static int bnxt_dl_info_get(struct devlink *dl, struct devlink_info_req *req,
@@ -554,8 +915,13 @@ static int bnxt_dl_info_get(struct devlink *dl, struct devlink_info_req *req,
 
        rc = bnxt_hwrm_nvm_get_dev_info(bp, &nvm_dev_info);
        if (rc ||
-           !(nvm_dev_info.flags & NVM_GET_DEV_INFO_RESP_FLAGS_FW_VER_VALID))
+           !(nvm_dev_info.flags & NVM_GET_DEV_INFO_RESP_FLAGS_FW_VER_VALID)) {
+               if (!bnxt_get_pkginfo(bp->dev, buf, sizeof(buf)))
+                       return bnxt_dl_info_put(bp, req, BNXT_VERSION_STORED,
+                                               DEVLINK_INFO_VERSION_GENERIC_FW,
+                                               buf);
                return 0;
+       }
 
        buf[0] = 0;
        strncat(buf, nvm_dev_info.pkg_name, HWRM_FW_VER_STR_LEN);
@@ -583,8 +949,16 @@ static int bnxt_dl_info_get(struct devlink *dl, struct devlink_info_req *req,
        snprintf(roce_ver, FW_VER_STR_LEN, "%d.%d.%d.%d",
                 nvm_dev_info.roce_fw_major, nvm_dev_info.roce_fw_minor,
                 nvm_dev_info.roce_fw_build, nvm_dev_info.roce_fw_patch);
-       return bnxt_dl_info_put(bp, req, BNXT_VERSION_STORED,
-                               DEVLINK_INFO_VERSION_GENERIC_FW_ROCE, roce_ver);
+       rc = bnxt_dl_info_put(bp, req, BNXT_VERSION_STORED,
+                             DEVLINK_INFO_VERSION_GENERIC_FW_ROCE, roce_ver);
+       if (rc)
+               return rc;
+
+       rc = bnxt_dl_livepatch_info_put(bp, req, BNXT_FW_SRT_PATCH);
+       if (rc)
+               return rc;
+       return bnxt_dl_livepatch_info_put(bp, req, BNXT_FW_CRT_PATCH);
+
 }
 
 static int bnxt_hwrm_nvm_req(struct bnxt *bp, u32 param_id, void *msg,
@@ -712,6 +1086,32 @@ static int bnxt_dl_msix_validate(struct devlink *dl, u32 id,
        return 0;
 }
 
+static int bnxt_remote_dev_reset_get(struct devlink *dl, u32 id,
+                                    struct devlink_param_gset_ctx *ctx)
+{
+       struct bnxt *bp = bnxt_get_bp_from_dl(dl);
+
+       if (~bp->fw_cap & BNXT_FW_CAP_HOT_RESET_IF)
+               return -EOPNOTSUPP;
+
+       ctx->val.vbool = bnxt_dl_get_remote_reset(dl);
+       return 0;
+}
+
+static int bnxt_remote_dev_reset_set(struct devlink *dl, u32 id,
+                                    struct devlink_param_gset_ctx *ctx)
+{
+       struct bnxt *bp = bnxt_get_bp_from_dl(dl);
+       int rc;
+
+       rc = bnxt_hwrm_remote_dev_reset_set(bp, ctx->val.vbool);
+       if (rc)
+               return rc;
+
+       bnxt_dl_set_remote_reset(dl, ctx->val.vbool);
+       return rc;
+}
+
 static const struct devlink_param bnxt_dl_params[] = {
        DEVLINK_PARAM_GENERIC(ENABLE_SRIOV,
                              BIT(DEVLINK_PARAM_CMODE_PERMANENT),
@@ -734,17 +1134,25 @@ static const struct devlink_param bnxt_dl_params[] = {
                             BIT(DEVLINK_PARAM_CMODE_PERMANENT),
                             bnxt_dl_nvm_param_get, bnxt_dl_nvm_param_set,
                             NULL),
+       /* keep REMOTE_DEV_RESET last, it is excluded based on caps */
+       DEVLINK_PARAM_GENERIC(ENABLE_REMOTE_DEV_RESET,
+                             BIT(DEVLINK_PARAM_CMODE_RUNTIME),
+                             bnxt_remote_dev_reset_get,
+                             bnxt_remote_dev_reset_set, NULL),
 };
 
 static int bnxt_dl_params_register(struct bnxt *bp)
 {
+       int num_params = ARRAY_SIZE(bnxt_dl_params);
        int rc;
 
        if (bp->hwrm_spec_code < 0x10600)
                return 0;
 
-       rc = devlink_params_register(bp->dl, bnxt_dl_params,
-                                    ARRAY_SIZE(bnxt_dl_params));
+       if (~bp->fw_cap & BNXT_FW_CAP_HOT_RESET_IF)
+               num_params--;
+
+       rc = devlink_params_register(bp->dl, bnxt_dl_params, num_params);
        if (rc)
                netdev_warn(bp->dev, "devlink_params_register failed. rc=%d\n",
                            rc);
@@ -753,11 +1161,15 @@ static int bnxt_dl_params_register(struct bnxt *bp)
 
 static void bnxt_dl_params_unregister(struct bnxt *bp)
 {
+       int num_params = ARRAY_SIZE(bnxt_dl_params);
+
        if (bp->hwrm_spec_code < 0x10600)
                return;
 
-       devlink_params_unregister(bp->dl, bnxt_dl_params,
-                                 ARRAY_SIZE(bnxt_dl_params));
+       if (~bp->fw_cap & BNXT_FW_CAP_HOT_RESET_IF)
+               num_params--;
+
+       devlink_params_unregister(bp->dl, bnxt_dl_params, num_params);
 }
 
 int bnxt_dl_register(struct bnxt *bp)
@@ -782,6 +1194,7 @@ int bnxt_dl_register(struct bnxt *bp)
        bp->dl = dl;
        bp_dl = devlink_priv(dl);
        bp_dl->bp = bp;
+       bnxt_dl_set_remote_reset(dl, true);
 
        /* Add switchdev eswitch mode setting, if SRIOV supported */
        if (pci_find_ext_capability(bp->pdev, PCI_EXT_CAP_ID_SRIOV) &&
index 406dc65..a715458 100644 (file)
@@ -13,6 +13,7 @@
 /* Struct to hold housekeeping info needed by devlink interface */
 struct bnxt_dl {
        struct bnxt *bp;        /* back ptr to the controlling dev */
+       bool remote_reset;
 };
 
 static inline struct bnxt *bnxt_get_bp_from_dl(struct devlink *dl)
@@ -20,6 +21,23 @@ static inline struct bnxt *bnxt_get_bp_from_dl(struct devlink *dl)
        return ((struct bnxt_dl *)devlink_priv(dl))->bp;
 }
 
+static inline void bnxt_dl_remote_reload(struct bnxt *bp)
+{
+       devlink_remote_reload_actions_performed(bp->dl, 0,
+                                               BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT) |
+                                               BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE));
+}
+
+static inline bool bnxt_dl_get_remote_reset(struct devlink *dl)
+{
+       return ((struct bnxt_dl *)devlink_priv(dl))->remote_reset;
+}
+
+static inline void bnxt_dl_set_remote_reset(struct devlink *dl, bool value)
+{
+       ((struct bnxt_dl *)devlink_priv(dl))->remote_reset = value;
+}
+
 #define NVM_OFF_MSIX_VEC_PER_PF_MAX    108
 #define NVM_OFF_MSIX_VEC_PER_PF_MIN    114
 #define NVM_OFF_IGNORE_ARI             164
@@ -53,9 +71,9 @@ enum bnxt_dl_version_type {
        BNXT_VERSION_STORED,
 };
 
-void bnxt_devlink_health_report(struct bnxt *bp, unsigned long event);
-void bnxt_dl_health_status_update(struct bnxt *bp, bool healthy);
-void bnxt_dl_health_recovery_done(struct bnxt *bp);
+void bnxt_devlink_health_fw_report(struct bnxt *bp);
+void bnxt_dl_health_fw_status_update(struct bnxt *bp, bool healthy);
+void bnxt_dl_health_fw_recovery_done(struct bnxt *bp);
 void bnxt_dl_fw_reporters_create(struct bnxt *bp);
 void bnxt_dl_fw_reporters_destroy(struct bnxt *bp, bool all);
 int bnxt_dl_register(struct bnxt *bp);
index fbb56b1..8188d55 100644 (file)
@@ -427,6 +427,8 @@ static const struct {
        BNXT_RX_STATS_EXT_ENTRY(rx_pcs_symbol_err),
        BNXT_RX_STATS_EXT_ENTRY(rx_corrected_bits),
        BNXT_RX_STATS_EXT_DISCARD_COS_ENTRIES,
+       BNXT_RX_STATS_EXT_ENTRY(rx_fec_corrected_blocks),
+       BNXT_RX_STATS_EXT_ENTRY(rx_fec_uncorrectable_blocks),
 };
 
 static const struct {
@@ -2180,13 +2182,18 @@ static int bnxt_flash_nvram(struct net_device *dev, u16 dir_type,
        return rc;
 }
 
-static int bnxt_hwrm_firmware_reset(struct net_device *dev, u8 proc_type,
-                                   u8 self_reset, u8 flags)
+int bnxt_hwrm_firmware_reset(struct net_device *dev, u8 proc_type,
+                            u8 self_reset, u8 flags)
 {
        struct bnxt *bp = netdev_priv(dev);
        struct hwrm_fw_reset_input *req;
        int rc;
 
+       if (!bnxt_hwrm_reset_permitted(bp)) {
+               netdev_warn(bp->dev, "Reset denied by firmware, it may be inhibited by remote driver");
+               return -EPERM;
+       }
+
        rc = hwrm_req_init(bp, req, HWRM_FW_RESET);
        if (rc)
                return rc;
@@ -2825,39 +2832,56 @@ static char *bnxt_parse_pkglog(int desired_field, u8 *data, size_t datalen)
        return retval;
 }
 
-static void bnxt_get_pkgver(struct net_device *dev)
+int bnxt_get_pkginfo(struct net_device *dev, char *ver, int size)
 {
        struct bnxt *bp = netdev_priv(dev);
        u16 index = 0;
        char *pkgver;
        u32 pkglen;
        u8 *pkgbuf;
-       int len;
+       int rc;
 
-       if (bnxt_find_nvram_item(dev, BNX_DIR_TYPE_PKG_LOG,
-                                BNX_DIR_ORDINAL_FIRST, BNX_DIR_EXT_NONE,
-                                &index, NULL, &pkglen) != 0)
-               return;
+       rc = bnxt_find_nvram_item(dev, BNX_DIR_TYPE_PKG_LOG,
+                                 BNX_DIR_ORDINAL_FIRST, BNX_DIR_EXT_NONE,
+                                 &index, NULL, &pkglen);
+       if (rc)
+               return rc;
 
        pkgbuf = kzalloc(pkglen, GFP_KERNEL);
        if (!pkgbuf) {
                dev_err(&bp->pdev->dev, "Unable to allocate memory for pkg version, length = %u\n",
                        pkglen);
-               return;
+               return -ENOMEM;
        }
 
-       if (bnxt_get_nvram_item(dev, index, 0, pkglen, pkgbuf))
+       rc = bnxt_get_nvram_item(dev, index, 0, pkglen, pkgbuf);
+       if (rc)
                goto err;
 
        pkgver = bnxt_parse_pkglog(BNX_PKG_LOG_FIELD_IDX_PKG_VERSION, pkgbuf,
                                   pkglen);
-       if (pkgver && *pkgver != 0 && isdigit(*pkgver)) {
+       if (pkgver && *pkgver != 0 && isdigit(*pkgver))
+               strscpy(ver, pkgver, size);
+       else
+               rc = -ENOENT;
+
+err:
+       kfree(pkgbuf);
+
+       return rc;
+}
+
+static void bnxt_get_pkgver(struct net_device *dev)
+{
+       struct bnxt *bp = netdev_priv(dev);
+       char buf[FW_VER_STR_LEN];
+       int len;
+
+       if (!bnxt_get_pkginfo(dev, buf, sizeof(buf))) {
                len = strlen(bp->fw_ver_str);
                snprintf(bp->fw_ver_str + len, FW_VER_STR_LEN - len - 1,
-                        "/pkg %s", pkgver);
+                        "/pkg %s", buf);
        }
-err:
-       kfree(pkgbuf);
 }
 
 static int bnxt_get_eeprom(struct net_device *dev,
@@ -3609,337 +3633,6 @@ static int bnxt_reset(struct net_device *dev, u32 *flags)
        return 0;
 }
 
-static int bnxt_hwrm_dbg_dma_data(struct bnxt *bp, void *msg,
-                                 struct bnxt_hwrm_dbg_dma_info *info)
-{
-       struct hwrm_dbg_cmn_input *cmn_req = msg;
-       __le16 *seq_ptr = msg + info->seq_off;
-       struct hwrm_dbg_cmn_output *cmn_resp;
-       u16 seq = 0, len, segs_off;
-       dma_addr_t dma_handle;
-       void *dma_buf, *resp;
-       int rc, off = 0;
-
-       dma_buf = hwrm_req_dma_slice(bp, msg, info->dma_len, &dma_handle);
-       if (!dma_buf) {
-               hwrm_req_drop(bp, msg);
-               return -ENOMEM;
-       }
-
-       hwrm_req_timeout(bp, msg, HWRM_COREDUMP_TIMEOUT);
-       cmn_resp = hwrm_req_hold(bp, msg);
-       resp = cmn_resp;
-
-       segs_off = offsetof(struct hwrm_dbg_coredump_list_output,
-                           total_segments);
-       cmn_req->host_dest_addr = cpu_to_le64(dma_handle);
-       cmn_req->host_buf_len = cpu_to_le32(info->dma_len);
-       while (1) {
-               *seq_ptr = cpu_to_le16(seq);
-               rc = hwrm_req_send(bp, msg);
-               if (rc)
-                       break;
-
-               len = le16_to_cpu(*((__le16 *)(resp + info->data_len_off)));
-               if (!seq &&
-                   cmn_req->req_type == cpu_to_le16(HWRM_DBG_COREDUMP_LIST)) {
-                       info->segs = le16_to_cpu(*((__le16 *)(resp +
-                                                             segs_off)));
-                       if (!info->segs) {
-                               rc = -EIO;
-                               break;
-                       }
-
-                       info->dest_buf_size = info->segs *
-                                       sizeof(struct coredump_segment_record);
-                       info->dest_buf = kmalloc(info->dest_buf_size,
-                                                GFP_KERNEL);
-                       if (!info->dest_buf) {
-                               rc = -ENOMEM;
-                               break;
-                       }
-               }
-
-               if (info->dest_buf) {
-                       if ((info->seg_start + off + len) <=
-                           BNXT_COREDUMP_BUF_LEN(info->buf_len)) {
-                               memcpy(info->dest_buf + off, dma_buf, len);
-                       } else {
-                               rc = -ENOBUFS;
-                               break;
-                       }
-               }
-
-               if (cmn_req->req_type ==
-                               cpu_to_le16(HWRM_DBG_COREDUMP_RETRIEVE))
-                       info->dest_buf_size += len;
-
-               if (!(cmn_resp->flags & HWRM_DBG_CMN_FLAGS_MORE))
-                       break;
-
-               seq++;
-               off += len;
-       }
-       hwrm_req_drop(bp, msg);
-       return rc;
-}
-
-static int bnxt_hwrm_dbg_coredump_list(struct bnxt *bp,
-                                      struct bnxt_coredump *coredump)
-{
-       struct bnxt_hwrm_dbg_dma_info info = {NULL};
-       struct hwrm_dbg_coredump_list_input *req;
-       int rc;
-
-       rc = hwrm_req_init(bp, req, HWRM_DBG_COREDUMP_LIST);
-       if (rc)
-               return rc;
-
-       info.dma_len = COREDUMP_LIST_BUF_LEN;
-       info.seq_off = offsetof(struct hwrm_dbg_coredump_list_input, seq_no);
-       info.data_len_off = offsetof(struct hwrm_dbg_coredump_list_output,
-                                    data_len);
-
-       rc = bnxt_hwrm_dbg_dma_data(bp, req, &info);
-       if (!rc) {
-               coredump->data = info.dest_buf;
-               coredump->data_size = info.dest_buf_size;
-               coredump->total_segs = info.segs;
-       }
-       return rc;
-}
-
-static int bnxt_hwrm_dbg_coredump_initiate(struct bnxt *bp, u16 component_id,
-                                          u16 segment_id)
-{
-       struct hwrm_dbg_coredump_initiate_input *req;
-       int rc;
-
-       rc = hwrm_req_init(bp, req, HWRM_DBG_COREDUMP_INITIATE);
-       if (rc)
-               return rc;
-
-       hwrm_req_timeout(bp, req, HWRM_COREDUMP_TIMEOUT);
-       req->component_id = cpu_to_le16(component_id);
-       req->segment_id = cpu_to_le16(segment_id);
-
-       return hwrm_req_send(bp, req);
-}
-
-static int bnxt_hwrm_dbg_coredump_retrieve(struct bnxt *bp, u16 component_id,
-                                          u16 segment_id, u32 *seg_len,
-                                          void *buf, u32 buf_len, u32 offset)
-{
-       struct hwrm_dbg_coredump_retrieve_input *req;
-       struct bnxt_hwrm_dbg_dma_info info = {NULL};
-       int rc;
-
-       rc = hwrm_req_init(bp, req, HWRM_DBG_COREDUMP_RETRIEVE);
-       if (rc)
-               return rc;
-
-       req->component_id = cpu_to_le16(component_id);
-       req->segment_id = cpu_to_le16(segment_id);
-
-       info.dma_len = COREDUMP_RETRIEVE_BUF_LEN;
-       info.seq_off = offsetof(struct hwrm_dbg_coredump_retrieve_input,
-                               seq_no);
-       info.data_len_off = offsetof(struct hwrm_dbg_coredump_retrieve_output,
-                                    data_len);
-       if (buf) {
-               info.dest_buf = buf + offset;
-               info.buf_len = buf_len;
-               info.seg_start = offset;
-       }
-
-       rc = bnxt_hwrm_dbg_dma_data(bp, req, &info);
-       if (!rc)
-               *seg_len = info.dest_buf_size;
-
-       return rc;
-}
-
-static void
-bnxt_fill_coredump_seg_hdr(struct bnxt *bp,
-                          struct bnxt_coredump_segment_hdr *seg_hdr,
-                          struct coredump_segment_record *seg_rec, u32 seg_len,
-                          int status, u32 duration, u32 instance)
-{
-       memset(seg_hdr, 0, sizeof(*seg_hdr));
-       memcpy(seg_hdr->signature, "sEgM", 4);
-       if (seg_rec) {
-               seg_hdr->component_id = (__force __le32)seg_rec->component_id;
-               seg_hdr->segment_id = (__force __le32)seg_rec->segment_id;
-               seg_hdr->low_version = seg_rec->version_low;
-               seg_hdr->high_version = seg_rec->version_hi;
-       } else {
-               /* For hwrm_ver_get response Component id = 2
-                * and Segment id = 0
-                */
-               seg_hdr->component_id = cpu_to_le32(2);
-               seg_hdr->segment_id = 0;
-       }
-       seg_hdr->function_id = cpu_to_le16(bp->pdev->devfn);
-       seg_hdr->length = cpu_to_le32(seg_len);
-       seg_hdr->status = cpu_to_le32(status);
-       seg_hdr->duration = cpu_to_le32(duration);
-       seg_hdr->data_offset = cpu_to_le32(sizeof(*seg_hdr));
-       seg_hdr->instance = cpu_to_le32(instance);
-}
-
-static void
-bnxt_fill_coredump_record(struct bnxt *bp, struct bnxt_coredump_record *record,
-                         time64_t start, s16 start_utc, u16 total_segs,
-                         int status)
-{
-       time64_t end = ktime_get_real_seconds();
-       u32 os_ver_major = 0, os_ver_minor = 0;
-       struct tm tm;
-
-       time64_to_tm(start, 0, &tm);
-       memset(record, 0, sizeof(*record));
-       memcpy(record->signature, "cOrE", 4);
-       record->flags = 0;
-       record->low_version = 0;
-       record->high_version = 1;
-       record->asic_state = 0;
-       strlcpy(record->system_name, utsname()->nodename,
-               sizeof(record->system_name));
-       record->year = cpu_to_le16(tm.tm_year + 1900);
-       record->month = cpu_to_le16(tm.tm_mon + 1);
-       record->day = cpu_to_le16(tm.tm_mday);
-       record->hour = cpu_to_le16(tm.tm_hour);
-       record->minute = cpu_to_le16(tm.tm_min);
-       record->second = cpu_to_le16(tm.tm_sec);
-       record->utc_bias = cpu_to_le16(start_utc);
-       strcpy(record->commandline, "ethtool -w");
-       record->total_segments = cpu_to_le32(total_segs);
-
-       sscanf(utsname()->release, "%u.%u", &os_ver_major, &os_ver_minor);
-       record->os_ver_major = cpu_to_le32(os_ver_major);
-       record->os_ver_minor = cpu_to_le32(os_ver_minor);
-
-       strlcpy(record->os_name, utsname()->sysname, 32);
-       time64_to_tm(end, 0, &tm);
-       record->end_year = cpu_to_le16(tm.tm_year + 1900);
-       record->end_month = cpu_to_le16(tm.tm_mon + 1);
-       record->end_day = cpu_to_le16(tm.tm_mday);
-       record->end_hour = cpu_to_le16(tm.tm_hour);
-       record->end_minute = cpu_to_le16(tm.tm_min);
-       record->end_second = cpu_to_le16(tm.tm_sec);
-       record->end_utc_bias = cpu_to_le16(sys_tz.tz_minuteswest * 60);
-       record->asic_id1 = cpu_to_le32(bp->chip_num << 16 |
-                                      bp->ver_resp.chip_rev << 8 |
-                                      bp->ver_resp.chip_metal);
-       record->asic_id2 = 0;
-       record->coredump_status = cpu_to_le32(status);
-       record->ioctl_low_version = 0;
-       record->ioctl_high_version = 0;
-}
-
-static int bnxt_get_coredump(struct bnxt *bp, void *buf, u32 *dump_len)
-{
-       u32 ver_get_resp_len = sizeof(struct hwrm_ver_get_output);
-       u32 offset = 0, seg_hdr_len, seg_record_len, buf_len = 0;
-       struct coredump_segment_record *seg_record = NULL;
-       struct bnxt_coredump_segment_hdr seg_hdr;
-       struct bnxt_coredump coredump = {NULL};
-       time64_t start_time;
-       u16 start_utc;
-       int rc = 0, i;
-
-       if (buf)
-               buf_len = *dump_len;
-
-       start_time = ktime_get_real_seconds();
-       start_utc = sys_tz.tz_minuteswest * 60;
-       seg_hdr_len = sizeof(seg_hdr);
-
-       /* First segment should be hwrm_ver_get response */
-       *dump_len = seg_hdr_len + ver_get_resp_len;
-       if (buf) {
-               bnxt_fill_coredump_seg_hdr(bp, &seg_hdr, NULL, ver_get_resp_len,
-                                          0, 0, 0);
-               memcpy(buf + offset, &seg_hdr, seg_hdr_len);
-               offset += seg_hdr_len;
-               memcpy(buf + offset, &bp->ver_resp, ver_get_resp_len);
-               offset += ver_get_resp_len;
-       }
-
-       rc = bnxt_hwrm_dbg_coredump_list(bp, &coredump);
-       if (rc) {
-               netdev_err(bp->dev, "Failed to get coredump segment list\n");
-               goto err;
-       }
-
-       *dump_len += seg_hdr_len * coredump.total_segs;
-
-       seg_record = (struct coredump_segment_record *)coredump.data;
-       seg_record_len = sizeof(*seg_record);
-
-       for (i = 0; i < coredump.total_segs; i++) {
-               u16 comp_id = le16_to_cpu(seg_record->component_id);
-               u16 seg_id = le16_to_cpu(seg_record->segment_id);
-               u32 duration = 0, seg_len = 0;
-               unsigned long start, end;
-
-               if (buf && ((offset + seg_hdr_len) >
-                           BNXT_COREDUMP_BUF_LEN(buf_len))) {
-                       rc = -ENOBUFS;
-                       goto err;
-               }
-
-               start = jiffies;
-
-               rc = bnxt_hwrm_dbg_coredump_initiate(bp, comp_id, seg_id);
-               if (rc) {
-                       netdev_err(bp->dev,
-                                  "Failed to initiate coredump for seg = %d\n",
-                                  seg_record->segment_id);
-                       goto next_seg;
-               }
-
-               /* Write segment data into the buffer */
-               rc = bnxt_hwrm_dbg_coredump_retrieve(bp, comp_id, seg_id,
-                                                    &seg_len, buf, buf_len,
-                                                    offset + seg_hdr_len);
-               if (rc && rc == -ENOBUFS)
-                       goto err;
-               else if (rc)
-                       netdev_err(bp->dev,
-                                  "Failed to retrieve coredump for seg = %d\n",
-                                  seg_record->segment_id);
-
-next_seg:
-               end = jiffies;
-               duration = jiffies_to_msecs(end - start);
-               bnxt_fill_coredump_seg_hdr(bp, &seg_hdr, seg_record, seg_len,
-                                          rc, duration, 0);
-
-               if (buf) {
-                       /* Write segment header into the buffer */
-                       memcpy(buf + offset, &seg_hdr, seg_hdr_len);
-                       offset += seg_hdr_len + seg_len;
-               }
-
-               *dump_len += seg_len;
-               seg_record =
-                       (struct coredump_segment_record *)((u8 *)seg_record +
-                                                          seg_record_len);
-       }
-
-err:
-       if (buf)
-               bnxt_fill_coredump_record(bp, buf + offset, start_time,
-                                         start_utc, coredump.total_segs + 1,
-                                         rc);
-       kfree(coredump.data);
-       *dump_len += sizeof(struct bnxt_coredump_record);
-       if (rc == -ENOBUFS)
-               netdev_err(bp->dev, "Firmware returned large coredump buffer\n");
-       return rc;
-}
-
 static int bnxt_set_dump(struct net_device *dev, struct ethtool_dump *dump)
 {
        struct bnxt *bp = netdev_priv(dev);
@@ -3971,10 +3664,7 @@ static int bnxt_get_dump_flag(struct net_device *dev, struct ethtool_dump *dump)
                        bp->ver_resp.hwrm_fw_rsvd_8b;
 
        dump->flag = bp->dump_flag;
-       if (bp->dump_flag == BNXT_DUMP_CRASH)
-               dump->len = BNXT_CRASH_DUMP_LEN;
-       else
-               bnxt_get_coredump(bp, NULL, &dump->len);
+       dump->len = bnxt_get_coredump_length(bp, bp->dump_flag);
        return 0;
 }
 
@@ -3989,15 +3679,7 @@ static int bnxt_get_dump_data(struct net_device *dev, struct ethtool_dump *dump,
        memset(buf, 0, dump->len);
 
        dump->flag = bp->dump_flag;
-       if (dump->flag == BNXT_DUMP_CRASH) {
-#ifdef CONFIG_TEE_BNXT_FW
-               return tee_bnxt_copy_coredump(buf, 0, dump->len);
-#endif
-       } else {
-               return bnxt_get_coredump(bp, buf, &dump->len);
-       }
-
-       return 0;
+       return bnxt_get_coredump(bp, dump->flag, buf, &dump->len);
 }
 
 static int bnxt_get_ts_info(struct net_device *dev,
index 0a57cb6..6aa4484 100644 (file)
@@ -22,49 +22,6 @@ struct bnxt_led_cfg {
        u8 rsvd;
 };
 
-#define COREDUMP_LIST_BUF_LEN          2048
-#define COREDUMP_RETRIEVE_BUF_LEN      4096
-
-struct bnxt_coredump {
-       void            *data;
-       int             data_size;
-       u16             total_segs;
-};
-
-#define BNXT_COREDUMP_BUF_LEN(len) ((len) - sizeof(struct bnxt_coredump_record))
-
-struct bnxt_hwrm_dbg_dma_info {
-       void *dest_buf;
-       int dest_buf_size;
-       u16 dma_len;
-       u16 seq_off;
-       u16 data_len_off;
-       u16 segs;
-       u32 seg_start;
-       u32 buf_len;
-};
-
-struct hwrm_dbg_cmn_input {
-       __le16 req_type;
-       __le16 cmpl_ring;
-       __le16 seq_id;
-       __le16 target_id;
-       __le64 resp_addr;
-       __le64 host_dest_addr;
-       __le32 host_buf_len;
-};
-
-struct hwrm_dbg_cmn_output {
-       __le16 error_code;
-       __le16 req_type;
-       __le16 seq_id;
-       __le16 resp_len;
-       u8 flags;
-       #define HWRM_DBG_CMN_FLAGS_MORE 1
-};
-
-#define BNXT_CRASH_DUMP_LEN    (8 << 20)
-
 #define BNXT_LED_DFLT_ENA                              \
        (PORT_LED_CFG_REQ_ENABLES_LED0_ID |             \
         PORT_LED_CFG_REQ_ENABLES_LED0_STATE |          \
@@ -94,8 +51,11 @@ u32 bnxt_fw_to_ethtool_speed(u16);
 u16 bnxt_get_fw_auto_link_speeds(u32);
 int bnxt_hwrm_nvm_get_dev_info(struct bnxt *bp,
                               struct hwrm_nvm_get_dev_info_output *nvm_dev_info);
+int bnxt_hwrm_firmware_reset(struct net_device *dev, u8 proc_type,
+                            u8 self_reset, u8 flags);
 int bnxt_flash_package_from_fw_obj(struct net_device *dev, const struct firmware *fw,
                                   u32 install_type);
+int bnxt_get_pkginfo(struct net_device *dev, char *ver, int size);
 void bnxt_ethtool_init(struct bnxt *bp);
 void bnxt_ethtool_free(struct bnxt *bp);
 
index 94d07a9..ea86c54 100644 (file)
@@ -532,8 +532,8 @@ struct hwrm_err_output {
 #define HWRM_VERSION_MAJOR 1
 #define HWRM_VERSION_MINOR 10
 #define HWRM_VERSION_UPDATE 2
-#define HWRM_VERSION_RSVD 52
-#define HWRM_VERSION_STR "1.10.2.52"
+#define HWRM_VERSION_RSVD 63
+#define HWRM_VERSION_STR "1.10.2.63"
 
 /* hwrm_ver_get_input (size:192b/24B) */
 struct hwrm_ver_get_input {
@@ -1587,6 +1587,8 @@ struct hwrm_func_qcaps_output {
        #define FUNC_QCAPS_RESP_FLAGS_EXT_DFLT_VLAN_TPID_PCP_SUPPORTED           0x200000UL
        #define FUNC_QCAPS_RESP_FLAGS_EXT_KTLS_SUPPORTED                         0x400000UL
        #define FUNC_QCAPS_RESP_FLAGS_EXT_EP_RATE_CONTROL                        0x800000UL
+       #define FUNC_QCAPS_RESP_FLAGS_EXT_MIN_BW_SUPPORTED                       0x1000000UL
+       #define FUNC_QCAPS_RESP_FLAGS_EXT_TX_COAL_CMPL_CAP                       0x2000000UL
        u8      max_schqs;
        u8      mpc_chnls_cap;
        #define FUNC_QCAPS_RESP_MPC_CHNLS_CAP_TCE         0x1UL
@@ -1956,6 +1958,18 @@ struct hwrm_func_cfg_output {
        u8      valid;
 };
 
+/* hwrm_func_cfg_cmd_err (size:64b/8B) */
+struct hwrm_func_cfg_cmd_err {
+       u8      code;
+       #define FUNC_CFG_CMD_ERR_CODE_UNKNOWN                      0x0UL
+       #define FUNC_CFG_CMD_ERR_CODE_PARTITION_MIN_BW_RANGE       0x1UL
+       #define FUNC_CFG_CMD_ERR_CODE_PARTITION_MIN_MORE_THAN_MAX  0x2UL
+       #define FUNC_CFG_CMD_ERR_CODE_PARTITION_MIN_BW_UNSUPPORTED 0x3UL
+       #define FUNC_CFG_CMD_ERR_CODE_PARTITION_BW_PERCENT         0x4UL
+       #define FUNC_CFG_CMD_ERR_CODE_LAST                        FUNC_CFG_CMD_ERR_CODE_PARTITION_BW_PERCENT
+       u8      unused_0[7];
+};
+
 /* hwrm_func_qstats_input (size:192b/24B) */
 struct hwrm_func_qstats_input {
        __le16  req_type;
@@ -3601,7 +3615,15 @@ struct hwrm_port_phy_qcfg_output {
        #define PORT_PHY_QCFG_RESP_PHY_TYPE_200G_BASESR4     0x1dUL
        #define PORT_PHY_QCFG_RESP_PHY_TYPE_200G_BASELR4     0x1eUL
        #define PORT_PHY_QCFG_RESP_PHY_TYPE_200G_BASEER4     0x1fUL
-       #define PORT_PHY_QCFG_RESP_PHY_TYPE_LAST            PORT_PHY_QCFG_RESP_PHY_TYPE_200G_BASEER4
+       #define PORT_PHY_QCFG_RESP_PHY_TYPE_50G_BASECR       0x20UL
+       #define PORT_PHY_QCFG_RESP_PHY_TYPE_50G_BASESR       0x21UL
+       #define PORT_PHY_QCFG_RESP_PHY_TYPE_50G_BASELR       0x22UL
+       #define PORT_PHY_QCFG_RESP_PHY_TYPE_50G_BASEER       0x23UL
+       #define PORT_PHY_QCFG_RESP_PHY_TYPE_100G_BASECR2     0x24UL
+       #define PORT_PHY_QCFG_RESP_PHY_TYPE_100G_BASESR2     0x25UL
+       #define PORT_PHY_QCFG_RESP_PHY_TYPE_100G_BASELR2     0x26UL
+       #define PORT_PHY_QCFG_RESP_PHY_TYPE_100G_BASEER2     0x27UL
+       #define PORT_PHY_QCFG_RESP_PHY_TYPE_LAST            PORT_PHY_QCFG_RESP_PHY_TYPE_100G_BASEER2
        u8      media_type;
        #define PORT_PHY_QCFG_RESP_MEDIA_TYPE_UNKNOWN 0x0UL
        #define PORT_PHY_QCFG_RESP_MEDIA_TYPE_TP      0x1UL
@@ -4040,7 +4062,7 @@ struct tx_port_stats_ext {
        __le64  pfc_pri7_tx_transitions;
 };
 
-/* rx_port_stats_ext (size:3648b/456B) */
+/* rx_port_stats_ext (size:3776b/472B) */
 struct rx_port_stats_ext {
        __le64  link_down_events;
        __le64  continuous_pause_events;
@@ -4099,6 +4121,8 @@ struct rx_port_stats_ext {
        __le64  rx_discard_packets_cos5;
        __le64  rx_discard_packets_cos6;
        __le64  rx_discard_packets_cos7;
+       __le64  rx_fec_corrected_blocks;
+       __le64  rx_fec_uncorrectable_blocks;
 };
 
 /* hwrm_port_qstats_ext_input (size:320b/40B) */
@@ -4372,7 +4396,10 @@ struct hwrm_port_phy_qcaps_output {
        #define PORT_PHY_QCAPS_RESP_SUPPORTED_PAM4_SPEEDS_FORCE_MODE_50G      0x1UL
        #define PORT_PHY_QCAPS_RESP_SUPPORTED_PAM4_SPEEDS_FORCE_MODE_100G     0x2UL
        #define PORT_PHY_QCAPS_RESP_SUPPORTED_PAM4_SPEEDS_FORCE_MODE_200G     0x4UL
-       u8      unused_0[3];
+       __le16  flags2;
+       #define PORT_PHY_QCAPS_RESP_FLAGS2_PAUSE_UNSUPPORTED     0x1UL
+       #define PORT_PHY_QCAPS_RESP_FLAGS2_PFC_UNSUPPORTED       0x2UL
+       u8      unused_0[1];
        u8      valid;
 };
 
@@ -6076,6 +6103,11 @@ struct hwrm_vnic_qcaps_output {
        #define VNIC_QCAPS_RESP_FLAGS_VIRTIO_NET_VNIC_ALLOC_CAP           0x800UL
        #define VNIC_QCAPS_RESP_FLAGS_METADATA_FORMAT_CAP                 0x1000UL
        #define VNIC_QCAPS_RESP_FLAGS_RSS_STRICT_HASH_TYPE_CAP            0x2000UL
+       #define VNIC_QCAPS_RESP_FLAGS_RSS_HASH_TYPE_DELTA_CAP             0x4000UL
+       #define VNIC_QCAPS_RESP_FLAGS_RSS_HASH_FUNCTION_TOEPLITZ_CAP      0x8000UL
+       #define VNIC_QCAPS_RESP_FLAGS_RSS_HASH_FUNCTION_XOR_CAP           0x10000UL
+       #define VNIC_QCAPS_RESP_FLAGS_RSS_HASH_FUNCTION_CHKSM_CAP         0x20000UL
+       #define VNIC_QCAPS_RESP_FLAGS_RSS_IPV6_FLOW_LABEL_CAP             0x40000UL
        __le16  max_aggs_supported;
        u8      unused_1[5];
        u8      valid;
@@ -6206,7 +6238,15 @@ struct hwrm_vnic_rss_cfg_input {
        __le64  ring_grp_tbl_addr;
        __le64  hash_key_tbl_addr;
        __le16  rss_ctx_idx;
-       u8      unused_1[6];
+       u8      flags;
+       #define VNIC_RSS_CFG_REQ_FLAGS_HASH_TYPE_INCLUDE     0x1UL
+       #define VNIC_RSS_CFG_REQ_FLAGS_HASH_TYPE_EXCLUDE     0x2UL
+       u8      rss_hash_function;
+       #define VNIC_RSS_CFG_REQ_RSS_HASH_FUNCTION_TOEPLITZ 0x0UL
+       #define VNIC_RSS_CFG_REQ_RSS_HASH_FUNCTION_XOR      0x1UL
+       #define VNIC_RSS_CFG_REQ_RSS_HASH_FUNCTION_CHECKSUM 0x2UL
+       #define VNIC_RSS_CFG_REQ_RSS_HASH_FUNCTION_LAST    VNIC_RSS_CFG_REQ_RSS_HASH_FUNCTION_CHECKSUM
+       u8      unused_1[4];
 };
 
 /* hwrm_vnic_rss_cfg_output (size:128b/16B) */
@@ -6331,7 +6371,24 @@ struct hwrm_ring_alloc_input {
        #define RING_ALLOC_REQ_RING_TYPE_RX_AGG    0x4UL
        #define RING_ALLOC_REQ_RING_TYPE_NQ        0x5UL
        #define RING_ALLOC_REQ_RING_TYPE_LAST     RING_ALLOC_REQ_RING_TYPE_NQ
-       u8      unused_0;
+       u8      cmpl_coal_cnt;
+       #define RING_ALLOC_REQ_CMPL_COAL_CNT_COAL_OFF 0x0UL
+       #define RING_ALLOC_REQ_CMPL_COAL_CNT_COAL_4   0x1UL
+       #define RING_ALLOC_REQ_CMPL_COAL_CNT_COAL_8   0x2UL
+       #define RING_ALLOC_REQ_CMPL_COAL_CNT_COAL_12  0x3UL
+       #define RING_ALLOC_REQ_CMPL_COAL_CNT_COAL_16  0x4UL
+       #define RING_ALLOC_REQ_CMPL_COAL_CNT_COAL_24  0x5UL
+       #define RING_ALLOC_REQ_CMPL_COAL_CNT_COAL_32  0x6UL
+       #define RING_ALLOC_REQ_CMPL_COAL_CNT_COAL_48  0x7UL
+       #define RING_ALLOC_REQ_CMPL_COAL_CNT_COAL_64  0x8UL
+       #define RING_ALLOC_REQ_CMPL_COAL_CNT_COAL_96  0x9UL
+       #define RING_ALLOC_REQ_CMPL_COAL_CNT_COAL_128 0xaUL
+       #define RING_ALLOC_REQ_CMPL_COAL_CNT_COAL_192 0xbUL
+       #define RING_ALLOC_REQ_CMPL_COAL_CNT_COAL_256 0xcUL
+       #define RING_ALLOC_REQ_CMPL_COAL_CNT_COAL_320 0xdUL
+       #define RING_ALLOC_REQ_CMPL_COAL_CNT_COAL_384 0xeUL
+       #define RING_ALLOC_REQ_CMPL_COAL_CNT_COAL_MAX 0xfUL
+       #define RING_ALLOC_REQ_CMPL_COAL_CNT_LAST    RING_ALLOC_REQ_CMPL_COAL_CNT_COAL_MAX
        __le16  flags;
        #define RING_ALLOC_REQ_FLAGS_RX_SOP_PAD     0x1UL
        __le64  page_tbl_addr;
@@ -7099,6 +7156,7 @@ struct hwrm_cfa_ntuple_filter_alloc_input {
        #define CFA_NTUPLE_FILTER_ALLOC_REQ_FLAGS_DEST_FID              0x8UL
        #define CFA_NTUPLE_FILTER_ALLOC_REQ_FLAGS_ARP_REPLY             0x10UL
        #define CFA_NTUPLE_FILTER_ALLOC_REQ_FLAGS_DEST_RFS_RING_IDX     0x20UL
+       #define CFA_NTUPLE_FILTER_ALLOC_REQ_FLAGS_NO_L2_CONTEXT         0x40UL
        __le32  enables;
        #define CFA_NTUPLE_FILTER_ALLOC_REQ_ENABLES_L2_FILTER_ID         0x1UL
        #define CFA_NTUPLE_FILTER_ALLOC_REQ_ENABLES_ETHERTYPE            0x2UL
@@ -7234,6 +7292,7 @@ struct hwrm_cfa_ntuple_filter_cfg_input {
        __le32  flags;
        #define CFA_NTUPLE_FILTER_CFG_REQ_FLAGS_DEST_FID              0x1UL
        #define CFA_NTUPLE_FILTER_CFG_REQ_FLAGS_DEST_RFS_RING_IDX     0x2UL
+       #define CFA_NTUPLE_FILTER_CFG_REQ_FLAGS_NO_L2_CONTEXT         0x4UL
        __le64  ntuple_filter_id;
        __le32  new_dst_id;
        __le32  new_mirror_vnic_id;
@@ -7834,11 +7893,11 @@ struct hwrm_cfa_adv_flow_mgnt_qcaps_output {
        #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_TRUFLOW_CAPABLE                              0x8000UL
        #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_L2_FILTER_TRAFFIC_TYPE_L2_ROCE_SUPPORTED     0x10000UL
        #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_LAG_SUPPORTED                                0x20000UL
+       #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_NTUPLE_FLOW_NO_L2CTX_SUPPORTED               0x40000UL
        u8      unused_0[3];
        u8      valid;
 };
 
-/* hwrm_tunnel_dst_port_query_input (size:192b/24B) */
 struct hwrm_tunnel_dst_port_query_input {
        __le16  req_type;
        __le16  cmpl_ring;
@@ -8414,6 +8473,86 @@ struct hwrm_fw_get_structured_data_cmd_err {
        u8      unused_0[7];
 };
 
+/* hwrm_fw_livepatch_query_input (size:192b/24B) */
+struct hwrm_fw_livepatch_query_input {
+       __le16  req_type;
+       __le16  cmpl_ring;
+       __le16  seq_id;
+       __le16  target_id;
+       __le64  resp_addr;
+       u8      fw_target;
+       #define FW_LIVEPATCH_QUERY_REQ_FW_TARGET_COMMON_FW 0x1UL
+       #define FW_LIVEPATCH_QUERY_REQ_FW_TARGET_SECURE_FW 0x2UL
+       #define FW_LIVEPATCH_QUERY_REQ_FW_TARGET_LAST     FW_LIVEPATCH_QUERY_REQ_FW_TARGET_SECURE_FW
+       u8      unused_0[7];
+};
+
+/* hwrm_fw_livepatch_query_output (size:640b/80B) */
+struct hwrm_fw_livepatch_query_output {
+       __le16  error_code;
+       __le16  req_type;
+       __le16  seq_id;
+       __le16  resp_len;
+       char    install_ver[32];
+       char    active_ver[32];
+       __le16  status_flags;
+       #define FW_LIVEPATCH_QUERY_RESP_STATUS_FLAGS_INSTALL     0x1UL
+       #define FW_LIVEPATCH_QUERY_RESP_STATUS_FLAGS_ACTIVE      0x2UL
+       u8      unused_0[5];
+       u8      valid;
+};
+
+/* hwrm_fw_livepatch_input (size:256b/32B) */
+struct hwrm_fw_livepatch_input {
+       __le16  req_type;
+       __le16  cmpl_ring;
+       __le16  seq_id;
+       __le16  target_id;
+       __le64  resp_addr;
+       u8      opcode;
+       #define FW_LIVEPATCH_REQ_OPCODE_ACTIVATE   0x1UL
+       #define FW_LIVEPATCH_REQ_OPCODE_DEACTIVATE 0x2UL
+       #define FW_LIVEPATCH_REQ_OPCODE_LAST      FW_LIVEPATCH_REQ_OPCODE_DEACTIVATE
+       u8      fw_target;
+       #define FW_LIVEPATCH_REQ_FW_TARGET_COMMON_FW 0x1UL
+       #define FW_LIVEPATCH_REQ_FW_TARGET_SECURE_FW 0x2UL
+       #define FW_LIVEPATCH_REQ_FW_TARGET_LAST     FW_LIVEPATCH_REQ_FW_TARGET_SECURE_FW
+       u8      loadtype;
+       #define FW_LIVEPATCH_REQ_LOADTYPE_NVM_INSTALL   0x1UL
+       #define FW_LIVEPATCH_REQ_LOADTYPE_MEMORY_DIRECT 0x2UL
+       #define FW_LIVEPATCH_REQ_LOADTYPE_LAST         FW_LIVEPATCH_REQ_LOADTYPE_MEMORY_DIRECT
+       u8      flags;
+       __le32  patch_len;
+       __le64  host_addr;
+};
+
+/* hwrm_fw_livepatch_output (size:128b/16B) */
+struct hwrm_fw_livepatch_output {
+       __le16  error_code;
+       __le16  req_type;
+       __le16  seq_id;
+       __le16  resp_len;
+       u8      unused_0[7];
+       u8      valid;
+};
+
+/* hwrm_fw_livepatch_cmd_err (size:64b/8B) */
+struct hwrm_fw_livepatch_cmd_err {
+       u8      code;
+       #define FW_LIVEPATCH_CMD_ERR_CODE_UNKNOWN         0x0UL
+       #define FW_LIVEPATCH_CMD_ERR_CODE_INVALID_OPCODE  0x1UL
+       #define FW_LIVEPATCH_CMD_ERR_CODE_INVALID_TARGET  0x2UL
+       #define FW_LIVEPATCH_CMD_ERR_CODE_NOT_SUPPORTED   0x3UL
+       #define FW_LIVEPATCH_CMD_ERR_CODE_NOT_INSTALLED   0x4UL
+       #define FW_LIVEPATCH_CMD_ERR_CODE_NOT_PATCHED     0x5UL
+       #define FW_LIVEPATCH_CMD_ERR_CODE_AUTH_FAIL       0x6UL
+       #define FW_LIVEPATCH_CMD_ERR_CODE_INVALID_HEADER  0x7UL
+       #define FW_LIVEPATCH_CMD_ERR_CODE_INVALID_SIZE    0x8UL
+       #define FW_LIVEPATCH_CMD_ERR_CODE_ALREADY_PATCHED 0x9UL
+       #define FW_LIVEPATCH_CMD_ERR_CODE_LAST           FW_LIVEPATCH_CMD_ERR_CODE_ALREADY_PATCHED
+       u8      unused_0[7];
+};
+
 /* hwrm_exec_fwd_resp_input (size:1024b/128B) */
 struct hwrm_exec_fwd_resp_input {
        __le16  req_type;
index f0aa480..8388be1 100644 (file)
@@ -11,9 +11,7 @@
 #include <linux/pci.h>
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
-#include <linux/ptp_clock_kernel.h>
 #include <linux/net_tstamp.h>
-#include <linux/timecounter.h>
 #include <linux/timekeeping.h>
 #include <linux/ptp_classify.h>
 #include "bnxt_hsi.h"
index fa5f057..7c528e1 100644 (file)
@@ -10,6 +10,9 @@
 #ifndef BNXT_PTP_H
 #define BNXT_PTP_H
 
+#include <linux/ptp_clock_kernel.h>
+#include <linux/timecounter.h>
+
 #define BNXT_PTP_GRC_WIN       6
 #define BNXT_PTP_GRC_WIN_BASE  0x6000
 
index 309371a..ffce528 100644 (file)
@@ -901,7 +901,7 @@ static int macb_mdiobus_register(struct macb *bp)
         * directly under the MAC node
         */
        child = of_get_child_by_name(np, "mdio");
-       if (np) {
+       if (child) {
                int ret = of_mdiobus_register(bp->mii_bus, child);
 
                of_node_put(child);
index 1a1bebd..67364ab 100644 (file)
@@ -137,7 +137,7 @@ static struct hns3_dbg_cmd_info hns3_dbg_cmd[] = {
                .name = "uc",
                .cmd = HNAE3_DBG_CMD_MAC_UC,
                .dentry = HNS3_DBG_DENTRY_MAC,
-               .buf_len = HNS3_DBG_READ_LEN,
+               .buf_len = HNS3_DBG_READ_LEN_128KB,
                .init = hns3_dbg_common_file_init,
        },
        {
@@ -256,7 +256,7 @@ static struct hns3_dbg_cmd_info hns3_dbg_cmd[] = {
                .name = "tqp",
                .cmd = HNAE3_DBG_CMD_REG_TQP,
                .dentry = HNS3_DBG_DENTRY_REG,
-               .buf_len = HNS3_DBG_READ_LEN,
+               .buf_len = HNS3_DBG_READ_LEN_128KB,
                .init = hns3_dbg_common_file_init,
        },
        {
@@ -298,7 +298,7 @@ static struct hns3_dbg_cmd_info hns3_dbg_cmd[] = {
                .name = "fd_tcam",
                .cmd = HNAE3_DBG_CMD_FD_TCAM,
                .dentry = HNS3_DBG_DENTRY_FD,
-               .buf_len = HNS3_DBG_READ_LEN,
+               .buf_len = HNS3_DBG_READ_LEN_1MB,
                .init = hns3_dbg_common_file_init,
        },
        {
@@ -584,7 +584,7 @@ static const struct hns3_dbg_item rx_queue_info_items[] = {
        { "TAIL", 2 },
        { "HEAD", 2 },
        { "FBDNUM", 2 },
-       { "PKTNUM", 2 },
+       { "PKTNUM", 5 },
        { "COPYBREAK", 2 },
        { "RING_EN", 2 },
        { "RX_RING_EN", 2 },
@@ -687,7 +687,7 @@ static const struct hns3_dbg_item tx_queue_info_items[] = {
        { "HEAD", 2 },
        { "FBDNUM", 2 },
        { "OFFSET", 2 },
-       { "PKTNUM", 2 },
+       { "PKTNUM", 5 },
        { "RING_EN", 2 },
        { "TX_RING_EN", 2 },
        { "BASE_ADDR", 10 },
@@ -912,13 +912,13 @@ static int hns3_dbg_rx_bd_info(struct hns3_dbg_data *d, char *buf, int len)
 }
 
 static const struct hns3_dbg_item tx_bd_info_items[] = {
-       { "BD_IDX", 5 },
-       { "ADDRESS", 2 },
+       { "BD_IDX", 2 },
+       { "ADDRESS", 13 },
        { "VLAN_TAG", 2 },
        { "SIZE", 2 },
        { "T_CS_VLAN_TSO", 2 },
        { "OT_VLAN_TAG", 3 },
-       { "TV", 2 },
+       { "TV", 5 },
        { "OLT_VLAN_LEN", 2 },
        { "PAYLEN_OL4CS", 2 },
        { "BD_FE_SC_VLD", 2 },
index f0aa4fb..4e0a8c2 100644 (file)
@@ -391,7 +391,7 @@ static int hclge_dbg_dump_mac(struct hclge_dev *hdev, char *buf, int len)
 static int hclge_dbg_dump_dcb_qset(struct hclge_dev *hdev, char *buf, int len,
                                   int *pos)
 {
-       struct hclge_dbg_bitmap_cmd *bitmap;
+       struct hclge_dbg_bitmap_cmd req;
        struct hclge_desc desc;
        u16 qset_id, qset_num;
        int ret;
@@ -408,12 +408,12 @@ static int hclge_dbg_dump_dcb_qset(struct hclge_dev *hdev, char *buf, int len,
                if (ret)
                        return ret;
 
-               bitmap = (struct hclge_dbg_bitmap_cmd *)&desc.data[1];
+               req.bitmap = (u8)le32_to_cpu(desc.data[1]);
 
                *pos += scnprintf(buf + *pos, len - *pos,
                                  "%04u           %#x            %#x             %#x               %#x\n",
-                                 qset_id, bitmap->bit0, bitmap->bit1,
-                                 bitmap->bit2, bitmap->bit3);
+                                 qset_id, req.bit0, req.bit1, req.bit2,
+                                 req.bit3);
        }
 
        return 0;
@@ -422,7 +422,7 @@ static int hclge_dbg_dump_dcb_qset(struct hclge_dev *hdev, char *buf, int len,
 static int hclge_dbg_dump_dcb_pri(struct hclge_dev *hdev, char *buf, int len,
                                  int *pos)
 {
-       struct hclge_dbg_bitmap_cmd *bitmap;
+       struct hclge_dbg_bitmap_cmd req;
        struct hclge_desc desc;
        u8 pri_id, pri_num;
        int ret;
@@ -439,12 +439,11 @@ static int hclge_dbg_dump_dcb_pri(struct hclge_dev *hdev, char *buf, int len,
                if (ret)
                        return ret;
 
-               bitmap = (struct hclge_dbg_bitmap_cmd *)&desc.data[1];
+               req.bitmap = (u8)le32_to_cpu(desc.data[1]);
 
                *pos += scnprintf(buf + *pos, len - *pos,
                                  "%03u       %#x           %#x                %#x\n",
-                                 pri_id, bitmap->bit0, bitmap->bit1,
-                                 bitmap->bit2);
+                                 pri_id, req.bit0, req.bit1, req.bit2);
        }
 
        return 0;
@@ -453,7 +452,7 @@ static int hclge_dbg_dump_dcb_pri(struct hclge_dev *hdev, char *buf, int len,
 static int hclge_dbg_dump_dcb_pg(struct hclge_dev *hdev, char *buf, int len,
                                 int *pos)
 {
-       struct hclge_dbg_bitmap_cmd *bitmap;
+       struct hclge_dbg_bitmap_cmd req;
        struct hclge_desc desc;
        u8 pg_id;
        int ret;
@@ -466,12 +465,11 @@ static int hclge_dbg_dump_dcb_pg(struct hclge_dev *hdev, char *buf, int len,
                if (ret)
                        return ret;
 
-               bitmap = (struct hclge_dbg_bitmap_cmd *)&desc.data[1];
+               req.bitmap = (u8)le32_to_cpu(desc.data[1]);
 
                *pos += scnprintf(buf + *pos, len - *pos,
                                  "%03u      %#x           %#x               %#x\n",
-                                 pg_id, bitmap->bit0, bitmap->bit1,
-                                 bitmap->bit2);
+                                 pg_id, req.bit0, req.bit1, req.bit2);
        }
 
        return 0;
@@ -511,7 +509,7 @@ static int hclge_dbg_dump_dcb_queue(struct hclge_dev *hdev, char *buf, int len,
 static int hclge_dbg_dump_dcb_port(struct hclge_dev *hdev, char *buf, int len,
                                   int *pos)
 {
-       struct hclge_dbg_bitmap_cmd *bitmap;
+       struct hclge_dbg_bitmap_cmd req;
        struct hclge_desc desc;
        u8 port_id = 0;
        int ret;
@@ -521,12 +519,12 @@ static int hclge_dbg_dump_dcb_port(struct hclge_dev *hdev, char *buf, int len,
        if (ret)
                return ret;
 
-       bitmap = (struct hclge_dbg_bitmap_cmd *)&desc.data[1];
+       req.bitmap = (u8)le32_to_cpu(desc.data[1]);
 
        *pos += scnprintf(buf + *pos, len - *pos, "port_mask: %#x\n",
-                        bitmap->bit0);
+                        req.bit0);
        *pos += scnprintf(buf + *pos, len - *pos, "port_shaping_pass: %#x\n",
-                        bitmap->bit1);
+                        req.bit1);
 
        return 0;
 }
index f1db669..2e41aa2 100644 (file)
@@ -2930,33 +2930,29 @@ static void hclge_mbx_task_schedule(struct hclge_dev *hdev)
 {
        if (!test_bit(HCLGE_STATE_REMOVING, &hdev->state) &&
            !test_and_set_bit(HCLGE_STATE_MBX_SERVICE_SCHED, &hdev->state))
-               mod_delayed_work_on(cpumask_first(&hdev->affinity_mask),
-                                   hclge_wq, &hdev->service_task, 0);
+               mod_delayed_work(hclge_wq, &hdev->service_task, 0);
 }
 
 static void hclge_reset_task_schedule(struct hclge_dev *hdev)
 {
        if (!test_bit(HCLGE_STATE_REMOVING, &hdev->state) &&
+           test_bit(HCLGE_STATE_SERVICE_INITED, &hdev->state) &&
            !test_and_set_bit(HCLGE_STATE_RST_SERVICE_SCHED, &hdev->state))
-               mod_delayed_work_on(cpumask_first(&hdev->affinity_mask),
-                                   hclge_wq, &hdev->service_task, 0);
+               mod_delayed_work(hclge_wq, &hdev->service_task, 0);
 }
 
 static void hclge_errhand_task_schedule(struct hclge_dev *hdev)
 {
        if (!test_bit(HCLGE_STATE_REMOVING, &hdev->state) &&
            !test_and_set_bit(HCLGE_STATE_ERR_SERVICE_SCHED, &hdev->state))
-               mod_delayed_work_on(cpumask_first(&hdev->affinity_mask),
-                                   hclge_wq, &hdev->service_task, 0);
+               mod_delayed_work(hclge_wq, &hdev->service_task, 0);
 }
 
 void hclge_task_schedule(struct hclge_dev *hdev, unsigned long delay_time)
 {
        if (!test_bit(HCLGE_STATE_REMOVING, &hdev->state) &&
            !test_bit(HCLGE_STATE_RST_FAIL, &hdev->state))
-               mod_delayed_work_on(cpumask_first(&hdev->affinity_mask),
-                                   hclge_wq, &hdev->service_task,
-                                   delay_time);
+               mod_delayed_work(hclge_wq, &hdev->service_task, delay_time);
 }
 
 static int hclge_get_mac_link_status(struct hclge_dev *hdev, int *link_status)
@@ -3650,33 +3646,14 @@ static void hclge_get_misc_vector(struct hclge_dev *hdev)
        hdev->num_msi_used += 1;
 }
 
-static void hclge_irq_affinity_notify(struct irq_affinity_notify *notify,
-                                     const cpumask_t *mask)
-{
-       struct hclge_dev *hdev = container_of(notify, struct hclge_dev,
-                                             affinity_notify);
-
-       cpumask_copy(&hdev->affinity_mask, mask);
-}
-
-static void hclge_irq_affinity_release(struct kref *ref)
-{
-}
-
 static void hclge_misc_affinity_setup(struct hclge_dev *hdev)
 {
        irq_set_affinity_hint(hdev->misc_vector.vector_irq,
                              &hdev->affinity_mask);
-
-       hdev->affinity_notify.notify = hclge_irq_affinity_notify;
-       hdev->affinity_notify.release = hclge_irq_affinity_release;
-       irq_set_affinity_notifier(hdev->misc_vector.vector_irq,
-                                 &hdev->affinity_notify);
 }
 
 static void hclge_misc_affinity_teardown(struct hclge_dev *hdev)
 {
-       irq_set_affinity_notifier(hdev->misc_vector.vector_irq, NULL);
        irq_set_affinity_hint(hdev->misc_vector.vector_irq, NULL);
 }
 
@@ -13233,7 +13210,7 @@ static int hclge_init(void)
 {
        pr_info("%s is initializing\n", HCLGE_NAME);
 
-       hclge_wq = alloc_workqueue("%s", 0, 0, HCLGE_NAME);
+       hclge_wq = alloc_workqueue("%s", WQ_UNBOUND, 0, HCLGE_NAME);
        if (!hclge_wq) {
                pr_err("%s: failed to create workqueue\n", HCLGE_NAME);
                return -ENOMEM;
index 4f8403a..9e1eede 100644 (file)
@@ -974,7 +974,6 @@ struct hclge_dev {
 
        /* affinity mask and notify for misc interrupt */
        cpumask_t affinity_mask;
-       struct irq_affinity_notify affinity_notify;
        struct hclge_ptp *ptp;
        struct devlink *devlink;
 };
index 3306050..645b2c0 100644 (file)
@@ -2232,6 +2232,7 @@ static void hclgevf_get_misc_vector(struct hclgevf_dev *hdev)
 void hclgevf_reset_task_schedule(struct hclgevf_dev *hdev)
 {
        if (!test_bit(HCLGEVF_STATE_REMOVING, &hdev->state) &&
+           test_bit(HCLGEVF_STATE_SERVICE_INITED, &hdev->state) &&
            !test_and_set_bit(HCLGEVF_STATE_RST_SERVICE_SCHED,
                              &hdev->state))
                mod_delayed_work(hclgevf_wq, &hdev->service_task, 0);
@@ -3449,6 +3450,8 @@ static int hclgevf_init_hdev(struct hclgevf_dev *hdev)
 
        hclgevf_init_rxd_adv_layout(hdev);
 
+       set_bit(HCLGEVF_STATE_SERVICE_INITED, &hdev->state);
+
        hdev->last_reset_time = jiffies;
        dev_info(&hdev->pdev->dev, "finished initializing %s driver\n",
                 HCLGEVF_DRIVER_NAME);
@@ -3899,7 +3902,7 @@ static int hclgevf_init(void)
 {
        pr_info("%s is initializing\n", HCLGEVF_NAME);
 
-       hclgevf_wq = alloc_workqueue("%s", 0, 0, HCLGEVF_NAME);
+       hclgevf_wq = alloc_workqueue("%s", WQ_UNBOUND, 0, HCLGEVF_NAME);
        if (!hclgevf_wq) {
                pr_err("%s: failed to create workqueue\n", HCLGEVF_NAME);
                return -ENOMEM;
index 883130a..28288d7 100644 (file)
@@ -146,6 +146,7 @@ enum hclgevf_states {
        HCLGEVF_STATE_REMOVING,
        HCLGEVF_STATE_NIC_REGISTERED,
        HCLGEVF_STATE_ROCE_REGISTERED,
+       HCLGEVF_STATE_SERVICE_INITED,
        /* task states */
        HCLGEVF_STATE_RST_SERVICE_SCHED,
        HCLGEVF_STATE_RST_HANDLING,
index 967a90e..bf4ecd9 100644 (file)
@@ -34,6 +34,7 @@
 #include <linux/if_bridge.h>
 #include <linux/ctype.h>
 #include <linux/bpf.h>
+#include <linux/btf.h>
 #include <linux/auxiliary_bus.h>
 #include <linux/avf/virtchnl.h>
 #include <linux/cpu_rmap.h>
@@ -479,6 +480,7 @@ enum ice_pf_flags {
        ICE_FLAG_NO_MEDIA,
        ICE_FLAG_FW_LLDP_AGENT,
        ICE_FLAG_MOD_POWER_UNSUPPORTED,
+       ICE_FLAG_PHY_FW_LOAD_FAILED,
        ICE_FLAG_ETHTOOL_CTXT,          /* set when ethtool holds RTNL lock */
        ICE_FLAG_LEGACY_RX,
        ICE_FLAG_VF_TRUE_PROMISC_ENA,
@@ -610,6 +612,13 @@ struct ice_pf {
 struct ice_netdev_priv {
        struct ice_vsi *vsi;
        struct ice_repr *repr;
+       /* indirect block callbacks on registered higher level devices
+        * (e.g. tunnel devices)
+        *
+        * tc_indr_block_cb_priv_list is used to look up indirect callback
+        * private data
+        */
+       struct list_head tc_indr_block_priv_list;
 };
 
 /**
index a5425f0..4eef348 100644 (file)
@@ -1185,6 +1185,7 @@ struct ice_aqc_get_link_status_data {
 #define ICE_AQ_LINK_TOPO_UNSUPP_MEDIA  BIT(7)
        u8 link_cfg_err;
 #define ICE_AQ_LINK_MODULE_POWER_UNSUPPORTED   BIT(5)
+#define ICE_AQ_LINK_EXTERNAL_PHY_LOAD_FAILURE  BIT(6)
 #define ICE_AQ_LINK_INVAL_MAX_POWER_LIMIT      BIT(7)
        u8 link_info;
 #define ICE_AQ_LINK_UP                 BIT(0)  /* Link Status */
@@ -1268,6 +1269,7 @@ struct ice_aqc_set_event_mask {
 #define ICE_AQ_LINK_EVENT_AN_COMPLETED         BIT(7)
 #define ICE_AQ_LINK_EVENT_MODULE_QUAL_FAIL     BIT(8)
 #define ICE_AQ_LINK_EVENT_PORT_TX_SUSPENDED    BIT(9)
+#define ICE_AQ_LINK_EVENT_PHY_FW_LOAD_FAIL     BIT(12)
        u8      reserved1[6];
 };
 
index e731b46..23cfcce 100644 (file)
@@ -1566,6 +1566,30 @@ static struct ice_buf_build *ice_pkg_buf_alloc(struct ice_hw *hw)
 }
 
 /**
+ * ice_get_sw_prof_type - determine switch profile type
+ * @hw: pointer to the HW structure
+ * @fv: pointer to the switch field vector
+ */
+static enum ice_prof_type
+ice_get_sw_prof_type(struct ice_hw *hw, struct ice_fv *fv)
+{
+       u16 i;
+
+       for (i = 0; i < hw->blk[ICE_BLK_SW].es.fvw; i++) {
+               /* UDP tunnel will have UDP_OF protocol ID and VNI offset */
+               if (fv->ew[i].prot_id == (u8)ICE_PROT_UDP_OF &&
+                   fv->ew[i].off == ICE_VNI_OFFSET)
+                       return ICE_PROF_TUN_UDP;
+
+               /* GRE tunnel will have GRE protocol */
+               if (fv->ew[i].prot_id == (u8)ICE_PROT_GRE_OF)
+                       return ICE_PROF_TUN_GRE;
+       }
+
+       return ICE_PROF_NON_TUN;
+}
+
+/**
  * ice_get_sw_fv_bitmap - Get switch field vector bitmap based on profile type
  * @hw: pointer to hardware structure
  * @req_profs: type of profiles requested
@@ -1588,6 +1612,7 @@ ice_get_sw_fv_bitmap(struct ice_hw *hw, enum ice_prof_type req_profs,
        bitmap_zero(bm, ICE_MAX_NUM_PROFILES);
        ice_seg = hw->seg;
        do {
+               enum ice_prof_type prof_type;
                u32 offset;
 
                fv = ice_pkg_enum_entry(ice_seg, &state, ICE_SID_FLD_VEC_SW,
@@ -1595,7 +1620,10 @@ ice_get_sw_fv_bitmap(struct ice_hw *hw, enum ice_prof_type req_profs,
                ice_seg = NULL;
 
                if (fv) {
-                       if (req_profs & ICE_PROF_NON_TUN)
+                       /* Determine field vector type */
+                       prof_type = ice_get_sw_prof_type(hw, fv);
+
+                       if (req_profs & prof_type)
                                set_bit((u16)offset, bm);
                }
        } while (fv);
index 120bceb..0f572a3 100644 (file)
@@ -373,6 +373,7 @@ struct ice_pkg_enum {
 enum ice_tunnel_type {
        TNL_VXLAN = 0,
        TNL_GENEVE,
+       TNL_GRETAP,
        __TNL_TYPE_CNT,
        TNL_LAST = 0xFF,
        TNL_ALL = 0xFF,
@@ -614,6 +615,9 @@ struct ice_chs_chg {
 
 enum ice_prof_type {
        ICE_PROF_NON_TUN = 0x1,
+       ICE_PROF_TUN_UDP = 0x2,
+       ICE_PROF_TUN_GRE = 0x4,
+       ICE_PROF_TUN_ALL = 0x6,
        ICE_PROF_ALL = 0xFF,
 };
 #endif /* _ICE_FLEX_TYPE_H_ */
index 37c18c6..e375ac8 100644 (file)
@@ -100,9 +100,9 @@ static void ice_display_lag_info(struct ice_lag *lag)
  */
 static void ice_lag_info_event(struct ice_lag *lag, void *ptr)
 {
-       struct net_device *event_netdev, *netdev_tmp;
        struct netdev_notifier_bonding_info *info;
        struct netdev_bonding_info *bonding_info;
+       struct net_device *event_netdev;
        const char *lag_netdev_name;
 
        event_netdev = netdev_notifier_info_to_dev(ptr);
@@ -123,19 +123,6 @@ static void ice_lag_info_event(struct ice_lag *lag, void *ptr)
                goto lag_out;
        }
 
-       rcu_read_lock();
-       for_each_netdev_in_bond_rcu(lag->upper_netdev, netdev_tmp) {
-               if (!netif_is_ice(netdev_tmp))
-                       continue;
-
-               if (netdev_tmp && netdev_tmp != lag->netdev &&
-                   lag->peer_netdev != netdev_tmp) {
-                       dev_hold(netdev_tmp);
-                       lag->peer_netdev = netdev_tmp;
-               }
-       }
-       rcu_read_unlock();
-
        if (bonding_info->slave.state)
                ice_lag_set_backup(lag);
        else
@@ -319,6 +306,9 @@ ice_lag_event_handler(struct notifier_block *notif_blk, unsigned long event,
        case NETDEV_BONDING_INFO:
                ice_lag_info_event(lag, ptr);
                break;
+       case NETDEV_UNREGISTER:
+               ice_lag_unlink(lag, ptr);
+               break;
        default:
                break;
        }
index 77dceab..159c52b 100644 (file)
@@ -1983,6 +1983,7 @@ static struct ice_q_vector *ice_pull_qvec_from_rc(struct ice_ring_container *rc)
        case ICE_TX_CONTAINER:
                if (rc->tx_ring)
                        return rc->tx_ring->q_vector;
+               break;
        default:
                break;
        }
index 9ba2277..66112ad 100644 (file)
@@ -58,6 +58,12 @@ static void ice_vsi_release_all(struct ice_pf *pf);
 static int ice_rebuild_channels(struct ice_pf *pf);
 static void ice_remove_q_channels(struct ice_vsi *vsi, bool rem_adv_fltr);
 
+static int
+ice_indr_setup_tc_cb(struct net_device *netdev, struct Qdisc *sch,
+                    void *cb_priv, enum tc_setup_type type, void *type_data,
+                    void *data,
+                    void (*cleanup)(struct flow_block_cb *block_cb));
+
 bool netif_is_ice(struct net_device *dev)
 {
        return dev && (dev->netdev_ops == &ice_netdev_ops);
@@ -931,6 +937,29 @@ static void ice_set_dflt_mib(struct ice_pf *pf)
 }
 
 /**
+ * ice_check_phy_fw_load - check if PHY FW load failed
+ * @pf: pointer to PF struct
+ * @link_cfg_err: bitmap from the link info structure
+ *
+ * check if external PHY FW load failed and print an error message if it did
+ */
+static void ice_check_phy_fw_load(struct ice_pf *pf, u8 link_cfg_err)
+{
+       if (!(link_cfg_err & ICE_AQ_LINK_EXTERNAL_PHY_LOAD_FAILURE)) {
+               clear_bit(ICE_FLAG_PHY_FW_LOAD_FAILED, pf->flags);
+               return;
+       }
+
+       if (test_bit(ICE_FLAG_PHY_FW_LOAD_FAILED, pf->flags))
+               return;
+
+       if (link_cfg_err & ICE_AQ_LINK_EXTERNAL_PHY_LOAD_FAILURE) {
+               dev_err(ice_pf_to_dev(pf), "Device failed to load the FW for the external PHY. Please download and install the latest NVM for your device and try again\n");
+               set_bit(ICE_FLAG_PHY_FW_LOAD_FAILED, pf->flags);
+       }
+}
+
+/**
  * ice_check_module_power
  * @pf: pointer to PF struct
  * @link_cfg_err: bitmap from the link info structure
@@ -963,6 +992,20 @@ static void ice_check_module_power(struct ice_pf *pf, u8 link_cfg_err)
 }
 
 /**
+ * ice_check_link_cfg_err - check if link configuration failed
+ * @pf: pointer to the PF struct
+ * @link_cfg_err: bitmap from the link info structure
+ *
+ * print if any link configuration failure happens due to the value in the
+ * link_cfg_err parameter in the link info structure
+ */
+static void ice_check_link_cfg_err(struct ice_pf *pf, u8 link_cfg_err)
+{
+       ice_check_module_power(pf, link_cfg_err);
+       ice_check_phy_fw_load(pf, link_cfg_err);
+}
+
+/**
  * ice_link_event - process the link event
  * @pf: PF that the link event is associated with
  * @pi: port_info for the port that the link event is associated with
@@ -997,7 +1040,7 @@ ice_link_event(struct ice_pf *pf, struct ice_port_info *pi, bool link_up,
                        pi->lport, ice_stat_str(status),
                        ice_aq_str(pi->hw->adminq.sq_last_status));
 
-       ice_check_module_power(pf, pi->phy.link_info.link_cfg_err);
+       ice_check_link_cfg_err(pf, pi->phy.link_info.link_cfg_err);
 
        /* Check if the link state is up after updating link info, and treat
         * this event as an UP event since the link is actually UP now.
@@ -1075,7 +1118,8 @@ static int ice_init_link_events(struct ice_port_info *pi)
        u16 mask;
 
        mask = ~((u16)(ICE_AQ_LINK_EVENT_UPDOWN | ICE_AQ_LINK_EVENT_MEDIA_NA |
-                      ICE_AQ_LINK_EVENT_MODULE_QUAL_FAIL));
+                      ICE_AQ_LINK_EVENT_MODULE_QUAL_FAIL |
+                      ICE_AQ_LINK_EVENT_PHY_FW_LOAD_FAIL));
 
        if (ice_aq_set_event_mask(pi->hw, pi->lport, mask, NULL)) {
                dev_dbg(ice_hw_to_dev(pi->hw), "Failed to set link event mask for port %d\n",
@@ -2146,7 +2190,7 @@ static void ice_check_media_subtask(struct ice_pf *pf)
        if (err)
                return;
 
-       ice_check_module_power(pf, pi->phy.link_info.link_cfg_err);
+       ice_check_link_cfg_err(pf, pi->phy.link_info.link_cfg_err);
 
        if (pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) {
                if (!test_bit(ICE_PHY_INIT_COMPLETE, pf->state))
@@ -3394,6 +3438,63 @@ ice_vlan_rx_kill_vid(struct net_device *netdev, __always_unused __be16 proto,
 }
 
 /**
+ * ice_rep_indr_tc_block_unbind
+ * @cb_priv: indirection block private data
+ */
+static void ice_rep_indr_tc_block_unbind(void *cb_priv)
+{
+       struct ice_indr_block_priv *indr_priv = cb_priv;
+
+       list_del(&indr_priv->list);
+       kfree(indr_priv);
+}
+
+/**
+ * ice_tc_indir_block_unregister - Unregister TC indirect block notifications
+ * @vsi: VSI struct which has the netdev
+ */
+static void ice_tc_indir_block_unregister(struct ice_vsi *vsi)
+{
+       struct ice_netdev_priv *np = netdev_priv(vsi->netdev);
+
+       flow_indr_dev_unregister(ice_indr_setup_tc_cb, np,
+                                ice_rep_indr_tc_block_unbind);
+}
+
+/**
+ * ice_tc_indir_block_remove - clean indirect TC block notifications
+ * @pf: PF structure
+ */
+static void ice_tc_indir_block_remove(struct ice_pf *pf)
+{
+       struct ice_vsi *pf_vsi = ice_get_main_vsi(pf);
+
+       if (!pf_vsi)
+               return;
+
+       ice_tc_indir_block_unregister(pf_vsi);
+}
+
+/**
+ * ice_tc_indir_block_register - Register TC indirect block notifications
+ * @vsi: VSI struct which has the netdev
+ *
+ * Returns 0 on success, negative value on failure
+ */
+static int ice_tc_indir_block_register(struct ice_vsi *vsi)
+{
+       struct ice_netdev_priv *np;
+
+       if (!vsi || !vsi->netdev)
+               return -EINVAL;
+
+       np = netdev_priv(vsi->netdev);
+
+       INIT_LIST_HEAD(&np->tc_indr_block_priv_list);
+       return flow_indr_dev_register(ice_indr_setup_tc_cb, np);
+}
+
+/**
  * ice_setup_pf_sw - Setup the HW switch on startup or after reset
  * @pf: board private structure
  *
@@ -3401,6 +3502,7 @@ ice_vlan_rx_kill_vid(struct net_device *netdev, __always_unused __be16 proto,
  */
 static int ice_setup_pf_sw(struct ice_pf *pf)
 {
+       struct device *dev = ice_pf_to_dev(pf);
        struct ice_vsi *vsi;
        int status = 0;
 
@@ -3422,6 +3524,13 @@ static int ice_setup_pf_sw(struct ice_pf *pf)
        /* netdev has to be configured before setting frame size */
        ice_vsi_cfg_frame_size(vsi);
 
+       /* init indirect block notifications */
+       status = ice_tc_indir_block_register(vsi);
+       if (status) {
+               dev_err(dev, "Failed to register netdev notifier\n");
+               goto unroll_cfg_netdev;
+       }
+
        /* Setup DCB netlink interface */
        ice_dcbnl_setup(vsi);
 
@@ -3433,7 +3542,7 @@ static int ice_setup_pf_sw(struct ice_pf *pf)
 
        status = ice_set_cpu_rx_rmap(vsi);
        if (status) {
-               dev_err(ice_pf_to_dev(pf), "Failed to set CPU Rx map VSI %d error %d\n",
+               dev_err(dev, "Failed to set CPU Rx map VSI %d error %d\n",
                        vsi->vsi_num, status);
                status = -EINVAL;
                goto unroll_napi_add;
@@ -3446,8 +3555,9 @@ static int ice_setup_pf_sw(struct ice_pf *pf)
 
 free_cpu_rx_map:
        ice_free_cpu_rx_rmap(vsi);
-
 unroll_napi_add:
+       ice_tc_indir_block_unregister(vsi);
+unroll_cfg_netdev:
        if (vsi) {
                ice_napi_del(vsi);
                if (vsi->netdev) {
@@ -4528,7 +4638,8 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
 
        ice_init_link_dflt_override(pf->hw.port_info);
 
-       ice_check_module_power(pf, pf->hw.port_info->phy.link_info.link_cfg_err);
+       ice_check_link_cfg_err(pf,
+                              pf->hw.port_info->phy.link_info.link_cfg_err);
 
        /* if media available, initialize PHY settings */
        if (pf->hw.port_info->phy.link_info.link_info &
@@ -4721,6 +4832,8 @@ static void ice_remove(struct pci_dev *pdev)
                msleep(100);
        }
 
+       ice_tc_indir_block_remove(pf);
+
        if (test_bit(ICE_FLAG_SRIOV_ENA, pf->flags)) {
                set_bit(ICE_VF_RESETS_DISABLED, pf->state);
                ice_free_vfs(pf);
@@ -8155,6 +8268,121 @@ ice_setup_tc(struct net_device *netdev, enum tc_setup_type type,
        return -EOPNOTSUPP;
 }
 
+static struct ice_indr_block_priv *
+ice_indr_block_priv_lookup(struct ice_netdev_priv *np,
+                          struct net_device *netdev)
+{
+       struct ice_indr_block_priv *cb_priv;
+
+       list_for_each_entry(cb_priv, &np->tc_indr_block_priv_list, list) {
+               if (!cb_priv->netdev)
+                       return NULL;
+               if (cb_priv->netdev == netdev)
+                       return cb_priv;
+       }
+       return NULL;
+}
+
+static int
+ice_indr_setup_block_cb(enum tc_setup_type type, void *type_data,
+                       void *indr_priv)
+{
+       struct ice_indr_block_priv *priv = indr_priv;
+       struct ice_netdev_priv *np = priv->np;
+
+       switch (type) {
+       case TC_SETUP_CLSFLOWER:
+               return ice_setup_tc_cls_flower(np, priv->netdev,
+                                              (struct flow_cls_offload *)
+                                              type_data);
+       default:
+               return -EOPNOTSUPP;
+       }
+}
+
+static int
+ice_indr_setup_tc_block(struct net_device *netdev, struct Qdisc *sch,
+                       struct ice_netdev_priv *np,
+                       struct flow_block_offload *f, void *data,
+                       void (*cleanup)(struct flow_block_cb *block_cb))
+{
+       struct ice_indr_block_priv *indr_priv;
+       struct flow_block_cb *block_cb;
+
+       if (!ice_is_tunnel_supported(netdev) &&
+           !(is_vlan_dev(netdev) &&
+             vlan_dev_real_dev(netdev) == np->vsi->netdev))
+               return -EOPNOTSUPP;
+
+       if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
+               return -EOPNOTSUPP;
+
+       switch (f->command) {
+       case FLOW_BLOCK_BIND:
+               indr_priv = ice_indr_block_priv_lookup(np, netdev);
+               if (indr_priv)
+                       return -EEXIST;
+
+               indr_priv = kzalloc(sizeof(*indr_priv), GFP_KERNEL);
+               if (!indr_priv)
+                       return -ENOMEM;
+
+               indr_priv->netdev = netdev;
+               indr_priv->np = np;
+               list_add(&indr_priv->list, &np->tc_indr_block_priv_list);
+
+               block_cb =
+                       flow_indr_block_cb_alloc(ice_indr_setup_block_cb,
+                                                indr_priv, indr_priv,
+                                                ice_rep_indr_tc_block_unbind,
+                                                f, netdev, sch, data, np,
+                                                cleanup);
+
+               if (IS_ERR(block_cb)) {
+                       list_del(&indr_priv->list);
+                       kfree(indr_priv);
+                       return PTR_ERR(block_cb);
+               }
+               flow_block_cb_add(block_cb, f);
+               list_add_tail(&block_cb->driver_list, &ice_block_cb_list);
+               break;
+       case FLOW_BLOCK_UNBIND:
+               indr_priv = ice_indr_block_priv_lookup(np, netdev);
+               if (!indr_priv)
+                       return -ENOENT;
+
+               block_cb = flow_block_cb_lookup(f->block,
+                                               ice_indr_setup_block_cb,
+                                               indr_priv);
+               if (!block_cb)
+                       return -ENOENT;
+
+               flow_indr_block_cb_remove(block_cb, f);
+
+               list_del(&block_cb->driver_list);
+               break;
+       default:
+               return -EOPNOTSUPP;
+       }
+       return 0;
+}
+
+static int
+ice_indr_setup_tc_cb(struct net_device *netdev, struct Qdisc *sch,
+                    void *cb_priv, enum tc_setup_type type, void *type_data,
+                    void *data,
+                    void (*cleanup)(struct flow_block_cb *block_cb))
+{
+       switch (type) {
+       case TC_SETUP_BLOCK:
+               return ice_indr_setup_tc_block(netdev, sch, cb_priv, type_data,
+                                              data, cleanup);
+
+       default:
+               return -EOPNOTSUPP;
+       }
+}
+
 /**
  * ice_open - Called when a network interface becomes active
  * @netdev: network interface device structure
@@ -8213,7 +8441,7 @@ int ice_open_internal(struct net_device *netdev)
                return -EIO;
        }
 
-       ice_check_module_power(pf, pi->phy.link_info.link_cfg_err);
+       ice_check_link_cfg_err(pf, pi->phy.link_info.link_cfg_err);
 
        /* Set PHY if there is media, otherwise, turn off PHY */
        if (pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) {
index 0b220df..dc1b0e9 100644 (file)
@@ -37,10 +37,22 @@ enum ice_protocol_type {
        ICE_TCP_IL,
        ICE_UDP_OF,
        ICE_UDP_ILOS,
+       ICE_VXLAN,
+       ICE_GENEVE,
+       ICE_NVGRE,
+       ICE_VXLAN_GPE,
        ICE_SCTP_IL,
        ICE_PROTOCOL_LAST
 };
 
+enum ice_sw_tunnel_type {
+       ICE_NON_TUN = 0,
+       ICE_SW_TUN_VXLAN,
+       ICE_SW_TUN_GENEVE,
+       ICE_SW_TUN_NVGRE,
+       ICE_ALL_TUNNELS /* All tunnel types including NVGRE */
+};
+
 /* Decoders for ice_prot_id:
  * - F: First
  * - I: Inner
@@ -74,6 +86,8 @@ enum ice_prot_id {
        ICE_PROT_INVALID        = 255  /* when offset == ICE_FV_OFFSET_INVAL */
 };
 
+#define ICE_VNI_OFFSET         12 /* offset of VNI from ICE_PROT_UDP_OF */
+
 #define ICE_MAC_OFOS_HW                1
 #define ICE_MAC_IL_HW          4
 #define ICE_ETYPE_OL_HW                9
@@ -85,8 +99,15 @@ enum ice_prot_id {
 #define ICE_IPV6_IL_HW         41
 #define ICE_TCP_IL_HW          49
 #define ICE_UDP_ILOS_HW                53
+#define ICE_GRE_OF_HW          64
 
 #define ICE_UDP_OF_HW  52 /* UDP Tunnels */
+#define ICE_META_DATA_ID_HW 255 /* this is used for tunnel type */
+
+#define ICE_MDID_SIZE 2
+#define ICE_TUN_FLAG_MDID 21
+#define ICE_TUN_FLAG_MDID_OFF (ICE_MDID_SIZE * ICE_TUN_FLAG_MDID)
+#define ICE_TUN_FLAG_MASK 0xFF
 
 #define ICE_TUN_FLAG_FV_IND 2
 
@@ -152,6 +173,18 @@ struct ice_l4_hdr {
        __be16 check;
 };
 
+struct ice_udp_tnl_hdr {
+       __be16 field;
+       __be16 proto_type;
+       __be32 vni;     /* only use lower 24-bits */
+};
+
+struct ice_nvgre_hdr {
+       __be16 flags;
+       __be16 protocol;
+       __be32 tni_flow;
+};
+
 union ice_prot_hdr {
        struct ice_ether_hdr eth_hdr;
        struct ice_ethtype_hdr ethertype;
@@ -160,6 +193,8 @@ union ice_prot_hdr {
        struct ice_ipv6_hdr ipv6_hdr;
        struct ice_l4_hdr l4_hdr;
        struct ice_sctp_hdr sctp_hdr;
+       struct ice_udp_tnl_hdr tnl_hdr;
+       struct ice_nvgre_hdr nvgre_hdr;
 };
 
 /* This is mapping table entry that maps every word within a given protocol
index a1be0d0..bf7247c 100644 (file)
@@ -1929,6 +1929,9 @@ err_kworker:
  */
 void ice_ptp_release(struct ice_pf *pf)
 {
+       if (!test_bit(ICE_FLAG_PTP, pf->flags))
+               return;
+
        /* Disable timestamping for both Tx and Rx */
        ice_ptp_cfg_timestamp(pf, false);
 
index c49eeea..af8e6ef 100644 (file)
@@ -267,6 +267,9 @@ static int ice_repr_add(struct ice_vf *vf)
        if (err)
                goto err_devlink;
 
+       repr->netdev->min_mtu = ETH_MIN_MTU;
+       repr->netdev->max_mtu = ICE_MAX_MTU;
+
        err = ice_repr_reg_netdev(repr->netdev);
        if (err)
                goto err_netdev;
index 2742e1c..793f4a9 100644 (file)
@@ -35,6 +35,192 @@ struct ice_dummy_pkt_offsets {
        u16 offset; /* ICE_PROTOCOL_LAST indicates end of list */
 };
 
+static const struct ice_dummy_pkt_offsets dummy_gre_tcp_packet_offsets[] = {
+       { ICE_MAC_OFOS,         0 },
+       { ICE_ETYPE_OL,         12 },
+       { ICE_IPV4_OFOS,        14 },
+       { ICE_NVGRE,            34 },
+       { ICE_MAC_IL,           42 },
+       { ICE_IPV4_IL,          56 },
+       { ICE_TCP_IL,           76 },
+       { ICE_PROTOCOL_LAST,    0 },
+};
+
+static const u8 dummy_gre_tcp_packet[] = {
+       0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */
+       0x00, 0x00, 0x00, 0x00,
+       0x00, 0x00, 0x00, 0x00,
+
+       0x08, 0x00,             /* ICE_ETYPE_OL 12 */
+
+       0x45, 0x00, 0x00, 0x3E, /* ICE_IPV4_OFOS 14 */
+       0x00, 0x00, 0x00, 0x00,
+       0x00, 0x2F, 0x00, 0x00,
+       0x00, 0x00, 0x00, 0x00,
+       0x00, 0x00, 0x00, 0x00,
+
+       0x80, 0x00, 0x65, 0x58, /* ICE_NVGRE 34 */
+       0x00, 0x00, 0x00, 0x00,
+
+       0x00, 0x00, 0x00, 0x00, /* ICE_MAC_IL 42 */
+       0x00, 0x00, 0x00, 0x00,
+       0x00, 0x00, 0x00, 0x00,
+       0x08, 0x00,
+
+       0x45, 0x00, 0x00, 0x14, /* ICE_IPV4_IL 56 */
+       0x00, 0x00, 0x00, 0x00,
+       0x00, 0x06, 0x00, 0x00,
+       0x00, 0x00, 0x00, 0x00,
+       0x00, 0x00, 0x00, 0x00,
+
+       0x00, 0x00, 0x00, 0x00, /* ICE_TCP_IL 76 */
+       0x00, 0x00, 0x00, 0x00,
+       0x00, 0x00, 0x00, 0x00,
+       0x50, 0x02, 0x20, 0x00,
+       0x00, 0x00, 0x00, 0x00
+};
+
+static const struct ice_dummy_pkt_offsets dummy_gre_udp_packet_offsets[] = {
+       { ICE_MAC_OFOS,         0 },
+       { ICE_ETYPE_OL,         12 },
+       { ICE_IPV4_OFOS,        14 },
+       { ICE_NVGRE,            34 },
+       { ICE_MAC_IL,           42 },
+       { ICE_IPV4_IL,          56 },
+       { ICE_UDP_ILOS,         76 },
+       { ICE_PROTOCOL_LAST,    0 },
+};
+
+static const u8 dummy_gre_udp_packet[] = {
+       0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */
+       0x00, 0x00, 0x00, 0x00,
+       0x00, 0x00, 0x00, 0x00,
+
+       0x08, 0x00,             /* ICE_ETYPE_OL 12 */
+
+       0x45, 0x00, 0x00, 0x3E, /* ICE_IPV4_OFOS 14 */
+       0x00, 0x00, 0x00, 0x00,
+       0x00, 0x2F, 0x00, 0x00,
+       0x00, 0x00, 0x00, 0x00,
+       0x00, 0x00, 0x00, 0x00,
+
+       0x80, 0x00, 0x65, 0x58, /* ICE_NVGRE 34 */
+       0x00, 0x00, 0x00, 0x00,
+
+       0x00, 0x00, 0x00, 0x00, /* ICE_MAC_IL 42 */
+       0x00, 0x00, 0x00, 0x00,
+       0x00, 0x00, 0x00, 0x00,
+       0x08, 0x00,
+
+       0x45, 0x00, 0x00, 0x14, /* ICE_IPV4_IL 56 */
+       0x00, 0x00, 0x00, 0x00,
+       0x00, 0x11, 0x00, 0x00,
+       0x00, 0x00, 0x00, 0x00,
+       0x00, 0x00, 0x00, 0x00,
+
+       0x00, 0x00, 0x00, 0x00, /* ICE_UDP_ILOS 76 */
+       0x00, 0x08, 0x00, 0x00,
+};
+
+static const struct ice_dummy_pkt_offsets dummy_udp_tun_tcp_packet_offsets[] = {
+       { ICE_MAC_OFOS,         0 },
+       { ICE_ETYPE_OL,         12 },
+       { ICE_IPV4_OFOS,        14 },
+       { ICE_UDP_OF,           34 },
+       { ICE_VXLAN,            42 },
+       { ICE_GENEVE,           42 },
+       { ICE_VXLAN_GPE,        42 },
+       { ICE_MAC_IL,           50 },
+       { ICE_IPV4_IL,          64 },
+       { ICE_TCP_IL,           84 },
+       { ICE_PROTOCOL_LAST,    0 },
+};
+
+static const u8 dummy_udp_tun_tcp_packet[] = {
+       0x00, 0x00, 0x00, 0x00,  /* ICE_MAC_OFOS 0 */
+       0x00, 0x00, 0x00, 0x00,
+       0x00, 0x00, 0x00, 0x00,
+
+       0x08, 0x00,             /* ICE_ETYPE_OL 12 */
+
+       0x45, 0x00, 0x00, 0x5a, /* ICE_IPV4_OFOS 14 */
+       0x00, 0x01, 0x00, 0x00,
+       0x40, 0x11, 0x00, 0x00,
+       0x00, 0x00, 0x00, 0x00,
+       0x00, 0x00, 0x00, 0x00,
+
+       0x00, 0x00, 0x12, 0xb5, /* ICE_UDP_OF 34 */
+       0x00, 0x46, 0x00, 0x00,
+
+       0x00, 0x00, 0x65, 0x58, /* ICE_VXLAN 42 */
+       0x00, 0x00, 0x00, 0x00,
+
+       0x00, 0x00, 0x00, 0x00, /* ICE_MAC_IL 50 */
+       0x00, 0x00, 0x00, 0x00,
+       0x00, 0x00, 0x00, 0x00,
+       0x08, 0x00,
+
+       0x45, 0x00, 0x00, 0x28, /* ICE_IPV4_IL 64 */
+       0x00, 0x01, 0x00, 0x00,
+       0x40, 0x06, 0x00, 0x00,
+       0x00, 0x00, 0x00, 0x00,
+       0x00, 0x00, 0x00, 0x00,
+
+       0x00, 0x00, 0x00, 0x00, /* ICE_TCP_IL 84 */
+       0x00, 0x00, 0x00, 0x00,
+       0x00, 0x00, 0x00, 0x00,
+       0x50, 0x02, 0x20, 0x00,
+       0x00, 0x00, 0x00, 0x00
+};
+
+static const struct ice_dummy_pkt_offsets dummy_udp_tun_udp_packet_offsets[] = {
+       { ICE_MAC_OFOS,         0 },
+       { ICE_ETYPE_OL,         12 },
+       { ICE_IPV4_OFOS,        14 },
+       { ICE_UDP_OF,           34 },
+       { ICE_VXLAN,            42 },
+       { ICE_GENEVE,           42 },
+       { ICE_VXLAN_GPE,        42 },
+       { ICE_MAC_IL,           50 },
+       { ICE_IPV4_IL,          64 },
+       { ICE_UDP_ILOS,         84 },
+       { ICE_PROTOCOL_LAST,    0 },
+};
+
+static const u8 dummy_udp_tun_udp_packet[] = {
+       0x00, 0x00, 0x00, 0x00,  /* ICE_MAC_OFOS 0 */
+       0x00, 0x00, 0x00, 0x00,
+       0x00, 0x00, 0x00, 0x00,
+
+       0x08, 0x00,             /* ICE_ETYPE_OL 12 */
+
+       0x45, 0x00, 0x00, 0x4e, /* ICE_IPV4_OFOS 14 */
+       0x00, 0x01, 0x00, 0x00,
+       0x00, 0x11, 0x00, 0x00,
+       0x00, 0x00, 0x00, 0x00,
+       0x00, 0x00, 0x00, 0x00,
+
+       0x00, 0x00, 0x12, 0xb5, /* ICE_UDP_OF 34 */
+       0x00, 0x3a, 0x00, 0x00,
+
+       0x00, 0x00, 0x65, 0x58, /* ICE_VXLAN 42 */
+       0x00, 0x00, 0x00, 0x00,
+
+       0x00, 0x00, 0x00, 0x00, /* ICE_MAC_IL 50 */
+       0x00, 0x00, 0x00, 0x00,
+       0x00, 0x00, 0x00, 0x00,
+       0x08, 0x00,
+
+       0x45, 0x00, 0x00, 0x1c, /* ICE_IPV4_IL 64 */
+       0x00, 0x01, 0x00, 0x00,
+       0x00, 0x11, 0x00, 0x00,
+       0x00, 0x00, 0x00, 0x00,
+       0x00, 0x00, 0x00, 0x00,
+
+       0x00, 0x00, 0x00, 0x00, /* ICE_UDP_ILOS 84 */
+       0x00, 0x08, 0x00, 0x00,
+};
+
 /* offset info for MAC + IPv4 + UDP dummy packet */
 static const struct ice_dummy_pkt_offsets dummy_udp_packet_offsets[] = {
        { ICE_MAC_OFOS,         0 },
@@ -1177,8 +1363,10 @@ ice_get_recp_frm_fw(struct ice_hw *hw, struct ice_sw_recipe *recps, u8 rid,
        recps[rid].root_buf = devm_kmemdup(ice_hw_to_dev(hw), tmp,
                                           recps[rid].n_grp_count * sizeof(*recps[rid].root_buf),
                                           GFP_KERNEL);
-       if (!recps[rid].root_buf)
+       if (!recps[rid].root_buf) {
+               status = ICE_ERR_NO_MEMORY;
                goto err_unroll;
+       }
 
        /* Copy result indexes */
        bitmap_copy(recps[rid].res_idxs, result_bm, ICE_MAX_FV_WORDS);
@@ -3582,6 +3770,9 @@ static const struct ice_prot_ext_tbl_entry ice_prot_ext[ICE_PROTOCOL_LAST] = {
        { ICE_TCP_IL,           { 0, 2 } },
        { ICE_UDP_OF,           { 0, 2 } },
        { ICE_UDP_ILOS,         { 0, 2 } },
+       { ICE_VXLAN,            { 8, 10, 12, 14 } },
+       { ICE_GENEVE,           { 8, 10, 12, 14 } },
+       { ICE_NVGRE,            { 0, 2, 4, 6 } },
 };
 
 static struct ice_protocol_entry ice_prot_id_tbl[ICE_PROTOCOL_LAST] = {
@@ -3596,6 +3787,9 @@ static struct ice_protocol_entry ice_prot_id_tbl[ICE_PROTOCOL_LAST] = {
        { ICE_TCP_IL,           ICE_TCP_IL_HW },
        { ICE_UDP_OF,           ICE_UDP_OF_HW },
        { ICE_UDP_ILOS,         ICE_UDP_ILOS_HW },
+       { ICE_VXLAN,            ICE_UDP_OF_HW },
+       { ICE_GENEVE,           ICE_UDP_OF_HW },
+       { ICE_NVGRE,            ICE_GRE_OF_HW },
 };
 
 /**
@@ -3915,12 +4109,11 @@ ice_find_free_recp_res_idx(struct ice_hw *hw, const unsigned long *profiles,
  * ice_add_sw_recipe - function to call AQ calls to create switch recipe
  * @hw: pointer to hardware structure
  * @rm: recipe management list entry
- * @match_tun_mask: tunnel mask that needs to be programmed
  * @profiles: bitmap of profiles that will be associated.
  */
 static enum ice_status
 ice_add_sw_recipe(struct ice_hw *hw, struct ice_sw_recipe *rm,
-                 u16 match_tun_mask, unsigned long *profiles)
+                 unsigned long *profiles)
 {
        DECLARE_BITMAP(result_idx_bm, ICE_MAX_FV_WORDS);
        struct ice_aqc_recipe_data_elem *tmp;
@@ -4128,15 +4321,6 @@ ice_add_sw_recipe(struct ice_hw *hw, struct ice_sw_recipe *rm,
                }
                buf[recps].content.act_ctrl_fwd_priority = rm->priority;
 
-               /* To differentiate among different UDP tunnels, a meta data ID
-                * flag is used.
-                */
-               if (match_tun_mask) {
-                       buf[recps].content.lkup_indx[i] = ICE_TUN_FLAG_FV_IND;
-                       buf[recps].content.mask[i] =
-                               cpu_to_le16(match_tun_mask);
-               }
-
                recps++;
                rm->root_rid = (u8)rid;
        }
@@ -4199,6 +4383,7 @@ ice_add_sw_recipe(struct ice_hw *hw, struct ice_sw_recipe *rm,
                recp->chain_idx = entry->chain_idx;
                recp->priority = buf[buf_idx].content.act_ctrl_fwd_priority;
                recp->n_grp_count = rm->n_grp_count;
+               recp->tun_type = rm->tun_type;
                recp->recp_created = true;
        }
        rm->root_buf = buf;
@@ -4279,6 +4464,55 @@ free_mem:
        return status;
 }
 
+/**
+ * ice_tun_type_match_word - determine if tun type needs a match mask
+ * @tun_type: tunnel type
+ * @mask: mask to be used for the tunnel
+ */
+static bool ice_tun_type_match_word(enum ice_sw_tunnel_type tun_type, u16 *mask)
+{
+       switch (tun_type) {
+       case ICE_SW_TUN_GENEVE:
+       case ICE_SW_TUN_VXLAN:
+       case ICE_SW_TUN_NVGRE:
+               *mask = ICE_TUN_FLAG_MASK;
+               return true;
+
+       default:
+               *mask = 0;
+               return false;
+       }
+}
+
+/**
+ * ice_add_special_words - Add words that are not protocols, such as metadata
+ * @rinfo: other information regarding the rule e.g. priority and action info
+ * @lkup_exts: lookup word structure
+ */
+static enum ice_status
+ice_add_special_words(struct ice_adv_rule_info *rinfo,
+                     struct ice_prot_lkup_ext *lkup_exts)
+{
+       u16 mask;
+
+       /* If this is a tunneled packet, then add recipe index to match the
+        * tunnel bit in the packet metadata flags.
+        */
+       if (ice_tun_type_match_word(rinfo->tun_type, &mask)) {
+               if (lkup_exts->n_val_words < ICE_MAX_CHAIN_WORDS) {
+                       u8 word = lkup_exts->n_val_words++;
+
+                       lkup_exts->fv_words[word].prot_id = ICE_META_DATA_ID_HW;
+                       lkup_exts->fv_words[word].off = ICE_TUN_FLAG_MDID_OFF;
+                       lkup_exts->field_mask[word] = mask;
+               } else {
+                       return ICE_ERR_MAX_LIMIT;
+               }
+       }
+
+       return 0;
+}
+
 /* ice_get_compat_fv_bitmap - Get compatible field vector bitmap for rule
  * @hw: pointer to hardware structure
  * @rinfo: other information regarding the rule e.g. priority and action info
@@ -4288,9 +4522,30 @@ static void
 ice_get_compat_fv_bitmap(struct ice_hw *hw, struct ice_adv_rule_info *rinfo,
                         unsigned long *bm)
 {
+       enum ice_prof_type prof_type;
+
        bitmap_zero(bm, ICE_MAX_NUM_PROFILES);
 
-       ice_get_sw_fv_bitmap(hw, ICE_PROF_NON_TUN, bm);
+       switch (rinfo->tun_type) {
+       case ICE_NON_TUN:
+               prof_type = ICE_PROF_NON_TUN;
+               break;
+       case ICE_ALL_TUNNELS:
+               prof_type = ICE_PROF_TUN_ALL;
+               break;
+       case ICE_SW_TUN_GENEVE:
+       case ICE_SW_TUN_VXLAN:
+               prof_type = ICE_PROF_TUN_UDP;
+               break;
+       case ICE_SW_TUN_NVGRE:
+               prof_type = ICE_PROF_TUN_GRE;
+               break;
+       default:
+               prof_type = ICE_PROF_ALL;
+               break;
+       }
+
+       ice_get_sw_fv_bitmap(hw, prof_type, bm);
 }
 
 /**
@@ -4315,7 +4570,6 @@ ice_add_adv_recipe(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups,
        struct ice_sw_fv_list_entry *tmp;
        enum ice_status status = 0;
        struct ice_sw_recipe *rm;
-       u16 match_tun_mask = 0;
        u8 i;
 
        if (!lkups_cnt)
@@ -4365,6 +4619,13 @@ ice_add_adv_recipe(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups,
        if (status)
                goto err_unroll;
 
+       /* Create any special protocol/offset pairs, such as looking at tunnel
+        * bits by extracting metadata
+        */
+       status = ice_add_special_words(rinfo, lkup_exts);
+       if (status)
+               goto err_free_lkup_exts;
+
        /* Group match words into recipes using preferred recipe grouping
         * criteria.
         */
@@ -4396,7 +4657,7 @@ ice_add_adv_recipe(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups,
                goto err_unroll;
 
        /* Recipe we need does not exist, add a recipe */
-       status = ice_add_sw_recipe(hw, rm, match_tun_mask, profiles);
+       status = ice_add_sw_recipe(hw, rm, profiles);
        if (status)
                goto err_unroll;
 
@@ -4466,12 +4727,14 @@ err_free_lkup_exts:
  * @lkups: lookup elements or match criteria for the advanced recipe, one
  *        structure per protocol header
  * @lkups_cnt: number of protocols
+ * @tun_type: tunnel type
  * @pkt: dummy packet to fill according to filter match criteria
  * @pkt_len: packet length of dummy packet
  * @offsets: pointer to receive the pointer to the offsets for the packet
  */
 static void
 ice_find_dummy_packet(struct ice_adv_lkup_elem *lkups, u16 lkups_cnt,
+                     enum ice_sw_tunnel_type tun_type,
                      const u8 **pkt, u16 *pkt_len,
                      const struct ice_dummy_pkt_offsets **offsets)
 {
@@ -4495,6 +4758,35 @@ ice_find_dummy_packet(struct ice_adv_lkup_elem *lkups, u16 lkups_cnt,
                        ipv6 = true;
        }
 
+       if (tun_type == ICE_SW_TUN_NVGRE) {
+               if (tcp) {
+                       *pkt = dummy_gre_tcp_packet;
+                       *pkt_len = sizeof(dummy_gre_tcp_packet);
+                       *offsets = dummy_gre_tcp_packet_offsets;
+                       return;
+               }
+
+               *pkt = dummy_gre_udp_packet;
+               *pkt_len = sizeof(dummy_gre_udp_packet);
+               *offsets = dummy_gre_udp_packet_offsets;
+               return;
+       }
+
+       if (tun_type == ICE_SW_TUN_VXLAN ||
+           tun_type == ICE_SW_TUN_GENEVE) {
+               if (tcp) {
+                       *pkt = dummy_udp_tun_tcp_packet;
+                       *pkt_len = sizeof(dummy_udp_tun_tcp_packet);
+                       *offsets = dummy_udp_tun_tcp_packet_offsets;
+                       return;
+               }
+
+               *pkt = dummy_udp_tun_udp_packet;
+               *pkt_len = sizeof(dummy_udp_tun_udp_packet);
+               *offsets = dummy_udp_tun_udp_packet_offsets;
+               return;
+       }
+
        if (udp && !ipv6) {
                if (vlan) {
                        *pkt = dummy_vlan_udp_packet;
@@ -4615,6 +4907,13 @@ ice_fill_adv_dummy_packet(struct ice_adv_lkup_elem *lkups, u16 lkups_cnt,
                case ICE_SCTP_IL:
                        len = sizeof(struct ice_sctp_hdr);
                        break;
+               case ICE_NVGRE:
+                       len = sizeof(struct ice_nvgre_hdr);
+                       break;
+               case ICE_VXLAN:
+               case ICE_GENEVE:
+                       len = sizeof(struct ice_udp_tnl_hdr);
+                       break;
                default:
                        return ICE_ERR_PARAM;
                }
@@ -4645,6 +4944,48 @@ ice_fill_adv_dummy_packet(struct ice_adv_lkup_elem *lkups, u16 lkups_cnt,
 }
 
 /**
+ * ice_fill_adv_packet_tun - fill dummy packet with udp tunnel port
+ * @hw: pointer to the hardware structure
+ * @tun_type: tunnel type
+ * @pkt: dummy packet to fill in
+ * @offsets: offset info for the dummy packet
+ */
+static enum ice_status
+ice_fill_adv_packet_tun(struct ice_hw *hw, enum ice_sw_tunnel_type tun_type,
+                       u8 *pkt, const struct ice_dummy_pkt_offsets *offsets)
+{
+       u16 open_port, i;
+
+       switch (tun_type) {
+       case ICE_SW_TUN_VXLAN:
+       case ICE_SW_TUN_GENEVE:
+               if (!ice_get_open_tunnel_port(hw, &open_port))
+                       return ICE_ERR_CFG;
+               break;
+
+       default:
+               /* Nothing needs to be done for this tunnel type */
+               return 0;
+       }
+
+       /* Find the outer UDP protocol header and insert the port number */
+       for (i = 0; offsets[i].type != ICE_PROTOCOL_LAST; i++) {
+               if (offsets[i].type == ICE_UDP_OF) {
+                       struct ice_l4_hdr *hdr;
+                       u16 offset;
+
+                       offset = offsets[i].offset;
+                       hdr = (struct ice_l4_hdr *)&pkt[offset];
+                       hdr->dst_port = cpu_to_be16(open_port);
+
+                       return 0;
+               }
+       }
+
+       return ICE_ERR_CFG;
+}
+
+/**
  * ice_find_adv_rule_entry - Search a rule entry
  * @hw: pointer to the hardware structure
  * @lkups: lookup elements or match criteria for the advanced recipe, one
@@ -4678,6 +5019,7 @@ ice_find_adv_rule_entry(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups,
                                break;
                        }
                if (rinfo->sw_act.flag == list_itr->rule_info.sw_act.flag &&
+                   rinfo->tun_type == list_itr->rule_info.tun_type &&
                    lkups_matched)
                        return list_itr;
        }
@@ -4852,7 +5194,7 @@ ice_add_adv_rule(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups,
                return ICE_ERR_PARAM;
 
        /* make sure that we can locate a dummy packet */
-       ice_find_dummy_packet(lkups, lkups_cnt, &pkt, &pkt_len,
+       ice_find_dummy_packet(lkups, lkups_cnt, rinfo->tun_type, &pkt, &pkt_len,
                              &pkt_offsets);
        if (!pkt) {
                status = ICE_ERR_PARAM;
@@ -4963,6 +5305,14 @@ ice_add_adv_rule(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups,
        if (status)
                goto err_ice_add_adv_rule;
 
+       if (rinfo->tun_type != ICE_NON_TUN) {
+               status = ice_fill_adv_packet_tun(hw, rinfo->tun_type,
+                                                s_rule->pdata.lkup_tx_rx.hdr,
+                                                pkt_offsets);
+               if (status)
+                       goto err_ice_add_adv_rule;
+       }
+
        status = ice_aq_sw_rules(hw, (struct ice_aqc_sw_rules *)s_rule,
                                 rule_buf_sz, 1, ice_aqc_opc_add_sw_rules,
                                 NULL);
@@ -5198,6 +5548,13 @@ ice_rem_adv_rule(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups,
                        return ICE_ERR_CFG;
        }
 
+       /* Create any special protocol/offset pairs, such as looking at tunnel
+        * bits by extracting metadata
+        */
+       status = ice_add_special_words(rinfo, &lkup_exts);
+       if (status)
+               return status;
+
        rid = ice_find_recp(hw, &lkup_exts);
        /* If did not find a recipe that match the existing criteria */
        if (rid == ICE_MAX_NUM_RECIPES)
index c4dd206..d8a3890 100644 (file)
@@ -171,6 +171,7 @@ struct ice_adv_rule_flags_info {
 };
 
 struct ice_adv_rule_info {
+       enum ice_sw_tunnel_type tun_type;
        struct ice_sw_act_ctrl sw_act;
        u32 priority;
        u8 rx; /* true means LOOKUP_RX otherwise LOOKUP_TX */
@@ -211,6 +212,8 @@ struct ice_sw_recipe {
        /* Bit map specifying the IDs associated with this group of recipe */
        DECLARE_BITMAP(r_bitmap, ICE_MAX_NUM_RECIPES);
 
+       enum ice_sw_tunnel_type tun_type;
+
        /* List of type ice_fltr_mgmt_list_entry or adv_rule */
        u8 adv_rule;
        struct list_head filt_rules;
index 725caa1..e5d23fe 100644 (file)
@@ -3,8 +3,9 @@
 
 #include "ice.h"
 #include "ice_tc_lib.h"
-#include "ice_lib.h"
 #include "ice_fltr.h"
+#include "ice_lib.h"
+#include "ice_protocol_type.h"
 
 /**
  * ice_tc_count_lkups - determine lookup count for switch filter
@@ -20,7 +21,21 @@ ice_tc_count_lkups(u32 flags, struct ice_tc_flower_lyr_2_4_hdrs *headers,
 {
        int lkups_cnt = 0;
 
-       if (flags & ICE_TC_FLWR_FIELD_ETH_TYPE_ID)
+       if (flags & ICE_TC_FLWR_FIELD_TENANT_ID)
+               lkups_cnt++;
+
+       if (flags & (ICE_TC_FLWR_FIELD_ENC_SRC_IPV4 |
+                    ICE_TC_FLWR_FIELD_ENC_DEST_IPV4 |
+                    ICE_TC_FLWR_FIELD_ENC_SRC_IPV6 |
+                    ICE_TC_FLWR_FIELD_ENC_DEST_IPV6))
+               lkups_cnt++;
+
+       if (flags & ICE_TC_FLWR_FIELD_ENC_DEST_L4_PORT)
+               lkups_cnt++;
+
+       /* currently inner etype filter isn't supported */
+       if ((flags & ICE_TC_FLWR_FIELD_ETH_TYPE_ID) &&
+           fltr->tunnel_type == TNL_LAST)
                lkups_cnt++;
 
        /* are MAC fields specified? */
@@ -32,10 +47,8 @@ ice_tc_count_lkups(u32 flags, struct ice_tc_flower_lyr_2_4_hdrs *headers,
                lkups_cnt++;
 
        /* are IPv[4|6] fields specified? */
-       if (flags & (ICE_TC_FLWR_FIELD_DEST_IPV4 | ICE_TC_FLWR_FIELD_SRC_IPV4))
-               lkups_cnt++;
-       else if (flags & (ICE_TC_FLWR_FIELD_DEST_IPV6 |
-                         ICE_TC_FLWR_FIELD_SRC_IPV6))
+       if (flags & (ICE_TC_FLWR_FIELD_DEST_IPV4 | ICE_TC_FLWR_FIELD_SRC_IPV4 |
+                    ICE_TC_FLWR_FIELD_DEST_IPV6 | ICE_TC_FLWR_FIELD_SRC_IPV6))
                lkups_cnt++;
 
        /* is L4 (TCP/UDP/any other L4 protocol fields) specified? */
@@ -46,6 +59,148 @@ ice_tc_count_lkups(u32 flags, struct ice_tc_flower_lyr_2_4_hdrs *headers,
        return lkups_cnt;
 }
 
+static enum ice_protocol_type ice_proto_type_from_mac(bool inner)
+{
+       return inner ? ICE_MAC_IL : ICE_MAC_OFOS;
+}
+
+static enum ice_protocol_type ice_proto_type_from_ipv4(bool inner)
+{
+       return inner ? ICE_IPV4_IL : ICE_IPV4_OFOS;
+}
+
+static enum ice_protocol_type ice_proto_type_from_ipv6(bool inner)
+{
+       return inner ? ICE_IPV6_IL : ICE_IPV6_OFOS;
+}
+
+static enum ice_protocol_type
+ice_proto_type_from_l4_port(bool inner, u16 ip_proto)
+{
+       if (inner) {
+               switch (ip_proto) {
+               case IPPROTO_UDP:
+                       return ICE_UDP_ILOS;
+               }
+       } else {
+               switch (ip_proto) {
+               case IPPROTO_TCP:
+                       return ICE_TCP_IL;
+               case IPPROTO_UDP:
+                       return ICE_UDP_OF;
+               }
+       }
+
+       return 0;
+}
+
+static enum ice_protocol_type
+ice_proto_type_from_tunnel(enum ice_tunnel_type type)
+{
+       switch (type) {
+       case TNL_VXLAN:
+               return ICE_VXLAN;
+       case TNL_GENEVE:
+               return ICE_GENEVE;
+       case TNL_GRETAP:
+               return ICE_NVGRE;
+       default:
+               return 0;
+       }
+}
+
+static enum ice_sw_tunnel_type
+ice_sw_type_from_tunnel(enum ice_tunnel_type type)
+{
+       switch (type) {
+       case TNL_VXLAN:
+               return ICE_SW_TUN_VXLAN;
+       case TNL_GENEVE:
+               return ICE_SW_TUN_GENEVE;
+       case TNL_GRETAP:
+               return ICE_SW_TUN_NVGRE;
+       default:
+               return ICE_NON_TUN;
+       }
+}
+
+static int
+ice_tc_fill_tunnel_outer(u32 flags, struct ice_tc_flower_fltr *fltr,
+                        struct ice_adv_lkup_elem *list)
+{
+       struct ice_tc_flower_lyr_2_4_hdrs *hdr = &fltr->outer_headers;
+       int i = 0;
+
+       if (flags & ICE_TC_FLWR_FIELD_TENANT_ID) {
+               u32 tenant_id;
+
+               list[i].type = ice_proto_type_from_tunnel(fltr->tunnel_type);
+               switch (fltr->tunnel_type) {
+               case TNL_VXLAN:
+               case TNL_GENEVE:
+                       tenant_id = be32_to_cpu(fltr->tenant_id) << 8;
+                       list[i].h_u.tnl_hdr.vni = cpu_to_be32(tenant_id);
+                       memcpy(&list[i].m_u.tnl_hdr.vni, "\xff\xff\xff\x00", 4);
+                       i++;
+                       break;
+               case TNL_GRETAP:
+                       list[i].h_u.nvgre_hdr.tni_flow = fltr->tenant_id;
+                       memcpy(&list[i].m_u.nvgre_hdr.tni_flow, "\xff\xff\xff\xff", 4);
+                       i++;
+                       break;
+               default:
+                       break;
+               }
+       }
+
+       if (flags & (ICE_TC_FLWR_FIELD_ENC_SRC_IPV4 |
+                    ICE_TC_FLWR_FIELD_ENC_DEST_IPV4)) {
+               list[i].type = ice_proto_type_from_ipv4(false);
+
+               if (flags & ICE_TC_FLWR_FIELD_ENC_SRC_IPV4) {
+                       list[i].h_u.ipv4_hdr.src_addr = hdr->l3_key.src_ipv4;
+                       list[i].m_u.ipv4_hdr.src_addr = hdr->l3_mask.src_ipv4;
+               }
+               if (flags & ICE_TC_FLWR_FIELD_ENC_DEST_IPV4) {
+                       list[i].h_u.ipv4_hdr.dst_addr = hdr->l3_key.dst_ipv4;
+                       list[i].m_u.ipv4_hdr.dst_addr = hdr->l3_mask.dst_ipv4;
+               }
+               i++;
+       }
+
+       if (flags & (ICE_TC_FLWR_FIELD_ENC_SRC_IPV6 |
+                    ICE_TC_FLWR_FIELD_ENC_DEST_IPV6)) {
+               list[i].type = ice_proto_type_from_ipv6(false);
+
+               if (flags & ICE_TC_FLWR_FIELD_ENC_SRC_IPV6) {
+                       memcpy(&list[i].h_u.ipv6_hdr.src_addr,
+                              &hdr->l3_key.src_ipv6_addr,
+                              sizeof(hdr->l3_key.src_ipv6_addr));
+                       memcpy(&list[i].m_u.ipv6_hdr.src_addr,
+                              &hdr->l3_mask.src_ipv6_addr,
+                              sizeof(hdr->l3_mask.src_ipv6_addr));
+               }
+               if (flags & ICE_TC_FLWR_FIELD_ENC_DEST_IPV6) {
+                       memcpy(&list[i].h_u.ipv6_hdr.dst_addr,
+                              &hdr->l3_key.dst_ipv6_addr,
+                              sizeof(hdr->l3_key.dst_ipv6_addr));
+                       memcpy(&list[i].m_u.ipv6_hdr.dst_addr,
+                              &hdr->l3_mask.dst_ipv6_addr,
+                              sizeof(hdr->l3_mask.dst_ipv6_addr));
+               }
+               i++;
+       }
+
+       if (flags & ICE_TC_FLWR_FIELD_ENC_DEST_L4_PORT) {
+               list[i].type = ice_proto_type_from_l4_port(false, hdr->l3_key.ip_proto);
+               list[i].h_u.l4_hdr.dst_port = hdr->l4_key.dst_port;
+               list[i].m_u.l4_hdr.dst_port = hdr->l4_mask.dst_port;
+               i++;
+       }
+
+       return i;
+}
+
 /**
  * ice_tc_fill_rules - fill filter rules based on TC fltr
  * @hw: pointer to HW structure
@@ -67,9 +222,16 @@ ice_tc_fill_rules(struct ice_hw *hw, u32 flags,
                  u16 *l4_proto)
 {
        struct ice_tc_flower_lyr_2_4_hdrs *headers = &tc_fltr->outer_headers;
+       bool inner = false;
        int i = 0;
 
-       if (flags & ICE_TC_FLWR_FIELD_ETH_TYPE_ID) {
+       rule_info->tun_type = ice_sw_type_from_tunnel(tc_fltr->tunnel_type);
+       if (tc_fltr->tunnel_type != TNL_LAST) {
+               i = ice_tc_fill_tunnel_outer(flags, tc_fltr, list);
+
+               headers = &tc_fltr->inner_headers;
+               inner = true;
+       } else if (flags & ICE_TC_FLWR_FIELD_ETH_TYPE_ID) {
                list[i].type = ICE_ETYPE_OL;
                list[i].h_u.ethertype.ethtype_id = headers->l2_key.n_proto;
                list[i].m_u.ethertype.ethtype_id = headers->l2_mask.n_proto;
@@ -83,7 +245,7 @@ ice_tc_fill_rules(struct ice_hw *hw, u32 flags,
                l2_key = &headers->l2_key;
                l2_mask = &headers->l2_mask;
 
-               list[i].type = ICE_MAC_OFOS;
+               list[i].type = ice_proto_type_from_mac(inner);
                if (flags & ICE_TC_FLWR_FIELD_DST_MAC) {
                        ether_addr_copy(list[i].h_u.eth_hdr.dst_addr,
                                        l2_key->dst_mac);
@@ -112,7 +274,7 @@ ice_tc_fill_rules(struct ice_hw *hw, u32 flags,
                     ICE_TC_FLWR_FIELD_SRC_IPV4)) {
                struct ice_tc_l3_hdr *l3_key, *l3_mask;
 
-               list[i].type = ICE_IPV4_OFOS;
+               list[i].type = ice_proto_type_from_ipv4(inner);
                l3_key = &headers->l3_key;
                l3_mask = &headers->l3_mask;
                if (flags & ICE_TC_FLWR_FIELD_DEST_IPV4) {
@@ -129,7 +291,7 @@ ice_tc_fill_rules(struct ice_hw *hw, u32 flags,
                struct ice_ipv6_hdr *ipv6_hdr, *ipv6_mask;
                struct ice_tc_l3_hdr *l3_key, *l3_mask;
 
-               list[i].type = ICE_IPV6_OFOS;
+               list[i].type = ice_proto_type_from_ipv6(inner);
                ipv6_hdr = &list[i].h_u.ipv6_hdr;
                ipv6_mask = &list[i].m_u.ipv6_hdr;
                l3_key = &headers->l3_key;
@@ -155,19 +317,10 @@ ice_tc_fill_rules(struct ice_hw *hw, u32 flags,
                     ICE_TC_FLWR_FIELD_SRC_L4_PORT)) {
                struct ice_tc_l4_hdr *l4_key, *l4_mask;
 
+               list[i].type = ice_proto_type_from_l4_port(inner, headers->l3_key.ip_proto);
                l4_key = &headers->l4_key;
                l4_mask = &headers->l4_mask;
-               if (headers->l3_key.ip_proto == IPPROTO_TCP) {
-                       list[i].type = ICE_TCP_IL;
-                       /* detected L4 proto is TCP */
-                       if (l4_proto)
-                               *l4_proto = IPPROTO_TCP;
-               } else if (headers->l3_key.ip_proto == IPPROTO_UDP) {
-                       list[i].type = ICE_UDP_ILOS;
-                       /* detected L4 proto is UDP */
-                       if (l4_proto)
-                               *l4_proto = IPPROTO_UDP;
-               }
+
                if (flags & ICE_TC_FLWR_FIELD_DEST_L4_PORT) {
                        list[i].h_u.l4_hdr.dst_port = l4_key->dst_port;
                        list[i].m_u.l4_hdr.dst_port = l4_mask->dst_port;
@@ -182,6 +335,30 @@ ice_tc_fill_rules(struct ice_hw *hw, u32 flags,
        return i;
 }
 
+/**
+ * ice_tc_tun_get_type - get the tunnel type
+ * @tunnel_dev: ptr to tunnel device
+ *
+ * This function detects appropriate tunnel_type if specified device is
+ * tunnel device such as VXLAN/Geneve
+ */
+static int ice_tc_tun_get_type(struct net_device *tunnel_dev)
+{
+       if (netif_is_vxlan(tunnel_dev))
+               return TNL_VXLAN;
+       if (netif_is_geneve(tunnel_dev))
+               return TNL_GENEVE;
+       if (netif_is_gretap(tunnel_dev) ||
+           netif_is_ip6gretap(tunnel_dev))
+               return TNL_GRETAP;
+       return TNL_LAST;
+}
+
+bool ice_is_tunnel_supported(struct net_device *dev)
+{
+       return ice_tc_tun_get_type(dev) != TNL_LAST;
+}
+
 static int
 ice_eswitch_tc_parse_action(struct ice_tc_flower_fltr *fltr,
                            struct flow_action_entry *act)
@@ -201,10 +378,8 @@ ice_eswitch_tc_parse_action(struct ice_tc_flower_fltr *fltr,
 
                        fltr->dest_vsi = repr->src_vsi;
                        fltr->direction = ICE_ESWITCH_FLTR_INGRESS;
-               } else if (netif_is_ice(act->dev)) {
-                       struct ice_netdev_priv *np = netdev_priv(act->dev);
-
-                       fltr->dest_vsi = np->vsi;
+               } else if (netif_is_ice(act->dev) ||
+                          ice_is_tunnel_supported(act->dev)) {
                        fltr->direction = ICE_ESWITCH_FLTR_EGRESS;
                } else {
                        NL_SET_ERR_MSG_MOD(fltr->extack, "Unsupported netdevice in switchdev mode");
@@ -235,11 +410,7 @@ ice_eswitch_add_tc_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr)
        int ret = 0;
        int i;
 
-       if (!flags || (flags & (ICE_TC_FLWR_FIELD_ENC_DEST_IPV4 |
-                               ICE_TC_FLWR_FIELD_ENC_SRC_IPV4 |
-                               ICE_TC_FLWR_FIELD_ENC_DEST_IPV6 |
-                               ICE_TC_FLWR_FIELD_ENC_SRC_IPV6 |
-                               ICE_TC_FLWR_FIELD_ENC_SRC_L4_PORT))) {
+       if (!flags || (flags & ICE_TC_FLWR_FIELD_ENC_SRC_L4_PORT)) {
                NL_SET_ERR_MSG_MOD(fltr->extack, "Unsupported encap field(s)");
                return -EOPNOTSUPP;
        }
@@ -255,6 +426,10 @@ ice_eswitch_add_tc_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr)
                goto exit;
        }
 
+       /* egress traffic is always redirect to uplink */
+       if (fltr->direction == ICE_ESWITCH_FLTR_EGRESS)
+               fltr->dest_vsi = vsi->back->switchdev.uplink_vsi;
+
        rule_info.sw_act.fltr_act = fltr->action.fltr_act;
        if (fltr->action.fltr_act != ICE_DROP_PACKET)
                rule_info.sw_act.vsi_handle = fltr->dest_vsi->idx;
@@ -438,19 +613,26 @@ exit:
  * @match: Pointer to flow match structure
  * @fltr: Pointer to filter structure
  * @headers: inner or outer header fields
+ * @is_encap: set true for tunnel IPv4 address
  */
 static int
 ice_tc_set_ipv4(struct flow_match_ipv4_addrs *match,
                struct ice_tc_flower_fltr *fltr,
-               struct ice_tc_flower_lyr_2_4_hdrs *headers)
+               struct ice_tc_flower_lyr_2_4_hdrs *headers, bool is_encap)
 {
        if (match->key->dst) {
-               fltr->flags |= ICE_TC_FLWR_FIELD_DEST_IPV4;
+               if (is_encap)
+                       fltr->flags |= ICE_TC_FLWR_FIELD_ENC_DEST_IPV4;
+               else
+                       fltr->flags |= ICE_TC_FLWR_FIELD_DEST_IPV4;
                headers->l3_key.dst_ipv4 = match->key->dst;
                headers->l3_mask.dst_ipv4 = match->mask->dst;
        }
        if (match->key->src) {
-               fltr->flags |= ICE_TC_FLWR_FIELD_SRC_IPV4;
+               if (is_encap)
+                       fltr->flags |= ICE_TC_FLWR_FIELD_ENC_SRC_IPV4;
+               else
+                       fltr->flags |= ICE_TC_FLWR_FIELD_SRC_IPV4;
                headers->l3_key.src_ipv4 = match->key->src;
                headers->l3_mask.src_ipv4 = match->mask->src;
        }
@@ -462,11 +644,12 @@ ice_tc_set_ipv4(struct flow_match_ipv4_addrs *match,
  * @match: Pointer to flow match structure
  * @fltr: Pointer to filter structure
  * @headers: inner or outer header fields
+ * @is_encap: set true for tunnel IPv6 address
  */
 static int
 ice_tc_set_ipv6(struct flow_match_ipv6_addrs *match,
                struct ice_tc_flower_fltr *fltr,
-               struct ice_tc_flower_lyr_2_4_hdrs *headers)
+               struct ice_tc_flower_lyr_2_4_hdrs *headers, bool is_encap)
 {
        struct ice_tc_l3_hdr *l3_key, *l3_mask;
 
@@ -484,21 +667,31 @@ ice_tc_set_ipv6(struct flow_match_ipv6_addrs *match,
                NL_SET_ERR_MSG_MOD(fltr->extack, "Bad src/dest IPv6, addr is any");
                return -EINVAL;
        }
-       if (!ipv6_addr_any(&match->mask->dst))
-               fltr->flags |= ICE_TC_FLWR_FIELD_DEST_IPV6;
-       if (!ipv6_addr_any(&match->mask->src))
-               fltr->flags |= ICE_TC_FLWR_FIELD_SRC_IPV6;
+       if (!ipv6_addr_any(&match->mask->dst)) {
+               if (is_encap)
+                       fltr->flags |= ICE_TC_FLWR_FIELD_ENC_DEST_IPV6;
+               else
+                       fltr->flags |= ICE_TC_FLWR_FIELD_DEST_IPV6;
+       }
+       if (!ipv6_addr_any(&match->mask->src)) {
+               if (is_encap)
+                       fltr->flags |= ICE_TC_FLWR_FIELD_ENC_SRC_IPV6;
+               else
+                       fltr->flags |= ICE_TC_FLWR_FIELD_SRC_IPV6;
+       }
 
        l3_key = &headers->l3_key;
        l3_mask = &headers->l3_mask;
 
-       if (fltr->flags & ICE_TC_FLWR_FIELD_SRC_IPV6) {
+       if (fltr->flags & (ICE_TC_FLWR_FIELD_ENC_SRC_IPV6 |
+                          ICE_TC_FLWR_FIELD_SRC_IPV6)) {
                memcpy(&l3_key->src_ipv6_addr, &match->key->src.s6_addr,
                       sizeof(match->key->src.s6_addr));
                memcpy(&l3_mask->src_ipv6_addr, &match->mask->src.s6_addr,
                       sizeof(match->mask->src.s6_addr));
        }
-       if (fltr->flags & ICE_TC_FLWR_FIELD_DEST_IPV6) {
+       if (fltr->flags & (ICE_TC_FLWR_FIELD_ENC_DEST_IPV6 |
+                          ICE_TC_FLWR_FIELD_DEST_IPV6)) {
                memcpy(&l3_key->dst_ipv6_addr, &match->key->dst.s6_addr,
                       sizeof(match->key->dst.s6_addr));
                memcpy(&l3_mask->dst_ipv6_addr, &match->mask->dst.s6_addr,
@@ -513,18 +706,27 @@ ice_tc_set_ipv6(struct flow_match_ipv6_addrs *match,
  * @match: Flow match structure
  * @fltr: Pointer to filter structure
  * @headers: inner or outer header fields
+ * @is_encap: set true for tunnel port
  */
 static int
 ice_tc_set_port(struct flow_match_ports match,
                struct ice_tc_flower_fltr *fltr,
-               struct ice_tc_flower_lyr_2_4_hdrs *headers)
+               struct ice_tc_flower_lyr_2_4_hdrs *headers, bool is_encap)
 {
        if (match.key->dst) {
+               if (is_encap)
+                       fltr->flags |= ICE_TC_FLWR_FIELD_ENC_DEST_L4_PORT;
+               else
+                       fltr->flags |= ICE_TC_FLWR_FIELD_DEST_L4_PORT;
                fltr->flags |= ICE_TC_FLWR_FIELD_DEST_L4_PORT;
                headers->l4_key.dst_port = match.key->dst;
                headers->l4_mask.dst_port = match.mask->dst;
        }
        if (match.key->src) {
+               if (is_encap)
+                       fltr->flags |= ICE_TC_FLWR_FIELD_ENC_SRC_L4_PORT;
+               else
+                       fltr->flags |= ICE_TC_FLWR_FIELD_SRC_L4_PORT;
                fltr->flags |= ICE_TC_FLWR_FIELD_SRC_L4_PORT;
                headers->l4_key.src_port = match.key->src;
                headers->l4_mask.src_port = match.mask->src;
@@ -532,6 +734,85 @@ ice_tc_set_port(struct flow_match_ports match,
        return 0;
 }
 
+static struct net_device *
+ice_get_tunnel_device(struct net_device *dev, struct flow_rule *rule)
+{
+       struct flow_action_entry *act;
+       int i;
+
+       if (ice_is_tunnel_supported(dev))
+               return dev;
+
+       flow_action_for_each(i, act, &rule->action) {
+               if (act->id == FLOW_ACTION_REDIRECT &&
+                   ice_is_tunnel_supported(act->dev))
+                       return act->dev;
+       }
+
+       return NULL;
+}
+
+static int
+ice_parse_tunnel_attr(struct net_device *dev, struct flow_rule *rule,
+                     struct ice_tc_flower_fltr *fltr)
+{
+       struct ice_tc_flower_lyr_2_4_hdrs *headers = &fltr->outer_headers;
+       struct flow_match_control enc_control;
+
+       fltr->tunnel_type = ice_tc_tun_get_type(dev);
+       headers->l3_key.ip_proto = IPPROTO_UDP;
+
+       if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
+               struct flow_match_enc_keyid enc_keyid;
+
+               flow_rule_match_enc_keyid(rule, &enc_keyid);
+
+               if (!enc_keyid.mask->keyid ||
+                   enc_keyid.mask->keyid != cpu_to_be32(ICE_TC_FLOWER_MASK_32))
+                       return -EINVAL;
+
+               fltr->flags |= ICE_TC_FLWR_FIELD_TENANT_ID;
+               fltr->tenant_id = enc_keyid.key->keyid;
+       }
+
+       flow_rule_match_enc_control(rule, &enc_control);
+
+       if (enc_control.key->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
+               struct flow_match_ipv4_addrs match;
+
+               flow_rule_match_enc_ipv4_addrs(rule, &match);
+               if (ice_tc_set_ipv4(&match, fltr, headers, true))
+                       return -EINVAL;
+       } else if (enc_control.key->addr_type ==
+                                       FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
+               struct flow_match_ipv6_addrs match;
+
+               flow_rule_match_enc_ipv6_addrs(rule, &match);
+               if (ice_tc_set_ipv6(&match, fltr, headers, true))
+                       return -EINVAL;
+       }
+
+       if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) {
+               struct flow_match_ip match;
+
+               flow_rule_match_enc_ip(rule, &match);
+               headers->l3_key.tos = match.key->tos;
+               headers->l3_key.ttl = match.key->ttl;
+               headers->l3_mask.tos = match.mask->tos;
+               headers->l3_mask.ttl = match.mask->ttl;
+       }
+
+       if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS)) {
+               struct flow_match_ports match;
+
+               flow_rule_match_enc_ports(rule, &match);
+               if (ice_tc_set_port(match, fltr, headers, true))
+                       return -EINVAL;
+       }
+
+       return 0;
+}
+
 /**
  * ice_parse_cls_flower - Parse TC flower filters provided by kernel
  * @vsi: Pointer to the VSI
@@ -548,6 +829,7 @@ ice_parse_cls_flower(struct net_device *filter_dev, struct ice_vsi *vsi,
        struct flow_rule *rule = flow_cls_offload_flow_rule(f);
        u16 n_proto_mask = 0, n_proto_key = 0, addr_type = 0;
        struct flow_dissector *dissector;
+       struct net_device *tunnel_dev;
 
        dissector = rule->match.dissector;
 
@@ -559,12 +841,43 @@ ice_parse_cls_flower(struct net_device *filter_dev, struct ice_vsi *vsi,
              BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
              BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
              BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) |
+             BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) |
+             BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) |
+             BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) |
+             BIT(FLOW_DISSECTOR_KEY_ENC_PORTS) |
              BIT(FLOW_DISSECTOR_KEY_ENC_IP) |
              BIT(FLOW_DISSECTOR_KEY_PORTS))) {
                NL_SET_ERR_MSG_MOD(fltr->extack, "Unsupported key used");
                return -EOPNOTSUPP;
        }
 
+       tunnel_dev = ice_get_tunnel_device(filter_dev, rule);
+       if (tunnel_dev) {
+               int err;
+
+               filter_dev = tunnel_dev;
+
+               err = ice_parse_tunnel_attr(filter_dev, rule, fltr);
+               if (err) {
+                       NL_SET_ERR_MSG_MOD(fltr->extack, "Failed to parse TC flower tunnel attributes");
+                       return err;
+               }
+
+               /* header pointers should point to the inner headers, outer
+                * header were already set by ice_parse_tunnel_attr
+                */
+               headers = &fltr->inner_headers;
+       } else if (dissector->used_keys &
+                 (BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) |
+                  BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) |
+                  BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) |
+                  BIT(FLOW_DISSECTOR_KEY_ENC_PORTS))) {
+               NL_SET_ERR_MSG_MOD(fltr->extack, "Tunnel key used, but device isn't a tunnel");
+               return -EOPNOTSUPP;
+       } else {
+               fltr->tunnel_type = TNL_LAST;
+       }
+
        if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
                struct flow_match_basic match;
 
@@ -651,7 +964,7 @@ ice_parse_cls_flower(struct net_device *filter_dev, struct ice_vsi *vsi,
                struct flow_match_ipv4_addrs match;
 
                flow_rule_match_ipv4_addrs(rule, &match);
-               if (ice_tc_set_ipv4(&match, fltr, headers))
+               if (ice_tc_set_ipv4(&match, fltr, headers, false))
                        return -EINVAL;
        }
 
@@ -659,7 +972,7 @@ ice_parse_cls_flower(struct net_device *filter_dev, struct ice_vsi *vsi,
                struct flow_match_ipv6_addrs match;
 
                flow_rule_match_ipv6_addrs(rule, &match);
-               if (ice_tc_set_ipv6(&match, fltr, headers))
+               if (ice_tc_set_ipv6(&match, fltr, headers, false))
                        return -EINVAL;
        }
 
@@ -667,7 +980,7 @@ ice_parse_cls_flower(struct net_device *filter_dev, struct ice_vsi *vsi,
                struct flow_match_ports match;
 
                flow_rule_match_ports(rule, &match);
-               if (ice_tc_set_port(match, fltr, headers))
+               if (ice_tc_set_port(match, fltr, headers, false))
                        return -EINVAL;
                switch (headers->l3_key.ip_proto) {
                case IPPROTO_TCP:
index ee9b284..3190494 100644 (file)
 #define ICE_TC_FLWR_FIELD_ENC_DST_MAC          BIT(16)
 #define ICE_TC_FLWR_FIELD_ETH_TYPE_ID          BIT(17)
 
+#define ICE_TC_FLOWER_MASK_32   0xFFFFFFFF
+
+struct ice_indr_block_priv {
+       struct net_device *netdev;
+       struct ice_netdev_priv *np;
+       struct list_head list;
+};
+
 struct ice_tc_flower_action {
        u32 tc_class;
        enum ice_sw_fwd_act_type fltr_act;
@@ -112,6 +120,7 @@ struct ice_tc_flower_fltr {
        struct ice_vsi *src_vsi;
        __be32 tenant_id;
        u32 flags;
+       u8 tunnel_type;
        struct ice_tc_flower_action     action;
 
        /* cache ptr which is used wherever needed to communicate netlink
@@ -148,5 +157,6 @@ ice_add_cls_flower(struct net_device *netdev, struct ice_vsi *vsi,
 int
 ice_del_cls_flower(struct ice_vsi *vsi, struct flow_cls_offload *cls_flower);
 void ice_replay_tc_fltrs(struct ice_pf *pf);
+bool ice_is_tunnel_supported(struct net_device *dev);
 
 #endif /* _ICE_TC_LIB_H_ */
index a42eaf6..6a74344 100644 (file)
@@ -4499,13 +4499,6 @@ void ice_vc_set_dflt_vf_ops(struct ice_vc_vf_ops *ops)
        *ops = ice_vc_vf_dflt_ops;
 }
 
-static int
-ice_vc_repr_no_action_msg(struct ice_vf __always_unused *vf,
-                         u8 __always_unused *msg)
-{
-       return 0;
-}
-
 /**
  * ice_vc_repr_add_mac
  * @vf: pointer to VF
@@ -4581,20 +4574,62 @@ ice_vc_repr_del_mac(struct ice_vf __always_unused *vf, u8 __always_unused *msg)
                                     VIRTCHNL_STATUS_SUCCESS, NULL, 0);
 }
 
-static int ice_vc_repr_no_action(struct ice_vf __always_unused *vf)
+static int ice_vc_repr_add_vlan(struct ice_vf *vf, u8 __always_unused *msg)
 {
-       return 0;
+       dev_dbg(ice_pf_to_dev(vf->pf),
+               "Can't add VLAN in switchdev mode for VF %d\n", vf->vf_id);
+       return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_ADD_VLAN,
+                                    VIRTCHNL_STATUS_SUCCESS, NULL, 0);
+}
+
+static int ice_vc_repr_del_vlan(struct ice_vf *vf, u8 __always_unused *msg)
+{
+       dev_dbg(ice_pf_to_dev(vf->pf),
+               "Can't delete VLAN in switchdev mode for VF %d\n", vf->vf_id);
+       return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_DEL_VLAN,
+                                    VIRTCHNL_STATUS_SUCCESS, NULL, 0);
+}
+
+static int ice_vc_repr_ena_vlan_stripping(struct ice_vf *vf)
+{
+       dev_dbg(ice_pf_to_dev(vf->pf),
+               "Can't enable VLAN stripping in switchdev mode for VF %d\n",
+               vf->vf_id);
+       return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_ENABLE_VLAN_STRIPPING,
+                                    VIRTCHNL_STATUS_ERR_NOT_SUPPORTED,
+                                    NULL, 0);
+}
+
+static int ice_vc_repr_dis_vlan_stripping(struct ice_vf *vf)
+{
+       dev_dbg(ice_pf_to_dev(vf->pf),
+               "Can't disable VLAN stripping in switchdev mode for VF %d\n",
+               vf->vf_id);
+       return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_DISABLE_VLAN_STRIPPING,
+                                    VIRTCHNL_STATUS_ERR_NOT_SUPPORTED,
+                                    NULL, 0);
+}
+
+static int
+ice_vc_repr_cfg_promiscuous_mode(struct ice_vf *vf, u8 __always_unused *msg)
+{
+       dev_dbg(ice_pf_to_dev(vf->pf),
+               "Can't config promiscuous mode in switchdev mode for VF %d\n",
+               vf->vf_id);
+       return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE,
+                                    VIRTCHNL_STATUS_ERR_NOT_SUPPORTED,
+                                    NULL, 0);
 }
 
 void ice_vc_change_ops_to_repr(struct ice_vc_vf_ops *ops)
 {
        ops->add_mac_addr_msg = ice_vc_repr_add_mac;
        ops->del_mac_addr_msg = ice_vc_repr_del_mac;
-       ops->add_vlan_msg = ice_vc_repr_no_action_msg;
-       ops->remove_vlan_msg = ice_vc_repr_no_action_msg;
-       ops->ena_vlan_stripping = ice_vc_repr_no_action;
-       ops->dis_vlan_stripping = ice_vc_repr_no_action;
-       ops->cfg_promiscuous_mode_msg = ice_vc_repr_no_action_msg;
+       ops->add_vlan_msg = ice_vc_repr_add_vlan;
+       ops->remove_vlan_msg = ice_vc_repr_del_vlan;
+       ops->ena_vlan_stripping = ice_vc_repr_ena_vlan_stripping;
+       ops->dis_vlan_stripping = ice_vc_repr_dis_vlan_stripping;
+       ops->cfg_promiscuous_mode_msg = ice_vc_repr_cfg_promiscuous_mode;
 }
 
 /**
index ecf1e11..0da09ea 100644 (file)
@@ -25,6 +25,7 @@
 #define XRX200_DMA_DATA_LEN    (SZ_64K - 1)
 #define XRX200_DMA_RX          0
 #define XRX200_DMA_TX          1
+#define XRX200_DMA_BURST_LEN   8
 
 /* cpu port mac */
 #define PMAC_RX_IPG            0x0024
@@ -73,9 +74,6 @@ struct xrx200_priv {
        struct net_device *net_dev;
        struct device *dev;
 
-       int tx_burst_len;
-       int rx_burst_len;
-
        __iomem void *pmac_reg;
 };
 
@@ -323,7 +321,7 @@ static netdev_tx_t xrx200_start_xmit(struct sk_buff *skb,
                goto err_drop;
 
        /* dma needs to start on a burst length value aligned address */
-       byte_offset = mapping % (priv->tx_burst_len * 4);
+       byte_offset = mapping % (XRX200_DMA_BURST_LEN * 4);
 
        desc->addr = mapping - byte_offset;
        /* Make sure the address is written before we give it to HW */
@@ -422,7 +420,8 @@ static int xrx200_dma_init(struct xrx200_priv *priv)
        int ret = 0;
        int i;
 
-       ltq_dma_init_port(DMA_PORT_ETOP, priv->tx_burst_len, rx_burst_len);
+       ltq_dma_init_port(DMA_PORT_ETOP, XRX200_DMA_BURST_LEN,
+                         XRX200_DMA_BURST_LEN);
 
        ch_rx->dma.nr = XRX200_DMA_RX;
        ch_rx->dma.dev = priv->dev;
@@ -531,18 +530,6 @@ static int xrx200_probe(struct platform_device *pdev)
        if (err)
                eth_hw_addr_random(net_dev);
 
-       err = device_property_read_u32(dev, "lantiq,tx-burst-length", &priv->tx_burst_len);
-       if (err < 0) {
-               dev_err(dev, "unable to read tx-burst-length property\n");
-               return err;
-       }
-
-       err = device_property_read_u32(dev, "lantiq,rx-burst-length", &priv->rx_burst_len);
-       if (err < 0) {
-               dev_err(dev, "unable to read rx-burst-length property\n");
-               return err;
-       }
-
        /* bring up the dma engine and IP core */
        err = xrx200_dma_init(priv);
        if (err)
index b6c6365..5a7bdca 100644 (file)
@@ -3823,8 +3823,6 @@ static void mvneta_validate(struct phylink_config *config,
                            unsigned long *supported,
                            struct phylink_link_state *state)
 {
-       struct net_device *ndev = to_net_dev(config->dev);
-       struct mvneta_port *pp = netdev_priv(ndev);
        __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, };
 
        /* We only support QSGMII, SGMII, 802.3z and RGMII modes.
@@ -3832,15 +3830,8 @@ static void mvneta_validate(struct phylink_config *config,
         * "Bit 2 Field InBandAnEn In-band Auto-Negotiation enable. ...
         * When <PortType> = 1 (1000BASE-X) this field must be set to 1."
         */
-       if (phy_interface_mode_is_8023z(state->interface)) {
-               if (!phylink_test(state->advertising, Autoneg)) {
-                       linkmode_zero(supported);
-                       return;
-               }
-       } else if (state->interface != PHY_INTERFACE_MODE_NA &&
-                  state->interface != PHY_INTERFACE_MODE_QSGMII &&
-                  state->interface != PHY_INTERFACE_MODE_SGMII &&
-                  !phy_interface_mode_is_rgmii(state->interface)) {
+       if (phy_interface_mode_is_8023z(state->interface) &&
+           !phylink_test(state->advertising, Autoneg)) {
                linkmode_zero(supported);
                return;
        }
@@ -3853,11 +3844,12 @@ static void mvneta_validate(struct phylink_config *config,
        phylink_set(mask, Pause);
 
        /* Half-duplex at speeds higher than 100Mbit is unsupported */
-       if (pp->comphy || state->interface != PHY_INTERFACE_MODE_2500BASEX) {
+       if (state->interface != PHY_INTERFACE_MODE_2500BASEX) {
                phylink_set(mask, 1000baseT_Full);
                phylink_set(mask, 1000baseX_Full);
        }
-       if (pp->comphy || state->interface == PHY_INTERFACE_MODE_2500BASEX) {
+
+       if (state->interface == PHY_INTERFACE_MODE_2500BASEX) {
                phylink_set(mask, 2500baseT_Full);
                phylink_set(mask, 2500baseX_Full);
        }
@@ -3872,11 +3864,6 @@ static void mvneta_validate(struct phylink_config *config,
 
        linkmode_and(supported, supported, mask);
        linkmode_and(state->advertising, state->advertising, mask);
-
-       /* We can only operate at 2500BaseX or 1000BaseX.  If requested
-        * to advertise both, only report advertising at 2500BaseX.
-        */
-       phylink_helper_basex_speed(state);
 }
 
 static void mvneta_mac_pcs_get_state(struct phylink_config *config,
@@ -5179,6 +5166,31 @@ static int mvneta_probe(struct platform_device *pdev)
 
        pp->phylink_config.dev = &dev->dev;
        pp->phylink_config.type = PHYLINK_NETDEV;
+       phy_interface_set_rgmii(pp->phylink_config.supported_interfaces);
+       __set_bit(PHY_INTERFACE_MODE_QSGMII,
+                 pp->phylink_config.supported_interfaces);
+       if (comphy) {
+               /* If a COMPHY is present, we can support any of the serdes
+                * modes and switch between them.
+                */
+               __set_bit(PHY_INTERFACE_MODE_SGMII,
+                         pp->phylink_config.supported_interfaces);
+               __set_bit(PHY_INTERFACE_MODE_1000BASEX,
+                         pp->phylink_config.supported_interfaces);
+               __set_bit(PHY_INTERFACE_MODE_2500BASEX,
+                         pp->phylink_config.supported_interfaces);
+       } else if (phy_mode == PHY_INTERFACE_MODE_2500BASEX) {
+               /* No COMPHY, with only 2500BASE-X mode supported */
+               __set_bit(PHY_INTERFACE_MODE_2500BASEX,
+                         pp->phylink_config.supported_interfaces);
+       } else if (phy_mode == PHY_INTERFACE_MODE_1000BASEX ||
+                  phy_mode == PHY_INTERFACE_MODE_SGMII) {
+               /* No COMPHY, we can switch between 1000BASE-X and SGMII */
+               __set_bit(PHY_INTERFACE_MODE_1000BASEX,
+                         pp->phylink_config.supported_interfaces);
+               __set_bit(PHY_INTERFACE_MODE_SGMII,
+                         pp->phylink_config.supported_interfaces);
+       }
 
        phylink = phylink_create(&pp->phylink_config, pdev->dev.fwnode,
                                 phy_mode, &mvneta_phylink_ops);
index 8ddf58f..587def6 100644 (file)
@@ -6261,32 +6261,13 @@ static void mvpp2_phylink_validate(struct phylink_config *config,
        struct mvpp2_port *port = mvpp2_phylink_to_port(config);
        __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, };
 
-       /* Invalid combinations */
-       switch (state->interface) {
-       case PHY_INTERFACE_MODE_10GBASER:
-       case PHY_INTERFACE_MODE_XAUI:
-               if (!mvpp2_port_supports_xlg(port))
-                       goto empty_set;
-               break;
-       case PHY_INTERFACE_MODE_RGMII:
-       case PHY_INTERFACE_MODE_RGMII_ID:
-       case PHY_INTERFACE_MODE_RGMII_RXID:
-       case PHY_INTERFACE_MODE_RGMII_TXID:
-               if (!mvpp2_port_supports_rgmii(port))
-                       goto empty_set;
-               break;
-       case PHY_INTERFACE_MODE_1000BASEX:
-       case PHY_INTERFACE_MODE_2500BASEX:
-               /* When in 802.3z mode, we must have AN enabled:
-                * Bit 2 Field InBandAnEn In-band Auto-Negotiation enable. ...
-                * When <PortType> = 1 (1000BASE-X) this field must be set to 1.
-                */
-               if (!phylink_test(state->advertising, Autoneg))
-                       goto empty_set;
-               break;
-       default:
-               break;
-       }
+       /* When in 802.3z mode, we must have AN enabled:
+        * Bit 2 Field InBandAnEn In-band Auto-Negotiation enable. ...
+        * When <PortType> = 1 (1000BASE-X) this field must be set to 1.
+        */
+       if (phy_interface_mode_is_8023z(state->interface) &&
+           !phylink_test(state->advertising, Autoneg))
+               goto empty_set;
 
        phylink_set(mask, Autoneg);
        phylink_set_port_modes(mask);
@@ -6299,14 +6280,12 @@ static void mvpp2_phylink_validate(struct phylink_config *config,
        switch (state->interface) {
        case PHY_INTERFACE_MODE_10GBASER:
        case PHY_INTERFACE_MODE_XAUI:
-       case PHY_INTERFACE_MODE_NA:
                if (mvpp2_port_supports_xlg(port)) {
                        phylink_set_10g_modes(mask);
                        phylink_set(mask, 10000baseKR_Full);
                }
-               if (state->interface != PHY_INTERFACE_MODE_NA)
-                       break;
-               fallthrough;
+               break;
+
        case PHY_INTERFACE_MODE_RGMII:
        case PHY_INTERFACE_MODE_RGMII_ID:
        case PHY_INTERFACE_MODE_RGMII_RXID:
@@ -6318,30 +6297,24 @@ static void mvpp2_phylink_validate(struct phylink_config *config,
                phylink_set(mask, 100baseT_Full);
                phylink_set(mask, 1000baseT_Full);
                phylink_set(mask, 1000baseX_Full);
-               if (state->interface != PHY_INTERFACE_MODE_NA)
-                       break;
-               fallthrough;
+               break;
+
        case PHY_INTERFACE_MODE_1000BASEX:
+               phylink_set(mask, 1000baseT_Full);
+               phylink_set(mask, 1000baseX_Full);
+               break;
+
        case PHY_INTERFACE_MODE_2500BASEX:
-               if (port->comphy ||
-                   state->interface != PHY_INTERFACE_MODE_2500BASEX) {
-                       phylink_set(mask, 1000baseT_Full);
-                       phylink_set(mask, 1000baseX_Full);
-               }
-               if (port->comphy ||
-                   state->interface == PHY_INTERFACE_MODE_2500BASEX) {
-                       phylink_set(mask, 2500baseT_Full);
-                       phylink_set(mask, 2500baseX_Full);
-               }
+               phylink_set(mask, 2500baseT_Full);
+               phylink_set(mask, 2500baseX_Full);
                break;
+
        default:
                goto empty_set;
        }
 
        linkmode_and(supported, supported, mask);
        linkmode_and(state->advertising, state->advertising, mask);
-
-       phylink_helper_basex_speed(state);
        return;
 
 empty_set:
@@ -6937,6 +6910,40 @@ static int mvpp2_port_probe(struct platform_device *pdev,
                port->phylink_config.dev = &dev->dev;
                port->phylink_config.type = PHYLINK_NETDEV;
 
+               if (mvpp2_port_supports_xlg(port)) {
+                       __set_bit(PHY_INTERFACE_MODE_10GBASER,
+                                 port->phylink_config.supported_interfaces);
+                       __set_bit(PHY_INTERFACE_MODE_XAUI,
+                                 port->phylink_config.supported_interfaces);
+               }
+
+               if (mvpp2_port_supports_rgmii(port))
+                       phy_interface_set_rgmii(port->phylink_config.supported_interfaces);
+
+               if (comphy) {
+                       /* If a COMPHY is present, we can support any of the
+                        * serdes modes and switch between them.
+                        */
+                       __set_bit(PHY_INTERFACE_MODE_SGMII,
+                                 port->phylink_config.supported_interfaces);
+                       __set_bit(PHY_INTERFACE_MODE_1000BASEX,
+                                 port->phylink_config.supported_interfaces);
+                       __set_bit(PHY_INTERFACE_MODE_2500BASEX,
+                                 port->phylink_config.supported_interfaces);
+               } else if (phy_mode == PHY_INTERFACE_MODE_2500BASEX) {
+                       /* No COMPHY, with only 2500BASE-X mode supported */
+                       __set_bit(PHY_INTERFACE_MODE_2500BASEX,
+                                 port->phylink_config.supported_interfaces);
+               } else if (phy_mode == PHY_INTERFACE_MODE_1000BASEX ||
+                          phy_mode == PHY_INTERFACE_MODE_SGMII) {
+                       /* No COMPHY, we can switch between 1000BASE-X and SGMII
+                        */
+                       __set_bit(PHY_INTERFACE_MODE_1000BASEX,
+                                 port->phylink_config.supported_interfaces);
+                       __set_bit(PHY_INTERFACE_MODE_SGMII,
+                                 port->phylink_config.supported_interfaces);
+               }
+
                phylink = phylink_create(&port->phylink_config, port_fwnode,
                                         phy_mode, &mvpp2_phylink_ops);
                if (IS_ERR(phylink)) {
index 3144d30..77fd39e 100644 (file)
@@ -8,6 +8,8 @@
 #ifndef NPC_H
 #define NPC_H
 
+#define NPC_KEX_CHAN_MASK      0xFFFULL
+
 enum NPC_LID_E {
        NPC_LID_LA = 0,
        NPC_LID_LB,
@@ -591,6 +593,8 @@ struct rvu_npc_mcam_rule {
        u8 default_rule;
        bool enable;
        bool vfvlan_cfg;
+       u16 chan;
+       u16 chan_mask;
 };
 
 #endif /* NPC_H */
index 9338765..c7fd466 100644 (file)
@@ -95,7 +95,7 @@ static char *cgx_tx_stats_fields[] = {
        [CGX_STAT5]     = "Total frames sent on the interface",
        [CGX_STAT6]     = "Packets sent with an octet count < 64",
        [CGX_STAT7]     = "Packets sent with an octet count == 64",
-       [CGX_STAT8]     = "Packets sent with an octet count of 65–127",
+       [CGX_STAT8]     = "Packets sent with an octet count of 65-127",
        [CGX_STAT9]     = "Packets sent with an octet count of 128-255",
        [CGX_STAT10]    = "Packets sent with an octet count of 256-511",
        [CGX_STAT11]    = "Packets sent with an octet count of 512-1023",
@@ -125,7 +125,7 @@ static char *rpm_rx_stats_fields[] = {
        "Total frames received on interface",
        "Packets received with an octet count < 64",
        "Packets received with an octet count == 64",
-       "Packets received with an octet count of 65â\80\93127",
+       "Packets received with an octet count of 65-127",
        "Packets received with an octet count of 128-255",
        "Packets received with an octet count of 256-511",
        "Packets received with an octet count of 512-1023",
@@ -164,7 +164,7 @@ static char *rpm_tx_stats_fields[] = {
        "Packets sent to the multicast DMAC",
        "Packets sent to a broadcast DMAC",
        "Packets sent with an octet count == 64",
-       "Packets sent with an octet count of 65â\80\93127",
+       "Packets sent with an octet count of 65-127",
        "Packets sent with an octet count of 128-255",
        "Packets sent with an octet count of 256-511",
        "Packets sent with an octet count of 512-1023",
@@ -226,18 +226,175 @@ static const struct file_operations rvu_dbg_##name##_fops = { \
 
 static void print_nix_qsize(struct seq_file *filp, struct rvu_pfvf *pfvf);
 
+#define LMT_MAPTBL_ENTRY_SIZE 16
+/* Dump LMTST map table */
+static ssize_t rvu_dbg_lmtst_map_table_display(struct file *filp,
+                                              char __user *buffer,
+                                              size_t count, loff_t *ppos)
+{
+       struct rvu *rvu = filp->private_data;
+       u64 lmt_addr, val, tbl_base;
+       int pf, vf, num_vfs, hw_vfs;
+       void __iomem *lmt_map_base;
+       int index = 0, off = 0;
+       int bytes_not_copied;
+       int buf_size = 10240;
+       char *buf;
+
+       /* don't allow partial reads */
+       if (*ppos != 0)
+               return 0;
+
+       buf = kzalloc(buf_size, GFP_KERNEL);
+       if (!buf)
+               return -ENOSPC;
+
+       tbl_base = rvu_read64(rvu, BLKADDR_APR, APR_AF_LMT_MAP_BASE);
+
+       lmt_map_base = ioremap_wc(tbl_base, 128 * 1024);
+       if (!lmt_map_base) {
+               dev_err(rvu->dev, "Failed to setup lmt map table mapping!!\n");
+               kfree(buf);
+               return false;
+       }
+
+       off +=  scnprintf(&buf[off], buf_size - 1 - off,
+                         "\n\t\t\t\t\tLmtst Map Table Entries");
+       off +=  scnprintf(&buf[off], buf_size - 1 - off,
+                         "\n\t\t\t\t\t=======================");
+       off +=  scnprintf(&buf[off], buf_size - 1 - off, "\nPcifunc\t\t\t");
+       off +=  scnprintf(&buf[off], buf_size - 1 - off, "Table Index\t\t");
+       off +=  scnprintf(&buf[off], buf_size - 1 - off,
+                         "Lmtline Base (word 0)\t\t");
+       off +=  scnprintf(&buf[off], buf_size - 1 - off,
+                         "Lmt Map Entry (word 1)");
+       off += scnprintf(&buf[off], buf_size - 1 - off, "\n");
+       for (pf = 0; pf < rvu->hw->total_pfs; pf++) {
+               off += scnprintf(&buf[off], buf_size - 1 - off, "PF%d  \t\t\t",
+                                   pf);
+
+               index = pf * rvu->hw->total_vfs * LMT_MAPTBL_ENTRY_SIZE;
+               off += scnprintf(&buf[off], buf_size - 1 - off, " 0x%llx\t\t",
+                                (tbl_base + index));
+               lmt_addr = readq(lmt_map_base + index);
+               off += scnprintf(&buf[off], buf_size - 1 - off,
+                                " 0x%016llx\t\t", lmt_addr);
+               index += 8;
+               val = readq(lmt_map_base + index);
+               off += scnprintf(&buf[off], buf_size - 1 - off, " 0x%016llx\n",
+                                val);
+               /* Reading num of VFs per PF */
+               rvu_get_pf_numvfs(rvu, pf, &num_vfs, &hw_vfs);
+               for (vf = 0; vf < num_vfs; vf++) {
+                       index = (pf * rvu->hw->total_vfs * 16) +
+                               ((vf + 1)  * LMT_MAPTBL_ENTRY_SIZE);
+                       off += scnprintf(&buf[off], buf_size - 1 - off,
+                                           "PF%d:VF%d  \t\t", pf, vf);
+                       off += scnprintf(&buf[off], buf_size - 1 - off,
+                                        " 0x%llx\t\t", (tbl_base + index));
+                       lmt_addr = readq(lmt_map_base + index);
+                       off += scnprintf(&buf[off], buf_size - 1 - off,
+                                        " 0x%016llx\t\t", lmt_addr);
+                       index += 8;
+                       val = readq(lmt_map_base + index);
+                       off += scnprintf(&buf[off], buf_size - 1 - off,
+                                        " 0x%016llx\n", val);
+               }
+       }
+       off +=  scnprintf(&buf[off], buf_size - 1 - off, "\n");
+
+       bytes_not_copied = copy_to_user(buffer, buf, off);
+       kfree(buf);
+
+       iounmap(lmt_map_base);
+       if (bytes_not_copied)
+               return -EFAULT;
+
+       *ppos = off;
+       return off;
+}
+
+RVU_DEBUG_FOPS(lmtst_map_table, lmtst_map_table_display, NULL);
+
+static void get_lf_str_list(struct rvu_block block, int pcifunc,
+                           char *lfs)
+{
+       int lf = 0, seq = 0, len = 0, prev_lf = block.lf.max;
+
+       for_each_set_bit(lf, block.lf.bmap, block.lf.max) {
+               if (lf >= block.lf.max)
+                       break;
+
+               if (block.fn_map[lf] != pcifunc)
+                       continue;
+
+               if (lf == prev_lf + 1) {
+                       prev_lf = lf;
+                       seq = 1;
+                       continue;
+               }
+
+               if (seq)
+                       len += sprintf(lfs + len, "-%d,%d", prev_lf, lf);
+               else
+                       len += (len ? sprintf(lfs + len, ",%d", lf) :
+                                     sprintf(lfs + len, "%d", lf));
+
+               prev_lf = lf;
+               seq = 0;
+       }
+
+       if (seq)
+               len += sprintf(lfs + len, "-%d", prev_lf);
+
+       lfs[len] = '\0';
+}
+
+static int get_max_column_width(struct rvu *rvu)
+{
+       int index, pf, vf, lf_str_size = 12, buf_size = 256;
+       struct rvu_block block;
+       u16 pcifunc;
+       char *buf;
+
+       buf = kzalloc(buf_size, GFP_KERNEL);
+       if (!buf)
+               return -ENOMEM;
+
+       for (pf = 0; pf < rvu->hw->total_pfs; pf++) {
+               for (vf = 0; vf <= rvu->hw->total_vfs; vf++) {
+                       pcifunc = pf << 10 | vf;
+                       if (!pcifunc)
+                               continue;
+
+                       for (index = 0; index < BLK_COUNT; index++) {
+                               block = rvu->hw->block[index];
+                               if (!strlen(block.name))
+                                       continue;
+
+                               get_lf_str_list(block, pcifunc, buf);
+                               if (lf_str_size <= strlen(buf))
+                                       lf_str_size = strlen(buf) + 1;
+                       }
+               }
+       }
+
+       kfree(buf);
+       return lf_str_size;
+}
+
 /* Dumps current provisioning status of all RVU block LFs */
 static ssize_t rvu_dbg_rsrc_attach_status(struct file *filp,
                                          char __user *buffer,
                                          size_t count, loff_t *ppos)
 {
-       int index, off = 0, flag = 0, go_back = 0, len = 0;
+       int index, off = 0, flag = 0, len = 0, i = 0;
        struct rvu *rvu = filp->private_data;
-       int lf, pf, vf, pcifunc;
+       int bytes_not_copied = 0;
        struct rvu_block block;
-       int bytes_not_copied;
-       int lf_str_size = 12;
+       int pf, vf, pcifunc;
        int buf_size = 2048;
+       int lf_str_size;
        char *lfs;
        char *buf;
 
@@ -249,6 +406,9 @@ static ssize_t rvu_dbg_rsrc_attach_status(struct file *filp,
        if (!buf)
                return -ENOSPC;
 
+       /* Get the maximum width of a column */
+       lf_str_size = get_max_column_width(rvu);
+
        lfs = kzalloc(lf_str_size, GFP_KERNEL);
        if (!lfs) {
                kfree(buf);
@@ -262,65 +422,69 @@ static ssize_t rvu_dbg_rsrc_attach_status(struct file *filp,
                                         "%-*s", lf_str_size,
                                         rvu->hw->block[index].name);
                }
+
        off += scnprintf(&buf[off], buf_size - 1 - off, "\n");
+       bytes_not_copied = copy_to_user(buffer + (i * off), buf, off);
+       if (bytes_not_copied)
+               goto out;
+
+       i++;
+       *ppos += off;
        for (pf = 0; pf < rvu->hw->total_pfs; pf++) {
                for (vf = 0; vf <= rvu->hw->total_vfs; vf++) {
+                       off = 0;
+                       flag = 0;
                        pcifunc = pf << 10 | vf;
                        if (!pcifunc)
                                continue;
 
                        if (vf) {
                                sprintf(lfs, "PF%d:VF%d", pf, vf - 1);
-                               go_back = scnprintf(&buf[off],
-                                                   buf_size - 1 - off,
-                                                   "%-*s", lf_str_size, lfs);
+                               off = scnprintf(&buf[off],
+                                               buf_size - 1 - off,
+                                               "%-*s", lf_str_size, lfs);
                        } else {
                                sprintf(lfs, "PF%d", pf);
-                               go_back = scnprintf(&buf[off],
-                                                   buf_size - 1 - off,
-                                                   "%-*s", lf_str_size, lfs);
+                               off = scnprintf(&buf[off],
+                                               buf_size - 1 - off,
+                                               "%-*s", lf_str_size, lfs);
                        }
 
-                       off += go_back;
-                       for (index = 0; index < BLKTYPE_MAX; index++) {
+                       for (index = 0; index < BLK_COUNT; index++) {
                                block = rvu->hw->block[index];
                                if (!strlen(block.name))
                                        continue;
                                len = 0;
                                lfs[len] = '\0';
-                               for (lf = 0; lf < block.lf.max; lf++) {
-                                       if (block.fn_map[lf] != pcifunc)
-                                               continue;
+                               get_lf_str_list(block, pcifunc, lfs);
+                               if (strlen(lfs))
                                        flag = 1;
-                                       len += sprintf(&lfs[len], "%d,", lf);
-                               }
 
-                               if (flag)
-                                       len--;
-                               lfs[len] = '\0';
                                off += scnprintf(&buf[off], buf_size - 1 - off,
                                                 "%-*s", lf_str_size, lfs);
-                               if (!strlen(lfs))
-                                       go_back += lf_str_size;
                        }
-                       if (!flag)
-                               off -= go_back;
-                       else
-                               flag = 0;
-                       off--;
-                       off +=  scnprintf(&buf[off], buf_size - 1 - off, "\n");
+                       if (flag) {
+                               off +=  scnprintf(&buf[off],
+                                                 buf_size - 1 - off, "\n");
+                               bytes_not_copied = copy_to_user(buffer +
+                                                               (i * off),
+                                                               buf, off);
+                               if (bytes_not_copied)
+                                       goto out;
+
+                               i++;
+                               *ppos += off;
+                       }
                }
        }
 
-       bytes_not_copied = copy_to_user(buffer, buf, off);
+out:
        kfree(lfs);
        kfree(buf);
-
        if (bytes_not_copied)
                return -EFAULT;
 
-       *ppos = off;
-       return off;
+       return *ppos;
 }
 
 RVU_DEBUG_FOPS(rsrc_status, rsrc_attach_status, NULL);
@@ -504,7 +668,7 @@ static ssize_t rvu_dbg_qsize_write(struct file *filp,
        if (cmd_buf)
                ret = -EINVAL;
 
-       if (!strncmp(subtoken, "help", 4) || ret < 0) {
+       if (ret < 0 || !strncmp(subtoken, "help", 4)) {
                dev_info(rvu->dev, "Use echo <%s-lf > qsize\n", blk_string);
                goto qsize_write_done;
        }
@@ -1719,6 +1883,10 @@ static int rvu_dbg_nix_band_prof_ctx_display(struct seq_file *m, void *unused)
        u16 pcifunc;
        char *str;
 
+       /* Ingress policers do not exist on all platforms */
+       if (!nix_hw->ipolicer)
+               return 0;
+
        for (layer = 0; layer < BAND_PROF_NUM_LAYERS; layer++) {
                if (layer == BAND_PROF_INVAL_LAYER)
                        continue;
@@ -1768,6 +1936,10 @@ static int rvu_dbg_nix_band_prof_rsrc_display(struct seq_file *m, void *unused)
        int layer;
        char *str;
 
+       /* Ingress policers do not exist on all platforms */
+       if (!nix_hw->ipolicer)
+               return 0;
+
        seq_puts(m, "\nBandwidth profile resource free count\n");
        seq_puts(m, "=====================================\n");
        for (layer = 0; layer < BAND_PROF_NUM_LAYERS; layer++) {
@@ -1878,7 +2050,7 @@ static int cgx_print_stats(struct seq_file *s, int lmac_id)
                return -ENODEV;
 
        mac_ops = get_mac_ops(cgxd);
-
+       /* There can be no CGX devices at all */
        if (!mac_ops)
                return 0;
 
@@ -1956,13 +2128,13 @@ static int cgx_print_stats(struct seq_file *s, int lmac_id)
                if (err)
                        return err;
 
-       if (is_rvu_otx2(rvu))
-               seq_printf(s, "%s: %llu\n", cgx_tx_stats_fields[stat],
-                          tx_stat);
-       else
-               seq_printf(s, "%s: %llu\n", rpm_tx_stats_fields[stat],
-                          tx_stat);
-       stat++;
+               if (is_rvu_otx2(rvu))
+                       seq_printf(s, "%s: %llu\n", cgx_tx_stats_fields[stat],
+                                  tx_stat);
+               else
+                       seq_printf(s, "%s: %llu\n", rpm_tx_stats_fields[stat],
+                                  tx_stat);
+               stat++;
        }
 
        return err;
@@ -2400,6 +2572,8 @@ static int rvu_dbg_npc_mcam_show_rules(struct seq_file *s, void *unused)
                                seq_printf(s, "VF%d", vf);
                        }
                        seq_puts(s, "\n");
+                       seq_printf(s, "\tchannel: 0x%x\n", iter->chan);
+                       seq_printf(s, "\tchannel_mask: 0x%x\n", iter->chan_mask);
                }
 
                rvu_dbg_npc_mcam_show_action(s, iter);
@@ -2672,6 +2846,10 @@ void rvu_dbg_init(struct rvu *rvu)
        debugfs_create_file("rsrc_alloc", 0444, rvu->rvu_dbg.root, rvu,
                            &rvu_dbg_rsrc_status_fops);
 
+       if (!is_rvu_otx2(rvu))
+               debugfs_create_file("lmtst_map_table", 0444, rvu->rvu_dbg.root,
+                                   rvu, &rvu_dbg_lmtst_map_table_fops);
+
        if (!cgx_get_cgxcnt_max())
                goto create;
 
index 7761dcf..d8b1948 100644 (file)
@@ -2583,6 +2583,9 @@ static void nix_free_tx_vtag_entries(struct rvu *rvu, u16 pcifunc)
                return;
 
        nix_hw = get_nix_hw(rvu->hw, blkaddr);
+       if (!nix_hw)
+               return;
+
        vlan = &nix_hw->txvlan;
 
        mutex_lock(&vlan->rsrc_lock);
index 51ddc7b..ff2b219 100644 (file)
@@ -1119,6 +1119,9 @@ find_rule:
        rule->default_rule = req->default_rule;
        rule->owner = owner;
        rule->enable = enable;
+       rule->chan_mask = write_req.entry_data.kw_mask[0] & NPC_KEX_CHAN_MASK;
+       rule->chan = write_req.entry_data.kw[0] & NPC_KEX_CHAN_MASK;
+       rule->chan &= rule->chan_mask;
        if (is_npc_intf_tx(req->intf))
                rule->intf = pfvf->nix_tx_intf;
        else
index f18fe66..2a4c14c 100644 (file)
@@ -53,6 +53,8 @@ struct prestera_port_stats {
        u64 good_octets_sent;
 };
 
+#define PRESTERA_AP_PORT_MAX   (10)
+
 struct prestera_port_caps {
        u64 supp_link_modes;
        u8 supp_fec;
@@ -69,6 +71,39 @@ struct prestera_lag {
 
 struct prestera_flow_block;
 
+struct prestera_port_mac_state {
+       u32 mode;
+       u32 speed;
+       bool oper;
+       u8 duplex;
+       u8 fc;
+       u8 fec;
+};
+
+struct prestera_port_phy_state {
+       u64 lmode_bmap;
+       struct {
+               bool pause;
+               bool asym_pause;
+       } remote_fc;
+       u8 mdix;
+};
+
+struct prestera_port_mac_config {
+       u32 mode;
+       u32 speed;
+       bool admin;
+       u8 inband;
+       u8 duplex;
+       u8 fec;
+};
+
+struct prestera_port_phy_config {
+       u32 mode;
+       bool admin;
+       u8 mdix;
+};
+
 struct prestera_port {
        struct net_device *dev;
        struct prestera_switch *sw;
@@ -91,6 +126,10 @@ struct prestera_port {
                struct prestera_port_stats stats;
                struct delayed_work caching_dw;
        } cached_hw_stats;
+       struct prestera_port_mac_config cfg_mac;
+       struct prestera_port_phy_config cfg_phy;
+       struct prestera_port_mac_state state_mac;
+       struct prestera_port_phy_state state_phy;
 };
 
 struct prestera_device {
@@ -107,7 +146,7 @@ struct prestera_device {
        int (*recv_msg)(struct prestera_device *dev, void *msg, size_t size);
 
        /* called by higher layer to send request to the firmware */
-       int (*send_req)(struct prestera_device *dev, void *in_msg,
+       int (*send_req)(struct prestera_device *dev, int qid, void *in_msg,
                        size_t in_size, void *out_msg, size_t out_size,
                        unsigned int wait);
 };
@@ -129,13 +168,28 @@ enum prestera_rxtx_event_id {
 
 enum prestera_port_event_id {
        PRESTERA_PORT_EVENT_UNSPEC,
-       PRESTERA_PORT_EVENT_STATE_CHANGED,
+       PRESTERA_PORT_EVENT_MAC_STATE_CHANGED,
 };
 
 struct prestera_port_event {
        u32 port_id;
        union {
-               u32 oper_state;
+               struct {
+                       u32 mode;
+                       u32 speed;
+                       u8 oper;
+                       u8 duplex;
+                       u8 fc;
+                       u8 fec;
+               } mac;
+               struct {
+                       u64 lmode_bmap;
+                       struct {
+                               bool pause;
+                               bool asym_pause;
+                       } remote_fc;
+                       u8 mdix;
+               } phy;
        } data;
 };
 
@@ -223,11 +277,16 @@ void prestera_device_unregister(struct prestera_device *dev);
 struct prestera_port *prestera_port_find_by_hwid(struct prestera_switch *sw,
                                                 u32 dev_id, u32 hw_id);
 
-int prestera_port_autoneg_set(struct prestera_port *port, bool enable,
-                             u64 adver_link_modes, u8 adver_fec);
+int prestera_port_autoneg_set(struct prestera_port *port, u64 link_modes);
 
 struct prestera_port *prestera_find_port(struct prestera_switch *sw, u32 id);
 
+int prestera_port_cfg_mac_read(struct prestera_port *port,
+                              struct prestera_port_mac_config *cfg);
+
+int prestera_port_cfg_mac_write(struct prestera_port *port,
+                               struct prestera_port_mac_config *cfg);
+
 struct prestera_port *prestera_port_dev_lower_find(struct net_device *dev);
 
 int prestera_port_pvid_set(struct prestera_port *port, u16 vid);
index 93a5e2b..6011454 100644 (file)
@@ -323,7 +323,6 @@ static int prestera_port_type_set(const struct ethtool_link_ksettings *ecmd,
 {
        u32 new_mode = PRESTERA_LINK_MODE_MAX;
        u32 type, mode;
-       int err;
 
        for (type = 0; type < PRESTERA_PORT_TYPE_MAX; type++) {
                if (port_types[type].eth_type == ecmd->base.port &&
@@ -348,13 +347,8 @@ static int prestera_port_type_set(const struct ethtool_link_ksettings *ecmd,
                }
        }
 
-       if (new_mode < PRESTERA_LINK_MODE_MAX)
-               err = prestera_hw_port_link_mode_set(port, new_mode);
-       else
-               err = -EINVAL;
-
-       if (err)
-               return err;
+       if (new_mode >= PRESTERA_LINK_MODE_MAX)
+               return -EINVAL;
 
        port->caps.type = type;
        port->autoneg = false;
@@ -434,27 +428,33 @@ static void prestera_port_supp_types_get(struct ethtool_link_ksettings *ecmd,
 static void prestera_port_remote_cap_get(struct ethtool_link_ksettings *ecmd,
                                         struct prestera_port *port)
 {
+       struct prestera_port_phy_state *state = &port->state_phy;
        bool asym_pause;
        bool pause;
        u64 bitmap;
        int err;
 
-       err = prestera_hw_port_remote_cap_get(port, &bitmap);
-       if (!err) {
-               prestera_modes_to_eth(ecmd->link_modes.lp_advertising,
-                                     bitmap, 0, PRESTERA_PORT_TYPE_NONE);
+       err = prestera_hw_port_phy_mode_get(port, NULL, &state->lmode_bmap,
+                                           &state->remote_fc.pause,
+                                           &state->remote_fc.asym_pause);
+       if (err)
+               netdev_warn(port->dev, "Remote link caps get failed %d",
+                           port->caps.transceiver);
 
-               if (!bitmap_empty(ecmd->link_modes.lp_advertising,
-                                 __ETHTOOL_LINK_MODE_MASK_NBITS)) {
-                       ethtool_link_ksettings_add_link_mode(ecmd,
-                                                            lp_advertising,
-                                                            Autoneg);
-               }
+       bitmap = state->lmode_bmap;
+
+       prestera_modes_to_eth(ecmd->link_modes.lp_advertising,
+                             bitmap, 0, PRESTERA_PORT_TYPE_NONE);
+
+       if (!bitmap_empty(ecmd->link_modes.lp_advertising,
+                         __ETHTOOL_LINK_MODE_MASK_NBITS)) {
+               ethtool_link_ksettings_add_link_mode(ecmd,
+                                                    lp_advertising,
+                                                    Autoneg);
        }
 
-       err = prestera_hw_port_remote_fc_get(port, &pause, &asym_pause);
-       if (err)
-               return;
+       pause = state->remote_fc.pause;
+       asym_pause = state->remote_fc.asym_pause;
 
        if (pause)
                ethtool_link_ksettings_add_link_mode(ecmd,
@@ -466,30 +466,46 @@ static void prestera_port_remote_cap_get(struct ethtool_link_ksettings *ecmd,
                                                     Asym_Pause);
 }
 
-static void prestera_port_speed_get(struct ethtool_link_ksettings *ecmd,
-                                   struct prestera_port *port)
+static void prestera_port_link_mode_get(struct ethtool_link_ksettings *ecmd,
+                                       struct prestera_port *port)
 {
+       struct prestera_port_mac_state *state = &port->state_mac;
        u32 speed;
+       u8 duplex;
        int err;
 
-       err = prestera_hw_port_speed_get(port, &speed);
-       ecmd->base.speed = err ? SPEED_UNKNOWN : speed;
+       if (!port->state_mac.oper)
+               return;
+
+       if (state->speed == SPEED_UNKNOWN || state->duplex == DUPLEX_UNKNOWN) {
+               err = prestera_hw_port_mac_mode_get(port, NULL, &speed,
+                                                   &duplex, NULL);
+               if (err) {
+                       state->speed = SPEED_UNKNOWN;
+                       state->duplex = DUPLEX_UNKNOWN;
+               } else {
+                       state->speed = speed;
+                       state->duplex = duplex == PRESTERA_PORT_DUPLEX_FULL ?
+                                         DUPLEX_FULL : DUPLEX_HALF;
+               }
+       }
+
+       ecmd->base.speed = port->state_mac.speed;
+       ecmd->base.duplex = port->state_mac.duplex;
 }
 
-static void prestera_port_duplex_get(struct ethtool_link_ksettings *ecmd,
-                                    struct prestera_port *port)
+static void prestera_port_mdix_get(struct ethtool_link_ksettings *ecmd,
+                                  struct prestera_port *port)
 {
-       u8 duplex;
-       int err;
+       struct prestera_port_phy_state *state = &port->state_phy;
 
-       err = prestera_hw_port_duplex_get(port, &duplex);
-       if (err) {
-               ecmd->base.duplex = DUPLEX_UNKNOWN;
-               return;
+       if (prestera_hw_port_phy_mode_get(port, &state->mdix, NULL, NULL, NULL)) {
+               netdev_warn(port->dev, "MDIX params get failed");
+               state->mdix = ETH_TP_MDI_INVALID;
        }
 
-       ecmd->base.duplex = duplex == PRESTERA_PORT_DUPLEX_FULL ?
-                           DUPLEX_FULL : DUPLEX_HALF;
+       ecmd->base.eth_tp_mdix = port->state_phy.mdix;
+       ecmd->base.eth_tp_mdix_ctrl = port->cfg_phy.mdix;
 }
 
 static int
@@ -501,6 +517,8 @@ prestera_ethtool_get_link_ksettings(struct net_device *dev,
        ethtool_link_ksettings_zero_link_mode(ecmd, supported);
        ethtool_link_ksettings_zero_link_mode(ecmd, advertising);
        ethtool_link_ksettings_zero_link_mode(ecmd, lp_advertising);
+       ecmd->base.speed = SPEED_UNKNOWN;
+       ecmd->base.duplex = DUPLEX_UNKNOWN;
 
        ecmd->base.autoneg = port->autoneg ? AUTONEG_ENABLE : AUTONEG_DISABLE;
 
@@ -521,13 +539,8 @@ prestera_ethtool_get_link_ksettings(struct net_device *dev,
 
        prestera_port_supp_types_get(ecmd, port);
 
-       if (netif_carrier_ok(dev)) {
-               prestera_port_speed_get(ecmd, port);
-               prestera_port_duplex_get(ecmd, port);
-       } else {
-               ecmd->base.speed = SPEED_UNKNOWN;
-               ecmd->base.duplex = DUPLEX_UNKNOWN;
-       }
+       if (netif_carrier_ok(dev))
+               prestera_port_link_mode_get(ecmd, port);
 
        ecmd->base.port = prestera_port_type_get(port);
 
@@ -545,8 +558,7 @@ prestera_ethtool_get_link_ksettings(struct net_device *dev,
 
        if (port->caps.type == PRESTERA_PORT_TYPE_TP &&
            port->caps.transceiver == PRESTERA_PORT_TCVR_COPPER)
-               prestera_hw_port_mdix_get(port, &ecmd->base.eth_tp_mdix,
-                                         &ecmd->base.eth_tp_mdix_ctrl);
+               prestera_port_mdix_get(ecmd, port);
 
        return 0;
 }
@@ -555,12 +567,17 @@ static int prestera_port_mdix_set(const struct ethtool_link_ksettings *ecmd,
                                  struct prestera_port *port)
 {
        if (ecmd->base.eth_tp_mdix_ctrl != ETH_TP_MDI_INVALID &&
-           port->caps.transceiver == PRESTERA_PORT_TCVR_COPPER &&
-           port->caps.type == PRESTERA_PORT_TYPE_TP)
-               return prestera_hw_port_mdix_set(port,
-                                                ecmd->base.eth_tp_mdix_ctrl);
-
+           port->caps.transceiver ==  PRESTERA_PORT_TCVR_COPPER &&
+           port->caps.type == PRESTERA_PORT_TYPE_TP) {
+               port->cfg_phy.mdix = ecmd->base.eth_tp_mdix_ctrl;
+               return prestera_hw_port_phy_mode_set(port, port->cfg_phy.admin,
+                                                    port->autoneg,
+                                                    port->cfg_phy.mode,
+                                                    port->adver_link_modes,
+                                                    port->cfg_phy.mdix);
+       }
        return 0;
+
 }
 
 static int prestera_port_link_mode_set(struct prestera_port *port,
@@ -568,12 +585,15 @@ static int prestera_port_link_mode_set(struct prestera_port *port,
 {
        u32 new_mode = PRESTERA_LINK_MODE_MAX;
        u32 mode;
+       int err;
 
        for (mode = 0; mode < PRESTERA_LINK_MODE_MAX; mode++) {
-               if (speed != port_link_modes[mode].speed)
+               if (speed != SPEED_UNKNOWN &&
+                   speed != port_link_modes[mode].speed)
                        continue;
 
-               if (duplex != port_link_modes[mode].duplex)
+               if (duplex != DUPLEX_UNKNOWN &&
+                   duplex != port_link_modes[mode].duplex)
                        continue;
 
                if (!(port_link_modes[mode].pr_mask &
@@ -590,36 +610,31 @@ static int prestera_port_link_mode_set(struct prestera_port *port,
        if (new_mode == PRESTERA_LINK_MODE_MAX)
                return -EOPNOTSUPP;
 
-       return prestera_hw_port_link_mode_set(port, new_mode);
+       err = prestera_hw_port_phy_mode_set(port, port->cfg_phy.admin,
+                                           false, new_mode, 0,
+                                           port->cfg_phy.mdix);
+       if (err)
+               return err;
+
+       port->adver_fec = BIT(PRESTERA_PORT_FEC_OFF);
+       port->adver_link_modes = 0;
+       port->cfg_phy.mode = new_mode;
+       port->autoneg = false;
+
+       return 0;
 }
 
 static int
 prestera_port_speed_duplex_set(const struct ethtool_link_ksettings *ecmd,
                               struct prestera_port *port)
 {
-       u32 curr_mode;
-       u8 duplex;
-       u32 speed;
-       int err;
-
-       err = prestera_hw_port_link_mode_get(port, &curr_mode);
-       if (err)
-               return err;
-       if (curr_mode >= PRESTERA_LINK_MODE_MAX)
-               return -EINVAL;
+       u8 duplex = DUPLEX_UNKNOWN;
 
        if (ecmd->base.duplex != DUPLEX_UNKNOWN)
                duplex = ecmd->base.duplex == DUPLEX_FULL ?
                         PRESTERA_PORT_DUPLEX_FULL : PRESTERA_PORT_DUPLEX_HALF;
-       else
-               duplex = port_link_modes[curr_mode].duplex;
 
-       if (ecmd->base.speed != SPEED_UNKNOWN)
-               speed = ecmd->base.speed;
-       else
-               speed = port_link_modes[curr_mode].speed;
-
-       return prestera_port_link_mode_set(port, speed, duplex,
+       return prestera_port_link_mode_set(port, ecmd->base.speed, duplex,
                                           port->caps.type);
 }
 
@@ -645,19 +660,12 @@ prestera_ethtool_set_link_ksettings(struct net_device *dev,
        prestera_modes_from_eth(ecmd->link_modes.advertising, &adver_modes,
                                &adver_fec, port->caps.type);
 
-       err = prestera_port_autoneg_set(port,
-                                       ecmd->base.autoneg == AUTONEG_ENABLE,
-                                       adver_modes, adver_fec);
-       if (err)
-               return err;
-
-       if (ecmd->base.autoneg == AUTONEG_DISABLE) {
+       if (ecmd->base.autoneg == AUTONEG_ENABLE)
+               err = prestera_port_autoneg_set(port, adver_modes);
+       else
                err = prestera_port_speed_duplex_set(ecmd, port);
-               if (err)
-                       return err;
-       }
 
-       return 0;
+       return err;
 }
 
 static int prestera_ethtool_get_fecparam(struct net_device *dev,
@@ -668,7 +676,7 @@ static int prestera_ethtool_get_fecparam(struct net_device *dev,
        u32 mode;
        int err;
 
-       err = prestera_hw_port_fec_get(port, &active);
+       err = prestera_hw_port_mac_mode_get(port, NULL, NULL, NULL, &active);
        if (err)
                return err;
 
@@ -693,18 +701,19 @@ static int prestera_ethtool_set_fecparam(struct net_device *dev,
                                         struct ethtool_fecparam *fecparam)
 {
        struct prestera_port *port = netdev_priv(dev);
-       u8 fec, active;
+       struct prestera_port_mac_config cfg_mac;
        u32 mode;
-       int err;
+       u8 fec;
 
        if (port->autoneg) {
                netdev_err(dev, "FEC set is not allowed while autoneg is on\n");
                return -EINVAL;
        }
 
-       err = prestera_hw_port_fec_get(port, &active);
-       if (err)
-               return err;
+       if (port->caps.transceiver == PRESTERA_PORT_TCVR_SFP) {
+               netdev_err(dev, "FEC set is not allowed on non-SFP ports\n");
+               return -EINVAL;
+       }
 
        fec = PRESTERA_PORT_FEC_MAX;
        for (mode = 0; mode < PRESTERA_PORT_FEC_MAX; mode++) {
@@ -715,13 +724,19 @@ static int prestera_ethtool_set_fecparam(struct net_device *dev,
                }
        }
 
-       if (fec == active)
+       prestera_port_cfg_mac_read(port, &cfg_mac);
+
+       if (fec == cfg_mac.fec)
                return 0;
 
-       if (fec == PRESTERA_PORT_FEC_MAX)
-               return -EOPNOTSUPP;
+       if (fec == PRESTERA_PORT_FEC_MAX) {
+               netdev_err(dev, "Unsupported FEC requested");
+               return -EINVAL;
+       }
+
+       cfg_mac.fec = fec;
 
-       return prestera_hw_port_fec_set(port, fec);
+       return prestera_port_cfg_mac_write(port, &cfg_mac);
 }
 
 static int prestera_ethtool_get_sset_count(struct net_device *dev, int sset)
@@ -766,6 +781,28 @@ static int prestera_ethtool_nway_reset(struct net_device *dev)
        return -EINVAL;
 }
 
+void prestera_ethtool_port_state_changed(struct prestera_port *port,
+                                        struct prestera_port_event *evt)
+{
+       struct prestera_port_mac_state *smac = &port->state_mac;
+
+       smac->oper = evt->data.mac.oper;
+
+       if (smac->oper) {
+               smac->mode = evt->data.mac.mode;
+               smac->speed = evt->data.mac.speed;
+               smac->duplex = evt->data.mac.duplex;
+               smac->fc = evt->data.mac.fc;
+               smac->fec = evt->data.mac.fec;
+       } else {
+               smac->mode = PRESTERA_MAC_MODE_MAX;
+               smac->speed = SPEED_UNKNOWN;
+               smac->duplex = DUPLEX_UNKNOWN;
+               smac->fc = 0;
+               smac->fec = 0;
+       }
+}
+
 const struct ethtool_ops prestera_ethtool_ops = {
        .get_drvinfo = prestera_ethtool_get_drvinfo,
        .get_link_ksettings = prestera_ethtool_get_link_ksettings,
index 523ef1f..9eb18e9 100644 (file)
@@ -6,6 +6,12 @@
 
 #include <linux/ethtool.h>
 
+struct prestera_port_event;
+struct prestera_port;
+
 extern const struct ethtool_ops prestera_ethtool_ops;
 
+void prestera_ethtool_port_state_changed(struct prestera_port *port,
+                                        struct prestera_port_event *evt);
+
 #endif /* _PRESTERA_ETHTOOL_H_ */
index c129785..41ba17c 100644 (file)
@@ -47,7 +47,6 @@ enum prestera_cmd_type_t {
        PRESTERA_CMD_TYPE_ACL_PORT_UNBIND = 0x531,
 
        PRESTERA_CMD_TYPE_RXTX_INIT = 0x800,
-       PRESTERA_CMD_TYPE_RXTX_PORT_INIT = 0x801,
 
        PRESTERA_CMD_TYPE_LAG_MEMBER_ADD = 0x900,
        PRESTERA_CMD_TYPE_LAG_MEMBER_DELETE = 0x901,
@@ -76,16 +75,12 @@ enum {
        PRESTERA_CMD_PORT_ATTR_LEARNING = 7,
        PRESTERA_CMD_PORT_ATTR_FLOOD = 8,
        PRESTERA_CMD_PORT_ATTR_CAPABILITY = 9,
-       PRESTERA_CMD_PORT_ATTR_REMOTE_CAPABILITY = 10,
-       PRESTERA_CMD_PORT_ATTR_REMOTE_FC = 11,
-       PRESTERA_CMD_PORT_ATTR_LINK_MODE = 12,
+       PRESTERA_CMD_PORT_ATTR_PHY_MODE = 12,
        PRESTERA_CMD_PORT_ATTR_TYPE = 13,
-       PRESTERA_CMD_PORT_ATTR_FEC = 14,
-       PRESTERA_CMD_PORT_ATTR_AUTONEG = 15,
-       PRESTERA_CMD_PORT_ATTR_DUPLEX = 16,
        PRESTERA_CMD_PORT_ATTR_STATS = 17,
-       PRESTERA_CMD_PORT_ATTR_MDIX = 18,
-       PRESTERA_CMD_PORT_ATTR_AUTONEG_RESTART = 19,
+       PRESTERA_CMD_PORT_ATTR_MAC_AUTONEG_RESTART = 18,
+       PRESTERA_CMD_PORT_ATTR_PHY_AUTONEG_RESTART = 19,
+       PRESTERA_CMD_PORT_ATTR_MAC_MODE = 22,
 };
 
 enum {
@@ -169,12 +164,12 @@ struct prestera_fw_event_handler {
 };
 
 struct prestera_msg_cmd {
-       u32 type;
+       __le32 type;
 };
 
 struct prestera_msg_ret {
        struct prestera_msg_cmd cmd;
-       u32 status;
+       __le32 status;
 };
 
 struct prestera_msg_common_req {
@@ -187,102 +182,144 @@ struct prestera_msg_common_resp {
 
 union prestera_msg_switch_param {
        u8 mac[ETH_ALEN];
-       u32 ageing_timeout_ms;
-};
+       __le32 ageing_timeout_ms;
+} __packed;
 
 struct prestera_msg_switch_attr_req {
        struct prestera_msg_cmd cmd;
-       u32 attr;
+       __le32 attr;
        union prestera_msg_switch_param param;
 };
 
 struct prestera_msg_switch_init_resp {
        struct prestera_msg_ret ret;
-       u32 port_count;
-       u32 mtu_max;
+       __le32 port_count;
+       __le32 mtu_max;
        u8  switch_id;
        u8  lag_max;
        u8  lag_member_max;
-};
+       __le32 size_tbl_router_nexthop;
+} __packed __aligned(4);
 
-struct prestera_msg_port_autoneg_param {
-       u64 link_mode;
-       u8  enable;
-       u8  fec;
-};
+struct prestera_msg_event_port_param {
+       union {
+               struct {
+                       u8 oper;
+                       __le32 mode;
+                       __le32 speed;
+                       u8 duplex;
+                       u8 fc;
+                       u8 fec;
+               } __packed mac;
+               struct {
+                       u8 mdix;
+                       __le64 lmode_bmap;
+                       u8 fc;
+               } __packed phy;
+       } __packed;
+} __packed __aligned(4);
 
 struct prestera_msg_port_cap_param {
-       u64 link_mode;
+       __le64 link_mode;
        u8  type;
        u8  fec;
+       u8  fc;
        u8  transceiver;
 };
 
-struct prestera_msg_port_mdix_param {
-       u8 status;
-       u8 admin_mode;
-};
-
 struct prestera_msg_port_flood_param {
        u8 type;
        u8 enable;
 };
 
 union prestera_msg_port_param {
-       u8  admin_state;
-       u8  oper_state;
-       u32 mtu;
-       u8  mac[ETH_ALEN];
-       u8  accept_frm_type;
-       u32 speed;
+       u8 admin_state;
+       u8 oper_state;
+       __le32 mtu;
+       u8 mac[ETH_ALEN];
+       u8 accept_frm_type;
+       __le32 speed;
        u8 learning;
        u8 flood;
-       u32 link_mode;
-       u8  type;
-       u8  duplex;
-       u8  fec;
-       u8  fc;
-       struct prestera_msg_port_mdix_param mdix;
-       struct prestera_msg_port_autoneg_param autoneg;
+       __le32 link_mode;
+       u8 type;
+       u8 duplex;
+       u8 fec;
+       u8 fc;
+
+       union {
+               struct {
+                       u8 admin:1;
+                       u8 fc;
+                       u8 ap_enable;
+                       union {
+                               struct {
+                                       __le32 mode;
+                                       u8  inband:1;
+                                       __le32 speed;
+                                       u8  duplex;
+                                       u8  fec;
+                                       u8  fec_supp;
+                               } __packed reg_mode;
+                               struct {
+                                       __le32 mode;
+                                       __le32 speed;
+                                       u8  fec;
+                                       u8  fec_supp;
+                               } __packed ap_modes[PRESTERA_AP_PORT_MAX];
+                       } __packed;
+               } __packed mac;
+               struct {
+                       u8 admin:1;
+                       u8 adv_enable;
+                       __le64 modes;
+                       __le32 mode;
+                       u8 mdix;
+               } __packed phy;
+       } __packed link;
+
        struct prestera_msg_port_cap_param cap;
        struct prestera_msg_port_flood_param flood_ext;
-};
+       struct prestera_msg_event_port_param link_evt;
+} __packed;
 
 struct prestera_msg_port_attr_req {
        struct prestera_msg_cmd cmd;
-       u32 attr;
-       u32 port;
-       u32 dev;
+       __le32 attr;
+       __le32 port;
+       __le32 dev;
        union prestera_msg_port_param param;
-};
+} __packed __aligned(4);
+
 
 struct prestera_msg_port_attr_resp {
        struct prestera_msg_ret ret;
        union prestera_msg_port_param param;
-};
+} __packed __aligned(4);
+
 
 struct prestera_msg_port_stats_resp {
        struct prestera_msg_ret ret;
-       u64 stats[PRESTERA_PORT_CNT_MAX];
+       __le64 stats[PRESTERA_PORT_CNT_MAX];
 };
 
 struct prestera_msg_port_info_req {
        struct prestera_msg_cmd cmd;
-       u32 port;
+       __le32 port;
 };
 
 struct prestera_msg_port_info_resp {
        struct prestera_msg_ret ret;
-       u32 hw_id;
-       u32 dev_id;
-       u16 fp_id;
+       __le32 hw_id;
+       __le32 dev_id;
+       __le16 fp_id;
 };
 
 struct prestera_msg_vlan_req {
        struct prestera_msg_cmd cmd;
-       u32 port;
-       u32 dev;
-       u16 vid;
+       __le32 port;
+       __le32 dev;
+       __le16 vid;
        u8  is_member;
        u8  is_tagged;
 };
@@ -292,113 +329,114 @@ struct prestera_msg_fdb_req {
        u8 dest_type;
        union {
                struct {
-                       u32 port;
-                       u32 dev;
+                       __le32 port;
+                       __le32 dev;
                };
-               u16 lag_id;
+               __le16 lag_id;
        } dest;
        u8  mac[ETH_ALEN];
-       u16 vid;
+       __le16 vid;
        u8  dynamic;
-       u32 flush_mode;
-};
+       __le32 flush_mode;
+} __packed __aligned(4);
 
 struct prestera_msg_bridge_req {
        struct prestera_msg_cmd cmd;
-       u32 port;
-       u32 dev;
-       u16 bridge;
+       __le32 port;
+       __le32 dev;
+       __le16 bridge;
 };
 
 struct prestera_msg_bridge_resp {
        struct prestera_msg_ret ret;
-       u16 bridge;
+       __le16 bridge;
 };
 
 struct prestera_msg_acl_action {
-       u32 id;
+       __le32 id;
+       __le32 reserved[5];
 };
 
 struct prestera_msg_acl_match {
-       u32 type;
+       __le32 type;
        union {
                struct {
                        u8 key;
                        u8 mask;
-               } u8;
+               } __packed u8;
                struct {
-                       u16 key;
-                       u16 mask;
+                       __le16 key;
+                       __le16 mask;
                } u16;
                struct {
-                       u32 key;
-                       u32 mask;
+                       __le32 key;
+                       __le32 mask;
                } u32;
                struct {
-                       u64 key;
-                       u64 mask;
+                       __le64 key;
+                       __le64 mask;
                } u64;
                struct {
                        u8 key[ETH_ALEN];
                        u8 mask[ETH_ALEN];
-               } mac;
-       } __packed keymask;
+               } __packed mac;
+       } keymask;
 };
 
 struct prestera_msg_acl_rule_req {
        struct prestera_msg_cmd cmd;
-       u32 id;
-       u32 priority;
-       u16 ruleset_id;
+       __le32 id;
+       __le32 priority;
+       __le16 ruleset_id;
        u8 n_actions;
        u8 n_matches;
 };
 
 struct prestera_msg_acl_rule_resp {
        struct prestera_msg_ret ret;
-       u32 id;
+       __le32 id;
 };
 
 struct prestera_msg_acl_rule_stats_resp {
        struct prestera_msg_ret ret;
-       u64 packets;
-       u64 bytes;
+       __le64 packets;
+       __le64 bytes;
 };
 
 struct prestera_msg_acl_ruleset_bind_req {
        struct prestera_msg_cmd cmd;
-       u32 port;
-       u32 dev;
-       u16 ruleset_id;
+       __le32 port;
+       __le32 dev;
+       __le16 ruleset_id;
 };
 
 struct prestera_msg_acl_ruleset_req {
        struct prestera_msg_cmd cmd;
-       u16 id;
+       __le16 id;
 };
 
 struct prestera_msg_acl_ruleset_resp {
        struct prestera_msg_ret ret;
-       u16 id;
+       __le16 id;
 };
 
 struct prestera_msg_span_req {
        struct prestera_msg_cmd cmd;
-       u32 port;
-       u32 dev;
+       __le32 port;
+       __le32 dev;
        u8 id;
-} __packed __aligned(4);
+};
 
 struct prestera_msg_span_resp {
        struct prestera_msg_ret ret;
        u8 id;
-} __packed __aligned(4);
+};
 
 struct prestera_msg_stp_req {
        struct prestera_msg_cmd cmd;
-       u32 port;
-       u32 dev;
-       u16 vid;
+       __le32 port;
+       __le32 dev;
+       __le16 vid;
        u8  state;
 };
 
@@ -409,20 +447,14 @@ struct prestera_msg_rxtx_req {
 
 struct prestera_msg_rxtx_resp {
        struct prestera_msg_ret ret;
-       u32 map_addr;
-};
-
-struct prestera_msg_rxtx_port_req {
-       struct prestera_msg_cmd cmd;
-       u32 port;
-       u32 dev;
+       __le32 map_addr;
 };
 
 struct prestera_msg_lag_req {
        struct prestera_msg_cmd cmd;
-       u32 port;
-       u32 dev;
-       u16 lag_id;
+       __le32 port;
+       __le32 dev;
+       __le16 lag_id;
 };
 
 struct prestera_msg_cpu_code_counter_req {
@@ -433,22 +465,18 @@ struct prestera_msg_cpu_code_counter_req {
 
 struct mvsw_msg_cpu_code_counter_ret {
        struct prestera_msg_ret ret;
-       u64 packet_count;
+       __le64 packet_count;
 };
 
 struct prestera_msg_event {
-       u16 type;
-       u16 id;
-};
-
-union prestera_msg_event_port_param {
-       u32 oper_state;
+       __le16 type;
+       __le16 id;
 };
 
 struct prestera_msg_event_port {
        struct prestera_msg_event id;
-       u32 port_id;
-       union prestera_msg_event_port_param param;
+       __le32 port_id;
+       struct prestera_msg_event_port_param param;
 };
 
 union prestera_msg_event_fdb_param {
@@ -459,12 +487,52 @@ struct prestera_msg_event_fdb {
        struct prestera_msg_event id;
        u8 dest_type;
        union {
-               u32 port_id;
-               u16 lag_id;
+               __le32 port_id;
+               __le16 lag_id;
        } dest;
-       u32 vid;
+       __le32 vid;
        union prestera_msg_event_fdb_param param;
-};
+} __packed __aligned(4);
+
+static inline void prestera_hw_build_tests(void)
+{
+       /* check requests */
+       BUILD_BUG_ON(sizeof(struct prestera_msg_common_req) != 4);
+       BUILD_BUG_ON(sizeof(struct prestera_msg_switch_attr_req) != 16);
+       BUILD_BUG_ON(sizeof(struct prestera_msg_port_attr_req) != 120);
+       BUILD_BUG_ON(sizeof(struct prestera_msg_port_info_req) != 8);
+       BUILD_BUG_ON(sizeof(struct prestera_msg_vlan_req) != 16);
+       BUILD_BUG_ON(sizeof(struct prestera_msg_fdb_req) != 28);
+       BUILD_BUG_ON(sizeof(struct prestera_msg_bridge_req) != 16);
+       BUILD_BUG_ON(sizeof(struct prestera_msg_acl_rule_req) != 16);
+       BUILD_BUG_ON(sizeof(struct prestera_msg_acl_ruleset_bind_req) != 16);
+       BUILD_BUG_ON(sizeof(struct prestera_msg_acl_ruleset_req) != 8);
+       BUILD_BUG_ON(sizeof(struct prestera_msg_span_req) != 16);
+       BUILD_BUG_ON(sizeof(struct prestera_msg_stp_req) != 16);
+       BUILD_BUG_ON(sizeof(struct prestera_msg_rxtx_req) != 8);
+       BUILD_BUG_ON(sizeof(struct prestera_msg_lag_req) != 16);
+       BUILD_BUG_ON(sizeof(struct prestera_msg_cpu_code_counter_req) != 8);
+
+       /* check responses */
+       BUILD_BUG_ON(sizeof(struct prestera_msg_common_resp) != 8);
+       BUILD_BUG_ON(sizeof(struct prestera_msg_switch_init_resp) != 24);
+       BUILD_BUG_ON(sizeof(struct prestera_msg_port_attr_resp) != 112);
+       BUILD_BUG_ON(sizeof(struct prestera_msg_port_stats_resp) != 248);
+       BUILD_BUG_ON(sizeof(struct prestera_msg_port_info_resp) != 20);
+       BUILD_BUG_ON(sizeof(struct prestera_msg_bridge_resp) != 12);
+       BUILD_BUG_ON(sizeof(struct prestera_msg_acl_rule_resp) != 12);
+       BUILD_BUG_ON(sizeof(struct prestera_msg_acl_rule_stats_resp) != 24);
+       BUILD_BUG_ON(sizeof(struct prestera_msg_acl_ruleset_resp) != 12);
+       BUILD_BUG_ON(sizeof(struct prestera_msg_span_resp) != 12);
+       BUILD_BUG_ON(sizeof(struct prestera_msg_rxtx_resp) != 12);
+
+       /* check events */
+       BUILD_BUG_ON(sizeof(struct prestera_msg_event_port) != 20);
+       BUILD_BUG_ON(sizeof(struct prestera_msg_event_fdb) != 20);
+}
+
+static u8 prestera_hw_mdix_to_eth(u8 mode);
+static void prestera_hw_remote_fc_to_eth(u8 fc, bool *pause, bool *asym_pause);
 
 static int __prestera_cmd_ret(struct prestera_switch *sw,
                              enum prestera_cmd_type_t type,
@@ -475,15 +543,15 @@ static int __prestera_cmd_ret(struct prestera_switch *sw,
        struct prestera_device *dev = sw->dev;
        int err;
 
-       cmd->type = type;
+       cmd->type = __cpu_to_le32(type);
 
-       err = dev->send_req(dev, cmd, clen, ret, rlen, waitms);
+       err = dev->send_req(dev, 0, cmd, clen, ret, rlen, waitms);
        if (err)
                return err;
 
-       if (ret->cmd.type != PRESTERA_CMD_TYPE_ACK)
+       if (__le32_to_cpu(ret->cmd.type) != PRESTERA_CMD_TYPE_ACK)
                return -EBADE;
-       if (ret->status != PRESTERA_CMD_ACK_OK)
+       if (__le32_to_cpu(ret->status) != PRESTERA_CMD_ACK_OK)
                return -EINVAL;
 
        return 0;
@@ -517,13 +585,24 @@ static int prestera_cmd(struct prestera_switch *sw,
 
 static int prestera_fw_parse_port_evt(void *msg, struct prestera_event *evt)
 {
-       struct prestera_msg_event_port *hw_evt = msg;
+       struct prestera_msg_event_port *hw_evt;
 
-       if (evt->id != PRESTERA_PORT_EVENT_STATE_CHANGED)
-               return -EINVAL;
+       hw_evt = (struct prestera_msg_event_port *)msg;
 
-       evt->port_evt.data.oper_state = hw_evt->param.oper_state;
-       evt->port_evt.port_id = hw_evt->port_id;
+       evt->port_evt.port_id = __le32_to_cpu(hw_evt->port_id);
+
+       if (evt->id == PRESTERA_PORT_EVENT_MAC_STATE_CHANGED) {
+               evt->port_evt.data.mac.oper = hw_evt->param.mac.oper;
+               evt->port_evt.data.mac.mode =
+                       __le32_to_cpu(hw_evt->param.mac.mode);
+               evt->port_evt.data.mac.speed =
+                       __le32_to_cpu(hw_evt->param.mac.speed);
+               evt->port_evt.data.mac.duplex = hw_evt->param.mac.duplex;
+               evt->port_evt.data.mac.fc = hw_evt->param.mac.fc;
+               evt->port_evt.data.mac.fec = hw_evt->param.mac.fec;
+       } else {
+               return -EINVAL;
+       }
 
        return 0;
 }
@@ -535,17 +614,17 @@ static int prestera_fw_parse_fdb_evt(void *msg, struct prestera_event *evt)
        switch (hw_evt->dest_type) {
        case PRESTERA_HW_FDB_ENTRY_TYPE_REG_PORT:
                evt->fdb_evt.type = PRESTERA_FDB_ENTRY_TYPE_REG_PORT;
-               evt->fdb_evt.dest.port_id = hw_evt->dest.port_id;
+               evt->fdb_evt.dest.port_id = __le32_to_cpu(hw_evt->dest.port_id);
                break;
        case PRESTERA_HW_FDB_ENTRY_TYPE_LAG:
                evt->fdb_evt.type = PRESTERA_FDB_ENTRY_TYPE_LAG;
-               evt->fdb_evt.dest.lag_id = hw_evt->dest.lag_id;
+               evt->fdb_evt.dest.lag_id = __le16_to_cpu(hw_evt->dest.lag_id);
                break;
        default:
                return -EINVAL;
        }
 
-       evt->fdb_evt.vid = hw_evt->vid;
+       evt->fdb_evt.vid = __le32_to_cpu(hw_evt->vid);
 
        ether_addr_copy(evt->fdb_evt.data.mac, hw_evt->param.mac);
 
@@ -597,20 +676,22 @@ static int prestera_evt_recv(struct prestera_device *dev, void *buf, size_t size
        struct prestera_msg_event *msg = buf;
        struct prestera_fw_event_handler eh;
        struct prestera_event evt;
+       u16 msg_type;
        int err;
 
-       if (msg->type >= PRESTERA_EVENT_TYPE_MAX)
+       msg_type = __le16_to_cpu(msg->type);
+       if (msg_type >= PRESTERA_EVENT_TYPE_MAX)
                return -EINVAL;
-       if (!fw_event_parsers[msg->type].func)
+       if (!fw_event_parsers[msg_type].func)
                return -ENOENT;
 
-       err = prestera_find_event_handler(sw, msg->type, &eh);
+       err = prestera_find_event_handler(sw, msg_type, &eh);
        if (err)
                return err;
 
-       evt.id = msg->id;
+       evt.id = __le16_to_cpu(msg->id);
 
-       err = fw_event_parsers[msg->type].func(buf, &evt);
+       err = fw_event_parsers[msg_type].func(buf, &evt);
        if (err)
                return err;
 
@@ -635,11 +716,39 @@ static void prestera_pkt_recv(struct prestera_device *dev)
        eh.func(sw, &ev, eh.arg);
 }
 
+static u8 prestera_hw_mdix_to_eth(u8 mode)
+{
+       switch (mode) {
+       case PRESTERA_PORT_TP_MDI:
+               return ETH_TP_MDI;
+       case PRESTERA_PORT_TP_MDIX:
+               return ETH_TP_MDI_X;
+       case PRESTERA_PORT_TP_AUTO:
+               return ETH_TP_MDI_AUTO;
+       default:
+               return ETH_TP_MDI_INVALID;
+       }
+}
+
+static u8 prestera_hw_mdix_from_eth(u8 mode)
+{
+       switch (mode) {
+       case ETH_TP_MDI:
+               return PRESTERA_PORT_TP_MDI;
+       case ETH_TP_MDI_X:
+               return PRESTERA_PORT_TP_MDIX;
+       case ETH_TP_MDI_AUTO:
+               return PRESTERA_PORT_TP_AUTO;
+       default:
+               return PRESTERA_PORT_TP_NA;
+       }
+}
+
 int prestera_hw_port_info_get(const struct prestera_port *port,
                              u32 *dev_id, u32 *hw_id, u16 *fp_id)
 {
        struct prestera_msg_port_info_req req = {
-               .port = port->id,
+               .port = __cpu_to_le32(port->id),
        };
        struct prestera_msg_port_info_resp resp;
        int err;
@@ -649,9 +758,9 @@ int prestera_hw_port_info_get(const struct prestera_port *port,
        if (err)
                return err;
 
-       *dev_id = resp.dev_id;
-       *hw_id = resp.hw_id;
-       *fp_id = resp.fp_id;
+       *dev_id = __le32_to_cpu(resp.dev_id);
+       *hw_id = __le32_to_cpu(resp.hw_id);
+       *fp_id = __le16_to_cpu(resp.fp_id);
 
        return 0;
 }
@@ -659,7 +768,7 @@ int prestera_hw_port_info_get(const struct prestera_port *port,
 int prestera_hw_switch_mac_set(struct prestera_switch *sw, const char *mac)
 {
        struct prestera_msg_switch_attr_req req = {
-               .attr = PRESTERA_CMD_SWITCH_ATTR_MAC,
+               .attr = __cpu_to_le32(PRESTERA_CMD_SWITCH_ATTR_MAC),
        };
 
        ether_addr_copy(req.param.mac, mac);
@@ -676,6 +785,8 @@ int prestera_hw_switch_init(struct prestera_switch *sw)
 
        INIT_LIST_HEAD(&sw->event_handlers);
 
+       prestera_hw_build_tests();
+
        err = prestera_cmd_ret_wait(sw, PRESTERA_CMD_TYPE_SWITCH_INIT,
                                    &req.cmd, sizeof(req),
                                    &resp.ret, sizeof(resp),
@@ -685,9 +796,9 @@ int prestera_hw_switch_init(struct prestera_switch *sw)
 
        sw->dev->recv_msg = prestera_evt_recv;
        sw->dev->recv_pkt = prestera_pkt_recv;
-       sw->port_count = resp.port_count;
+       sw->port_count = __le32_to_cpu(resp.port_count);
        sw->mtu_min = PRESTERA_MIN_MTU;
-       sw->mtu_max = resp.mtu_max;
+       sw->mtu_max = __le32_to_cpu(resp.mtu_max);
        sw->id = resp.switch_id;
        sw->lag_member_max = resp.lag_member_max;
        sw->lag_max = resp.lag_max;
@@ -703,9 +814,9 @@ void prestera_hw_switch_fini(struct prestera_switch *sw)
 int prestera_hw_switch_ageing_set(struct prestera_switch *sw, u32 ageing_ms)
 {
        struct prestera_msg_switch_attr_req req = {
-               .attr = PRESTERA_CMD_SWITCH_ATTR_AGEING,
+               .attr = __cpu_to_le32(PRESTERA_CMD_SWITCH_ATTR_AGEING),
                .param = {
-                       .ageing_timeout_ms = ageing_ms,
+                       .ageing_timeout_ms = __cpu_to_le32(ageing_ms),
                },
        };
 
@@ -713,15 +824,56 @@ int prestera_hw_switch_ageing_set(struct prestera_switch *sw, u32 ageing_ms)
                            &req.cmd, sizeof(req));
 }
 
-int prestera_hw_port_state_set(const struct prestera_port *port,
-                              bool admin_state)
+int prestera_hw_port_mac_mode_get(const struct prestera_port *port,
+                                 u32 *mode, u32 *speed, u8 *duplex, u8 *fec)
+{
+       struct prestera_msg_port_attr_resp resp;
+       struct prestera_msg_port_attr_req req = {
+               .attr = __cpu_to_le32(PRESTERA_CMD_PORT_ATTR_MAC_MODE),
+               .port = __cpu_to_le32(port->hw_id),
+               .dev = __cpu_to_le32(port->dev_id)
+       };
+       int err;
+
+       err = prestera_cmd_ret(port->sw, PRESTERA_CMD_TYPE_PORT_ATTR_GET,
+                              &req.cmd, sizeof(req), &resp.ret, sizeof(resp));
+       if (err)
+               return err;
+
+       if (mode)
+               *mode = __le32_to_cpu(resp.param.link_evt.mac.mode);
+
+       if (speed)
+               *speed = __le32_to_cpu(resp.param.link_evt.mac.speed);
+
+       if (duplex)
+               *duplex = resp.param.link_evt.mac.duplex;
+
+       if (fec)
+               *fec = resp.param.link_evt.mac.fec;
+
+       return err;
+}
+
+int prestera_hw_port_mac_mode_set(const struct prestera_port *port,
+                                 bool admin, u32 mode, u8 inband,
+                                 u32 speed, u8 duplex, u8 fec)
 {
        struct prestera_msg_port_attr_req req = {
-               .attr = PRESTERA_CMD_PORT_ATTR_ADMIN_STATE,
-               .port = port->hw_id,
-               .dev = port->dev_id,
+               .attr = __cpu_to_le32(PRESTERA_CMD_PORT_ATTR_MAC_MODE),
+               .port = __cpu_to_le32(port->hw_id),
+               .dev = __cpu_to_le32(port->dev_id),
                .param = {
-                       .admin_state = admin_state,
+                       .link = {
+                               .mac = {
+                                       .admin = admin,
+                                       .reg_mode.mode = __cpu_to_le32(mode),
+                                       .reg_mode.inband = inband,
+                                       .reg_mode.speed = __cpu_to_le32(speed),
+                                       .reg_mode.duplex = duplex,
+                                       .reg_mode.fec = fec
+                               }
+                       }
                }
        };
 
@@ -729,14 +881,70 @@ int prestera_hw_port_state_set(const struct prestera_port *port,
                            &req.cmd, sizeof(req));
 }
 
+int prestera_hw_port_phy_mode_get(const struct prestera_port *port,
+                                 u8 *mdix, u64 *lmode_bmap,
+                                 bool *fc_pause, bool *fc_asym)
+{
+       struct prestera_msg_port_attr_resp resp;
+       struct prestera_msg_port_attr_req req = {
+               .attr = __cpu_to_le32(PRESTERA_CMD_PORT_ATTR_PHY_MODE),
+               .port = __cpu_to_le32(port->hw_id),
+               .dev = __cpu_to_le32(port->dev_id)
+       };
+       int err;
+
+       err = prestera_cmd_ret(port->sw, PRESTERA_CMD_TYPE_PORT_ATTR_GET,
+                              &req.cmd, sizeof(req), &resp.ret, sizeof(resp));
+       if (err)
+               return err;
+
+       if (mdix)
+               *mdix = prestera_hw_mdix_to_eth(resp.param.link_evt.phy.mdix);
+
+       if (lmode_bmap)
+               *lmode_bmap = __le64_to_cpu(resp.param.link_evt.phy.lmode_bmap);
+
+       if (fc_pause && fc_asym)
+               prestera_hw_remote_fc_to_eth(resp.param.link_evt.phy.fc,
+                                            fc_pause, fc_asym);
+
+       return err;
+}
+
+int prestera_hw_port_phy_mode_set(const struct prestera_port *port,
+                                 bool admin, bool adv, u32 mode, u64 modes,
+                                 u8 mdix)
+{
+       struct prestera_msg_port_attr_req req = {
+               .attr = __cpu_to_le32(PRESTERA_CMD_PORT_ATTR_PHY_MODE),
+               .port = __cpu_to_le32(port->hw_id),
+               .dev = __cpu_to_le32(port->dev_id),
+               .param = {
+                       .link = {
+                               .phy = {
+                                       .admin = admin,
+                                       .adv_enable = adv ? 1 : 0,
+                                       .mode = __cpu_to_le32(mode),
+                                       .modes = __cpu_to_le64(modes),
+                               }
+                       }
+               }
+       };
+
+       req.param.link.phy.mdix = prestera_hw_mdix_from_eth(mdix);
+
+       return prestera_cmd(port->sw, PRESTERA_CMD_TYPE_PORT_ATTR_SET,
+                           &req.cmd, sizeof(req));
+}
+
 int prestera_hw_port_mtu_set(const struct prestera_port *port, u32 mtu)
 {
        struct prestera_msg_port_attr_req req = {
-               .attr = PRESTERA_CMD_PORT_ATTR_MTU,
-               .port = port->hw_id,
-               .dev = port->dev_id,
+               .attr = __cpu_to_le32(PRESTERA_CMD_PORT_ATTR_MTU),
+               .port = __cpu_to_le32(port->hw_id),
+               .dev = __cpu_to_le32(port->dev_id),
                .param = {
-                       .mtu = mtu,
+                       .mtu = __cpu_to_le32(mtu),
                }
        };
 
@@ -747,9 +955,9 @@ int prestera_hw_port_mtu_set(const struct prestera_port *port, u32 mtu)
 int prestera_hw_port_mac_set(const struct prestera_port *port, const char *mac)
 {
        struct prestera_msg_port_attr_req req = {
-               .attr = PRESTERA_CMD_PORT_ATTR_MAC,
-               .port = port->hw_id,
-               .dev = port->dev_id,
+               .attr = __cpu_to_le32(PRESTERA_CMD_PORT_ATTR_MAC),
+               .port = __cpu_to_le32(port->hw_id),
+               .dev = __cpu_to_le32(port->dev_id),
        };
 
        ether_addr_copy(req.param.mac, mac);
@@ -762,9 +970,9 @@ int prestera_hw_port_accept_frm_type(struct prestera_port *port,
                                     enum prestera_accept_frm_type type)
 {
        struct prestera_msg_port_attr_req req = {
-               .attr = PRESTERA_CMD_PORT_ATTR_ACCEPT_FRAME_TYPE,
-               .port = port->hw_id,
-               .dev = port->dev_id,
+               .attr = __cpu_to_le32(PRESTERA_CMD_PORT_ATTR_ACCEPT_FRAME_TYPE),
+               .port = __cpu_to_le32(port->hw_id),
+               .dev = __cpu_to_le32(port->dev_id),
                .param = {
                        .accept_frm_type = type,
                }
@@ -778,9 +986,9 @@ int prestera_hw_port_cap_get(const struct prestera_port *port,
                             struct prestera_port_caps *caps)
 {
        struct prestera_msg_port_attr_req req = {
-               .attr = PRESTERA_CMD_PORT_ATTR_CAPABILITY,
-               .port = port->hw_id,
-               .dev = port->dev_id,
+               .attr = __cpu_to_le32(PRESTERA_CMD_PORT_ATTR_CAPABILITY),
+               .port = __cpu_to_le32(port->hw_id),
+               .dev = __cpu_to_le32(port->dev_id),
        };
        struct prestera_msg_port_attr_resp resp;
        int err;
@@ -790,7 +998,7 @@ int prestera_hw_port_cap_get(const struct prestera_port *port,
        if (err)
                return err;
 
-       caps->supp_link_modes = resp.param.cap.link_mode;
+       caps->supp_link_modes = __le64_to_cpu(resp.param.cap.link_mode);
        caps->transceiver = resp.param.cap.transceiver;
        caps->supp_fec = resp.param.cap.fec;
        caps->type = resp.param.cap.type;
@@ -798,44 +1006,9 @@ int prestera_hw_port_cap_get(const struct prestera_port *port,
        return err;
 }
 
-int prestera_hw_port_remote_cap_get(const struct prestera_port *port,
-                                   u64 *link_mode_bitmap)
+static void prestera_hw_remote_fc_to_eth(u8 fc, bool *pause, bool *asym_pause)
 {
-       struct prestera_msg_port_attr_req req = {
-               .attr = PRESTERA_CMD_PORT_ATTR_REMOTE_CAPABILITY,
-               .port = port->hw_id,
-               .dev = port->dev_id,
-       };
-       struct prestera_msg_port_attr_resp resp;
-       int err;
-
-       err = prestera_cmd_ret(port->sw, PRESTERA_CMD_TYPE_PORT_ATTR_GET,
-                              &req.cmd, sizeof(req), &resp.ret, sizeof(resp));
-       if (err)
-               return err;
-
-       *link_mode_bitmap = resp.param.cap.link_mode;
-
-       return 0;
-}
-
-int prestera_hw_port_remote_fc_get(const struct prestera_port *port,
-                                  bool *pause, bool *asym_pause)
-{
-       struct prestera_msg_port_attr_req req = {
-               .attr = PRESTERA_CMD_PORT_ATTR_REMOTE_FC,
-               .port = port->hw_id,
-               .dev = port->dev_id,
-       };
-       struct prestera_msg_port_attr_resp resp;
-       int err;
-
-       err = prestera_cmd_ret(port->sw, PRESTERA_CMD_TYPE_PORT_ATTR_GET,
-                              &req.cmd, sizeof(req), &resp.ret, sizeof(resp));
-       if (err)
-               return err;
-
-       switch (resp.param.fc) {
+       switch (fc) {
        case PRESTERA_FC_SYMMETRIC:
                *pause = true;
                *asym_pause = false;
@@ -852,8 +1025,6 @@ int prestera_hw_port_remote_fc_get(const struct prestera_port *port,
                *pause = false;
                *asym_pause = false;
        }
-
-       return 0;
 }
 
 int prestera_hw_acl_ruleset_create(struct prestera_switch *sw, u16 *ruleset_id)
@@ -867,7 +1038,7 @@ int prestera_hw_acl_ruleset_create(struct prestera_switch *sw, u16 *ruleset_id)
        if (err)
                return err;
 
-       *ruleset_id = resp.id;
+       *ruleset_id = __le16_to_cpu(resp.id);
 
        return 0;
 }
@@ -875,7 +1046,7 @@ int prestera_hw_acl_ruleset_create(struct prestera_switch *sw, u16 *ruleset_id)
 int prestera_hw_acl_ruleset_del(struct prestera_switch *sw, u16 ruleset_id)
 {
        struct prestera_msg_acl_ruleset_req req = {
-               .id = ruleset_id,
+               .id = __cpu_to_le16(ruleset_id),
        };
 
        return prestera_cmd(sw, PRESTERA_CMD_TYPE_ACL_RULESET_DELETE,
@@ -890,7 +1061,7 @@ static int prestera_hw_acl_actions_put(struct prestera_msg_acl_action *action,
        int i = 0;
 
        list_for_each_entry(a_entry, a_list, list) {
-               action[i].id = a_entry->id;
+               action[i].id = __cpu_to_le32(a_entry->id);
 
                switch (a_entry->id) {
                case PRESTERA_ACL_RULE_ACTION_ACCEPT:
@@ -916,7 +1087,7 @@ static int prestera_hw_acl_matches_put(struct prestera_msg_acl_match *match,
        int i = 0;
 
        list_for_each_entry(m_entry, m_list, list) {
-               match[i].type = m_entry->type;
+               match[i].type = __cpu_to_le32(m_entry->type);
 
                switch (m_entry->type) {
                case PRESTERA_ACL_RULE_MATCH_ENTRY_TYPE_ETH_TYPE:
@@ -924,8 +1095,10 @@ static int prestera_hw_acl_matches_put(struct prestera_msg_acl_match *match,
                case PRESTERA_ACL_RULE_MATCH_ENTRY_TYPE_L4_PORT_DST:
                case PRESTERA_ACL_RULE_MATCH_ENTRY_TYPE_VLAN_ID:
                case PRESTERA_ACL_RULE_MATCH_ENTRY_TYPE_VLAN_TPID:
-                       match[i].keymask.u16.key = m_entry->keymask.u16.key;
-                       match[i].keymask.u16.mask = m_entry->keymask.u16.mask;
+                       match[i].keymask.u16.key =
+                               __cpu_to_le16(m_entry->keymask.u16.key);
+                       match[i].keymask.u16.mask =
+                               __cpu_to_le16(m_entry->keymask.u16.mask);
                        break;
                case PRESTERA_ACL_RULE_MATCH_ENTRY_TYPE_ICMP_TYPE:
                case PRESTERA_ACL_RULE_MATCH_ENTRY_TYPE_ICMP_CODE:
@@ -946,12 +1119,16 @@ static int prestera_hw_acl_matches_put(struct prestera_msg_acl_match *match,
                case PRESTERA_ACL_RULE_MATCH_ENTRY_TYPE_IP_DST:
                case PRESTERA_ACL_RULE_MATCH_ENTRY_TYPE_L4_PORT_RANGE_SRC:
                case PRESTERA_ACL_RULE_MATCH_ENTRY_TYPE_L4_PORT_RANGE_DST:
-                       match[i].keymask.u32.key = m_entry->keymask.u32.key;
-                       match[i].keymask.u32.mask = m_entry->keymask.u32.mask;
+                       match[i].keymask.u32.key =
+                               __cpu_to_le32(m_entry->keymask.u32.key);
+                       match[i].keymask.u32.mask =
+                               __cpu_to_le32(m_entry->keymask.u32.mask);
                        break;
                case PRESTERA_ACL_RULE_MATCH_ENTRY_TYPE_PORT:
-                       match[i].keymask.u64.key = m_entry->keymask.u64.key;
-                       match[i].keymask.u64.mask = m_entry->keymask.u64.mask;
+                       match[i].keymask.u64.key =
+                               __cpu_to_le64(m_entry->keymask.u64.key);
+                       match[i].keymask.u64.mask =
+                               __cpu_to_le64(m_entry->keymask.u64.mask);
                        break;
                default:
                        return -EINVAL;
@@ -1001,8 +1178,8 @@ int prestera_hw_acl_rule_add(struct prestera_switch *sw,
        if (err)
                goto free_buff;
 
-       req->ruleset_id = prestera_acl_rule_ruleset_id_get(rule);
-       req->priority = prestera_acl_rule_priority_get(rule);
+       req->ruleset_id = __cpu_to_le16(prestera_acl_rule_ruleset_id_get(rule));
+       req->priority = __cpu_to_le32(prestera_acl_rule_priority_get(rule));
        req->n_actions = prestera_acl_rule_action_len(rule);
        req->n_matches = prestera_acl_rule_match_len(rule);
 
@@ -1011,7 +1188,7 @@ int prestera_hw_acl_rule_add(struct prestera_switch *sw,
        if (err)
                goto free_buff;
 
-       *rule_id = resp.id;
+       *rule_id = __le32_to_cpu(resp.id);
 free_buff:
        kfree(buff);
        return err;
@@ -1020,7 +1197,7 @@ free_buff:
 int prestera_hw_acl_rule_del(struct prestera_switch *sw, u32 rule_id)
 {
        struct prestera_msg_acl_rule_req req = {
-               .id = rule_id
+               .id = __cpu_to_le32(rule_id)
        };
 
        return prestera_cmd(sw, PRESTERA_CMD_TYPE_ACL_RULE_DELETE,
@@ -1032,7 +1209,7 @@ int prestera_hw_acl_rule_stats_get(struct prestera_switch *sw, u32 rule_id,
 {
        struct prestera_msg_acl_rule_stats_resp resp;
        struct prestera_msg_acl_rule_req req = {
-               .id = rule_id
+               .id = __cpu_to_le32(rule_id)
        };
        int err;
 
@@ -1041,8 +1218,8 @@ int prestera_hw_acl_rule_stats_get(struct prestera_switch *sw, u32 rule_id,
        if (err)
                return err;
 
-       *packets = resp.packets;
-       *bytes = resp.bytes;
+       *packets = __le64_to_cpu(resp.packets);
+       *bytes = __le64_to_cpu(resp.bytes);
 
        return 0;
 }
@@ -1050,9 +1227,9 @@ int prestera_hw_acl_rule_stats_get(struct prestera_switch *sw, u32 rule_id,
 int prestera_hw_acl_port_bind(const struct prestera_port *port, u16 ruleset_id)
 {
        struct prestera_msg_acl_ruleset_bind_req req = {
-               .port = port->hw_id,
-               .dev = port->dev_id,
-               .ruleset_id = ruleset_id,
+               .port = __cpu_to_le32(port->hw_id),
+               .dev = __cpu_to_le32(port->dev_id),
+               .ruleset_id = __cpu_to_le16(ruleset_id),
        };
 
        return prestera_cmd(port->sw, PRESTERA_CMD_TYPE_ACL_PORT_BIND,
@@ -1063,9 +1240,9 @@ int prestera_hw_acl_port_unbind(const struct prestera_port *port,
                                u16 ruleset_id)
 {
        struct prestera_msg_acl_ruleset_bind_req req = {
-               .port = port->hw_id,
-               .dev = port->dev_id,
-               .ruleset_id = ruleset_id,
+               .port = __cpu_to_le32(port->hw_id),
+               .dev = __cpu_to_le32(port->dev_id),
+               .ruleset_id = __cpu_to_le16(ruleset_id),
        };
 
        return prestera_cmd(port->sw, PRESTERA_CMD_TYPE_ACL_PORT_UNBIND,
@@ -1076,8 +1253,8 @@ int prestera_hw_span_get(const struct prestera_port *port, u8 *span_id)
 {
        struct prestera_msg_span_resp resp;
        struct prestera_msg_span_req req = {
-               .port = port->hw_id,
-               .dev = port->dev_id,
+               .port = __cpu_to_le32(port->hw_id),
+               .dev = __cpu_to_le32(port->dev_id),
        };
        int err;
 
@@ -1094,8 +1271,8 @@ int prestera_hw_span_get(const struct prestera_port *port, u8 *span_id)
 int prestera_hw_span_bind(const struct prestera_port *port, u8 span_id)
 {
        struct prestera_msg_span_req req = {
-               .port = port->hw_id,
-               .dev = port->dev_id,
+               .port = __cpu_to_le32(port->hw_id),
+               .dev = __cpu_to_le32(port->dev_id),
                .id = span_id,
        };
 
@@ -1106,8 +1283,8 @@ int prestera_hw_span_bind(const struct prestera_port *port, u8 span_id)
 int prestera_hw_span_unbind(const struct prestera_port *port)
 {
        struct prestera_msg_span_req req = {
-               .port = port->hw_id,
-               .dev = port->dev_id,
+               .port = __cpu_to_le32(port->hw_id),
+               .dev = __cpu_to_le32(port->dev_id),
        };
 
        return prestera_cmd(port->sw, PRESTERA_CMD_TYPE_SPAN_UNBIND,
@@ -1127,9 +1304,9 @@ int prestera_hw_span_release(struct prestera_switch *sw, u8 span_id)
 int prestera_hw_port_type_get(const struct prestera_port *port, u8 *type)
 {
        struct prestera_msg_port_attr_req req = {
-               .attr = PRESTERA_CMD_PORT_ATTR_TYPE,
-               .port = port->hw_id,
-               .dev = port->dev_id,
+               .attr = __cpu_to_le32(PRESTERA_CMD_PORT_ATTR_TYPE),
+               .port = __cpu_to_le32(port->hw_id),
+               .dev = __cpu_to_le32(port->dev_id),
        };
        struct prestera_msg_port_attr_resp resp;
        int err;
@@ -1144,146 +1321,12 @@ int prestera_hw_port_type_get(const struct prestera_port *port, u8 *type)
        return 0;
 }
 
-int prestera_hw_port_fec_get(const struct prestera_port *port, u8 *fec)
-{
-       struct prestera_msg_port_attr_req req = {
-               .attr = PRESTERA_CMD_PORT_ATTR_FEC,
-               .port = port->hw_id,
-               .dev = port->dev_id,
-       };
-       struct prestera_msg_port_attr_resp resp;
-       int err;
-
-       err = prestera_cmd_ret(port->sw, PRESTERA_CMD_TYPE_PORT_ATTR_GET,
-                              &req.cmd, sizeof(req), &resp.ret, sizeof(resp));
-       if (err)
-               return err;
-
-       *fec = resp.param.fec;
-
-       return 0;
-}
-
-int prestera_hw_port_fec_set(const struct prestera_port *port, u8 fec)
-{
-       struct prestera_msg_port_attr_req req = {
-               .attr = PRESTERA_CMD_PORT_ATTR_FEC,
-               .port = port->hw_id,
-               .dev = port->dev_id,
-               .param = {
-                       .fec = fec,
-               }
-       };
-
-       return prestera_cmd(port->sw, PRESTERA_CMD_TYPE_PORT_ATTR_SET,
-                           &req.cmd, sizeof(req));
-}
-
-static u8 prestera_hw_mdix_to_eth(u8 mode)
-{
-       switch (mode) {
-       case PRESTERA_PORT_TP_MDI:
-               return ETH_TP_MDI;
-       case PRESTERA_PORT_TP_MDIX:
-               return ETH_TP_MDI_X;
-       case PRESTERA_PORT_TP_AUTO:
-               return ETH_TP_MDI_AUTO;
-       default:
-               return ETH_TP_MDI_INVALID;
-       }
-}
-
-static u8 prestera_hw_mdix_from_eth(u8 mode)
-{
-       switch (mode) {
-       case ETH_TP_MDI:
-               return PRESTERA_PORT_TP_MDI;
-       case ETH_TP_MDI_X:
-               return PRESTERA_PORT_TP_MDIX;
-       case ETH_TP_MDI_AUTO:
-               return PRESTERA_PORT_TP_AUTO;
-       default:
-               return PRESTERA_PORT_TP_NA;
-       }
-}
-
-int prestera_hw_port_mdix_get(const struct prestera_port *port, u8 *status,
-                             u8 *admin_mode)
-{
-       struct prestera_msg_port_attr_req req = {
-               .attr = PRESTERA_CMD_PORT_ATTR_MDIX,
-               .port = port->hw_id,
-               .dev = port->dev_id,
-       };
-       struct prestera_msg_port_attr_resp resp;
-       int err;
-
-       err = prestera_cmd_ret(port->sw, PRESTERA_CMD_TYPE_PORT_ATTR_GET,
-                              &req.cmd, sizeof(req), &resp.ret, sizeof(resp));
-       if (err)
-               return err;
-
-       *status = prestera_hw_mdix_to_eth(resp.param.mdix.status);
-       *admin_mode = prestera_hw_mdix_to_eth(resp.param.mdix.admin_mode);
-
-       return 0;
-}
-
-int prestera_hw_port_mdix_set(const struct prestera_port *port, u8 mode)
-{
-       struct prestera_msg_port_attr_req req = {
-               .attr = PRESTERA_CMD_PORT_ATTR_MDIX,
-               .port = port->hw_id,
-               .dev = port->dev_id,
-       };
-
-       req.param.mdix.admin_mode = prestera_hw_mdix_from_eth(mode);
-
-       return prestera_cmd(port->sw, PRESTERA_CMD_TYPE_PORT_ATTR_SET,
-                           &req.cmd, sizeof(req));
-}
-
-int prestera_hw_port_link_mode_set(const struct prestera_port *port, u32 mode)
-{
-       struct prestera_msg_port_attr_req req = {
-               .attr = PRESTERA_CMD_PORT_ATTR_LINK_MODE,
-               .port = port->hw_id,
-               .dev = port->dev_id,
-               .param = {
-                       .link_mode = mode,
-               }
-       };
-
-       return prestera_cmd(port->sw, PRESTERA_CMD_TYPE_PORT_ATTR_SET,
-                           &req.cmd, sizeof(req));
-}
-
-int prestera_hw_port_link_mode_get(const struct prestera_port *port, u32 *mode)
-{
-       struct prestera_msg_port_attr_req req = {
-               .attr = PRESTERA_CMD_PORT_ATTR_LINK_MODE,
-               .port = port->hw_id,
-               .dev = port->dev_id,
-       };
-       struct prestera_msg_port_attr_resp resp;
-       int err;
-
-       err = prestera_cmd_ret(port->sw, PRESTERA_CMD_TYPE_PORT_ATTR_GET,
-                              &req.cmd, sizeof(req), &resp.ret, sizeof(resp));
-       if (err)
-               return err;
-
-       *mode = resp.param.link_mode;
-
-       return 0;
-}
-
 int prestera_hw_port_speed_get(const struct prestera_port *port, u32 *speed)
 {
        struct prestera_msg_port_attr_req req = {
-               .attr = PRESTERA_CMD_PORT_ATTR_SPEED,
-               .port = port->hw_id,
-               .dev = port->dev_id,
+               .attr = __cpu_to_le32(PRESTERA_CMD_PORT_ATTR_SPEED),
+               .port = __cpu_to_le32(port->hw_id),
+               .dev = __cpu_to_le32(port->dev_id),
        };
        struct prestera_msg_port_attr_resp resp;
        int err;
@@ -1293,73 +1336,33 @@ int prestera_hw_port_speed_get(const struct prestera_port *port, u32 *speed)
        if (err)
                return err;
 
-       *speed = resp.param.speed;
+       *speed = __le32_to_cpu(resp.param.speed);
 
        return 0;
 }
 
-int prestera_hw_port_autoneg_set(const struct prestera_port *port,
-                                bool autoneg, u64 link_modes, u8 fec)
-{
-       struct prestera_msg_port_attr_req req = {
-               .attr = PRESTERA_CMD_PORT_ATTR_AUTONEG,
-               .port = port->hw_id,
-               .dev = port->dev_id,
-               .param = {
-                       .autoneg = {
-                               .link_mode = link_modes,
-                               .enable = autoneg,
-                               .fec = fec,
-                       }
-               }
-       };
-
-       return prestera_cmd(port->sw, PRESTERA_CMD_TYPE_PORT_ATTR_SET,
-                           &req.cmd, sizeof(req));
-}
-
 int prestera_hw_port_autoneg_restart(struct prestera_port *port)
 {
        struct prestera_msg_port_attr_req req = {
-               .attr = PRESTERA_CMD_PORT_ATTR_AUTONEG_RESTART,
-               .port = port->hw_id,
-               .dev = port->dev_id,
+               .attr = __cpu_to_le32(PRESTERA_CMD_PORT_ATTR_PHY_AUTONEG_RESTART),
+               .port = __cpu_to_le32(port->hw_id),
+               .dev = __cpu_to_le32(port->dev_id),
        };
 
        return prestera_cmd(port->sw, PRESTERA_CMD_TYPE_PORT_ATTR_SET,
                            &req.cmd, sizeof(req));
 }
 
-int prestera_hw_port_duplex_get(const struct prestera_port *port, u8 *duplex)
-{
-       struct prestera_msg_port_attr_req req = {
-               .attr = PRESTERA_CMD_PORT_ATTR_DUPLEX,
-               .port = port->hw_id,
-               .dev = port->dev_id,
-       };
-       struct prestera_msg_port_attr_resp resp;
-       int err;
-
-       err = prestera_cmd_ret(port->sw, PRESTERA_CMD_TYPE_PORT_ATTR_GET,
-                              &req.cmd, sizeof(req), &resp.ret, sizeof(resp));
-       if (err)
-               return err;
-
-       *duplex = resp.param.duplex;
-
-       return 0;
-}
-
 int prestera_hw_port_stats_get(const struct prestera_port *port,
                               struct prestera_port_stats *st)
 {
        struct prestera_msg_port_attr_req req = {
-               .attr = PRESTERA_CMD_PORT_ATTR_STATS,
-               .port = port->hw_id,
-               .dev = port->dev_id,
+               .attr = __cpu_to_le32(PRESTERA_CMD_PORT_ATTR_STATS),
+               .port = __cpu_to_le32(port->hw_id),
+               .dev = __cpu_to_le32(port->dev_id),
        };
        struct prestera_msg_port_stats_resp resp;
-       u64 *hw = resp.stats;
+       __le64 *hw = resp.stats;
        int err;
 
        err = prestera_cmd_ret(port->sw, PRESTERA_CMD_TYPE_PORT_ATTR_GET,
@@ -1367,36 +1370,56 @@ int prestera_hw_port_stats_get(const struct prestera_port *port,
        if (err)
                return err;
 
-       st->good_octets_received = hw[PRESTERA_PORT_GOOD_OCTETS_RCV_CNT];
-       st->bad_octets_received = hw[PRESTERA_PORT_BAD_OCTETS_RCV_CNT];
-       st->mac_trans_error = hw[PRESTERA_PORT_MAC_TRANSMIT_ERR_CNT];
-       st->broadcast_frames_received = hw[PRESTERA_PORT_BRDC_PKTS_RCV_CNT];
-       st->multicast_frames_received = hw[PRESTERA_PORT_MC_PKTS_RCV_CNT];
-       st->frames_64_octets = hw[PRESTERA_PORT_PKTS_64L_CNT];
-       st->frames_65_to_127_octets = hw[PRESTERA_PORT_PKTS_65TO127L_CNT];
-       st->frames_128_to_255_octets = hw[PRESTERA_PORT_PKTS_128TO255L_CNT];
-       st->frames_256_to_511_octets = hw[PRESTERA_PORT_PKTS_256TO511L_CNT];
-       st->frames_512_to_1023_octets = hw[PRESTERA_PORT_PKTS_512TO1023L_CNT];
-       st->frames_1024_to_max_octets = hw[PRESTERA_PORT_PKTS_1024TOMAXL_CNT];
-       st->excessive_collision = hw[PRESTERA_PORT_EXCESSIVE_COLLISIONS_CNT];
-       st->multicast_frames_sent = hw[PRESTERA_PORT_MC_PKTS_SENT_CNT];
-       st->broadcast_frames_sent = hw[PRESTERA_PORT_BRDC_PKTS_SENT_CNT];
-       st->fc_sent = hw[PRESTERA_PORT_FC_SENT_CNT];
-       st->fc_received = hw[PRESTERA_PORT_GOOD_FC_RCV_CNT];
-       st->buffer_overrun = hw[PRESTERA_PORT_DROP_EVENTS_CNT];
-       st->undersize = hw[PRESTERA_PORT_UNDERSIZE_PKTS_CNT];
-       st->fragments = hw[PRESTERA_PORT_FRAGMENTS_PKTS_CNT];
-       st->oversize = hw[PRESTERA_PORT_OVERSIZE_PKTS_CNT];
-       st->jabber = hw[PRESTERA_PORT_JABBER_PKTS_CNT];
-       st->rx_error_frame_received = hw[PRESTERA_PORT_MAC_RCV_ERROR_CNT];
-       st->bad_crc = hw[PRESTERA_PORT_BAD_CRC_CNT];
-       st->collisions = hw[PRESTERA_PORT_COLLISIONS_CNT];
-       st->late_collision = hw[PRESTERA_PORT_LATE_COLLISIONS_CNT];
-       st->unicast_frames_received = hw[PRESTERA_PORT_GOOD_UC_PKTS_RCV_CNT];
-       st->unicast_frames_sent = hw[PRESTERA_PORT_GOOD_UC_PKTS_SENT_CNT];
-       st->sent_multiple = hw[PRESTERA_PORT_MULTIPLE_PKTS_SENT_CNT];
-       st->sent_deferred = hw[PRESTERA_PORT_DEFERRED_PKTS_SENT_CNT];
-       st->good_octets_sent = hw[PRESTERA_PORT_GOOD_OCTETS_SENT_CNT];
+       st->good_octets_received =
+               __le64_to_cpu(hw[PRESTERA_PORT_GOOD_OCTETS_RCV_CNT]);
+       st->bad_octets_received =
+               __le64_to_cpu(hw[PRESTERA_PORT_BAD_OCTETS_RCV_CNT]);
+       st->mac_trans_error =
+               __le64_to_cpu(hw[PRESTERA_PORT_MAC_TRANSMIT_ERR_CNT]);
+       st->broadcast_frames_received =
+               __le64_to_cpu(hw[PRESTERA_PORT_BRDC_PKTS_RCV_CNT]);
+       st->multicast_frames_received =
+               __le64_to_cpu(hw[PRESTERA_PORT_MC_PKTS_RCV_CNT]);
+       st->frames_64_octets = __le64_to_cpu(hw[PRESTERA_PORT_PKTS_64L_CNT]);
+       st->frames_65_to_127_octets =
+               __le64_to_cpu(hw[PRESTERA_PORT_PKTS_65TO127L_CNT]);
+       st->frames_128_to_255_octets =
+               __le64_to_cpu(hw[PRESTERA_PORT_PKTS_128TO255L_CNT]);
+       st->frames_256_to_511_octets =
+               __le64_to_cpu(hw[PRESTERA_PORT_PKTS_256TO511L_CNT]);
+       st->frames_512_to_1023_octets =
+               __le64_to_cpu(hw[PRESTERA_PORT_PKTS_512TO1023L_CNT]);
+       st->frames_1024_to_max_octets =
+               __le64_to_cpu(hw[PRESTERA_PORT_PKTS_1024TOMAXL_CNT]);
+       st->excessive_collision =
+               __le64_to_cpu(hw[PRESTERA_PORT_EXCESSIVE_COLLISIONS_CNT]);
+       st->multicast_frames_sent =
+               __le64_to_cpu(hw[PRESTERA_PORT_MC_PKTS_SENT_CNT]);
+       st->broadcast_frames_sent =
+               __le64_to_cpu(hw[PRESTERA_PORT_BRDC_PKTS_SENT_CNT]);
+       st->fc_sent = __le64_to_cpu(hw[PRESTERA_PORT_FC_SENT_CNT]);
+       st->fc_received = __le64_to_cpu(hw[PRESTERA_PORT_GOOD_FC_RCV_CNT]);
+       st->buffer_overrun = __le64_to_cpu(hw[PRESTERA_PORT_DROP_EVENTS_CNT]);
+       st->undersize = __le64_to_cpu(hw[PRESTERA_PORT_UNDERSIZE_PKTS_CNT]);
+       st->fragments = __le64_to_cpu(hw[PRESTERA_PORT_FRAGMENTS_PKTS_CNT]);
+       st->oversize = __le64_to_cpu(hw[PRESTERA_PORT_OVERSIZE_PKTS_CNT]);
+       st->jabber = __le64_to_cpu(hw[PRESTERA_PORT_JABBER_PKTS_CNT]);
+       st->rx_error_frame_received =
+               __le64_to_cpu(hw[PRESTERA_PORT_MAC_RCV_ERROR_CNT]);
+       st->bad_crc = __le64_to_cpu(hw[PRESTERA_PORT_BAD_CRC_CNT]);
+       st->collisions = __le64_to_cpu(hw[PRESTERA_PORT_COLLISIONS_CNT]);
+       st->late_collision =
+               __le64_to_cpu(hw[PRESTERA_PORT_LATE_COLLISIONS_CNT]);
+       st->unicast_frames_received =
+               __le64_to_cpu(hw[PRESTERA_PORT_GOOD_UC_PKTS_RCV_CNT]);
+       st->unicast_frames_sent =
+               __le64_to_cpu(hw[PRESTERA_PORT_GOOD_UC_PKTS_SENT_CNT]);
+       st->sent_multiple =
+               __le64_to_cpu(hw[PRESTERA_PORT_MULTIPLE_PKTS_SENT_CNT]);
+       st->sent_deferred =
+               __le64_to_cpu(hw[PRESTERA_PORT_DEFERRED_PKTS_SENT_CNT]);
+       st->good_octets_sent =
+               __le64_to_cpu(hw[PRESTERA_PORT_GOOD_OCTETS_SENT_CNT]);
 
        return 0;
 }
@@ -1404,9 +1427,9 @@ int prestera_hw_port_stats_get(const struct prestera_port *port,
 int prestera_hw_port_learning_set(struct prestera_port *port, bool enable)
 {
        struct prestera_msg_port_attr_req req = {
-               .attr = PRESTERA_CMD_PORT_ATTR_LEARNING,
-               .port = port->hw_id,
-               .dev = port->dev_id,
+               .attr = __cpu_to_le32(PRESTERA_CMD_PORT_ATTR_LEARNING),
+               .port = __cpu_to_le32(port->hw_id),
+               .dev = __cpu_to_le32(port->dev_id),
                .param = {
                        .learning = enable,
                }
@@ -1419,9 +1442,9 @@ int prestera_hw_port_learning_set(struct prestera_port *port, bool enable)
 static int prestera_hw_port_uc_flood_set(struct prestera_port *port, bool flood)
 {
        struct prestera_msg_port_attr_req req = {
-               .attr = PRESTERA_CMD_PORT_ATTR_FLOOD,
-               .port = port->hw_id,
-               .dev = port->dev_id,
+               .attr = __cpu_to_le32(PRESTERA_CMD_PORT_ATTR_FLOOD),
+               .port = __cpu_to_le32(port->hw_id),
+               .dev = __cpu_to_le32(port->dev_id),
                .param = {
                        .flood_ext = {
                                .type = PRESTERA_PORT_FLOOD_TYPE_UC,
@@ -1437,9 +1460,9 @@ static int prestera_hw_port_uc_flood_set(struct prestera_port *port, bool flood)
 static int prestera_hw_port_mc_flood_set(struct prestera_port *port, bool flood)
 {
        struct prestera_msg_port_attr_req req = {
-               .attr = PRESTERA_CMD_PORT_ATTR_FLOOD,
-               .port = port->hw_id,
-               .dev = port->dev_id,
+               .attr = __cpu_to_le32(PRESTERA_CMD_PORT_ATTR_FLOOD),
+               .port = __cpu_to_le32(port->hw_id),
+               .dev = __cpu_to_le32(port->dev_id),
                .param = {
                        .flood_ext = {
                                .type = PRESTERA_PORT_FLOOD_TYPE_MC,
@@ -1455,9 +1478,9 @@ static int prestera_hw_port_mc_flood_set(struct prestera_port *port, bool flood)
 static int prestera_hw_port_flood_set_v2(struct prestera_port *port, bool flood)
 {
        struct prestera_msg_port_attr_req req = {
-               .attr = PRESTERA_CMD_PORT_ATTR_FLOOD,
-               .port = port->hw_id,
-               .dev = port->dev_id,
+               .attr = __cpu_to_le32(PRESTERA_CMD_PORT_ATTR_FLOOD),
+               .port = __cpu_to_le32(port->hw_id),
+               .dev = __cpu_to_le32(port->dev_id),
                .param = {
                        .flood = flood,
                }
@@ -1505,7 +1528,7 @@ err_uc_flood:
 int prestera_hw_vlan_create(struct prestera_switch *sw, u16 vid)
 {
        struct prestera_msg_vlan_req req = {
-               .vid = vid,
+               .vid = __cpu_to_le16(vid),
        };
 
        return prestera_cmd(sw, PRESTERA_CMD_TYPE_VLAN_CREATE,
@@ -1515,7 +1538,7 @@ int prestera_hw_vlan_create(struct prestera_switch *sw, u16 vid)
 int prestera_hw_vlan_delete(struct prestera_switch *sw, u16 vid)
 {
        struct prestera_msg_vlan_req req = {
-               .vid = vid,
+               .vid = __cpu_to_le16(vid),
        };
 
        return prestera_cmd(sw, PRESTERA_CMD_TYPE_VLAN_DELETE,
@@ -1526,9 +1549,9 @@ int prestera_hw_vlan_port_set(struct prestera_port *port, u16 vid,
                              bool is_member, bool untagged)
 {
        struct prestera_msg_vlan_req req = {
-               .port = port->hw_id,
-               .dev = port->dev_id,
-               .vid = vid,
+               .port = __cpu_to_le32(port->hw_id),
+               .dev = __cpu_to_le32(port->dev_id),
+               .vid = __cpu_to_le16(vid),
                .is_member = is_member,
                .is_tagged = !untagged,
        };
@@ -1540,9 +1563,9 @@ int prestera_hw_vlan_port_set(struct prestera_port *port, u16 vid,
 int prestera_hw_vlan_port_vid_set(struct prestera_port *port, u16 vid)
 {
        struct prestera_msg_vlan_req req = {
-               .port = port->hw_id,
-               .dev = port->dev_id,
-               .vid = vid,
+               .port = __cpu_to_le32(port->hw_id),
+               .dev = __cpu_to_le32(port->dev_id),
+               .vid = __cpu_to_le16(vid),
        };
 
        return prestera_cmd(port->sw, PRESTERA_CMD_TYPE_VLAN_PVID_SET,
@@ -1552,9 +1575,9 @@ int prestera_hw_vlan_port_vid_set(struct prestera_port *port, u16 vid)
 int prestera_hw_vlan_port_stp_set(struct prestera_port *port, u16 vid, u8 state)
 {
        struct prestera_msg_stp_req req = {
-               .port = port->hw_id,
-               .dev = port->dev_id,
-               .vid = vid,
+               .port = __cpu_to_le32(port->hw_id),
+               .dev = __cpu_to_le32(port->dev_id),
+               .vid = __cpu_to_le16(vid),
                .state = state,
        };
 
@@ -1567,10 +1590,10 @@ int prestera_hw_fdb_add(struct prestera_port *port, const unsigned char *mac,
 {
        struct prestera_msg_fdb_req req = {
                .dest = {
-                       .dev = port->dev_id,
-                       .port = port->hw_id,
+                       .dev = __cpu_to_le32(port->dev_id),
+                       .port = __cpu_to_le32(port->hw_id),
                },
-               .vid = vid,
+               .vid = __cpu_to_le16(vid),
                .dynamic = dynamic,
        };
 
@@ -1585,10 +1608,10 @@ int prestera_hw_fdb_del(struct prestera_port *port, const unsigned char *mac,
 {
        struct prestera_msg_fdb_req req = {
                .dest = {
-                       .dev = port->dev_id,
-                       .port = port->hw_id,
+                       .dev = __cpu_to_le32(port->dev_id),
+                       .port = __cpu_to_le32(port->hw_id),
                },
-               .vid = vid,
+               .vid = __cpu_to_le16(vid),
        };
 
        ether_addr_copy(req.mac, mac);
@@ -1603,9 +1626,9 @@ int prestera_hw_lag_fdb_add(struct prestera_switch *sw, u16 lag_id,
        struct prestera_msg_fdb_req req = {
                .dest_type = PRESTERA_HW_FDB_ENTRY_TYPE_LAG,
                .dest = {
-                       .lag_id = lag_id,
+                       .lag_id = __cpu_to_le16(lag_id),
                },
-               .vid = vid,
+               .vid = __cpu_to_le16(vid),
                .dynamic = dynamic,
        };
 
@@ -1621,9 +1644,9 @@ int prestera_hw_lag_fdb_del(struct prestera_switch *sw, u16 lag_id,
        struct prestera_msg_fdb_req req = {
                .dest_type = PRESTERA_HW_FDB_ENTRY_TYPE_LAG,
                .dest = {
-                       .lag_id = lag_id,
+                       .lag_id = __cpu_to_le16(lag_id),
                },
-               .vid = vid,
+               .vid = __cpu_to_le16(vid),
        };
 
        ether_addr_copy(req.mac, mac);
@@ -1636,10 +1659,10 @@ int prestera_hw_fdb_flush_port(struct prestera_port *port, u32 mode)
 {
        struct prestera_msg_fdb_req req = {
                .dest = {
-                       .dev = port->dev_id,
-                       .port = port->hw_id,
+                       .dev = __cpu_to_le32(port->dev_id),
+                       .port = __cpu_to_le32(port->hw_id),
                },
-               .flush_mode = mode,
+               .flush_mode = __cpu_to_le32(mode),
        };
 
        return prestera_cmd(port->sw, PRESTERA_CMD_TYPE_FDB_FLUSH_PORT,
@@ -1649,8 +1672,8 @@ int prestera_hw_fdb_flush_port(struct prestera_port *port, u32 mode)
 int prestera_hw_fdb_flush_vlan(struct prestera_switch *sw, u16 vid, u32 mode)
 {
        struct prestera_msg_fdb_req req = {
-               .vid = vid,
-               .flush_mode = mode,
+               .vid = __cpu_to_le16(vid),
+               .flush_mode = __cpu_to_le32(mode),
        };
 
        return prestera_cmd(sw, PRESTERA_CMD_TYPE_FDB_FLUSH_VLAN,
@@ -1662,11 +1685,11 @@ int prestera_hw_fdb_flush_port_vlan(struct prestera_port *port, u16 vid,
 {
        struct prestera_msg_fdb_req req = {
                .dest = {
-                       .dev = port->dev_id,
-                       .port = port->hw_id,
+                       .dev = __cpu_to_le32(port->dev_id),
+                       .port = __cpu_to_le32(port->hw_id),
                },
-               .vid = vid,
-               .flush_mode = mode,
+               .vid = __cpu_to_le16(vid),
+               .flush_mode = __cpu_to_le32(mode),
        };
 
        return prestera_cmd(port->sw, PRESTERA_CMD_TYPE_FDB_FLUSH_PORT_VLAN,
@@ -1679,9 +1702,9 @@ int prestera_hw_fdb_flush_lag(struct prestera_switch *sw, u16 lag_id,
        struct prestera_msg_fdb_req req = {
                .dest_type = PRESTERA_HW_FDB_ENTRY_TYPE_LAG,
                .dest = {
-                       .lag_id = lag_id,
+                       .lag_id = __cpu_to_le16(lag_id),
                },
-               .flush_mode = mode,
+               .flush_mode = __cpu_to_le32(mode),
        };
 
        return prestera_cmd(sw, PRESTERA_CMD_TYPE_FDB_FLUSH_PORT,
@@ -1694,10 +1717,10 @@ int prestera_hw_fdb_flush_lag_vlan(struct prestera_switch *sw,
        struct prestera_msg_fdb_req req = {
                .dest_type = PRESTERA_HW_FDB_ENTRY_TYPE_LAG,
                .dest = {
-                       .lag_id = lag_id,
+                       .lag_id = __cpu_to_le16(lag_id),
                },
-               .vid = vid,
-               .flush_mode = mode,
+               .vid = __cpu_to_le16(vid),
+               .flush_mode = __cpu_to_le32(mode),
        };
 
        return prestera_cmd(sw, PRESTERA_CMD_TYPE_FDB_FLUSH_PORT_VLAN,
@@ -1716,7 +1739,7 @@ int prestera_hw_bridge_create(struct prestera_switch *sw, u16 *bridge_id)
        if (err)
                return err;
 
-       *bridge_id = resp.bridge;
+       *bridge_id = __le16_to_cpu(resp.bridge);
 
        return 0;
 }
@@ -1724,7 +1747,7 @@ int prestera_hw_bridge_create(struct prestera_switch *sw, u16 *bridge_id)
 int prestera_hw_bridge_delete(struct prestera_switch *sw, u16 bridge_id)
 {
        struct prestera_msg_bridge_req req = {
-               .bridge = bridge_id,
+               .bridge = __cpu_to_le16(bridge_id),
        };
 
        return prestera_cmd(sw, PRESTERA_CMD_TYPE_BRIDGE_DELETE,
@@ -1734,9 +1757,9 @@ int prestera_hw_bridge_delete(struct prestera_switch *sw, u16 bridge_id)
 int prestera_hw_bridge_port_add(struct prestera_port *port, u16 bridge_id)
 {
        struct prestera_msg_bridge_req req = {
-               .bridge = bridge_id,
-               .port = port->hw_id,
-               .dev = port->dev_id,
+               .bridge = __cpu_to_le16(bridge_id),
+               .port = __cpu_to_le32(port->hw_id),
+               .dev = __cpu_to_le32(port->dev_id),
        };
 
        return prestera_cmd(port->sw, PRESTERA_CMD_TYPE_BRIDGE_PORT_ADD,
@@ -1746,9 +1769,9 @@ int prestera_hw_bridge_port_add(struct prestera_port *port, u16 bridge_id)
 int prestera_hw_bridge_port_delete(struct prestera_port *port, u16 bridge_id)
 {
        struct prestera_msg_bridge_req req = {
-               .bridge = bridge_id,
-               .port = port->hw_id,
-               .dev = port->dev_id,
+               .bridge = __cpu_to_le16(bridge_id),
+               .port = __cpu_to_le32(port->hw_id),
+               .dev = __cpu_to_le32(port->dev_id),
        };
 
        return prestera_cmd(port->sw, PRESTERA_CMD_TYPE_BRIDGE_PORT_DELETE,
@@ -1769,28 +1792,17 @@ int prestera_hw_rxtx_init(struct prestera_switch *sw,
        if (err)
                return err;
 
-       params->map_addr = resp.map_addr;
+       params->map_addr = __le32_to_cpu(resp.map_addr);
 
        return 0;
 }
 
-int prestera_hw_rxtx_port_init(struct prestera_port *port)
-{
-       struct prestera_msg_rxtx_port_req req = {
-               .port = port->hw_id,
-               .dev = port->dev_id,
-       };
-
-       return prestera_cmd(port->sw, PRESTERA_CMD_TYPE_RXTX_PORT_INIT,
-                           &req.cmd, sizeof(req));
-}
-
 int prestera_hw_lag_member_add(struct prestera_port *port, u16 lag_id)
 {
        struct prestera_msg_lag_req req = {
-               .port = port->hw_id,
-               .dev = port->dev_id,
-               .lag_id = lag_id,
+               .port = __cpu_to_le32(port->hw_id),
+               .dev = __cpu_to_le32(port->dev_id),
+               .lag_id = __cpu_to_le16(lag_id),
        };
 
        return prestera_cmd(port->sw, PRESTERA_CMD_TYPE_LAG_MEMBER_ADD,
@@ -1800,9 +1812,9 @@ int prestera_hw_lag_member_add(struct prestera_port *port, u16 lag_id)
 int prestera_hw_lag_member_del(struct prestera_port *port, u16 lag_id)
 {
        struct prestera_msg_lag_req req = {
-               .port = port->hw_id,
-               .dev = port->dev_id,
-               .lag_id = lag_id,
+               .port = __cpu_to_le32(port->hw_id),
+               .dev = __cpu_to_le32(port->dev_id),
+               .lag_id = __cpu_to_le16(lag_id),
        };
 
        return prestera_cmd(port->sw, PRESTERA_CMD_TYPE_LAG_MEMBER_DELETE,
@@ -1813,9 +1825,9 @@ int prestera_hw_lag_member_enable(struct prestera_port *port, u16 lag_id,
                                  bool enable)
 {
        struct prestera_msg_lag_req req = {
-               .port = port->hw_id,
-               .dev = port->dev_id,
-               .lag_id = lag_id,
+               .port = __cpu_to_le32(port->hw_id),
+               .dev = __cpu_to_le32(port->dev_id),
+               .lag_id = __cpu_to_le16(lag_id),
        };
        u32 cmd;
 
@@ -1842,7 +1854,7 @@ prestera_hw_cpu_code_counters_get(struct prestera_switch *sw, u8 code,
        if (err)
                return err;
 
-       *packet_count = resp.packet_count;
+       *packet_count = __le64_to_cpu(resp.packet_count);
 
        return 0;
 }
index 546d5fd..57a3c2e 100644 (file)
@@ -20,6 +20,23 @@ enum prestera_fdb_flush_mode {
 };
 
 enum {
+       PRESTERA_MAC_MODE_INTERNAL,
+       PRESTERA_MAC_MODE_SGMII,
+       PRESTERA_MAC_MODE_1000BASE_X,
+       PRESTERA_MAC_MODE_KR,
+       PRESTERA_MAC_MODE_KR2,
+       PRESTERA_MAC_MODE_KR4,
+       PRESTERA_MAC_MODE_CR,
+       PRESTERA_MAC_MODE_CR2,
+       PRESTERA_MAC_MODE_CR4,
+       PRESTERA_MAC_MODE_SR_LR,
+       PRESTERA_MAC_MODE_SR_LR2,
+       PRESTERA_MAC_MODE_SR_LR4,
+
+       PRESTERA_MAC_MODE_MAX
+};
+
+enum {
        PRESTERA_LINK_MODE_10baseT_Half,
        PRESTERA_LINK_MODE_10baseT_Full,
        PRESTERA_LINK_MODE_100baseT_Half,
@@ -116,32 +133,29 @@ int prestera_hw_switch_mac_set(struct prestera_switch *sw, const char *mac);
 /* Port API */
 int prestera_hw_port_info_get(const struct prestera_port *port,
                              u32 *dev_id, u32 *hw_id, u16 *fp_id);
-int prestera_hw_port_state_set(const struct prestera_port *port,
-                              bool admin_state);
+
+int prestera_hw_port_mac_mode_get(const struct prestera_port *port,
+                                 u32 *mode, u32 *speed, u8 *duplex, u8 *fec);
+int prestera_hw_port_mac_mode_set(const struct prestera_port *port,
+                                 bool admin, u32 mode, u8 inband,
+                                 u32 speed, u8 duplex, u8 fec);
+int prestera_hw_port_phy_mode_get(const struct prestera_port *port,
+                                 u8 *mdix, u64 *lmode_bmap,
+                                 bool *fc_pause, bool *fc_asym);
+int prestera_hw_port_phy_mode_set(const struct prestera_port *port,
+                                 bool admin, bool adv, u32 mode, u64 modes,
+                                 u8 mdix);
+
 int prestera_hw_port_mtu_set(const struct prestera_port *port, u32 mtu);
 int prestera_hw_port_mtu_get(const struct prestera_port *port, u32 *mtu);
 int prestera_hw_port_mac_set(const struct prestera_port *port, const char *mac);
 int prestera_hw_port_mac_get(const struct prestera_port *port, char *mac);
 int prestera_hw_port_cap_get(const struct prestera_port *port,
                             struct prestera_port_caps *caps);
-int prestera_hw_port_remote_cap_get(const struct prestera_port *port,
-                                   u64 *link_mode_bitmap);
-int prestera_hw_port_remote_fc_get(const struct prestera_port *port,
-                                  bool *pause, bool *asym_pause);
 int prestera_hw_port_type_get(const struct prestera_port *port, u8 *type);
-int prestera_hw_port_fec_get(const struct prestera_port *port, u8 *fec);
-int prestera_hw_port_fec_set(const struct prestera_port *port, u8 fec);
-int prestera_hw_port_autoneg_set(const struct prestera_port *port,
-                                bool autoneg, u64 link_modes, u8 fec);
 int prestera_hw_port_autoneg_restart(struct prestera_port *port);
-int prestera_hw_port_duplex_get(const struct prestera_port *port, u8 *duplex);
 int prestera_hw_port_stats_get(const struct prestera_port *port,
                               struct prestera_port_stats *stats);
-int prestera_hw_port_link_mode_set(const struct prestera_port *port, u32 mode);
-int prestera_hw_port_link_mode_get(const struct prestera_port *port, u32 *mode);
-int prestera_hw_port_mdix_get(const struct prestera_port *port, u8 *status,
-                             u8 *admin_mode);
-int prestera_hw_port_mdix_set(const struct prestera_port *port, u8 mode);
 int prestera_hw_port_speed_get(const struct prestera_port *port, u32 *speed);
 int prestera_hw_port_learning_set(struct prestera_port *port, bool enable);
 int prestera_hw_port_flood_set(struct prestera_port *port, unsigned long mask,
@@ -206,7 +220,6 @@ void prestera_hw_event_handler_unregister(struct prestera_switch *sw,
 /* RX/TX */
 int prestera_hw_rxtx_init(struct prestera_switch *sw,
                          struct prestera_rxtx_params *params);
-int prestera_hw_rxtx_port_init(struct prestera_port *port);
 
 /* LAG API */
 int prestera_hw_lag_member_add(struct prestera_port *port, u16 lag_id);
index d0d5a22..625b401 100644 (file)
@@ -80,27 +80,76 @@ struct prestera_port *prestera_find_port(struct prestera_switch *sw, u32 id)
        return port;
 }
 
-static int prestera_port_open(struct net_device *dev)
+int prestera_port_cfg_mac_read(struct prestera_port *port,
+                              struct prestera_port_mac_config *cfg)
+{
+       *cfg = port->cfg_mac;
+       return 0;
+}
+
+int prestera_port_cfg_mac_write(struct prestera_port *port,
+                               struct prestera_port_mac_config *cfg)
 {
-       struct prestera_port *port = netdev_priv(dev);
        int err;
 
-       err = prestera_hw_port_state_set(port, true);
+       err = prestera_hw_port_mac_mode_set(port, cfg->admin,
+                                           cfg->mode, cfg->inband, cfg->speed,
+                                           cfg->duplex, cfg->fec);
        if (err)
                return err;
 
+       port->cfg_mac = *cfg;
+       return 0;
+}
+
+static int prestera_port_open(struct net_device *dev)
+{
+       struct prestera_port *port = netdev_priv(dev);
+       struct prestera_port_mac_config cfg_mac;
+       int err = 0;
+
+       if (port->caps.transceiver == PRESTERA_PORT_TCVR_SFP) {
+               err = prestera_port_cfg_mac_read(port, &cfg_mac);
+               if (!err) {
+                       cfg_mac.admin = true;
+                       err = prestera_port_cfg_mac_write(port, &cfg_mac);
+               }
+       } else {
+               port->cfg_phy.admin = true;
+               err = prestera_hw_port_phy_mode_set(port, true, port->autoneg,
+                                                   port->cfg_phy.mode,
+                                                   port->adver_link_modes,
+                                                   port->cfg_phy.mdix);
+       }
+
        netif_start_queue(dev);
 
-       return 0;
+       return err;
 }
 
 static int prestera_port_close(struct net_device *dev)
 {
        struct prestera_port *port = netdev_priv(dev);
+       struct prestera_port_mac_config cfg_mac;
+       int err = 0;
 
        netif_stop_queue(dev);
 
-       return prestera_hw_port_state_set(port, false);
+       if (port->caps.transceiver == PRESTERA_PORT_TCVR_SFP) {
+               err = prestera_port_cfg_mac_read(port, &cfg_mac);
+               if (!err) {
+                       cfg_mac.admin = false;
+                       prestera_port_cfg_mac_write(port, &cfg_mac);
+               }
+       } else {
+               port->cfg_phy.admin = false;
+               err = prestera_hw_port_phy_mode_set(port, false, port->autoneg,
+                                                   port->cfg_phy.mode,
+                                                   port->adver_link_modes,
+                                                   port->cfg_phy.mdix);
+       }
+
+       return err;
 }
 
 static netdev_tx_t prestera_port_xmit(struct sk_buff *skb,
@@ -228,46 +277,23 @@ static const struct net_device_ops prestera_netdev_ops = {
        .ndo_get_devlink_port = prestera_devlink_get_port,
 };
 
-int prestera_port_autoneg_set(struct prestera_port *port, bool enable,
-                             u64 adver_link_modes, u8 adver_fec)
+int prestera_port_autoneg_set(struct prestera_port *port, u64 link_modes)
 {
-       bool refresh = false;
-       u64 link_modes;
        int err;
-       u8 fec;
-
-       if (port->caps.type != PRESTERA_PORT_TYPE_TP)
-               return enable ? -EINVAL : 0;
-
-       if (!enable)
-               goto set_autoneg;
-
-       link_modes = port->caps.supp_link_modes & adver_link_modes;
-       fec = port->caps.supp_fec & adver_fec;
-
-       if (!link_modes && !fec)
-               return -EOPNOTSUPP;
-
-       if (link_modes && port->adver_link_modes != link_modes) {
-               port->adver_link_modes = link_modes;
-               refresh = true;
-       }
-
-       if (fec && port->adver_fec != fec) {
-               port->adver_fec = fec;
-               refresh = true;
-       }
 
-set_autoneg:
-       if (port->autoneg == enable && !refresh)
+       if (port->autoneg && port->adver_link_modes == link_modes)
                return 0;
 
-       err = prestera_hw_port_autoneg_set(port, enable, port->adver_link_modes,
-                                          port->adver_fec);
+       err = prestera_hw_port_phy_mode_set(port, port->cfg_phy.admin,
+                                           true, 0, link_modes,
+                                           port->cfg_phy.mdix);
        if (err)
                return err;
 
-       port->autoneg = enable;
+       port->adver_fec = BIT(PRESTERA_PORT_FEC_OFF);
+       port->adver_link_modes = link_modes;
+       port->cfg_phy.mode = 0;
+       port->autoneg = true;
 
        return 0;
 }
@@ -288,6 +314,7 @@ static void prestera_port_list_del(struct prestera_port *port)
 
 static int prestera_port_create(struct prestera_switch *sw, u32 id)
 {
+       struct prestera_port_mac_config cfg_mac;
        struct prestera_port *port;
        struct net_device *dev;
        int err;
@@ -359,16 +386,43 @@ static int prestera_port_create(struct prestera_switch *sw, u32 id)
                goto err_port_init;
        }
 
-       port->adver_fec = BIT(PRESTERA_PORT_FEC_OFF);
-       prestera_port_autoneg_set(port, true, port->caps.supp_link_modes,
-                                 port->caps.supp_fec);
+       port->adver_link_modes = port->caps.supp_link_modes;
+       port->adver_fec = 0;
+       port->autoneg = true;
+
+       /* initialize config mac */
+       if (port->caps.transceiver != PRESTERA_PORT_TCVR_SFP) {
+               cfg_mac.admin = true;
+               cfg_mac.mode = PRESTERA_MAC_MODE_INTERNAL;
+       } else {
+               cfg_mac.admin = false;
+               cfg_mac.mode = PRESTERA_MAC_MODE_MAX;
+       }
+       cfg_mac.inband = false;
+       cfg_mac.speed = 0;
+       cfg_mac.duplex = DUPLEX_UNKNOWN;
+       cfg_mac.fec = PRESTERA_PORT_FEC_OFF;
 
-       err = prestera_hw_port_state_set(port, false);
+       err = prestera_port_cfg_mac_write(port, &cfg_mac);
        if (err) {
-               dev_err(prestera_dev(sw), "Failed to set port(%u) down\n", id);
+               dev_err(prestera_dev(sw), "Failed to set port(%u) mac mode\n", id);
                goto err_port_init;
        }
 
+       /* initialize config phy (if this is inegral) */
+       if (port->caps.transceiver != PRESTERA_PORT_TCVR_SFP) {
+               port->cfg_phy.mdix = ETH_TP_MDI_AUTO;
+               port->cfg_phy.admin = false;
+               err = prestera_hw_port_phy_mode_set(port,
+                                                   port->cfg_phy.admin,
+                                                   false, 0, 0,
+                                                   port->cfg_phy.mdix);
+               if (err) {
+                       dev_err(prestera_dev(sw), "Failed to set port(%u) phy mode\n", id);
+                       goto err_port_init;
+               }
+       }
+
        err = prestera_rxtx_port_init(port);
        if (err)
                goto err_port_init;
@@ -449,8 +503,10 @@ static void prestera_port_handle_event(struct prestera_switch *sw,
 
        caching_dw = &port->cached_hw_stats.caching_dw;
 
-       if (evt->id == PRESTERA_PORT_EVENT_STATE_CHANGED) {
-               if (evt->port_evt.data.oper_state) {
+       prestera_ethtool_port_state_changed(port, &evt->port_evt);
+
+       if (evt->id == PRESTERA_PORT_EVENT_MAC_STATE_CHANGED) {
+               if (port->state_mac.oper) {
                        netif_carrier_on(port->dev);
                        if (!delayed_work_pending(caching_dw))
                                queue_delayed_work(prestera_wq, caching_dw, 0);
index a250d39..5d4d410 100644 (file)
 
 #define PRESTERA_MSG_MAX_SIZE 1500
 
-#define PRESTERA_SUPP_FW_MAJ_VER       3
+#define PRESTERA_SUPP_FW_MAJ_VER       4
 #define PRESTERA_SUPP_FW_MIN_VER       0
 
-#define PRESTERA_PREV_FW_MAJ_VER       2
+#define PRESTERA_PREV_FW_MAJ_VER       4
 #define PRESTERA_PREV_FW_MIN_VER       0
 
 #define PRESTERA_FW_PATH_FMT   "mrvl/prestera/mvsw_prestera_fw-v%u.%u.img"
@@ -102,23 +102,30 @@ struct prestera_fw_evtq_regs {
        u32 len;
 };
 
+#define PRESTERA_CMD_QNUM_MAX  4
+
+struct prestera_fw_cmdq_regs {
+       u32 req_ctl;
+       u32 req_len;
+       u32 rcv_ctl;
+       u32 rcv_len;
+       u32 offs;
+       u32 len;
+};
+
 struct prestera_fw_regs {
        u32 fw_ready;
-       u32 pad;
        u32 cmd_offs;
        u32 cmd_len;
+       u32 cmd_qnum;
        u32 evt_offs;
        u32 evt_qnum;
 
-       u32 cmd_req_ctl;
-       u32 cmd_req_len;
-       u32 cmd_rcv_ctl;
-       u32 cmd_rcv_len;
-
        u32 fw_status;
        u32 rx_status;
 
-       struct prestera_fw_evtq_regs evtq_list[PRESTERA_EVT_QNUM_MAX];
+       struct prestera_fw_cmdq_regs cmdq_list[PRESTERA_EVT_QNUM_MAX];
+       struct prestera_fw_evtq_regs evtq_list[PRESTERA_CMD_QNUM_MAX];
 };
 
 #define PRESTERA_FW_REG_OFFSET(f)      offsetof(struct prestera_fw_regs, f)
@@ -130,14 +137,22 @@ struct prestera_fw_regs {
 
 #define PRESTERA_CMD_BUF_OFFS_REG      PRESTERA_FW_REG_OFFSET(cmd_offs)
 #define PRESTERA_CMD_BUF_LEN_REG       PRESTERA_FW_REG_OFFSET(cmd_len)
+#define PRESTERA_CMD_QNUM_REG          PRESTERA_FW_REG_OFFSET(cmd_qnum)
 #define PRESTERA_EVT_BUF_OFFS_REG      PRESTERA_FW_REG_OFFSET(evt_offs)
 #define PRESTERA_EVT_QNUM_REG          PRESTERA_FW_REG_OFFSET(evt_qnum)
 
-#define PRESTERA_CMD_REQ_CTL_REG       PRESTERA_FW_REG_OFFSET(cmd_req_ctl)
-#define PRESTERA_CMD_REQ_LEN_REG       PRESTERA_FW_REG_OFFSET(cmd_req_len)
+#define PRESTERA_CMDQ_REG_OFFSET(q, f)                 \
+       (PRESTERA_FW_REG_OFFSET(cmdq_list) +            \
+        (q) * sizeof(struct prestera_fw_cmdq_regs) +   \
+        offsetof(struct prestera_fw_cmdq_regs, f))
+
+#define PRESTERA_CMDQ_REQ_CTL_REG(q)   PRESTERA_CMDQ_REG_OFFSET(q, req_ctl)
+#define PRESTERA_CMDQ_REQ_LEN_REG(q)   PRESTERA_CMDQ_REG_OFFSET(q, req_len)
+#define PRESTERA_CMDQ_RCV_CTL_REG(q)   PRESTERA_CMDQ_REG_OFFSET(q, rcv_ctl)
+#define PRESTERA_CMDQ_RCV_LEN_REG(q)   PRESTERA_CMDQ_REG_OFFSET(q, rcv_len)
+#define PRESTERA_CMDQ_OFFS_REG(q)      PRESTERA_CMDQ_REG_OFFSET(q, offs)
+#define PRESTERA_CMDQ_LEN_REG(q)       PRESTERA_CMDQ_REG_OFFSET(q, len)
 
-#define PRESTERA_CMD_RCV_CTL_REG       PRESTERA_FW_REG_OFFSET(cmd_rcv_ctl)
-#define PRESTERA_CMD_RCV_LEN_REG       PRESTERA_FW_REG_OFFSET(cmd_rcv_len)
 #define PRESTERA_FW_STATUS_REG         PRESTERA_FW_REG_OFFSET(fw_status)
 #define PRESTERA_RX_STATUS_REG         PRESTERA_FW_REG_OFFSET(rx_status)
 
@@ -174,6 +189,13 @@ struct prestera_fw_evtq {
        size_t len;
 };
 
+struct prestera_fw_cmdq {
+       /* serialize access to dev->send_req */
+       struct mutex cmd_mtx;
+       u8 __iomem *addr;
+       size_t len;
+};
+
 struct prestera_fw {
        struct prestera_fw_rev rev_supp;
        const struct firmware *bin;
@@ -183,9 +205,10 @@ struct prestera_fw {
        u8 __iomem *ldr_ring_buf;
        u32 ldr_buf_len;
        u32 ldr_wr_idx;
-       struct mutex cmd_mtx; /* serialize access to dev->send_req */
        size_t cmd_mbox_len;
        u8 __iomem *cmd_mbox;
+       struct prestera_fw_cmdq cmd_queue[PRESTERA_CMD_QNUM_MAX];
+       u8 cmd_qnum;
        struct prestera_fw_evtq evt_queue[PRESTERA_EVT_QNUM_MAX];
        u8 evt_qnum;
        struct work_struct evt_work;
@@ -324,7 +347,27 @@ static int prestera_fw_wait_reg32(struct prestera_fw *fw, u32 reg, u32 cmp,
                                  1 * USEC_PER_MSEC, waitms * USEC_PER_MSEC);
 }
 
-static int prestera_fw_cmd_send(struct prestera_fw *fw,
+static void prestera_fw_cmdq_lock(struct prestera_fw *fw, u8 qid)
+{
+       mutex_lock(&fw->cmd_queue[qid].cmd_mtx);
+}
+
+static void prestera_fw_cmdq_unlock(struct prestera_fw *fw, u8 qid)
+{
+       mutex_unlock(&fw->cmd_queue[qid].cmd_mtx);
+}
+
+static u32 prestera_fw_cmdq_len(struct prestera_fw *fw, u8 qid)
+{
+       return fw->cmd_queue[qid].len;
+}
+
+static u8 __iomem *prestera_fw_cmdq_buf(struct prestera_fw *fw, u8 qid)
+{
+       return fw->cmd_queue[qid].addr;
+}
+
+static int prestera_fw_cmd_send(struct prestera_fw *fw, int qid,
                                void *in_msg, size_t in_size,
                                void *out_msg, size_t out_size,
                                unsigned int waitms)
@@ -335,30 +378,32 @@ static int prestera_fw_cmd_send(struct prestera_fw *fw,
        if (!waitms)
                waitms = PRESTERA_FW_CMD_DEFAULT_WAIT_MS;
 
-       if (ALIGN(in_size, 4) > fw->cmd_mbox_len)
+       if (ALIGN(in_size, 4) > prestera_fw_cmdq_len(fw, qid))
                return -EMSGSIZE;
 
        /* wait for finish previous reply from FW */
-       err = prestera_fw_wait_reg32(fw, PRESTERA_CMD_RCV_CTL_REG, 0, 30);
+       err = prestera_fw_wait_reg32(fw, PRESTERA_CMDQ_RCV_CTL_REG(qid), 0, 30);
        if (err) {
                dev_err(fw->dev.dev, "finish reply from FW is timed out\n");
                return err;
        }
 
-       prestera_fw_write(fw, PRESTERA_CMD_REQ_LEN_REG, in_size);
-       memcpy_toio(fw->cmd_mbox, in_msg, in_size);
+       prestera_fw_write(fw, PRESTERA_CMDQ_REQ_LEN_REG(qid), in_size);
+
+       memcpy_toio(prestera_fw_cmdq_buf(fw, qid), in_msg, in_size);
 
-       prestera_fw_write(fw, PRESTERA_CMD_REQ_CTL_REG, PRESTERA_CMD_F_REQ_SENT);
+       prestera_fw_write(fw, PRESTERA_CMDQ_REQ_CTL_REG(qid),
+                         PRESTERA_CMD_F_REQ_SENT);
 
        /* wait for reply from FW */
-       err = prestera_fw_wait_reg32(fw, PRESTERA_CMD_RCV_CTL_REG,
+       err = prestera_fw_wait_reg32(fw, PRESTERA_CMDQ_RCV_CTL_REG(qid),
                                     PRESTERA_CMD_F_REPL_SENT, waitms);
        if (err) {
                dev_err(fw->dev.dev, "reply from FW is timed out\n");
                goto cmd_exit;
        }
 
-       ret_size = prestera_fw_read(fw, PRESTERA_CMD_RCV_LEN_REG);
+       ret_size = prestera_fw_read(fw, PRESTERA_CMDQ_RCV_LEN_REG(qid));
        if (ret_size > out_size) {
                dev_err(fw->dev.dev, "ret_size (%u) > out_len(%zu)\n",
                        ret_size, out_size);
@@ -366,14 +411,15 @@ static int prestera_fw_cmd_send(struct prestera_fw *fw,
                goto cmd_exit;
        }
 
-       memcpy_fromio(out_msg, fw->cmd_mbox + in_size, ret_size);
+       memcpy_fromio(out_msg, prestera_fw_cmdq_buf(fw, qid) + in_size, ret_size);
 
 cmd_exit:
-       prestera_fw_write(fw, PRESTERA_CMD_REQ_CTL_REG, PRESTERA_CMD_F_REPL_RCVD);
+       prestera_fw_write(fw, PRESTERA_CMDQ_REQ_CTL_REG(qid),
+                         PRESTERA_CMD_F_REPL_RCVD);
        return err;
 }
 
-static int prestera_fw_send_req(struct prestera_device *dev,
+static int prestera_fw_send_req(struct prestera_device *dev, int qid,
                                void *in_msg, size_t in_size, void *out_msg,
                                size_t out_size, unsigned int waitms)
 {
@@ -382,9 +428,10 @@ static int prestera_fw_send_req(struct prestera_device *dev,
 
        fw = container_of(dev, struct prestera_fw, dev);
 
-       mutex_lock(&fw->cmd_mtx);
-       ret = prestera_fw_cmd_send(fw, in_msg, in_size, out_msg, out_size, waitms);
-       mutex_unlock(&fw->cmd_mtx);
+       prestera_fw_cmdq_lock(fw, qid);
+       ret = prestera_fw_cmd_send(fw, qid, in_msg, in_size, out_msg, out_size,
+                                  waitms);
+       prestera_fw_cmdq_unlock(fw, qid);
 
        return ret;
 }
@@ -414,7 +461,16 @@ static int prestera_fw_init(struct prestera_fw *fw)
 
        fw->cmd_mbox = base + prestera_fw_read(fw, PRESTERA_CMD_BUF_OFFS_REG);
        fw->cmd_mbox_len = prestera_fw_read(fw, PRESTERA_CMD_BUF_LEN_REG);
-       mutex_init(&fw->cmd_mtx);
+       fw->cmd_qnum = prestera_fw_read(fw, PRESTERA_CMD_QNUM_REG);
+
+       for (qid = 0; qid < fw->cmd_qnum; qid++) {
+               u32 offs = prestera_fw_read(fw, PRESTERA_CMDQ_OFFS_REG(qid));
+               struct prestera_fw_cmdq *cmdq = &fw->cmd_queue[qid];
+
+               cmdq->len = prestera_fw_read(fw, PRESTERA_CMDQ_LEN_REG(qid));
+               cmdq->addr = fw->cmd_mbox + offs;
+               mutex_init(&cmdq->cmd_mtx);
+       }
 
        fw->evt_buf = base + prestera_fw_read(fw, PRESTERA_EVT_BUF_OFFS_REG);
        fw->evt_qnum = prestera_fw_read(fw, PRESTERA_EVT_QNUM_REG);
index 73d2eba..e452cde 100644 (file)
@@ -794,14 +794,7 @@ void prestera_rxtx_switch_fini(struct prestera_switch *sw)
 
 int prestera_rxtx_port_init(struct prestera_port *port)
 {
-       int err;
-
-       err = prestera_hw_rxtx_port_init(port);
-       if (err)
-               return err;
-
        port->dev->needed_headroom = PRESTERA_DSA_HLEN;
-
        return 0;
 }
 
index 5abb551..28b5b93 100644 (file)
@@ -4910,7 +4910,7 @@ static int sky2_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
        pci_set_master(pdev);
 
        if (sizeof(dma_addr_t) > sizeof(u32) &&
-           !(err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64)))) {
+           !dma_set_mask(&pdev->dev, DMA_BIT_MASK(64))) {
                using_dac = 1;
                err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
                if (err < 0) {
index da1bec0..eae9aa9 100644 (file)
@@ -745,7 +745,7 @@ static int mlx5_fw_tracer_set_mtrc_conf(struct mlx5_fw_tracer *tracer)
        MLX5_SET(mtrc_conf, in, trace_mode, TRACE_TO_MEMORY);
        MLX5_SET(mtrc_conf, in, log_trace_buffer_size,
                 ilog2(TRACER_BUFFER_PAGE_NUM));
-       MLX5_SET(mtrc_conf, in, trace_mkey, tracer->buff.mkey.key);
+       MLX5_SET(mtrc_conf, in, trace_mkey, tracer->buff.mkey);
 
        err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
                                   MLX5_REG_MTRC_CONF, 0, 1);
@@ -1028,7 +1028,7 @@ int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer)
 
 err_notifier_unregister:
        mlx5_eq_notifier_unregister(dev, &tracer->nb);
-       mlx5_core_destroy_mkey(dev, &tracer->buff.mkey);
+       mlx5_core_destroy_mkey(dev, tracer->buff.mkey);
 err_dealloc_pd:
        mlx5_core_dealloc_pd(dev, tracer->buff.pdn);
 err_cancel_work:
@@ -1051,7 +1051,7 @@ void mlx5_fw_tracer_cleanup(struct mlx5_fw_tracer *tracer)
        if (tracer->owner)
                mlx5_fw_tracer_ownership_release(tracer);
 
-       mlx5_core_destroy_mkey(tracer->dev, &tracer->buff.mkey);
+       mlx5_core_destroy_mkey(tracer->dev, tracer->buff.mkey);
        mlx5_core_dealloc_pd(tracer->dev, tracer->buff.pdn);
 }
 
index 97252a8..4762b55 100644 (file)
@@ -89,7 +89,7 @@ struct mlx5_fw_tracer {
                void *log_buf;
                dma_addr_t dma;
                u32 size;
-               struct mlx5_core_mkey mkey;
+               u32 mkey;
                u32 consumer_index;
        } buff;
 
index ed4fb79..538adab 100644 (file)
@@ -30,7 +30,7 @@ static const char *const mlx5_rsc_sgmt_name[] = {
 
 struct mlx5_rsc_dump {
        u32 pdn;
-       struct mlx5_core_mkey mkey;
+       u32 mkey;
        u16 fw_segment_type[MLX5_SGMT_TYPE_NUM];
 };
 
@@ -89,7 +89,7 @@ static int mlx5_rsc_dump_trigger(struct mlx5_core_dev *dev, struct mlx5_rsc_dump
                return -ENOMEM;
 
        in_seq_num = MLX5_GET(resource_dump, cmd->cmd, seq_num);
-       MLX5_SET(resource_dump, cmd->cmd, mkey, rsc_dump->mkey.key);
+       MLX5_SET(resource_dump, cmd->cmd, mkey, rsc_dump->mkey);
        MLX5_SET64(resource_dump, cmd->cmd, address, dma);
 
        err = mlx5_core_access_reg(dev, cmd->cmd, sizeof(cmd->cmd), cmd->cmd,
@@ -202,7 +202,7 @@ free_page:
 }
 
 static int mlx5_rsc_dump_create_mkey(struct mlx5_core_dev *mdev, u32 pdn,
-                                    struct mlx5_core_mkey *mkey)
+                                    u32 *mkey)
 {
        int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
        void *mkc;
@@ -276,7 +276,7 @@ int mlx5_rsc_dump_init(struct mlx5_core_dev *dev)
        return err;
 
 destroy_mkey:
-       mlx5_core_destroy_mkey(dev, &rsc_dump->mkey);
+       mlx5_core_destroy_mkey(dev, rsc_dump->mkey);
 free_pd:
        mlx5_core_dealloc_pd(dev, rsc_dump->pdn);
        return err;
@@ -287,6 +287,6 @@ void mlx5_rsc_dump_cleanup(struct mlx5_core_dev *dev)
        if (IS_ERR_OR_NULL(dev->rsc_dump))
                return;
 
-       mlx5_core_destroy_mkey(dev, &dev->rsc_dump->mkey);
+       mlx5_core_destroy_mkey(dev, dev->rsc_dump->mkey);
        mlx5_core_dealloc_pd(dev, dev->rsc_dump->pdn);
 }
index a3a4fec..f0ac6b0 100644 (file)
@@ -79,6 +79,11 @@ struct page_pool;
                                 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
 
 #define MLX5E_RX_MAX_HEAD (256)
+#define MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE (9)
+#define MLX5E_SHAMPO_WQ_HEADER_PER_PAGE (PAGE_SIZE >> MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE)
+#define MLX5E_SHAMPO_WQ_BASE_HEAD_ENTRY_SIZE (64)
+#define MLX5E_SHAMPO_WQ_RESRV_SIZE (64 * 1024)
+#define MLX5E_SHAMPO_WQ_BASE_RESRV_SIZE (4096)
 
 #define MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(mdev) \
        (6 + MLX5_CAP_GEN(mdev, cache_line_128byte)) /* HW restriction */
@@ -152,6 +157,25 @@ struct page_pool;
 #define MLX5E_UMR_WQEBBS \
        (DIV_ROUND_UP(MLX5E_UMR_WQE_INLINE_SZ, MLX5_SEND_WQE_BB))
 
+#define MLX5E_KLM_UMR_WQE_SZ(sgl_len)\
+       (sizeof(struct mlx5e_umr_wqe) +\
+       (sizeof(struct mlx5_klm) * (sgl_len)))
+
+#define MLX5E_KLM_UMR_WQEBBS(klm_entries) \
+       (DIV_ROUND_UP(MLX5E_KLM_UMR_WQE_SZ(klm_entries), MLX5_SEND_WQE_BB))
+
+#define MLX5E_KLM_UMR_DS_CNT(klm_entries)\
+       (DIV_ROUND_UP(MLX5E_KLM_UMR_WQE_SZ(klm_entries), MLX5_SEND_WQE_DS))
+
+#define MLX5E_KLM_MAX_ENTRIES_PER_WQE(wqe_size)\
+       (((wqe_size) - sizeof(struct mlx5e_umr_wqe)) / sizeof(struct mlx5_klm))
+
+#define MLX5E_KLM_ENTRIES_PER_WQE(wqe_size)\
+       ALIGN_DOWN(MLX5E_KLM_MAX_ENTRIES_PER_WQE(wqe_size), MLX5_UMR_KLM_ALIGNMENT)
+
+#define MLX5E_MAX_KLM_PER_WQE(mdev) \
+       MLX5E_KLM_ENTRIES_PER_WQE(MLX5E_TX_MPW_MAX_NUM_DS << MLX5_MKEY_BSF_OCTO_SIZE)
+
 #define MLX5E_MSG_LEVEL                        NETIF_MSG_LINK
 
 #define mlx5e_dbg(mlevel, priv, format, ...)                    \
@@ -217,7 +241,10 @@ struct mlx5e_umr_wqe {
        struct mlx5_wqe_ctrl_seg       ctrl;
        struct mlx5_wqe_umr_ctrl_seg   uctrl;
        struct mlx5_mkey_seg           mkc;
-       struct mlx5_mtt                inline_mtts[0];
+       union {
+               struct mlx5_mtt inline_mtts[0];
+               struct mlx5_klm inline_klms[0];
+       };
 };
 
 enum mlx5e_priv_flag {
@@ -242,6 +269,21 @@ enum mlx5e_priv_flag {
 
 #define MLX5E_GET_PFLAG(params, pflag) (!!((params)->pflags & (BIT(pflag))))
 
+enum packet_merge {
+       MLX5E_PACKET_MERGE_NONE,
+       MLX5E_PACKET_MERGE_LRO,
+       MLX5E_PACKET_MERGE_SHAMPO,
+};
+
+struct mlx5e_packet_merge_param {
+       enum packet_merge type;
+       u32 timeout;
+       struct {
+               u8 match_criteria_type;
+               u8 alignment_granularity;
+       } shampo;
+};
+
 struct mlx5e_params {
        u8  log_sq_size;
        u8  rq_wq_type;
@@ -259,13 +301,12 @@ struct mlx5e_params {
        bool tunneled_offload_en;
        struct dim_cq_moder rx_cq_moderation;
        struct dim_cq_moder tx_cq_moderation;
-       bool lro_en;
+       struct mlx5e_packet_merge_param packet_merge;
        u8  tx_min_inline_mode;
        bool vlan_strip_disable;
        bool scatter_fcs_en;
        bool rx_dim_enabled;
        bool tx_dim_enabled;
-       u32 lro_timeout;
        u32 pflags;
        struct bpf_prog *xdp_prog;
        struct mlx5e_xsk *xsk;
@@ -287,7 +328,8 @@ enum {
        MLX5E_RQ_STATE_NO_CSUM_COMPLETE,
        MLX5E_RQ_STATE_CSUM_FULL, /* cqe_csum_full hw bit is set */
        MLX5E_RQ_STATE_FPGA_TLS, /* FPGA TLS enabled */
-       MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX /* set when mini_cqe_resp_stride_index cap is used */
+       MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX, /* set when mini_cqe_resp_stride_index cap is used */
+       MLX5E_RQ_STATE_SHAMPO, /* set when SHAMPO cap is used */
 };
 
 struct mlx5e_cq {
@@ -578,6 +620,7 @@ typedef struct sk_buff *
                         struct mlx5e_wqe_frag_info *wi, u32 cqe_bcnt);
 typedef bool (*mlx5e_fp_post_rx_wqes)(struct mlx5e_rq *rq);
 typedef void (*mlx5e_fp_dealloc_wqe)(struct mlx5e_rq*, u16);
+typedef void (*mlx5e_fp_shampo_dealloc_hd)(struct mlx5e_rq*, u16, u16, bool);
 
 int mlx5e_rq_set_handlers(struct mlx5e_rq *rq, struct mlx5e_params *params, bool xsk);
 void mlx5e_rq_set_trap_handlers(struct mlx5e_rq *rq, struct mlx5e_params *params);
@@ -599,6 +642,25 @@ struct mlx5e_rq_frags_info {
        u8 wqe_bulk;
 };
 
+struct mlx5e_shampo_hd {
+       u32 mkey;
+       struct mlx5e_dma_info *info;
+       struct page *last_page;
+       u16 hd_per_wq;
+       u16 hd_per_wqe;
+       unsigned long *bitmap;
+       u16 pi;
+       u16 ci;
+       __be32 key;
+       u64 last_addr;
+};
+
+struct mlx5e_hw_gro_data {
+       struct sk_buff *skb;
+       struct flow_keys fk;
+       int second_ip_id;
+};
+
 struct mlx5e_rq {
        /* data path */
        union {
@@ -620,6 +682,7 @@ struct mlx5e_rq {
                        u8                     umr_in_progress;
                        u8                     umr_last_bulk;
                        u8                     umr_completed;
+                       struct mlx5e_shampo_hd *shampo;
                } mpwqe;
        };
        struct {
@@ -639,6 +702,8 @@ struct mlx5e_rq {
        struct mlx5e_icosq    *icosq;
        struct mlx5e_priv     *priv;
 
+       struct mlx5e_hw_gro_data *hw_gro_data;
+
        mlx5e_fp_handle_rx_cqe handle_rx_cqe;
        mlx5e_fp_post_rx_wqes  post_wqes;
        mlx5e_fp_dealloc_wqe   dealloc_wqe;
@@ -666,7 +731,7 @@ struct mlx5e_rq {
        u8                     wq_type;
        u32                    rqn;
        struct mlx5_core_dev  *mdev;
-       struct mlx5_core_mkey  umr_mkey;
+       u32  umr_mkey;
        struct mlx5e_dma_info  wqe_overflow;
 
        /* XDP read-mostly */
@@ -886,6 +951,7 @@ struct mlx5e_priv {
 struct mlx5e_rx_handlers {
        mlx5e_fp_handle_rx_cqe handle_rx_cqe;
        mlx5e_fp_handle_rx_cqe handle_rx_cqe_mpwqe;
+       mlx5e_fp_handle_rx_cqe handle_rx_cqe_mpwqe_shampo;
 };
 
 extern const struct mlx5e_rx_handlers mlx5e_rx_handlers_nic;
@@ -915,6 +981,7 @@ void mlx5e_build_ptys2ethtool_map(void);
 
 bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev);
 
+void mlx5e_shampo_dealloc_hd(struct mlx5e_rq *rq, u16 len, u16 start, bool close);
 void mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats);
 void mlx5e_fold_sw_stats64(struct mlx5e_priv *priv, struct rtnl_link_stats64 *s);
 
index 3cbb596..f8c2902 100644 (file)
@@ -87,7 +87,8 @@ bool mlx5e_rx_is_linear_skb(struct mlx5e_params *params,
        u32 linear_frag_sz = max(mlx5e_rx_get_linear_frag_sz(params, xsk),
                                 mlx5e_rx_get_linear_frag_sz(params, NULL));
 
-       return !params->lro_en && linear_frag_sz <= PAGE_SIZE;
+       return params->packet_merge.type == MLX5E_PACKET_MERGE_NONE &&
+               linear_frag_sz <= PAGE_SIZE;
 }
 
 bool mlx5e_verify_rx_mpwqe_strides(struct mlx5_core_dev *mdev,
@@ -138,6 +139,27 @@ u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5e_params *params,
        return params->log_rq_mtu_frames - log_pkts_per_wqe;
 }
 
+u8 mlx5e_shampo_get_log_hd_entry_size(struct mlx5_core_dev *mdev,
+                                     struct mlx5e_params *params)
+{
+       return order_base_2(DIV_ROUND_UP(MLX5E_RX_MAX_HEAD, MLX5E_SHAMPO_WQ_BASE_HEAD_ENTRY_SIZE));
+}
+
+u8 mlx5e_shampo_get_log_rsrv_size(struct mlx5_core_dev *mdev,
+                                 struct mlx5e_params *params)
+{
+       return order_base_2(MLX5E_SHAMPO_WQ_RESRV_SIZE / MLX5E_SHAMPO_WQ_BASE_RESRV_SIZE);
+}
+
+u8 mlx5e_shampo_get_log_pkt_per_rsrv(struct mlx5_core_dev *mdev,
+                                    struct mlx5e_params *params)
+{
+       u32 resrv_size = BIT(mlx5e_shampo_get_log_rsrv_size(mdev, params)) *
+                        PAGE_SIZE;
+
+       return order_base_2(DIV_ROUND_UP(resrv_size, params->sw_mtu));
+}
+
 u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev,
                                   struct mlx5e_params *params,
                                   struct mlx5e_xsk_param *xsk)
@@ -164,19 +186,8 @@ u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev,
                mlx5e_rx_is_linear_skb(params, xsk) :
                mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk);
 
-       return is_linear_skb ? mlx5e_get_linear_rq_headroom(params, xsk) : 0;
-}
-
-struct mlx5e_lro_param mlx5e_get_lro_param(struct mlx5e_params *params)
-{
-       struct mlx5e_lro_param lro_param;
-
-       lro_param = (struct mlx5e_lro_param) {
-               .enabled = params->lro_en,
-               .timeout = params->lro_timeout,
-       };
-
-       return lro_param;
+       return is_linear_skb || params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO ?
+               mlx5e_get_linear_rq_headroom(params, xsk) : 0;
 }
 
 u16 mlx5e_calc_sq_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
@@ -453,6 +464,23 @@ static void mlx5e_build_common_cq_param(struct mlx5_core_dev *mdev,
                MLX5_SET(cqc, cqc, cqe_sz, CQE_STRIDE_128_PAD);
 }
 
+static u32 mlx5e_shampo_get_log_cq_size(struct mlx5_core_dev *mdev,
+                                       struct mlx5e_params *params,
+                                       struct mlx5e_xsk_param *xsk)
+{
+       int rsrv_size = BIT(mlx5e_shampo_get_log_rsrv_size(mdev, params)) * PAGE_SIZE;
+       u16 num_strides = BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk));
+       int pkt_per_rsrv = BIT(mlx5e_shampo_get_log_pkt_per_rsrv(mdev, params));
+       u8 log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk);
+       int wq_size = BIT(mlx5e_mpwqe_get_log_rq_size(params, xsk));
+       int wqe_size = BIT(log_stride_sz) * num_strides;
+
+       /* +1 is for the case that the pkt_per_rsrv dont consume the reservation
+        * so we get a filler cqe for the rest of the reservation.
+        */
+       return order_base_2((wqe_size / rsrv_size) * wq_size * (pkt_per_rsrv + 1));
+}
+
 static void mlx5e_build_rx_cq_param(struct mlx5_core_dev *mdev,
                                    struct mlx5e_params *params,
                                    struct mlx5e_xsk_param *xsk,
@@ -464,9 +492,12 @@ static void mlx5e_build_rx_cq_param(struct mlx5_core_dev *mdev,
 
        switch (params->rq_wq_type) {
        case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
-               log_cq_size = mlx5e_mpwqe_get_log_rq_size(params, xsk) +
-                       mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk);
                hw_stridx = MLX5_CAP_GEN(mdev, mini_cqe_resp_stride_index);
+               if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO)
+                       log_cq_size = mlx5e_shampo_get_log_cq_size(mdev, params, xsk);
+               else
+                       log_cq_size = mlx5e_mpwqe_get_log_rq_size(params, xsk) +
+                               mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk);
                break;
        default: /* MLX5_WQ_TYPE_CYCLIC */
                log_cq_size = params->log_rq_mtu_frames;
@@ -485,10 +516,11 @@ static void mlx5e_build_rx_cq_param(struct mlx5_core_dev *mdev,
 
 static u8 rq_end_pad_mode(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
 {
+       bool lro_en = params->packet_merge.type == MLX5E_PACKET_MERGE_LRO;
        bool ro = pcie_relaxed_ordering_enabled(mdev->pdev) &&
                MLX5_CAP_GEN(mdev, relaxed_ordering_write);
 
-       return ro && params->lro_en ?
+       return ro && lro_en ?
                MLX5_WQ_END_PAD_MODE_NONE : MLX5_WQ_END_PAD_MODE_ALIGN;
 }
 
@@ -520,6 +552,22 @@ int mlx5e_build_rq_param(struct mlx5_core_dev *mdev,
                MLX5_SET(wq, wq, log_wqe_stride_size,
                         log_wqe_stride_size - MLX5_MPWQE_LOG_STRIDE_SZ_BASE);
                MLX5_SET(wq, wq, log_wq_sz, mlx5e_mpwqe_get_log_rq_size(params, xsk));
+               if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) {
+                       MLX5_SET(wq, wq, shampo_enable, true);
+                       MLX5_SET(wq, wq, log_reservation_size,
+                                mlx5e_shampo_get_log_rsrv_size(mdev, params));
+                       MLX5_SET(wq, wq,
+                                log_max_num_of_packets_per_reservation,
+                                mlx5e_shampo_get_log_pkt_per_rsrv(mdev, params));
+                       MLX5_SET(wq, wq, log_headers_entry_size,
+                                mlx5e_shampo_get_log_hd_entry_size(mdev, params));
+                       MLX5_SET(rqc, rqc, reservation_timeout,
+                                params->packet_merge.timeout);
+                       MLX5_SET(rqc, rqc, shampo_match_criteria_type,
+                                params->packet_merge.shampo.match_criteria_type);
+                       MLX5_SET(rqc, rqc, shampo_no_match_alignment_granularity,
+                                params->packet_merge.shampo.alignment_granularity);
+               }
                break;
        }
        default: /* MLX5_WQ_TYPE_CYCLIC */
@@ -620,17 +668,80 @@ static u8 mlx5e_get_rq_log_wq_sz(void *rqc)
        return MLX5_GET(wq, wq, log_wq_sz);
 }
 
-static u8 mlx5e_build_icosq_log_wq_sz(struct mlx5e_params *params,
+/* This function calculates the maximum number of headers entries that are needed
+ * per WQE, the formula is based on the size of the reservations and the
+ * restriction we have about max packets for reservation that is equal to max
+ * headers per reservation.
+ */
+u32 mlx5e_shampo_hd_per_wqe(struct mlx5_core_dev *mdev,
+                           struct mlx5e_params *params,
+                           struct mlx5e_rq_param *rq_param)
+{
+       int resv_size = BIT(mlx5e_shampo_get_log_rsrv_size(mdev, params)) * PAGE_SIZE;
+       u16 num_strides = BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params, NULL));
+       int pkt_per_resv = BIT(mlx5e_shampo_get_log_pkt_per_rsrv(mdev, params));
+       u8 log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params, NULL);
+       int wqe_size = BIT(log_stride_sz) * num_strides;
+       u32 hd_per_wqe;
+
+       /* Assumption: hd_per_wqe % 8 == 0. */
+       hd_per_wqe = (wqe_size / resv_size) * pkt_per_resv;
+       mlx5_core_dbg(mdev, "%s hd_per_wqe = %d rsrv_size = %d wqe_size = %d pkt_per_resv = %d\n",
+                     __func__, hd_per_wqe, resv_size, wqe_size, pkt_per_resv);
+       return hd_per_wqe;
+}
+
+/* This function calculates the maximum number of headers entries that are needed
+ * for the WQ, this value is uesed to allocate the header buffer in HW, thus
+ * must be a pow of 2.
+ */
+u32 mlx5e_shampo_hd_per_wq(struct mlx5_core_dev *mdev,
+                          struct mlx5e_params *params,
+                          struct mlx5e_rq_param *rq_param)
+{
+       void *wqc = MLX5_ADDR_OF(rqc, rq_param->rqc, wq);
+       int wq_size = BIT(MLX5_GET(wq, wqc, log_wq_sz));
+       u32 hd_per_wqe, hd_per_wq;
+
+       hd_per_wqe = mlx5e_shampo_hd_per_wqe(mdev, params, rq_param);
+       hd_per_wq = roundup_pow_of_two(hd_per_wqe * wq_size);
+       return hd_per_wq;
+}
+
+static u32 mlx5e_shampo_icosq_sz(struct mlx5_core_dev *mdev,
+                                struct mlx5e_params *params,
+                                struct mlx5e_rq_param *rq_param)
+{
+       int max_num_of_umr_per_wqe, max_hd_per_wqe, max_klm_per_umr, rest;
+       void *wqc = MLX5_ADDR_OF(rqc, rq_param->rqc, wq);
+       int wq_size = BIT(MLX5_GET(wq, wqc, log_wq_sz));
+       u32 wqebbs;
+
+       max_klm_per_umr = MLX5E_MAX_KLM_PER_WQE(mdev);
+       max_hd_per_wqe = mlx5e_shampo_hd_per_wqe(mdev, params, rq_param);
+       max_num_of_umr_per_wqe = max_hd_per_wqe / max_klm_per_umr;
+       rest = max_hd_per_wqe % max_klm_per_umr;
+       wqebbs = MLX5E_KLM_UMR_WQEBBS(max_klm_per_umr) * max_num_of_umr_per_wqe;
+       if (rest)
+               wqebbs += MLX5E_KLM_UMR_WQEBBS(rest);
+       wqebbs *= wq_size;
+       return wqebbs;
+}
+
+static u8 mlx5e_build_icosq_log_wq_sz(struct mlx5_core_dev *mdev,
+                                     struct mlx5e_params *params,
                                      struct mlx5e_rq_param *rqp)
 {
-       switch (params->rq_wq_type) {
-       case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
-               return max_t(u8, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE,
-                            order_base_2(MLX5E_UMR_WQEBBS) +
-                            mlx5e_get_rq_log_wq_sz(rqp->rqc));
-       default: /* MLX5_WQ_TYPE_CYCLIC */
+       u32 wqebbs;
+
+       /* MLX5_WQ_TYPE_CYCLIC */
+       if (params->rq_wq_type != MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
                return MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE;
-       }
+
+       wqebbs = MLX5E_UMR_WQEBBS * BIT(mlx5e_get_rq_log_wq_sz(rqp->rqc));
+       if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO)
+               wqebbs += mlx5e_shampo_icosq_sz(mdev, params, rqp);
+       return max_t(u8, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE, order_base_2(wqebbs));
 }
 
 static u8 mlx5e_build_async_icosq_log_wq_sz(struct mlx5_core_dev *mdev)
@@ -697,7 +808,7 @@ int mlx5e_build_channel_param(struct mlx5_core_dev *mdev,
        if (err)
                return err;
 
-       icosq_log_wq_sz = mlx5e_build_icosq_log_wq_sz(params, &cparam->rq);
+       icosq_log_wq_sz = mlx5e_build_icosq_log_wq_sz(mdev, params, &cparam->rq);
        async_icosq_log_wq_sz = mlx5e_build_async_icosq_log_wq_sz(mdev);
 
        mlx5e_build_sq_param(mdev, params, &cparam->txq_sq);
index 879ad46..433e696 100644 (file)
@@ -11,11 +11,6 @@ struct mlx5e_xsk_param {
        u16 chunk_size;
 };
 
-struct mlx5e_lro_param {
-       bool enabled;
-       u32 timeout;
-};
-
 struct mlx5e_cq_param {
        u32                        cqc[MLX5_ST_SZ_DW(cqc)];
        struct mlx5_wq_param       wq;
@@ -116,6 +111,18 @@ bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev,
                                  struct mlx5e_xsk_param *xsk);
 u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5e_params *params,
                               struct mlx5e_xsk_param *xsk);
+u8 mlx5e_shampo_get_log_hd_entry_size(struct mlx5_core_dev *mdev,
+                                     struct mlx5e_params *params);
+u8 mlx5e_shampo_get_log_rsrv_size(struct mlx5_core_dev *mdev,
+                                 struct mlx5e_params *params);
+u8 mlx5e_shampo_get_log_pkt_per_rsrv(struct mlx5_core_dev *mdev,
+                                    struct mlx5e_params *params);
+u32 mlx5e_shampo_hd_per_wqe(struct mlx5_core_dev *mdev,
+                           struct mlx5e_params *params,
+                           struct mlx5e_rq_param *rq_param);
+u32 mlx5e_shampo_hd_per_wq(struct mlx5_core_dev *mdev,
+                          struct mlx5e_params *params,
+                          struct mlx5e_rq_param *rq_param);
 u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev,
                                   struct mlx5e_params *params,
                                   struct mlx5e_xsk_param *xsk);
@@ -125,7 +132,6 @@ u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev,
 u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev,
                          struct mlx5e_params *params,
                          struct mlx5e_xsk_param *xsk);
-struct mlx5e_lro_param mlx5e_get_lro_param(struct mlx5e_params *params);
 
 /* Build queue parameters */
 
index 3a86f66..18d542b 100644 (file)
@@ -682,7 +682,7 @@ int mlx5e_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params,
        c->tstamp   = &priv->tstamp;
        c->pdev     = mlx5_core_dma_dev(priv->mdev);
        c->netdev   = priv->netdev;
-       c->mkey_be  = cpu_to_be32(priv->mdev->mlx5e_res.hw_objs.mkey.key);
+       c->mkey_be  = cpu_to_be32(priv->mdev->mlx5e_res.hw_objs.mkey);
        c->num_tc   = mlx5e_get_dcb_num_tc(params);
        c->stats    = &priv->ptp_stats.ch;
        c->lag_port = lag_port;
index b8b481b..c1cdd8c 100644 (file)
@@ -127,7 +127,7 @@ mlx5e_rss_get_tt_config(struct mlx5e_rss *rss, enum mlx5_traffic_types tt)
 
 static int mlx5e_rss_create_tir(struct mlx5e_rss *rss,
                                enum mlx5_traffic_types tt,
-                               const struct mlx5e_lro_param *init_lro_param,
+                               const struct mlx5e_packet_merge_param *init_pkt_merge_param,
                                bool inner)
 {
        struct mlx5e_rss_params_traffic_type rss_tt;
@@ -161,7 +161,7 @@ static int mlx5e_rss_create_tir(struct mlx5e_rss *rss,
        rqtn = mlx5e_rqt_get_rqtn(&rss->rqt);
        mlx5e_tir_builder_build_rqt(builder, rss->mdev->mlx5e_res.hw_objs.td.tdn,
                                    rqtn, rss->inner_ft_support);
-       mlx5e_tir_builder_build_lro(builder, init_lro_param);
+       mlx5e_tir_builder_build_packet_merge(builder, init_pkt_merge_param);
        rss_tt = mlx5e_rss_get_tt_config(rss, tt);
        mlx5e_tir_builder_build_rss(builder, &rss->hash, &rss_tt, inner);
 
@@ -198,14 +198,14 @@ static void mlx5e_rss_destroy_tir(struct mlx5e_rss *rss, enum mlx5_traffic_types
 }
 
 static int mlx5e_rss_create_tirs(struct mlx5e_rss *rss,
-                                const struct mlx5e_lro_param *init_lro_param,
+                                const struct mlx5e_packet_merge_param *init_pkt_merge_param,
                                 bool inner)
 {
        enum mlx5_traffic_types tt, max_tt;
        int err;
 
        for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
-               err = mlx5e_rss_create_tir(rss, tt, init_lro_param, inner);
+               err = mlx5e_rss_create_tir(rss, tt, init_pkt_merge_param, inner);
                if (err)
                        goto err_destroy_tirs;
        }
@@ -297,7 +297,7 @@ int mlx5e_rss_init_no_tirs(struct mlx5e_rss *rss, struct mlx5_core_dev *mdev,
 
 int mlx5e_rss_init(struct mlx5e_rss *rss, struct mlx5_core_dev *mdev,
                   bool inner_ft_support, u32 drop_rqn,
-                  const struct mlx5e_lro_param *init_lro_param)
+                  const struct mlx5e_packet_merge_param *init_pkt_merge_param)
 {
        int err;
 
@@ -305,12 +305,12 @@ int mlx5e_rss_init(struct mlx5e_rss *rss, struct mlx5_core_dev *mdev,
        if (err)
                goto err_out;
 
-       err = mlx5e_rss_create_tirs(rss, init_lro_param, false);
+       err = mlx5e_rss_create_tirs(rss, init_pkt_merge_param, false);
        if (err)
                goto err_destroy_rqt;
 
        if (inner_ft_support) {
-               err = mlx5e_rss_create_tirs(rss, init_lro_param, true);
+               err = mlx5e_rss_create_tirs(rss, init_pkt_merge_param, true);
                if (err)
                        goto err_destroy_tirs;
        }
@@ -372,7 +372,7 @@ u32 mlx5e_rss_get_tirn(struct mlx5e_rss *rss, enum mlx5_traffic_types tt,
  */
 int mlx5e_rss_obtain_tirn(struct mlx5e_rss *rss,
                          enum mlx5_traffic_types tt,
-                         const struct mlx5e_lro_param *init_lro_param,
+                         const struct mlx5e_packet_merge_param *init_pkt_merge_param,
                          bool inner, u32 *tirn)
 {
        struct mlx5e_tir *tir;
@@ -381,7 +381,7 @@ int mlx5e_rss_obtain_tirn(struct mlx5e_rss *rss,
        if (!tir) { /* TIR doesn't exist, create one */
                int err;
 
-               err = mlx5e_rss_create_tir(rss, tt, init_lro_param, inner);
+               err = mlx5e_rss_create_tir(rss, tt, init_pkt_merge_param, inner);
                if (err)
                        return err;
                tir = rss_get_tir(rss, tt, inner);
@@ -419,7 +419,8 @@ void mlx5e_rss_disable(struct mlx5e_rss *rss)
                               mlx5e_rqt_get_rqtn(&rss->rqt), rss->drop_rqn, err);
 }
 
-int mlx5e_rss_lro_set_param(struct mlx5e_rss *rss, struct mlx5e_lro_param *lro_param)
+int mlx5e_rss_packet_merge_set_param(struct mlx5e_rss *rss,
+                                    struct mlx5e_packet_merge_param *pkt_merge_param)
 {
        struct mlx5e_tir_builder *builder;
        enum mlx5_traffic_types tt;
@@ -429,7 +430,7 @@ int mlx5e_rss_lro_set_param(struct mlx5e_rss *rss, struct mlx5e_lro_param *lro_p
        if (!builder)
                return -ENOMEM;
 
-       mlx5e_tir_builder_build_lro(builder, lro_param);
+       mlx5e_tir_builder_build_packet_merge(builder, pkt_merge_param);
 
        final_err = 0;
 
index d522a10..c6b2164 100644 (file)
@@ -17,7 +17,7 @@ struct mlx5e_rss *mlx5e_rss_alloc(void);
 void mlx5e_rss_free(struct mlx5e_rss *rss);
 int mlx5e_rss_init(struct mlx5e_rss *rss, struct mlx5_core_dev *mdev,
                   bool inner_ft_support, u32 drop_rqn,
-                  const struct mlx5e_lro_param *init_lro_param);
+                  const struct mlx5e_packet_merge_param *init_pkt_merge_param);
 int mlx5e_rss_init_no_tirs(struct mlx5e_rss *rss, struct mlx5_core_dev *mdev,
                           bool inner_ft_support, u32 drop_rqn);
 int mlx5e_rss_cleanup(struct mlx5e_rss *rss);
@@ -30,13 +30,14 @@ u32 mlx5e_rss_get_tirn(struct mlx5e_rss *rss, enum mlx5_traffic_types tt,
                       bool inner);
 int mlx5e_rss_obtain_tirn(struct mlx5e_rss *rss,
                          enum mlx5_traffic_types tt,
-                         const struct mlx5e_lro_param *init_lro_param,
+                         const struct mlx5e_packet_merge_param *init_pkt_merge_param,
                          bool inner, u32 *tirn);
 
 void mlx5e_rss_enable(struct mlx5e_rss *rss, u32 *rqns, unsigned int num_rqns);
 void mlx5e_rss_disable(struct mlx5e_rss *rss);
 
-int mlx5e_rss_lro_set_param(struct mlx5e_rss *rss, struct mlx5e_lro_param *lro_param);
+int mlx5e_rss_packet_merge_set_param(struct mlx5e_rss *rss,
+                                    struct mlx5e_packet_merge_param *pkt_merge_param);
 int mlx5e_rss_get_rxfh(struct mlx5e_rss *rss, u32 *indir, u8 *key, u8 *hfunc);
 int mlx5e_rss_set_rxfh(struct mlx5e_rss *rss, const u32 *indir,
                       const u8 *key, const u8 *hfunc,
index 13056cb..1429538 100644 (file)
@@ -34,7 +34,7 @@ struct mlx5e_rx_res {
 /* API for rx_res_rss_* */
 
 static int mlx5e_rx_res_rss_init_def(struct mlx5e_rx_res *res,
-                                    const struct mlx5e_lro_param *init_lro_param,
+                                    const struct mlx5e_packet_merge_param *init_pkt_merge_param,
                                     unsigned int init_nch)
 {
        bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT;
@@ -49,7 +49,7 @@ static int mlx5e_rx_res_rss_init_def(struct mlx5e_rx_res *res,
                return -ENOMEM;
 
        err = mlx5e_rss_init(rss, res->mdev, inner_ft_support, res->drop_rqn,
-                            init_lro_param);
+                            init_pkt_merge_param);
        if (err)
                goto err_rss_free;
 
@@ -275,7 +275,7 @@ struct mlx5e_rx_res *mlx5e_rx_res_alloc(void)
 }
 
 static int mlx5e_rx_res_channels_init(struct mlx5e_rx_res *res,
-                                     const struct mlx5e_lro_param *init_lro_param)
+                                     const struct mlx5e_packet_merge_param *init_pkt_merge_param)
 {
        bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT;
        struct mlx5e_tir_builder *builder;
@@ -306,7 +306,7 @@ static int mlx5e_rx_res_channels_init(struct mlx5e_rx_res *res,
                mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn,
                                            mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt),
                                            inner_ft_support);
-               mlx5e_tir_builder_build_lro(builder, init_lro_param);
+               mlx5e_tir_builder_build_packet_merge(builder, init_pkt_merge_param);
                mlx5e_tir_builder_build_direct(builder);
 
                err = mlx5e_tir_init(&res->channels[ix].direct_tir, builder, res->mdev, true);
@@ -336,7 +336,7 @@ static int mlx5e_rx_res_channels_init(struct mlx5e_rx_res *res,
                mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn,
                                            mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt),
                                            inner_ft_support);
-               mlx5e_tir_builder_build_lro(builder, init_lro_param);
+               mlx5e_tir_builder_build_packet_merge(builder, init_pkt_merge_param);
                mlx5e_tir_builder_build_direct(builder);
 
                err = mlx5e_tir_init(&res->channels[ix].xsk_tir, builder, res->mdev, true);
@@ -437,7 +437,7 @@ static void mlx5e_rx_res_ptp_destroy(struct mlx5e_rx_res *res)
 
 int mlx5e_rx_res_init(struct mlx5e_rx_res *res, struct mlx5_core_dev *mdev,
                      enum mlx5e_rx_res_features features, unsigned int max_nch,
-                     u32 drop_rqn, const struct mlx5e_lro_param *init_lro_param,
+                     u32 drop_rqn, const struct mlx5e_packet_merge_param *init_pkt_merge_param,
                      unsigned int init_nch)
 {
        int err;
@@ -447,11 +447,11 @@ int mlx5e_rx_res_init(struct mlx5e_rx_res *res, struct mlx5_core_dev *mdev,
        res->max_nch = max_nch;
        res->drop_rqn = drop_rqn;
 
-       err = mlx5e_rx_res_rss_init_def(res, init_lro_param, init_nch);
+       err = mlx5e_rx_res_rss_init_def(res, init_pkt_merge_param, init_nch);
        if (err)
                goto err_out;
 
-       err = mlx5e_rx_res_channels_init(res, init_lro_param);
+       err = mlx5e_rx_res_channels_init(res, init_pkt_merge_param);
        if (err)
                goto err_rss_destroy;
 
@@ -645,7 +645,8 @@ int mlx5e_rx_res_xsk_deactivate(struct mlx5e_rx_res *res, unsigned int ix)
        return err;
 }
 
-int mlx5e_rx_res_lro_set_param(struct mlx5e_rx_res *res, struct mlx5e_lro_param *lro_param)
+int mlx5e_rx_res_packet_merge_set_param(struct mlx5e_rx_res *res,
+                                       struct mlx5e_packet_merge_param *pkt_merge_param)
 {
        struct mlx5e_tir_builder *builder;
        int err, final_err;
@@ -655,7 +656,7 @@ int mlx5e_rx_res_lro_set_param(struct mlx5e_rx_res *res, struct mlx5e_lro_param
        if (!builder)
                return -ENOMEM;
 
-       mlx5e_tir_builder_build_lro(builder, lro_param);
+       mlx5e_tir_builder_build_packet_merge(builder, pkt_merge_param);
 
        final_err = 0;
 
@@ -665,7 +666,7 @@ int mlx5e_rx_res_lro_set_param(struct mlx5e_rx_res *res, struct mlx5e_lro_param
                if (!rss)
                        continue;
 
-               err = mlx5e_rss_lro_set_param(rss, lro_param);
+               err = mlx5e_rss_packet_merge_set_param(rss, pkt_merge_param);
                if (err)
                        final_err = final_err ? : err;
        }
@@ -673,7 +674,7 @@ int mlx5e_rx_res_lro_set_param(struct mlx5e_rx_res *res, struct mlx5e_lro_param
        for (ix = 0; ix < res->max_nch; ix++) {
                err = mlx5e_tir_modify(&res->channels[ix].direct_tir, builder);
                if (err) {
-                       mlx5_core_warn(res->mdev, "Failed to update LRO state of direct TIR %#x for channel %u: err = %d\n",
+                       mlx5_core_warn(res->mdev, "Failed to update packet merge state of direct TIR %#x for channel %u: err = %d\n",
                                       mlx5e_tir_get_tirn(&res->channels[ix].direct_tir), ix, err);
                        if (!final_err)
                                final_err = err;
index 4a15942..d09f7d1 100644 (file)
@@ -25,7 +25,7 @@ enum mlx5e_rx_res_features {
 struct mlx5e_rx_res *mlx5e_rx_res_alloc(void);
 int mlx5e_rx_res_init(struct mlx5e_rx_res *res, struct mlx5_core_dev *mdev,
                      enum mlx5e_rx_res_features features, unsigned int max_nch,
-                     u32 drop_rqn, const struct mlx5e_lro_param *init_lro_param,
+                     u32 drop_rqn, const struct mlx5e_packet_merge_param *init_pkt_merge_param,
                      unsigned int init_nch);
 void mlx5e_rx_res_destroy(struct mlx5e_rx_res *res);
 void mlx5e_rx_res_free(struct mlx5e_rx_res *res);
@@ -57,7 +57,8 @@ int mlx5e_rx_res_rss_set_rxfh(struct mlx5e_rx_res *res, u32 rss_idx,
 u8 mlx5e_rx_res_rss_get_hash_fields(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt);
 int mlx5e_rx_res_rss_set_hash_fields(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt,
                                     u8 rx_hash_fields);
-int mlx5e_rx_res_lro_set_param(struct mlx5e_rx_res *res, struct mlx5e_lro_param *lro_param);
+int mlx5e_rx_res_packet_merge_set_param(struct mlx5e_rx_res *res,
+                                       struct mlx5e_packet_merge_param *pkt_merge_param);
 
 int mlx5e_rx_res_rss_init(struct mlx5e_rx_res *res, u32 *rss_idx, unsigned int init_nch);
 int mlx5e_rx_res_rss_destroy(struct mlx5e_rx_res *res, u32 rss_idx);
index de936dc..da169b8 100644 (file)
@@ -70,24 +70,30 @@ void mlx5e_tir_builder_build_rqt(struct mlx5e_tir_builder *builder, u32 tdn,
        MLX5_SET(tirc, tirc, tunneled_offload_en, inner_ft_support);
 }
 
-void mlx5e_tir_builder_build_lro(struct mlx5e_tir_builder *builder,
-                                const struct mlx5e_lro_param *lro_param)
+void mlx5e_tir_builder_build_packet_merge(struct mlx5e_tir_builder *builder,
+                                         const struct mlx5e_packet_merge_param *pkt_merge_param)
 {
        void *tirc = mlx5e_tir_builder_get_tirc(builder);
        const unsigned int rough_max_l2_l3_hdr_sz = 256;
 
        if (builder->modify)
-               MLX5_SET(modify_tir_in, builder->in, bitmask.lro, 1);
-
-       if (!lro_param->enabled)
-               return;
-
-       MLX5_SET(tirc, tirc, lro_enable_mask,
-                MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO |
-                MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO);
-       MLX5_SET(tirc, tirc, lro_max_ip_payload_size,
-                (MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ - rough_max_l2_l3_hdr_sz) >> 8);
-       MLX5_SET(tirc, tirc, lro_timeout_period_usecs, lro_param->timeout);
+               MLX5_SET(modify_tir_in, builder->in, bitmask.packet_merge, 1);
+
+       switch (pkt_merge_param->type) {
+       case MLX5E_PACKET_MERGE_LRO:
+               MLX5_SET(tirc, tirc, packet_merge_mask,
+                        MLX5_TIRC_PACKET_MERGE_MASK_IPV4_LRO |
+                        MLX5_TIRC_PACKET_MERGE_MASK_IPV6_LRO);
+               MLX5_SET(tirc, tirc, lro_max_ip_payload_size,
+                        (MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ - rough_max_l2_l3_hdr_sz) >> 8);
+               MLX5_SET(tirc, tirc, lro_timeout_period_usecs, pkt_merge_param->timeout);
+               break;
+       case MLX5E_PACKET_MERGE_SHAMPO:
+               MLX5_SET(tirc, tirc, packet_merge_mask, MLX5_TIRC_PACKET_MERGE_MASK_SHAMPO);
+               break;
+       default:
+               break;
+       }
 }
 
 static int mlx5e_hfunc_to_hw(u8 hfunc)
index e45149a..857a84b 100644 (file)
@@ -18,7 +18,7 @@ struct mlx5e_rss_params_traffic_type {
 };
 
 struct mlx5e_tir_builder;
-struct mlx5e_lro_param;
+struct mlx5e_packet_merge_param;
 
 struct mlx5e_tir_builder *mlx5e_tir_builder_alloc(bool modify);
 void mlx5e_tir_builder_free(struct mlx5e_tir_builder *builder);
@@ -27,8 +27,8 @@ void mlx5e_tir_builder_clear(struct mlx5e_tir_builder *builder);
 void mlx5e_tir_builder_build_inline(struct mlx5e_tir_builder *builder, u32 tdn, u32 rqn);
 void mlx5e_tir_builder_build_rqt(struct mlx5e_tir_builder *builder, u32 tdn,
                                 u32 rqtn, bool inner_ft_support);
-void mlx5e_tir_builder_build_lro(struct mlx5e_tir_builder *builder,
-                                const struct mlx5e_lro_param *lro_param);
+void mlx5e_tir_builder_build_packet_merge(struct mlx5e_tir_builder *builder,
+                                         const struct mlx5e_packet_merge_param *pkt_merge_param);
 void mlx5e_tir_builder_build_rss(struct mlx5e_tir_builder *builder,
                                 const struct mlx5e_rss_params_hash *rss_hash,
                                 const struct mlx5e_rss_params_traffic_type *rss_tt,
index d54607a..a55b066 100644 (file)
@@ -137,7 +137,7 @@ static struct mlx5e_trap *mlx5e_open_trap(struct mlx5e_priv *priv)
        t->tstamp   = &priv->tstamp;
        t->pdev     = mlx5_core_dma_dev(priv->mdev);
        t->netdev   = priv->netdev;
-       t->mkey_be  = cpu_to_be32(priv->mdev->mlx5e_res.hw_objs.mkey.key);
+       t->mkey_be  = cpu_to_be32(priv->mdev->mlx5e_res.hw_objs.mkey);
        t->stats    = &priv->trap_stats.ch;
 
        netif_napi_add(netdev, &t->napi, mlx5e_trap_napi_poll, 64);
index 055c3bc..4cdf8e5 100644 (file)
@@ -36,6 +36,7 @@ ktime_t mlx5e_cqe_ts_to_ns(cqe_ts_to_ns func, struct mlx5_clock *clock, u64 cqe_
 enum mlx5e_icosq_wqe_type {
        MLX5E_ICOSQ_WQE_NOP,
        MLX5E_ICOSQ_WQE_UMR_RX,
+       MLX5E_ICOSQ_WQE_SHAMPO_HD_UMR,
 #ifdef CONFIG_MLX5_EN_TLS
        MLX5E_ICOSQ_WQE_UMR_TLS,
        MLX5E_ICOSQ_WQE_SET_PSV_TLS,
@@ -166,6 +167,10 @@ static inline u16 mlx5e_txqsq_get_next_pi(struct mlx5e_txqsq *sq, u16 size)
        return pi;
 }
 
+struct mlx5e_shampo_umr {
+       u16 len;
+};
+
 struct mlx5e_icosq_wqe_info {
        u8 wqe_type;
        u8 num_wqebbs;
@@ -175,6 +180,7 @@ struct mlx5e_icosq_wqe_info {
                struct {
                        struct mlx5e_rq *rq;
                } umr;
+               struct mlx5e_shampo_umr shampo;
 #ifdef CONFIG_MLX5_EN_TLS
                struct {
                        struct mlx5e_ktls_offload_context_rx *priv_rx;
index 84eb720..c0f409c 100644 (file)
@@ -47,7 +47,7 @@ void mlx5e_mkey_set_relaxed_ordering(struct mlx5_core_dev *mdev, void *mkc)
 }
 
 static int mlx5e_create_mkey(struct mlx5_core_dev *mdev, u32 pdn,
-                            struct mlx5_core_mkey *mkey)
+                            u32 *mkey)
 {
        int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
        void *mkc;
@@ -108,7 +108,7 @@ int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev)
        return 0;
 
 err_destroy_mkey:
-       mlx5_core_destroy_mkey(mdev, &res->mkey);
+       mlx5_core_destroy_mkey(mdev, res->mkey);
 err_dealloc_transport_domain:
        mlx5_core_dealloc_transport_domain(mdev, res->td.tdn);
 err_dealloc_pd:
@@ -121,7 +121,7 @@ void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev)
        struct mlx5e_hw_objs *res = &mdev->mlx5e_res.hw_objs;
 
        mlx5_free_bfreg(mdev, &res->bfreg);
-       mlx5_core_destroy_mkey(mdev, &res->mkey);
+       mlx5_core_destroy_mkey(mdev, res->mkey);
        mlx5_core_dealloc_transport_domain(mdev, res->td.tdn);
        mlx5_core_dealloc_pd(mdev, res->pdn);
        memset(res, 0, sizeof(*res));
index 25926e5..c2ea5fa 100644 (file)
@@ -1900,6 +1900,11 @@ int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val
                return -EINVAL;
        }
 
+       if (priv->channels.params.packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) {
+               netdev_warn(priv->netdev, "Can't set CQE compression with HW-GRO, disable it first.\n");
+               return -EINVAL;
+       }
+
        new_params = priv->channels.params;
        MLX5E_SET_PFLAG(&new_params, MLX5E_PFLAG_RX_CQE_COMPRESS, new_val);
        if (rx_filter)
@@ -1952,8 +1957,8 @@ static int set_pflag_rx_striding_rq(struct net_device *netdev, bool enable)
                        return -EOPNOTSUPP;
                if (!mlx5e_striding_rq_possible(mdev, &priv->channels.params))
                        return -EINVAL;
-       } else if (priv->channels.params.lro_en) {
-               netdev_warn(netdev, "Can't set legacy RQ with LRO, disable LRO first\n");
+       } else if (priv->channels.params.packet_merge.type != MLX5E_PACKET_MERGE_NONE) {
+               netdev_warn(netdev, "Can't set legacy RQ with HW-GRO/LRO, disable them first\n");
                return -EINVAL;
        }
 
index 81ebf28..ad0d234 100644 (file)
@@ -411,7 +411,7 @@ static int flow_get_tirn(struct mlx5e_priv *priv,
                         u32 rss_context, u32 *tirn)
 {
        if (fs->flow_type & FLOW_RSS) {
-               struct mlx5e_lro_param lro_param;
+               struct mlx5e_packet_merge_param pkt_merge_param;
                struct mlx5e_rss *rss;
                u32 flow_type;
                int err;
@@ -426,8 +426,8 @@ static int flow_get_tirn(struct mlx5e_priv *priv,
                if (tt < 0)
                        return -EINVAL;
 
-               lro_param = mlx5e_get_lro_param(&priv->channels.params);
-               err = mlx5e_rss_obtain_tirn(rss, tt, &lro_param, false, tirn);
+               pkt_merge_param = priv->channels.params.packet_merge;
+               err = mlx5e_rss_obtain_tirn(rss, tt, &pkt_merge_param, false, tirn);
                if (err)
                        return err;
                eth_rule->rss = rss;
index f3dec58..6557159 100644 (file)
@@ -218,6 +218,45 @@ static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq,
        ucseg->mkey_mask     = cpu_to_be64(MLX5_MKEY_MASK_FREE);
 }
 
+static int mlx5e_rq_shampo_hd_alloc(struct mlx5e_rq *rq, int node)
+{
+       rq->mpwqe.shampo = kvzalloc_node(sizeof(*rq->mpwqe.shampo),
+                                        GFP_KERNEL, node);
+       if (!rq->mpwqe.shampo)
+               return -ENOMEM;
+       return 0;
+}
+
+static void mlx5e_rq_shampo_hd_free(struct mlx5e_rq *rq)
+{
+       kvfree(rq->mpwqe.shampo);
+}
+
+static int mlx5e_rq_shampo_hd_info_alloc(struct mlx5e_rq *rq, int node)
+{
+       struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo;
+
+       shampo->bitmap = bitmap_zalloc_node(shampo->hd_per_wq, GFP_KERNEL,
+                                           node);
+       if (!shampo->bitmap)
+               return -ENOMEM;
+
+       shampo->info = kvzalloc_node(array_size(shampo->hd_per_wq,
+                                               sizeof(*shampo->info)),
+                                    GFP_KERNEL, node);
+       if (!shampo->info) {
+               kvfree(shampo->bitmap);
+               return -ENOMEM;
+       }
+       return 0;
+}
+
+static void mlx5e_rq_shampo_hd_info_free(struct mlx5e_rq *rq)
+{
+       kvfree(rq->mpwqe.shampo->bitmap);
+       kvfree(rq->mpwqe.shampo->info);
+}
+
 static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, int node)
 {
        int wq_sz = mlx5_wq_ll_get_size(&rq->mpwqe.wq);
@@ -233,10 +272,9 @@ static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, int node)
        return 0;
 }
 
-static int mlx5e_create_umr_mkey(struct mlx5_core_dev *mdev,
-                                u64 npages, u8 page_shift,
-                                struct mlx5_core_mkey *umr_mkey,
-                                dma_addr_t filler_addr)
+static int mlx5e_create_umr_mtt_mkey(struct mlx5_core_dev *mdev,
+                                    u64 npages, u8 page_shift, u32 *umr_mkey,
+                                    dma_addr_t filler_addr)
 {
        struct mlx5_mtt *mtt;
        int inlen;
@@ -284,12 +322,59 @@ static int mlx5e_create_umr_mkey(struct mlx5_core_dev *mdev,
        return err;
 }
 
+static int mlx5e_create_umr_klm_mkey(struct mlx5_core_dev *mdev,
+                                    u64 nentries,
+                                    u32 *umr_mkey)
+{
+       int inlen;
+       void *mkc;
+       u32 *in;
+       int err;
+
+       inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
+
+       in = kvzalloc(inlen, GFP_KERNEL);
+       if (!in)
+               return -ENOMEM;
+
+       mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+
+       MLX5_SET(mkc, mkc, free, 1);
+       MLX5_SET(mkc, mkc, umr_en, 1);
+       MLX5_SET(mkc, mkc, lw, 1);
+       MLX5_SET(mkc, mkc, lr, 1);
+       MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_KLMS);
+       mlx5e_mkey_set_relaxed_ordering(mdev, mkc);
+       MLX5_SET(mkc, mkc, qpn, 0xffffff);
+       MLX5_SET(mkc, mkc, pd, mdev->mlx5e_res.hw_objs.pdn);
+       MLX5_SET(mkc, mkc, translations_octword_size, nentries);
+       MLX5_SET(mkc, mkc, length64, 1);
+       err = mlx5_core_create_mkey(mdev, umr_mkey, in, inlen);
+
+       kvfree(in);
+       return err;
+}
+
 static int mlx5e_create_rq_umr_mkey(struct mlx5_core_dev *mdev, struct mlx5e_rq *rq)
 {
        u64 num_mtts = MLX5E_REQUIRED_MTTS(mlx5_wq_ll_get_size(&rq->mpwqe.wq));
 
-       return mlx5e_create_umr_mkey(mdev, num_mtts, PAGE_SHIFT, &rq->umr_mkey,
-                                    rq->wqe_overflow.addr);
+       return mlx5e_create_umr_mtt_mkey(mdev, num_mtts, PAGE_SHIFT,
+                                        &rq->umr_mkey, rq->wqe_overflow.addr);
+}
+
+static int mlx5e_create_rq_hd_umr_mkey(struct mlx5_core_dev *mdev,
+                                      struct mlx5e_rq *rq)
+{
+       u32 max_klm_size = BIT(MLX5_CAP_GEN(mdev, log_max_klm_list_size));
+
+       if (max_klm_size < rq->mpwqe.shampo->hd_per_wq) {
+               mlx5_core_err(mdev, "max klm list size 0x%x is smaller than shampo header buffer list size 0x%x\n",
+                             max_klm_size, rq->mpwqe.shampo->hd_per_wq);
+               return -EINVAL;
+       }
+       return mlx5e_create_umr_klm_mkey(mdev, rq->mpwqe.shampo->hd_per_wq,
+                                        &rq->mpwqe.shampo->mkey);
 }
 
 static u64 mlx5e_get_mpwqe_offset(u16 wqe_ix)
@@ -403,6 +488,65 @@ static int mlx5e_init_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *param
        return xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq->ix, 0);
 }
 
+static int mlx5_rq_shampo_alloc(struct mlx5_core_dev *mdev,
+                               struct mlx5e_params *params,
+                               struct mlx5e_rq_param *rqp,
+                               struct mlx5e_rq *rq,
+                               u32 *pool_size,
+                               int node)
+{
+       void *wqc = MLX5_ADDR_OF(rqc, rqp->rqc, wq);
+       int wq_size;
+       int err;
+
+       if (!test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state))
+               return 0;
+       err = mlx5e_rq_shampo_hd_alloc(rq, node);
+       if (err)
+               goto out;
+       rq->mpwqe.shampo->hd_per_wq =
+               mlx5e_shampo_hd_per_wq(mdev, params, rqp);
+       err = mlx5e_create_rq_hd_umr_mkey(mdev, rq);
+       if (err)
+               goto err_shampo_hd;
+       err = mlx5e_rq_shampo_hd_info_alloc(rq, node);
+       if (err)
+               goto err_shampo_info;
+       rq->hw_gro_data = kvzalloc_node(sizeof(*rq->hw_gro_data), GFP_KERNEL, node);
+       if (!rq->hw_gro_data) {
+               err = -ENOMEM;
+               goto err_hw_gro_data;
+       }
+       rq->mpwqe.shampo->key =
+               cpu_to_be32(rq->mpwqe.shampo->mkey);
+       rq->mpwqe.shampo->hd_per_wqe =
+               mlx5e_shampo_hd_per_wqe(mdev, params, rqp);
+       wq_size = BIT(MLX5_GET(wq, wqc, log_wq_sz));
+       *pool_size += (rq->mpwqe.shampo->hd_per_wqe * wq_size) /
+                    MLX5E_SHAMPO_WQ_HEADER_PER_PAGE;
+       return 0;
+
+err_hw_gro_data:
+       mlx5e_rq_shampo_hd_info_free(rq);
+err_shampo_info:
+       mlx5_core_destroy_mkey(mdev, rq->mpwqe.shampo->mkey);
+err_shampo_hd:
+       mlx5e_rq_shampo_hd_free(rq);
+out:
+       return err;
+}
+
+static void mlx5e_rq_free_shampo(struct mlx5e_rq *rq)
+{
+       if (!test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state))
+               return;
+
+       kvfree(rq->hw_gro_data);
+       mlx5e_rq_shampo_hd_info_free(rq);
+       mlx5_core_destroy_mkey(rq->mdev, rq->mpwqe.shampo->mkey);
+       mlx5e_rq_shampo_hd_free(rq);
+}
+
 static int mlx5e_alloc_rq(struct mlx5e_params *params,
                          struct mlx5e_xsk_param *xsk,
                          struct mlx5e_rq_param *rqp,
@@ -455,11 +599,16 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params,
                err = mlx5e_create_rq_umr_mkey(mdev, rq);
                if (err)
                        goto err_rq_drop_page;
-               rq->mkey_be = cpu_to_be32(rq->umr_mkey.key);
+               rq->mkey_be = cpu_to_be32(rq->umr_mkey);
 
                err = mlx5e_rq_alloc_mpwqe_info(rq, node);
                if (err)
                        goto err_rq_mkey;
+
+               err = mlx5_rq_shampo_alloc(mdev, params, rqp, rq, &pool_size, node);
+               if (err)
+                       goto err_free_by_rq_type;
+
                break;
        default: /* MLX5_WQ_TYPE_CYCLIC */
                err = mlx5_wq_cyc_create(mdev, &rqp->wq, rqc_wq, &rq->wqe.wq,
@@ -487,7 +636,7 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params,
                if (err)
                        goto err_rq_frags;
 
-               rq->mkey_be = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey.key);
+               rq->mkey_be = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey);
        }
 
        if (xsk) {
@@ -512,14 +661,14 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params,
                if (IS_ERR(rq->page_pool)) {
                        err = PTR_ERR(rq->page_pool);
                        rq->page_pool = NULL;
-                       goto err_free_by_rq_type;
+                       goto err_free_shampo;
                }
                if (xdp_rxq_info_is_reg(&rq->xdp_rxq))
                        err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
                                                         MEM_TYPE_PAGE_POOL, rq->page_pool);
        }
        if (err)
-               goto err_free_by_rq_type;
+               goto err_free_shampo;
 
        for (i = 0; i < wq_sz; i++) {
                if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
@@ -528,8 +677,10 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params,
                        u32 byte_count =
                                rq->mpwqe.num_strides << rq->mpwqe.log_stride_sz;
                        u64 dma_offset = mlx5e_get_mpwqe_offset(i);
+                       u16 headroom = test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state) ?
+                                      0 : rq->buff.headroom;
 
-                       wqe->data[0].addr = cpu_to_be64(dma_offset + rq->buff.headroom);
+                       wqe->data[0].addr = cpu_to_be64(dma_offset + headroom);
                        wqe->data[0].byte_count = cpu_to_be32(byte_count);
                        wqe->data[0].lkey = rq->mkey_be;
                } else {
@@ -569,12 +720,14 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params,
 
        return 0;
 
+err_free_shampo:
+       mlx5e_rq_free_shampo(rq);
 err_free_by_rq_type:
        switch (rq->wq_type) {
        case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
                kvfree(rq->mpwqe.info);
 err_rq_mkey:
-               mlx5_core_destroy_mkey(mdev, &rq->umr_mkey);
+               mlx5_core_destroy_mkey(mdev, rq->umr_mkey);
 err_rq_drop_page:
                mlx5e_free_mpwqe_rq_drop_page(rq);
                break;
@@ -607,8 +760,9 @@ static void mlx5e_free_rq(struct mlx5e_rq *rq)
        switch (rq->wq_type) {
        case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
                kvfree(rq->mpwqe.info);
-               mlx5_core_destroy_mkey(rq->mdev, &rq->umr_mkey);
+               mlx5_core_destroy_mkey(rq->mdev, rq->umr_mkey);
                mlx5e_free_mpwqe_rq_drop_page(rq);
+               mlx5e_rq_free_shampo(rq);
                break;
        default: /* MLX5_WQ_TYPE_CYCLIC */
                kvfree(rq->wqe.frags);
@@ -662,6 +816,12 @@ int mlx5e_create_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param)
                                                MLX5_ADAPTER_PAGE_SHIFT);
        MLX5_SET64(wq, wq,  dbr_addr,           rq->wq_ctrl.db.dma);
 
+       if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state)) {
+               MLX5_SET(wq, wq, log_headers_buffer_entry_num,
+                        order_base_2(rq->mpwqe.shampo->hd_per_wq));
+               MLX5_SET(wq, wq, headers_mkey, rq->mpwqe.shampo->mkey);
+       }
+
        mlx5_fill_page_frag_array(&rq->wq_ctrl.buf,
                                  (__be64 *)MLX5_ADDR_OF(wq, wq, pas));
 
@@ -801,6 +961,15 @@ void mlx5e_free_rx_in_progress_descs(struct mlx5e_rq *rq)
                head = mlx5_wq_ll_get_wqe_next_ix(wq, head);
        }
 
+       if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state)) {
+               u16 len;
+
+               len = (rq->mpwqe.shampo->pi - rq->mpwqe.shampo->ci) &
+                     (rq->mpwqe.shampo->hd_per_wq - 1);
+               mlx5e_shampo_dealloc_hd(rq, len, rq->mpwqe.shampo->ci, false);
+               rq->mpwqe.shampo->pi = rq->mpwqe.shampo->ci;
+       }
+
        rq->mpwqe.actual_wq_head = wq->head;
        rq->mpwqe.umr_in_progress = 0;
        rq->mpwqe.umr_completed = 0;
@@ -826,6 +995,10 @@ void mlx5e_free_rx_descs(struct mlx5e_rq *rq)
                        mlx5_wq_ll_pop(wq, wqe_ix_be,
                                       &wqe->next.next_wqe_index);
                }
+
+               if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state))
+                       mlx5e_shampo_dealloc_hd(rq, rq->mpwqe.shampo->hd_per_wq,
+                                               0, true);
        } else {
                struct mlx5_wq_cyc *wq = &rq->wqe.wq;
 
@@ -845,6 +1018,9 @@ int mlx5e_open_rq(struct mlx5e_params *params, struct mlx5e_rq_param *param,
        struct mlx5_core_dev *mdev = rq->mdev;
        int err;
 
+       if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO)
+               __set_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state);
+
        err = mlx5e_alloc_rq(params, xsk, param, node, rq);
        if (err)
                return err;
@@ -2028,7 +2204,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
        c->cpu      = cpu;
        c->pdev     = mlx5_core_dma_dev(priv->mdev);
        c->netdev   = priv->netdev;
-       c->mkey_be  = cpu_to_be32(priv->mdev->mlx5e_res.hw_objs.mkey.key);
+       c->mkey_be  = cpu_to_be32(priv->mdev->mlx5e_res.hw_objs.mkey);
        c->num_tc   = mlx5e_get_dcb_num_tc(params);
        c->xdp      = !!params->xdp_prog;
        c->stats    = &priv->channel_stats[ix].ch;
@@ -2222,17 +2398,14 @@ void mlx5e_close_channels(struct mlx5e_channels *chs)
        chs->num = 0;
 }
 
-static int mlx5e_modify_tirs_lro(struct mlx5e_priv *priv)
+static int mlx5e_modify_tirs_packet_merge(struct mlx5e_priv *priv)
 {
        struct mlx5e_rx_res *res = priv->rx_res;
-       struct mlx5e_lro_param lro_param;
-
-       lro_param = mlx5e_get_lro_param(&priv->channels.params);
 
-       return mlx5e_rx_res_lro_set_param(res, &lro_param);
+       return mlx5e_rx_res_packet_merge_set_param(res, &priv->channels.params.packet_merge);
 }
 
-static MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(mlx5e_modify_tirs_lro);
+static MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(mlx5e_modify_tirs_packet_merge);
 
 static int mlx5e_set_mtu(struct mlx5_core_dev *mdev,
                         struct mlx5e_params *params, u16 mtu)
@@ -3351,16 +3524,59 @@ static int set_feature_lro(struct net_device *netdev, bool enable)
        }
 
        new_params = *cur_params;
-       new_params.lro_en = enable;
 
-       if (cur_params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
-               if (mlx5e_rx_mpwqe_is_linear_skb(mdev, cur_params, NULL) ==
-                   mlx5e_rx_mpwqe_is_linear_skb(mdev, &new_params, NULL))
-                       reset = false;
+       if (enable)
+               new_params.packet_merge.type = MLX5E_PACKET_MERGE_LRO;
+       else if (new_params.packet_merge.type == MLX5E_PACKET_MERGE_LRO)
+               new_params.packet_merge.type = MLX5E_PACKET_MERGE_NONE;
+       else
+               goto out;
+
+       if (!(cur_params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO &&
+             new_params.packet_merge.type == MLX5E_PACKET_MERGE_LRO)) {
+               if (cur_params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
+                       if (mlx5e_rx_mpwqe_is_linear_skb(mdev, cur_params, NULL) ==
+                           mlx5e_rx_mpwqe_is_linear_skb(mdev, &new_params, NULL))
+                               reset = false;
+               }
+       }
+
+       err = mlx5e_safe_switch_params(priv, &new_params,
+                                      mlx5e_modify_tirs_packet_merge_ctx, NULL, reset);
+out:
+       mutex_unlock(&priv->state_lock);
+       return err;
+}
+
+static int set_feature_hw_gro(struct net_device *netdev, bool enable)
+{
+       struct mlx5e_priv *priv = netdev_priv(netdev);
+       struct mlx5e_params new_params;
+       bool reset = true;
+       int err = 0;
+
+       mutex_lock(&priv->state_lock);
+       new_params = priv->channels.params;
+
+       if (enable) {
+               if (MLX5E_GET_PFLAG(&new_params, MLX5E_PFLAG_RX_CQE_COMPRESS)) {
+                       netdev_warn(netdev, "Can't set HW-GRO when CQE compress is active\n");
+                       err = -EINVAL;
+                       goto out;
+               }
+               new_params.packet_merge.type = MLX5E_PACKET_MERGE_SHAMPO;
+               new_params.packet_merge.shampo.match_criteria_type =
+                       MLX5_RQC_SHAMPO_MATCH_CRITERIA_TYPE_EXTENDED;
+               new_params.packet_merge.shampo.alignment_granularity =
+                       MLX5_RQC_SHAMPO_NO_MATCH_ALIGNMENT_GRANULARITY_STRIDE;
+       } else if (new_params.packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) {
+               new_params.packet_merge.type = MLX5E_PACKET_MERGE_NONE;
+       } else {
+               goto out;
        }
 
        err = mlx5e_safe_switch_params(priv, &new_params,
-                                      mlx5e_modify_tirs_lro_ctx, NULL, reset);
+                                      mlx5e_modify_tirs_packet_merge_ctx, NULL, reset);
 out:
        mutex_unlock(&priv->state_lock);
        return err;
@@ -3539,6 +3755,7 @@ int mlx5e_set_features(struct net_device *netdev, netdev_features_t features)
        mlx5e_handle_feature(netdev, &oper_features, features, feature, handler)
 
        err |= MLX5E_HANDLE_FEATURE(NETIF_F_LRO, set_feature_lro);
+       err |= MLX5E_HANDLE_FEATURE(NETIF_F_GRO_HW, set_feature_hw_gro);
        err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_VLAN_CTAG_FILTER,
                                    set_feature_cvlan_filter);
        err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_TC, set_feature_hw_tc);
@@ -3599,6 +3816,10 @@ static netdev_features_t mlx5e_fix_features(struct net_device *netdev,
                        netdev_warn(netdev, "Disabling LRO, not supported in legacy RQ\n");
                        features &= ~NETIF_F_LRO;
                }
+               if (features & NETIF_F_GRO_HW) {
+                       netdev_warn(netdev, "Disabling HW-GRO, not supported in legacy RQ\n");
+                       features &= ~NETIF_F_GRO_HW;
+               }
        }
 
        if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)) {
@@ -3687,7 +3908,7 @@ int mlx5e_change_mtu(struct net_device *netdev, int new_mtu,
                goto out;
        }
 
-       if (params->lro_en)
+       if (params->packet_merge.type == MLX5E_PACKET_MERGE_LRO)
                reset = false;
 
        if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
@@ -4144,8 +4365,8 @@ static int mlx5e_xdp_allowed(struct mlx5e_priv *priv, struct bpf_prog *prog)
        struct net_device *netdev = priv->netdev;
        struct mlx5e_params new_params;
 
-       if (priv->channels.params.lro_en) {
-               netdev_warn(netdev, "can't set XDP while LRO is on, disable LRO first\n");
+       if (priv->channels.params.packet_merge.type != MLX5E_PACKET_MERGE_NONE) {
+               netdev_warn(netdev, "can't set XDP while HW-GRO/LRO is on, disable them first\n");
                return -EINVAL;
        }
 
@@ -4402,9 +4623,10 @@ void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16
            params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
                /* No XSK params: checking the availability of striding RQ in general. */
                if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL))
-                       params->lro_en = !slow_pci_heuristic(mdev);
+                       params->packet_merge.type = slow_pci_heuristic(mdev) ?
+                               MLX5E_PACKET_MERGE_NONE : MLX5E_PACKET_MERGE_LRO;
        }
-       params->lro_timeout = mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_LRO_TIMEOUT);
+       params->packet_merge.timeout = mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_LRO_TIMEOUT);
 
        /* CQ moderation params */
        rx_cq_period_mode = MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ?
@@ -4539,6 +4761,10 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
        netdev->hw_features      |= NETIF_F_HW_VLAN_CTAG_FILTER;
        netdev->hw_features      |= NETIF_F_HW_VLAN_STAG_TX;
 
+       if (!!MLX5_CAP_GEN(mdev, shampo) &&
+           mlx5e_check_fragmented_striding_rq_cap(mdev))
+               netdev->hw_features    |= NETIF_F_GRO_HW;
+
        if (mlx5e_tunnel_any_tx_proto_supported(mdev)) {
                netdev->hw_enc_features |= NETIF_F_HW_CSUM;
                netdev->hw_enc_features |= NETIF_F_TSO;
@@ -4589,6 +4815,7 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
        if (fcs_enabled)
                netdev->features  &= ~NETIF_F_RXALL;
        netdev->features  &= ~NETIF_F_LRO;
+       netdev->features  &= ~NETIF_F_GRO_HW;
        netdev->features  &= ~NETIF_F_RXFCS;
 
 #define FT_CAP(f) MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_receive.f)
@@ -4693,7 +4920,6 @@ static int mlx5e_init_nic_rx(struct mlx5e_priv *priv)
 {
        struct mlx5_core_dev *mdev = priv->mdev;
        enum mlx5e_rx_res_features features;
-       struct mlx5e_lro_param lro_param;
        int err;
 
        priv->rx_res = mlx5e_rx_res_alloc();
@@ -4711,9 +4937,9 @@ static int mlx5e_init_nic_rx(struct mlx5e_priv *priv)
        features = MLX5E_RX_RES_FEATURE_XSK | MLX5E_RX_RES_FEATURE_PTP;
        if (priv->channels.params.tunneled_offload_en)
                features |= MLX5E_RX_RES_FEATURE_INNER_FT;
-       lro_param = mlx5e_get_lro_param(&priv->channels.params);
        err = mlx5e_rx_res_init(priv->rx_res, priv->mdev, features,
-                               priv->max_nch, priv->drop_rq.rqn, &lro_param,
+                               priv->max_nch, priv->drop_rq.rqn,
+                               &priv->channels.params.packet_merge,
                                priv->channels.params.num_channels);
        if (err)
                goto err_close_drop_rq;
index 0684ac6..5230e04 100644 (file)
@@ -793,7 +793,6 @@ int mlx5e_rep_bond_update(struct mlx5e_priv *priv, bool cleanup)
 static int mlx5e_init_rep_rx(struct mlx5e_priv *priv)
 {
        struct mlx5_core_dev *mdev = priv->mdev;
-       struct mlx5e_lro_param lro_param;
        int err;
 
        priv->rx_res = mlx5e_rx_res_alloc();
@@ -808,9 +807,9 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv)
                return err;
        }
 
-       lro_param = mlx5e_get_lro_param(&priv->channels.params);
        err = mlx5e_rx_res_init(priv->rx_res, priv->mdev, 0,
-                               priv->max_nch, priv->drop_rq.rqn, &lro_param,
+                               priv->max_nch, priv->drop_rq.rqn,
+                               &priv->channels.params.packet_merge,
                                priv->channels.params.num_channels);
        if (err)
                goto err_close_drop_rq;
index 29a6586..f63c8ff 100644 (file)
 #include <linux/ip.h>
 #include <linux/ipv6.h>
 #include <linux/tcp.h>
+#include <linux/bitmap.h>
 #include <net/ip6_checksum.h>
 #include <net/page_pool.h>
 #include <net/inet_ecn.h>
+#include <net/udp.h>
+#include <net/tcp.h>
 #include "en.h"
 #include "en/txrx.h"
 #include "en_tc.h"
@@ -62,10 +65,12 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w
                                   u16 cqe_bcnt, u32 head_offset, u32 page_idx);
 static void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
 static void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
+static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
 
 const struct mlx5e_rx_handlers mlx5e_rx_handlers_nic = {
        .handle_rx_cqe       = mlx5e_handle_rx_cqe,
        .handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq,
+       .handle_rx_cqe_mpwqe_shampo = mlx5e_handle_rx_cqe_mpwrq_shampo,
 };
 
 static inline bool mlx5e_rx_hw_stamp(struct hwtstamp_config *config)
@@ -185,8 +190,9 @@ static inline u32 mlx5e_decompress_cqes_cont(struct mlx5e_rq *rq,
                        mlx5e_read_mini_arr_slot(wq, cqd, cqcc);
 
                mlx5e_decompress_cqe_no_hash(rq, wq, cqcc);
-               INDIRECT_CALL_2(rq->handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq,
-                               mlx5e_handle_rx_cqe, rq, &cqd->title);
+               INDIRECT_CALL_3(rq->handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq,
+                               mlx5e_handle_rx_cqe_mpwrq_shampo, mlx5e_handle_rx_cqe,
+                               rq, &cqd->title);
        }
        mlx5e_cqes_update_owner(wq, cqcc - wq->cc);
        wq->cc = cqcc;
@@ -206,8 +212,9 @@ static inline u32 mlx5e_decompress_cqes_start(struct mlx5e_rq *rq,
        mlx5e_read_title_slot(rq, wq, cc);
        mlx5e_read_mini_arr_slot(wq, cqd, cc + 1);
        mlx5e_decompress_cqe(rq, wq, cc);
-       INDIRECT_CALL_2(rq->handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq,
-                       mlx5e_handle_rx_cqe, rq, &cqd->title);
+       INDIRECT_CALL_3(rq->handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq,
+                       mlx5e_handle_rx_cqe_mpwrq_shampo, mlx5e_handle_rx_cqe,
+                       rq, &cqd->title);
        cqd->mini_arr_idx++;
 
        return mlx5e_decompress_cqes_cont(rq, wq, 1, budget_rem) - 1;
@@ -448,13 +455,13 @@ mlx5e_add_skb_frag(struct mlx5e_rq *rq, struct sk_buff *skb,
 static inline void
 mlx5e_copy_skb_header(struct device *pdev, struct sk_buff *skb,
                      struct mlx5e_dma_info *dma_info,
-                     int offset_from, u32 headlen)
+                     int offset_from, int dma_offset, u32 headlen)
 {
        const void *from = page_address(dma_info->page) + offset_from;
        /* Aligning len to sizeof(long) optimizes memcpy performance */
        unsigned int len = ALIGN(headlen, sizeof(long));
 
-       dma_sync_single_for_cpu(pdev, dma_info->addr + offset_from, len,
+       dma_sync_single_for_cpu(pdev, dma_info->addr + dma_offset, len,
                                DMA_FROM_DEVICE);
        skb_copy_to_linear_data(skb, from, len);
 }
@@ -494,6 +501,157 @@ static void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq, u8 n)
        mlx5_wq_ll_update_db_record(wq);
 }
 
+/* This function returns the size of the continuous free space inside a bitmap
+ * that starts from first and no longer than len including circular ones.
+ */
+static int bitmap_find_window(unsigned long *bitmap, int len,
+                             int bitmap_size, int first)
+{
+       int next_one, count;
+
+       next_one = find_next_bit(bitmap, bitmap_size, first);
+       if (next_one == bitmap_size) {
+               if (bitmap_size - first >= len)
+                       return len;
+               next_one = find_next_bit(bitmap, bitmap_size, 0);
+               count = next_one + bitmap_size - first;
+       } else {
+               count = next_one - first;
+       }
+
+       return min(len, count);
+}
+
+static void build_klm_umr(struct mlx5e_icosq *sq, struct mlx5e_umr_wqe *umr_wqe,
+                         __be32 key, u16 offset, u16 klm_len, u16 wqe_bbs)
+{
+       memset(umr_wqe, 0, offsetof(struct mlx5e_umr_wqe, inline_klms));
+       umr_wqe->ctrl.opmod_idx_opcode =
+               cpu_to_be32((sq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) |
+                            MLX5_OPCODE_UMR);
+       umr_wqe->ctrl.umr_mkey = key;
+       umr_wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << MLX5_WQE_CTRL_QPN_SHIFT)
+                                           | MLX5E_KLM_UMR_DS_CNT(klm_len));
+       umr_wqe->uctrl.flags = MLX5_UMR_TRANSLATION_OFFSET_EN | MLX5_UMR_INLINE;
+       umr_wqe->uctrl.xlt_offset = cpu_to_be16(offset);
+       umr_wqe->uctrl.xlt_octowords = cpu_to_be16(klm_len);
+       umr_wqe->uctrl.mkey_mask     = cpu_to_be64(MLX5_MKEY_MASK_FREE);
+}
+
+static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq,
+                                    struct mlx5e_icosq *sq,
+                                    u16 klm_entries, u16 index)
+{
+       struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo;
+       u16 entries, pi, i, header_offset, err, wqe_bbs, new_entries;
+       u32 lkey = rq->mdev->mlx5e_res.hw_objs.mkey;
+       struct page *page = shampo->last_page;
+       u64 addr = shampo->last_addr;
+       struct mlx5e_dma_info *dma_info;
+       struct mlx5e_umr_wqe *umr_wqe;
+       int headroom;
+
+       headroom = rq->buff.headroom;
+       new_entries = klm_entries - (shampo->pi & (MLX5_UMR_KLM_ALIGNMENT - 1));
+       entries = ALIGN(klm_entries, MLX5_UMR_KLM_ALIGNMENT);
+       wqe_bbs = MLX5E_KLM_UMR_WQEBBS(entries);
+       pi = mlx5e_icosq_get_next_pi(sq, wqe_bbs);
+       umr_wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi);
+       build_klm_umr(sq, umr_wqe, shampo->key, index, entries, wqe_bbs);
+
+       for (i = 0; i < entries; i++, index++) {
+               dma_info = &shampo->info[index];
+               if (i >= klm_entries || (index < shampo->pi && shampo->pi - index <
+                                        MLX5_UMR_KLM_ALIGNMENT))
+                       goto update_klm;
+               header_offset = (index & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1)) <<
+                       MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE;
+               if (!(header_offset & (PAGE_SIZE - 1))) {
+                       err = mlx5e_page_alloc(rq, dma_info);
+                       if (unlikely(err))
+                               goto err_unmap;
+                       addr = dma_info->addr;
+                       page = dma_info->page;
+               } else {
+                       dma_info->addr = addr + header_offset;
+                       dma_info->page = page;
+               }
+
+update_klm:
+               umr_wqe->inline_klms[i].bcount =
+                       cpu_to_be32(MLX5E_RX_MAX_HEAD);
+               umr_wqe->inline_klms[i].key    = cpu_to_be32(lkey);
+               umr_wqe->inline_klms[i].va     =
+                       cpu_to_be64(dma_info->addr + headroom);
+       }
+
+       sq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) {
+               .wqe_type       = MLX5E_ICOSQ_WQE_SHAMPO_HD_UMR,
+               .num_wqebbs     = wqe_bbs,
+               .shampo.len     = new_entries,
+       };
+
+       shampo->pi = (shampo->pi + new_entries) & (shampo->hd_per_wq - 1);
+       shampo->last_page = page;
+       shampo->last_addr = addr;
+       sq->pc += wqe_bbs;
+       sq->doorbell_cseg = &umr_wqe->ctrl;
+
+       return 0;
+
+err_unmap:
+       while (--i >= 0) {
+               if (--index < 0)
+                       index = shampo->hd_per_wq - 1;
+               dma_info = &shampo->info[index];
+               if (!(i & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1))) {
+                       dma_info->addr = ALIGN_DOWN(dma_info->addr, PAGE_SIZE);
+                       mlx5e_page_release(rq, dma_info, true);
+               }
+       }
+       rq->stats->buff_alloc_err++;
+       return err;
+}
+
+static int mlx5e_alloc_rx_hd_mpwqe(struct mlx5e_rq *rq)
+{
+       struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo;
+       u16 klm_entries, num_wqe, index, entries_before;
+       struct mlx5e_icosq *sq = rq->icosq;
+       int i, err, max_klm_entries, len;
+
+       max_klm_entries = MLX5E_MAX_KLM_PER_WQE(rq->mdev);
+       klm_entries = bitmap_find_window(shampo->bitmap,
+                                        shampo->hd_per_wqe,
+                                        shampo->hd_per_wq, shampo->pi);
+       if (!klm_entries)
+               return 0;
+
+       klm_entries += (shampo->pi & (MLX5_UMR_KLM_ALIGNMENT - 1));
+       index = ALIGN_DOWN(shampo->pi, MLX5_UMR_KLM_ALIGNMENT);
+       entries_before = shampo->hd_per_wq - index;
+
+       if (unlikely(entries_before < klm_entries))
+               num_wqe = DIV_ROUND_UP(entries_before, max_klm_entries) +
+                         DIV_ROUND_UP(klm_entries - entries_before, max_klm_entries);
+       else
+               num_wqe = DIV_ROUND_UP(klm_entries, max_klm_entries);
+
+       for (i = 0; i < num_wqe; i++) {
+               len = (klm_entries > max_klm_entries) ? max_klm_entries :
+                                                       klm_entries;
+               if (unlikely(index + len > shampo->hd_per_wq))
+                       len = shampo->hd_per_wq - index;
+               err = mlx5e_build_shampo_hd_umr(rq, sq, len, index);
+               if (unlikely(err))
+                       return err;
+               index = (index + len) & (rq->mpwqe.shampo->hd_per_wq - 1);
+               klm_entries -= len;
+       }
+
+       return 0;
+}
+
 static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
 {
        struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix];
@@ -514,6 +672,12 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
                goto err;
        }
 
+       if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state)) {
+               err = mlx5e_alloc_rx_hd_mpwqe(rq);
+               if (unlikely(err))
+                       goto err;
+       }
+
        pi = mlx5e_icosq_get_next_pi(sq, MLX5E_UMR_WQEBBS);
        umr_wqe = mlx5_wq_cyc_get_wqe(wq, pi);
        memcpy(umr_wqe, &rq->mpwqe.umr_wqe, offsetof(struct mlx5e_umr_wqe, inline_mtts));
@@ -558,6 +722,44 @@ err:
        return err;
 }
 
+/* This function is responsible to dealloc SHAMPO header buffer.
+ * close == true specifies that we are in the middle of closing RQ operation so
+ * we go over all the entries and if they are not in use we free them,
+ * otherwise we only go over a specific range inside the header buffer that are
+ * not in use.
+ */
+void mlx5e_shampo_dealloc_hd(struct mlx5e_rq *rq, u16 len, u16 start, bool close)
+{
+       struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo;
+       int hd_per_wq = shampo->hd_per_wq;
+       struct page *deleted_page = NULL;
+       struct mlx5e_dma_info *hd_info;
+       int i, index = start;
+
+       for (i = 0; i < len; i++, index++) {
+               if (index == hd_per_wq)
+                       index = 0;
+
+               if (close && !test_bit(index, shampo->bitmap))
+                       continue;
+
+               hd_info = &shampo->info[index];
+               hd_info->addr = ALIGN_DOWN(hd_info->addr, PAGE_SIZE);
+               if (hd_info->page != deleted_page) {
+                       deleted_page = hd_info->page;
+                       mlx5e_page_release(rq, hd_info, false);
+               }
+       }
+
+       if (start + len > hd_per_wq) {
+               len -= hd_per_wq - start;
+               bitmap_clear(shampo->bitmap, start, hd_per_wq - start);
+               start = 0;
+       }
+
+       bitmap_clear(shampo->bitmap, start, len);
+}
+
 static void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
 {
        struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix];
@@ -629,6 +831,28 @@ void mlx5e_free_icosq_descs(struct mlx5e_icosq *sq)
        sq->cc = sqcc;
 }
 
+static void mlx5e_handle_shampo_hd_umr(struct mlx5e_shampo_umr umr,
+                                      struct mlx5e_icosq *sq)
+{
+       struct mlx5e_channel *c = container_of(sq, struct mlx5e_channel, icosq);
+       struct mlx5e_shampo_hd *shampo;
+       /* assume 1:1 relationship between RQ and icosq */
+       struct mlx5e_rq *rq = &c->rq;
+       int end, from, len = umr.len;
+
+       shampo = rq->mpwqe.shampo;
+       end = shampo->hd_per_wq;
+       from = shampo->ci;
+       if (from + len > shampo->hd_per_wq) {
+               len -= end - from;
+               bitmap_set(shampo->bitmap, from, end - from);
+               from = 0;
+       }
+
+       bitmap_set(shampo->bitmap, from, len);
+       shampo->ci = (shampo->ci + umr.len) & (shampo->hd_per_wq - 1);
+}
+
 int mlx5e_poll_ico_cq(struct mlx5e_cq *cq)
 {
        struct mlx5e_icosq *sq = container_of(cq, struct mlx5e_icosq, cq);
@@ -685,6 +909,9 @@ int mlx5e_poll_ico_cq(struct mlx5e_cq *cq)
                                break;
                        case MLX5E_ICOSQ_WQE_NOP:
                                break;
+                       case MLX5E_ICOSQ_WQE_SHAMPO_HD_UMR:
+                               mlx5e_handle_shampo_hd_umr(wi->shampo, sq);
+                               break;
 #ifdef CONFIG_MLX5_EN_TLS
                        case MLX5E_ICOSQ_WQE_UMR_TLS:
                                break;
@@ -782,8 +1009,8 @@ static void mlx5e_lro_update_tcp_hdr(struct mlx5_cqe64 *cqe, struct tcphdr *tcp)
 
        if (tcp_ack) {
                tcp->ack                = 1;
-               tcp->ack_seq            = cqe->lro_ack_seq_num;
-               tcp->window             = cqe->lro_tcp_win;
+               tcp->ack_seq            = cqe->lro.ack_seq_num;
+               tcp->window             = cqe->lro.tcp_win;
        }
 }
 
@@ -809,7 +1036,7 @@ static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe,
                tcp = ip_p + sizeof(struct iphdr);
                skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
 
-               ipv4->ttl               = cqe->lro_min_ttl;
+               ipv4->ttl               = cqe->lro.min_ttl;
                ipv4->tot_len           = cpu_to_be16(tot_len);
                ipv4->check             = 0;
                ipv4->check             = ip_fast_csum((unsigned char *)ipv4,
@@ -829,7 +1056,7 @@ static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe,
                tcp = ip_p + sizeof(struct ipv6hdr);
                skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
 
-               ipv6->hop_limit         = cqe->lro_min_ttl;
+               ipv6->hop_limit         = cqe->lro.min_ttl;
                ipv6->payload_len       = cpu_to_be16(payload_len);
 
                mlx5e_lro_update_tcp_hdr(cqe, tcp);
@@ -841,6 +1068,142 @@ static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe,
        }
 }
 
+static void *mlx5e_shampo_get_packet_hd(struct mlx5e_rq *rq, u16 header_index)
+{
+       struct mlx5e_dma_info *last_head = &rq->mpwqe.shampo->info[header_index];
+       u16 head_offset = (last_head->addr & (PAGE_SIZE - 1)) + rq->buff.headroom;
+
+       return page_address(last_head->page) + head_offset;
+}
+
+static void mlx5e_shampo_update_ipv4_udp_hdr(struct mlx5e_rq *rq, struct iphdr *ipv4)
+{
+       int udp_off = rq->hw_gro_data->fk.control.thoff;
+       struct sk_buff *skb = rq->hw_gro_data->skb;
+       struct udphdr *uh;
+
+       uh = (struct udphdr *)(skb->data + udp_off);
+       uh->len = htons(skb->len - udp_off);
+
+       if (uh->check)
+               uh->check = ~udp_v4_check(skb->len - udp_off, ipv4->saddr,
+                                         ipv4->daddr, 0);
+
+       skb->csum_start = (unsigned char *)uh - skb->head;
+       skb->csum_offset = offsetof(struct udphdr, check);
+
+       skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_L4;
+}
+
+static void mlx5e_shampo_update_ipv6_udp_hdr(struct mlx5e_rq *rq, struct ipv6hdr *ipv6)
+{
+       int udp_off = rq->hw_gro_data->fk.control.thoff;
+       struct sk_buff *skb = rq->hw_gro_data->skb;
+       struct udphdr *uh;
+
+       uh = (struct udphdr *)(skb->data + udp_off);
+       uh->len = htons(skb->len - udp_off);
+
+       if (uh->check)
+               uh->check = ~udp_v6_check(skb->len - udp_off, &ipv6->saddr,
+                                         &ipv6->daddr, 0);
+
+       skb->csum_start = (unsigned char *)uh - skb->head;
+       skb->csum_offset = offsetof(struct udphdr, check);
+
+       skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_L4;
+}
+
+static void mlx5e_shampo_update_fin_psh_flags(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
+                                             struct tcphdr *skb_tcp_hd)
+{
+       u16 header_index = be16_to_cpu(cqe->shampo.header_entry_index);
+       struct tcphdr *last_tcp_hd;
+       void *last_hd_addr;
+
+       last_hd_addr = mlx5e_shampo_get_packet_hd(rq, header_index);
+       last_tcp_hd =  last_hd_addr + ETH_HLEN + rq->hw_gro_data->fk.control.thoff;
+       tcp_flag_word(skb_tcp_hd) |= tcp_flag_word(last_tcp_hd) & (TCP_FLAG_FIN | TCP_FLAG_PSH);
+}
+
+static void mlx5e_shampo_update_ipv4_tcp_hdr(struct mlx5e_rq *rq, struct iphdr *ipv4,
+                                            struct mlx5_cqe64 *cqe, bool match)
+{
+       int tcp_off = rq->hw_gro_data->fk.control.thoff;
+       struct sk_buff *skb = rq->hw_gro_data->skb;
+       struct tcphdr *tcp;
+
+       tcp = (struct tcphdr *)(skb->data + tcp_off);
+       if (match)
+               mlx5e_shampo_update_fin_psh_flags(rq, cqe, tcp);
+
+       tcp->check = ~tcp_v4_check(skb->len - tcp_off, ipv4->saddr,
+                                  ipv4->daddr, 0);
+       skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4;
+       if (ntohs(ipv4->id) == rq->hw_gro_data->second_ip_id)
+               skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_FIXEDID;
+
+       skb->csum_start = (unsigned char *)tcp - skb->head;
+       skb->csum_offset = offsetof(struct tcphdr, check);
+
+       if (tcp->cwr)
+               skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
+}
+
+static void mlx5e_shampo_update_ipv6_tcp_hdr(struct mlx5e_rq *rq, struct ipv6hdr *ipv6,
+                                            struct mlx5_cqe64 *cqe, bool match)
+{
+       int tcp_off = rq->hw_gro_data->fk.control.thoff;
+       struct sk_buff *skb = rq->hw_gro_data->skb;
+       struct tcphdr *tcp;
+
+       tcp = (struct tcphdr *)(skb->data + tcp_off);
+       if (match)
+               mlx5e_shampo_update_fin_psh_flags(rq, cqe, tcp);
+
+       tcp->check = ~tcp_v6_check(skb->len - tcp_off, &ipv6->saddr,
+                                  &ipv6->daddr, 0);
+       skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV6;
+       skb->csum_start = (unsigned char *)tcp - skb->head;
+       skb->csum_offset = offsetof(struct tcphdr, check);
+
+       if (tcp->cwr)
+               skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
+}
+
+static void mlx5e_shampo_update_hdr(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, bool match)
+{
+       bool is_ipv4 = (rq->hw_gro_data->fk.basic.n_proto == htons(ETH_P_IP));
+       struct sk_buff *skb = rq->hw_gro_data->skb;
+
+       skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
+       skb->ip_summed = CHECKSUM_PARTIAL;
+
+       if (is_ipv4) {
+               int nhoff = rq->hw_gro_data->fk.control.thoff - sizeof(struct iphdr);
+               struct iphdr *ipv4 = (struct iphdr *)(skb->data + nhoff);
+               __be16 newlen = htons(skb->len - nhoff);
+
+               csum_replace2(&ipv4->check, ipv4->tot_len, newlen);
+               ipv4->tot_len = newlen;
+
+               if (ipv4->protocol == IPPROTO_TCP)
+                       mlx5e_shampo_update_ipv4_tcp_hdr(rq, ipv4, cqe, match);
+               else
+                       mlx5e_shampo_update_ipv4_udp_hdr(rq, ipv4);
+       } else {
+               int nhoff = rq->hw_gro_data->fk.control.thoff - sizeof(struct ipv6hdr);
+               struct ipv6hdr *ipv6 = (struct ipv6hdr *)(skb->data + nhoff);
+
+               ipv6->payload_len = htons(skb->len - nhoff - sizeof(*ipv6));
+
+               if (ipv6->nexthdr == IPPROTO_TCP)
+                       mlx5e_shampo_update_ipv6_tcp_hdr(rq, ipv6, cqe, match);
+               else
+                       mlx5e_shampo_update_ipv6_udp_hdr(rq, ipv6);
+       }
+}
+
 static inline void mlx5e_skb_set_hash(struct mlx5_cqe64 *cqe,
                                      struct sk_buff *skb)
 {
@@ -1090,6 +1453,27 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe,
                stats->mcast_packets++;
 }
 
+static void mlx5e_shampo_complete_rx_cqe(struct mlx5e_rq *rq,
+                                        struct mlx5_cqe64 *cqe,
+                                        u32 cqe_bcnt,
+                                        struct sk_buff *skb)
+{
+       struct mlx5e_rq_stats *stats = rq->stats;
+
+       stats->packets++;
+       stats->gro_packets++;
+       stats->bytes += cqe_bcnt;
+       stats->gro_bytes += cqe_bcnt;
+       if (NAPI_GRO_CB(skb)->count != 1)
+               return;
+       mlx5e_build_rx_skb(cqe, cqe_bcnt, rq, skb);
+       skb_reset_network_header(skb);
+       if (!skb_flow_dissect_flow_keys(skb, &rq->hw_gro_data->fk, 0)) {
+               napi_gro_receive(rq->cq.napi, skb);
+               rq->hw_gro_data->skb = NULL;
+       }
+}
+
 static inline void mlx5e_complete_rx_cqe(struct mlx5e_rq *rq,
                                         struct mlx5_cqe64 *cqe,
                                         u32 cqe_bcnt,
@@ -1199,7 +1583,8 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
        }
 
        /* copy header */
-       mlx5e_copy_skb_header(rq->pdev, skb, head_wi->di, head_wi->offset, headlen);
+       mlx5e_copy_skb_header(rq->pdev, skb, head_wi->di, head_wi->offset, head_wi->offset,
+                             headlen);
        /* skb linear part was allocated with headlen and aligned to long */
        skb->tail += headlen;
        skb->len  += headlen;
@@ -1395,6 +1780,30 @@ const struct mlx5e_rx_handlers mlx5e_rx_handlers_rep = {
 };
 #endif
 
+static void
+mlx5e_fill_skb_data(struct sk_buff *skb, struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
+                   u32 data_bcnt, u32 data_offset)
+{
+       net_prefetchw(skb->data);
+
+       while (data_bcnt) {
+               u32 pg_consumed_bytes = min_t(u32, PAGE_SIZE - data_offset, data_bcnt);
+               unsigned int truesize;
+
+               if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state))
+                       truesize = pg_consumed_bytes;
+               else
+                       truesize = ALIGN(pg_consumed_bytes, BIT(rq->mpwqe.log_stride_sz));
+
+               mlx5e_add_skb_frag(rq, skb, di, data_offset,
+                                  pg_consumed_bytes, truesize);
+
+               data_bcnt -= pg_consumed_bytes;
+               data_offset = 0;
+               di++;
+       }
+}
+
 static struct sk_buff *
 mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
                                   u16 cqe_bcnt, u32 head_offset, u32 page_idx)
@@ -1420,20 +1829,9 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w
                frag_offset -= PAGE_SIZE;
        }
 
-       while (byte_cnt) {
-               u32 pg_consumed_bytes =
-                       min_t(u32, PAGE_SIZE - frag_offset, byte_cnt);
-               unsigned int truesize =
-                       ALIGN(pg_consumed_bytes, BIT(rq->mpwqe.log_stride_sz));
-
-               mlx5e_add_skb_frag(rq, skb, di, frag_offset,
-                                  pg_consumed_bytes, truesize);
-               byte_cnt -= pg_consumed_bytes;
-               frag_offset = 0;
-               di++;
-       }
+       mlx5e_fill_skb_data(skb, rq, di, byte_cnt, frag_offset);
        /* copy header */
-       mlx5e_copy_skb_header(rq->pdev, skb, head_di, head_offset, headlen);
+       mlx5e_copy_skb_header(rq->pdev, skb, head_di, head_offset, head_offset, headlen);
        /* skb linear part was allocated with headlen and aligned to long */
        skb->tail += headlen;
        skb->len  += headlen;
@@ -1487,6 +1885,181 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
        return skb;
 }
 
+static void
+mlx5e_skb_from_cqe_shampo(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
+                         struct mlx5_cqe64 *cqe, u16 header_index)
+{
+       struct mlx5e_dma_info *head = &rq->mpwqe.shampo->info[header_index];
+       u16 head_offset = head->addr & (PAGE_SIZE - 1);
+       u16 head_size = cqe->shampo.header_size;
+       u16 rx_headroom = rq->buff.headroom;
+       struct sk_buff *skb = NULL;
+       void *hdr, *data;
+       u32 frag_size;
+
+       hdr             = page_address(head->page) + head_offset;
+       data            = hdr + rx_headroom;
+       frag_size       = MLX5_SKB_FRAG_SZ(rx_headroom + head_size);
+
+       if (likely(frag_size <= BIT(MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE))) {
+               /* build SKB around header */
+               dma_sync_single_range_for_cpu(rq->pdev, head->addr, 0, frag_size, DMA_FROM_DEVICE);
+               prefetchw(hdr);
+               prefetch(data);
+               skb = mlx5e_build_linear_skb(rq, hdr, frag_size, rx_headroom, head_size);
+
+               if (unlikely(!skb))
+                       return;
+
+               /* queue up for recycling/reuse */
+               page_ref_inc(head->page);
+
+       } else {
+               /* allocate SKB and copy header for large header */
+               rq->stats->gro_large_hds++;
+               skb = napi_alloc_skb(rq->cq.napi,
+                                    ALIGN(head_size, sizeof(long)));
+               if (unlikely(!skb)) {
+                       rq->stats->buff_alloc_err++;
+                       return;
+               }
+
+               prefetchw(skb->data);
+               mlx5e_copy_skb_header(rq->pdev, skb, head,
+                                     head_offset + rx_headroom,
+                                     rx_headroom, head_size);
+               /* skb linear part was allocated with headlen and aligned to long */
+               skb->tail += head_size;
+               skb->len  += head_size;
+       }
+       rq->hw_gro_data->skb = skb;
+       NAPI_GRO_CB(skb)->count = 1;
+       skb_shinfo(skb)->gso_size = mpwrq_get_cqe_byte_cnt(cqe) - head_size;
+}
+
+static void
+mlx5e_shampo_align_fragment(struct sk_buff *skb, u8 log_stride_sz)
+{
+       skb_frag_t *last_frag = &skb_shinfo(skb)->frags[skb_shinfo(skb)->nr_frags - 1];
+       unsigned int frag_size = skb_frag_size(last_frag);
+       unsigned int frag_truesize;
+
+       frag_truesize = ALIGN(frag_size, BIT(log_stride_sz));
+       skb->truesize += frag_truesize - frag_size;
+}
+
+static void
+mlx5e_shampo_flush_skb(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, bool match)
+{
+       struct sk_buff *skb = rq->hw_gro_data->skb;
+       struct mlx5e_rq_stats *stats = rq->stats;
+
+       stats->gro_skbs++;
+       if (likely(skb_shinfo(skb)->nr_frags))
+               mlx5e_shampo_align_fragment(skb, rq->mpwqe.log_stride_sz);
+       if (NAPI_GRO_CB(skb)->count > 1)
+               mlx5e_shampo_update_hdr(rq, cqe, match);
+       napi_gro_receive(rq->cq.napi, skb);
+       rq->hw_gro_data->skb = NULL;
+}
+
+static bool
+mlx5e_hw_gro_skb_has_enough_space(struct sk_buff *skb, u16 data_bcnt)
+{
+       int nr_frags = skb_shinfo(skb)->nr_frags;
+
+       return PAGE_SIZE * nr_frags + data_bcnt <= GSO_MAX_SIZE;
+}
+
+static void
+mlx5e_free_rx_shampo_hd_entry(struct mlx5e_rq *rq, u16 header_index)
+{
+       struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo;
+       u64 addr = shampo->info[header_index].addr;
+
+       if (((header_index + 1) & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1)) == 0) {
+               shampo->info[header_index].addr = ALIGN_DOWN(addr, PAGE_SIZE);
+               mlx5e_page_release(rq, &shampo->info[header_index], true);
+       }
+       bitmap_clear(shampo->bitmap, header_index, 1);
+}
+
+static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
+{
+       u16 data_bcnt           = mpwrq_get_cqe_byte_cnt(cqe) - cqe->shampo.header_size;
+       u16 header_index        = be16_to_cpu(cqe->shampo.header_entry_index);
+       u32 wqe_offset          = be32_to_cpu(cqe->shampo.data_offset);
+       u16 cstrides            = mpwrq_get_cqe_consumed_strides(cqe);
+       u32 data_offset         = wqe_offset & (PAGE_SIZE - 1);
+       u32 cqe_bcnt            = mpwrq_get_cqe_byte_cnt(cqe);
+       u16 wqe_id              = be16_to_cpu(cqe->wqe_id);
+       u32 page_idx            = wqe_offset >> PAGE_SHIFT;
+       struct sk_buff **skb    = &rq->hw_gro_data->skb;
+       bool flush              = cqe->shampo.flush;
+       bool match              = cqe->shampo.match;
+       struct mlx5e_rq_stats *stats = rq->stats;
+       struct mlx5e_rx_wqe_ll *wqe;
+       struct mlx5e_dma_info *di;
+       struct mlx5e_mpw_info *wi;
+       struct mlx5_wq_ll *wq;
+
+       wi = &rq->mpwqe.info[wqe_id];
+       wi->consumed_strides += cstrides;
+
+       if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
+               trigger_report(rq, cqe);
+               stats->wqe_err++;
+               goto mpwrq_cqe_out;
+       }
+
+       if (unlikely(mpwrq_is_filler_cqe(cqe))) {
+               stats->mpwqe_filler_cqes++;
+               stats->mpwqe_filler_strides += cstrides;
+               goto mpwrq_cqe_out;
+       }
+
+       stats->gro_match_packets += match;
+
+       if (*skb && (!match || !(mlx5e_hw_gro_skb_has_enough_space(*skb, data_bcnt)))) {
+               match = false;
+               mlx5e_shampo_flush_skb(rq, cqe, match);
+       }
+
+       if (!*skb) {
+               mlx5e_skb_from_cqe_shampo(rq, wi, cqe, header_index);
+               if (unlikely(!*skb))
+                       goto free_hd_entry;
+       } else {
+               NAPI_GRO_CB(*skb)->count++;
+               if (NAPI_GRO_CB(*skb)->count == 2 &&
+                   rq->hw_gro_data->fk.basic.n_proto == htons(ETH_P_IP)) {
+                       void *hd_addr = mlx5e_shampo_get_packet_hd(rq, header_index);
+                       int nhoff = ETH_HLEN + rq->hw_gro_data->fk.control.thoff -
+                                   sizeof(struct iphdr);
+                       struct iphdr *iph = (struct iphdr *)(hd_addr + nhoff);
+
+                       rq->hw_gro_data->second_ip_id = ntohs(iph->id);
+               }
+       }
+
+       di = &wi->umr.dma_info[page_idx];
+       mlx5e_fill_skb_data(*skb, rq, di, data_bcnt, data_offset);
+
+       mlx5e_shampo_complete_rx_cqe(rq, cqe, cqe_bcnt, *skb);
+       if (flush)
+               mlx5e_shampo_flush_skb(rq, cqe, match);
+free_hd_entry:
+       mlx5e_free_rx_shampo_hd_entry(rq, header_index);
+mpwrq_cqe_out:
+       if (likely(wi->consumed_strides < rq->mpwqe.num_strides))
+               return;
+
+       wq  = &rq->mpwqe.wq;
+       wqe = mlx5_wq_ll_get_wqe(wq, wqe_id);
+       mlx5e_free_rx_mpwqe(rq, wi, true);
+       mlx5_wq_ll_pop(wq, cqe->wqe_id, &wqe->next.next_wqe_index);
+}
+
 static void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
 {
        u16 cstrides       = mpwrq_get_cqe_consumed_strides(cqe);
@@ -1579,11 +2152,15 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
 
                mlx5_cqwq_pop(cqwq);
 
-               INDIRECT_CALL_2(rq->handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq,
-                               mlx5e_handle_rx_cqe, rq, cqe);
+               INDIRECT_CALL_3(rq->handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq,
+                               mlx5e_handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq_shampo,
+                               rq, cqe);
        } while ((++work_done < budget) && (cqe = mlx5_cqwq_get_cqe(cqwq)));
 
 out:
+       if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state) && rq->hw_gro_data->skb)
+               mlx5e_shampo_flush_skb(rq, NULL, false);
+
        if (rcu_access_pointer(rq->xdp_prog))
                mlx5e_xdp_rx_poll_complete(rq);
 
@@ -1784,15 +2361,24 @@ int mlx5e_rq_set_handlers(struct mlx5e_rq *rq, struct mlx5e_params *params, bool
                rq->post_wqes = mlx5e_post_rx_mpwqes;
                rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe;
 
-               rq->handle_rx_cqe = priv->profile->rx_handlers->handle_rx_cqe_mpwqe;
                if (mlx5_fpga_is_ipsec_device(mdev)) {
                        netdev_err(netdev, "MPWQE RQ with Innova IPSec offload not supported\n");
                        return -EINVAL;
                }
-               if (!rq->handle_rx_cqe) {
-                       netdev_err(netdev, "RX handler of MPWQE RQ is not set\n");
-                       return -EINVAL;
+               if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) {
+                       rq->handle_rx_cqe = priv->profile->rx_handlers->handle_rx_cqe_mpwqe_shampo;
+                       if (!rq->handle_rx_cqe) {
+                               netdev_err(netdev, "RX handler of SHAMPO MPWQE RQ is not set\n");
+                               return -EINVAL;
+                       }
+               } else {
+                       rq->handle_rx_cqe = priv->profile->rx_handlers->handle_rx_cqe_mpwqe;
+                       if (!rq->handle_rx_cqe) {
+                               netdev_err(netdev, "RX handler of MPWQE RQ is not set\n");
+                               return -EINVAL;
+                       }
                }
+
                break;
        default: /* MLX5_WQ_TYPE_CYCLIC */
                rq->wqe.skb_from_cqe = xsk ?
index e1dd170..2a9bfc3 100644 (file)
@@ -128,6 +128,11 @@ static const struct counter_desc sw_stats_desc[] = {
 
        { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_lro_packets) },
        { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_lro_bytes) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_gro_packets) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_gro_bytes) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_gro_skbs) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_gro_match_packets) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_gro_large_hds) },
        { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_ecn_mark) },
        { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_removed_vlan_packets) },
        { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary) },
@@ -313,6 +318,11 @@ static void mlx5e_stats_grp_sw_update_stats_rq_stats(struct mlx5e_sw_stats *s,
        s->rx_bytes                   += rq_stats->bytes;
        s->rx_lro_packets             += rq_stats->lro_packets;
        s->rx_lro_bytes               += rq_stats->lro_bytes;
+       s->rx_gro_packets             += rq_stats->gro_packets;
+       s->rx_gro_bytes               += rq_stats->gro_bytes;
+       s->rx_gro_skbs                += rq_stats->gro_skbs;
+       s->rx_gro_match_packets       += rq_stats->gro_match_packets;
+       s->rx_gro_large_hds           += rq_stats->gro_large_hds;
        s->rx_ecn_mark                += rq_stats->ecn_mark;
        s->rx_removed_vlan_packets    += rq_stats->removed_vlan_packets;
        s->rx_csum_none               += rq_stats->csum_none;
@@ -1760,6 +1770,11 @@ static const struct counter_desc rq_stats_desc[] = {
        { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, xdp_redirect) },
        { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_packets) },
        { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_bytes) },
+       { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, gro_packets) },
+       { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, gro_bytes) },
+       { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, gro_skbs) },
+       { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, gro_match_packets) },
+       { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, gro_large_hds) },
        { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, ecn_mark) },
        { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, removed_vlan_packets) },
        { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, wqe_err) },
index 139e59f..2c1ed5b 100644 (file)
@@ -144,6 +144,11 @@ struct mlx5e_sw_stats {
        u64 tx_mpwqe_pkts;
        u64 rx_lro_packets;
        u64 rx_lro_bytes;
+       u64 rx_gro_packets;
+       u64 rx_gro_bytes;
+       u64 rx_gro_skbs;
+       u64 rx_gro_match_packets;
+       u64 rx_gro_large_hds;
        u64 rx_mcast_packets;
        u64 rx_ecn_mark;
        u64 rx_removed_vlan_packets;
@@ -322,6 +327,11 @@ struct mlx5e_rq_stats {
        u64 csum_none;
        u64 lro_packets;
        u64 lro_bytes;
+       u64 gro_packets;
+       u64 gro_bytes;
+       u64 gro_skbs;
+       u64 gro_match_packets;
+       u64 gro_large_hds;
        u64 mcast_packets;
        u64 ecn_mark;
        u64 removed_vlan_packets;
index 306279b..12abe99 100644 (file)
@@ -115,7 +115,7 @@ static int mlx5_fpga_conn_post_recv(struct mlx5_fpga_conn *conn,
        ix = conn->qp.rq.pc & (conn->qp.rq.size - 1);
        data = mlx5_wq_cyc_get_wqe(&conn->qp.wq.rq, ix);
        data->byte_count = cpu_to_be32(buf->sg[0].size);
-       data->lkey = cpu_to_be32(conn->fdev->conn_res.mkey.key);
+       data->lkey = cpu_to_be32(conn->fdev->conn_res.mkey);
        data->addr = cpu_to_be64(buf->sg[0].dma_addr);
 
        conn->qp.rq.pc++;
@@ -155,7 +155,7 @@ static void mlx5_fpga_conn_post_send(struct mlx5_fpga_conn *conn,
                if (!buf->sg[sgi].data)
                        break;
                data->byte_count = cpu_to_be32(buf->sg[sgi].size);
-               data->lkey = cpu_to_be32(conn->fdev->conn_res.mkey.key);
+               data->lkey = cpu_to_be32(conn->fdev->conn_res.mkey);
                data->addr = cpu_to_be64(buf->sg[sgi].dma_addr);
                data++;
                size++;
@@ -221,7 +221,7 @@ static int mlx5_fpga_conn_post_recv_buf(struct mlx5_fpga_conn *conn)
 }
 
 static int mlx5_fpga_conn_create_mkey(struct mlx5_core_dev *mdev, u32 pdn,
-                                     struct mlx5_core_mkey *mkey)
+                                     u32 *mkey)
 {
        int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
        void *mkc;
@@ -978,7 +978,7 @@ int mlx5_fpga_conn_device_init(struct mlx5_fpga_device *fdev)
                mlx5_fpga_err(fdev, "create mkey failed, %d\n", err);
                goto err_dealloc_pd;
        }
-       mlx5_fpga_dbg(fdev, "Created mkey 0x%x\n", fdev->conn_res.mkey.key);
+       mlx5_fpga_dbg(fdev, "Created mkey 0x%x\n", fdev->conn_res.mkey);
 
        return 0;
 
@@ -994,7 +994,7 @@ out:
 
 void mlx5_fpga_conn_device_cleanup(struct mlx5_fpga_device *fdev)
 {
-       mlx5_core_destroy_mkey(fdev->mdev, &fdev->conn_res.mkey);
+       mlx5_core_destroy_mkey(fdev->mdev, fdev->conn_res.mkey);
        mlx5_core_dealloc_pd(fdev->mdev, fdev->conn_res.pdn);
        mlx5_put_uars_page(fdev->mdev, fdev->conn_res.uar);
        mlx5_nic_vport_disable_roce(fdev->mdev);
index 52c9dee..2a984e8 100644 (file)
@@ -54,7 +54,7 @@ struct mlx5_fpga_device {
        /* QP Connection resources */
        struct {
                u32 pdn;
-               struct mlx5_core_mkey mkey;
+               u32 mkey;
                struct mlx5_uars_page *uar;
        } conn_res;
 
index 873efde..386ab9a 100644 (file)
@@ -99,6 +99,9 @@
 #define LEFTOVERS_NUM_LEVELS 1
 #define LEFTOVERS_NUM_PRIOS 1
 
+#define RDMA_RX_COUNTERS_PRIO_NUM_LEVELS 1
+#define RDMA_TX_COUNTERS_PRIO_NUM_LEVELS 1
+
 #define BY_PASS_PRIO_NUM_LEVELS 1
 #define BY_PASS_MIN_LEVEL (ETHTOOL_MIN_LEVEL + MLX5_BY_PASS_NUM_PRIOS +\
                           LEFTOVERS_NUM_PRIOS)
@@ -206,34 +209,63 @@ static struct init_tree_node egress_root_fs = {
        }
 };
 
-#define RDMA_RX_BYPASS_PRIO 0
-#define RDMA_RX_KERNEL_PRIO 1
+enum {
+       RDMA_RX_COUNTERS_PRIO,
+       RDMA_RX_BYPASS_PRIO,
+       RDMA_RX_KERNEL_PRIO,
+};
+
+#define RDMA_RX_BYPASS_MIN_LEVEL MLX5_BY_PASS_NUM_REGULAR_PRIOS
+#define RDMA_RX_KERNEL_MIN_LEVEL (RDMA_RX_BYPASS_MIN_LEVEL + 1)
+#define RDMA_RX_COUNTERS_MIN_LEVEL (RDMA_RX_KERNEL_MIN_LEVEL + 2)
+
 static struct init_tree_node rdma_rx_root_fs = {
        .type = FS_TYPE_NAMESPACE,
-       .ar_size = 2,
+       .ar_size = 3,
        .children = (struct init_tree_node[]) {
+               [RDMA_RX_COUNTERS_PRIO] =
+               ADD_PRIO(0, RDMA_RX_COUNTERS_MIN_LEVEL, 0,
+                        FS_CHAINING_CAPS,
+                        ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
+                               ADD_MULTIPLE_PRIO(MLX5_RDMA_RX_NUM_COUNTERS_PRIOS,
+                                                 RDMA_RX_COUNTERS_PRIO_NUM_LEVELS))),
                [RDMA_RX_BYPASS_PRIO] =
-               ADD_PRIO(0, MLX5_BY_PASS_NUM_REGULAR_PRIOS, 0,
+               ADD_PRIO(0, RDMA_RX_BYPASS_MIN_LEVEL, 0,
                         FS_CHAINING_CAPS,
                         ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
                                ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_REGULAR_PRIOS,
                                                  BY_PASS_PRIO_NUM_LEVELS))),
                [RDMA_RX_KERNEL_PRIO] =
-               ADD_PRIO(0, MLX5_BY_PASS_NUM_REGULAR_PRIOS + 1, 0,
+               ADD_PRIO(0, RDMA_RX_KERNEL_MIN_LEVEL, 0,
                         FS_CHAINING_CAPS,
                         ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_SWITCH_DOMAIN,
                                ADD_MULTIPLE_PRIO(1, 1))),
        }
 };
 
+enum {
+       RDMA_TX_COUNTERS_PRIO,
+       RDMA_TX_BYPASS_PRIO,
+};
+
+#define RDMA_TX_BYPASS_MIN_LEVEL MLX5_BY_PASS_NUM_PRIOS
+#define RDMA_TX_COUNTERS_MIN_LEVEL (RDMA_TX_BYPASS_MIN_LEVEL + 1)
+
 static struct init_tree_node rdma_tx_root_fs = {
        .type = FS_TYPE_NAMESPACE,
-       .ar_size = 1,
+       .ar_size = 2,
        .children = (struct init_tree_node[]) {
-               ADD_PRIO(0, MLX5_BY_PASS_NUM_PRIOS, 0,
+               [RDMA_TX_COUNTERS_PRIO] =
+               ADD_PRIO(0, RDMA_TX_COUNTERS_MIN_LEVEL, 0,
+                        FS_CHAINING_CAPS,
+                        ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
+                               ADD_MULTIPLE_PRIO(MLX5_RDMA_TX_NUM_COUNTERS_PRIOS,
+                                                 RDMA_TX_COUNTERS_PRIO_NUM_LEVELS))),
+               [RDMA_TX_BYPASS_PRIO] =
+               ADD_PRIO(0, RDMA_TX_BYPASS_MIN_LEVEL, 0,
                         FS_CHAINING_CAPS_RDMA_TX,
                         ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
-                               ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS,
+                               ADD_MULTIPLE_PRIO(RDMA_TX_BYPASS_MIN_LEVEL,
                                                  BY_PASS_PRIO_NUM_LEVELS))),
        }
 };
@@ -2219,6 +2251,12 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
                prio = RDMA_RX_KERNEL_PRIO;
        } else if (type == MLX5_FLOW_NAMESPACE_RDMA_TX) {
                root_ns = steering->rdma_tx_root_ns;
+       } else if (type == MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS) {
+               root_ns = steering->rdma_rx_root_ns;
+               prio = RDMA_RX_COUNTERS_PRIO;
+       } else if (type == MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS) {
+               root_ns = steering->rdma_tx_root_ns;
+               prio = RDMA_TX_COUNTERS_PRIO;
        } else { /* Must be NIC RX */
                root_ns = steering->root_ns;
                prio = type;
index 1037e36..2d8406f 100644 (file)
@@ -269,6 +269,12 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev)
                        return err;
        }
 
+       if (MLX5_CAP_GEN(dev, shampo)) {
+               err = mlx5_core_get_caps(dev, MLX5_CAP_DEV_SHAMPO);
+               if (err)
+                       return err;
+       }
+
        return 0;
 }
 
index 3b8d8ad..84297cc 100644 (file)
@@ -67,7 +67,7 @@ static void mlx5i_build_nic_params(struct mlx5_core_dev *mdev,
                MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE :
                MLX5I_PARAMS_DEFAULT_LOG_RQ_SIZE;
 
-       params->lro_en = false;
+       params->packet_merge.type = MLX5E_PACKET_MERGE_NONE;
        params->hard_mtu = MLX5_IB_GRH_BYTES + MLX5_IPOIB_HARD_LEN;
        params->tunneled_offload_en = false;
 }
@@ -356,7 +356,6 @@ static void mlx5i_destroy_flow_steering(struct mlx5e_priv *priv)
 static int mlx5i_init_rx(struct mlx5e_priv *priv)
 {
        struct mlx5_core_dev *mdev = priv->mdev;
-       struct mlx5e_lro_param lro_param;
        int err;
 
        priv->rx_res = mlx5e_rx_res_alloc();
@@ -371,9 +370,9 @@ static int mlx5i_init_rx(struct mlx5e_priv *priv)
                goto err_destroy_q_counters;
        }
 
-       lro_param = mlx5e_get_lro_param(&priv->channels.params);
        err = mlx5e_rx_res_init(priv->rx_res, priv->mdev, 0,
-                               priv->max_nch, priv->drop_rq.rqn, &lro_param,
+                               priv->max_nch, priv->drop_rq.rqn,
+                               &priv->channels.params.packet_merge,
                                priv->channels.params.num_channels);
        if (err)
                goto err_close_drop_rq;
index dea199e..57af962 100644 (file)
@@ -31,7 +31,7 @@ bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev);
 static inline void mlx5_lag_mp_reset(struct mlx5_lag *ldev) {};
 static inline int mlx5_lag_mp_init(struct mlx5_lag *ldev) { return 0; }
 static inline void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev) {}
-bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev) { return false; }
+static inline bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev) { return false; }
 
 #endif /* CONFIG_MLX5_ESWITCH */
 #endif /* __MLX5_LAG_MP_H__ */
index f844639..a92a92a 100644 (file)
@@ -1417,6 +1417,7 @@ static const int types[] = {
        MLX5_CAP_VDPA_EMULATION,
        MLX5_CAP_IPSEC,
        MLX5_CAP_PORT_SELECTION,
+       MLX5_CAP_DEV_SHAMPO,
 };
 
 static void mlx5_hca_caps_free(struct mlx5_core_dev *dev)
index 174f71e..f099a08 100644 (file)
 #include <linux/mlx5/driver.h>
 #include "mlx5_core.h"
 
-int mlx5_core_create_mkey(struct mlx5_core_dev *dev,
-                         struct mlx5_core_mkey *mkey,
-                         u32 *in, int inlen)
+int mlx5_core_create_mkey(struct mlx5_core_dev *dev, u32 *mkey, u32 *in,
+                         int inlen)
 {
        u32 lout[MLX5_ST_SZ_DW(create_mkey_out)] = {};
        u32 mkey_index;
-       void *mkc;
        int err;
 
        MLX5_SET(create_mkey_in, in, opcode, MLX5_CMD_OP_CREATE_MKEY);
@@ -50,38 +48,33 @@ int mlx5_core_create_mkey(struct mlx5_core_dev *dev,
        if (err)
                return err;
 
-       mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
        mkey_index = MLX5_GET(create_mkey_out, lout, mkey_index);
-       mkey->iova = MLX5_GET64(mkc, mkc, start_addr);
-       mkey->size = MLX5_GET64(mkc, mkc, len);
-       mkey->key = (u32)mlx5_mkey_variant(mkey->key) | mlx5_idx_to_mkey(mkey_index);
-       mkey->pd = MLX5_GET(mkc, mkc, pd);
-       init_waitqueue_head(&mkey->wait);
+       *mkey = MLX5_GET(create_mkey_in, in, memory_key_mkey_entry.mkey_7_0) |
+               mlx5_idx_to_mkey(mkey_index);
 
-       mlx5_core_dbg(dev, "out 0x%x, mkey 0x%x\n", mkey_index, mkey->key);
+       mlx5_core_dbg(dev, "out 0x%x, mkey 0x%x\n", mkey_index, *mkey);
        return 0;
 }
 EXPORT_SYMBOL(mlx5_core_create_mkey);
 
-int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev,
-                          struct mlx5_core_mkey *mkey)
+int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, u32 mkey)
 {
        u32 in[MLX5_ST_SZ_DW(destroy_mkey_in)] = {};
 
        MLX5_SET(destroy_mkey_in, in, opcode, MLX5_CMD_OP_DESTROY_MKEY);
-       MLX5_SET(destroy_mkey_in, in, mkey_index, mlx5_mkey_to_idx(mkey->key));
+       MLX5_SET(destroy_mkey_in, in, mkey_index, mlx5_mkey_to_idx(mkey));
        return mlx5_cmd_exec_in(dev, destroy_mkey, in);
 }
 EXPORT_SYMBOL(mlx5_core_destroy_mkey);
 
-int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey,
-                        u32 *out, int outlen)
+int mlx5_core_query_mkey(struct mlx5_core_dev *dev, u32 mkey, u32 *out,
+                        int outlen)
 {
        u32 in[MLX5_ST_SZ_DW(query_mkey_in)] = {};
 
        memset(out, 0, outlen);
        MLX5_SET(query_mkey_in, in, opcode, MLX5_CMD_OP_QUERY_MKEY);
-       MLX5_SET(query_mkey_in, in, mkey_index, mlx5_mkey_to_idx(mkey->key));
+       MLX5_SET(query_mkey_in, in, mkey_index, mlx5_mkey_to_idx(mkey));
        return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
 }
 EXPORT_SYMBOL(mlx5_core_query_mkey);
index 66c2476..7f6fd9c 100644 (file)
@@ -24,7 +24,7 @@ struct mlx5dr_icm_dm {
 };
 
 struct mlx5dr_icm_mr {
-       struct mlx5_core_mkey mkey;
+       u32 mkey;
        struct mlx5dr_icm_dm dm;
        struct mlx5dr_domain *dmn;
        size_t length;
@@ -33,7 +33,7 @@ struct mlx5dr_icm_mr {
 
 static int dr_icm_create_dm_mkey(struct mlx5_core_dev *mdev,
                                 u32 pd, u64 length, u64 start_addr, int mode,
-                                struct mlx5_core_mkey *mkey)
+                                u32 *mkey)
 {
        u32 inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
        u32 in[MLX5_ST_SZ_DW(create_mkey_in)] = {};
@@ -116,7 +116,7 @@ dr_icm_pool_mr_create(struct mlx5dr_icm_pool *pool)
        return icm_mr;
 
 free_mkey:
-       mlx5_core_destroy_mkey(mdev, &icm_mr->mkey);
+       mlx5_core_destroy_mkey(mdev, icm_mr->mkey);
 free_dm:
        mlx5_dm_sw_icm_dealloc(mdev, icm_mr->dm.type, icm_mr->dm.length, 0,
                               icm_mr->dm.addr, icm_mr->dm.obj_id);
@@ -130,7 +130,7 @@ static void dr_icm_pool_mr_destroy(struct mlx5dr_icm_mr *icm_mr)
        struct mlx5_core_dev *mdev = icm_mr->dmn->mdev;
        struct mlx5dr_icm_dm *dm = &icm_mr->dm;
 
-       mlx5_core_destroy_mkey(mdev, &icm_mr->mkey);
+       mlx5_core_destroy_mkey(mdev, icm_mr->mkey);
        mlx5_dm_sw_icm_dealloc(mdev, dm->type, dm->length, 0,
                               dm->addr, dm->obj_id);
        kvfree(icm_mr);
@@ -252,7 +252,7 @@ dr_icm_chunk_create(struct mlx5dr_icm_pool *pool,
 
        offset = mlx5dr_icm_pool_dm_type_to_entry_size(pool->icm_type) * seg;
 
-       chunk->rkey = buddy_mem_pool->icm_mr->mkey.key;
+       chunk->rkey = buddy_mem_pool->icm_mr->mkey;
        chunk->mr_addr = offset;
        chunk->icm_addr =
                (uintptr_t)buddy_mem_pool->icm_mr->icm_start_addr + offset;
index bfb14b4..00aef47 100644 (file)
@@ -350,7 +350,7 @@ static void dr_fill_data_segs(struct mlx5dr_send_ring *send_ring,
        send_info->read.length = send_info->write.length;
        /* Read into the same write area */
        send_info->read.addr = (uintptr_t)send_info->write.addr;
-       send_info->read.lkey = send_ring->mr->mkey.key;
+       send_info->read.lkey = send_ring->mr->mkey;
 
        if (send_ring->pending_wqe % send_ring->signal_th == 0)
                send_info->read.send_flags = IB_SEND_SIGNALED;
@@ -388,7 +388,7 @@ static int dr_postsend_icm_data(struct mlx5dr_domain *dmn,
                       (void *)(uintptr_t)send_info->write.addr,
                       send_info->write.length);
                send_info->write.addr = (uintptr_t)send_ring->mr->dma_addr + buff_offset;
-               send_info->write.lkey = send_ring->mr->mkey.key;
+               send_info->write.lkey = send_ring->mr->mkey;
        }
 
        send_ring->tx_head++;
@@ -848,8 +848,7 @@ static void dr_destroy_cq(struct mlx5_core_dev *mdev, struct mlx5dr_cq *cq)
        kfree(cq);
 }
 
-static int
-dr_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, struct mlx5_core_mkey *mkey)
+static int dr_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, u32 *mkey)
 {
        u32 in[MLX5_ST_SZ_DW(create_mkey_in)] = {};
        void *mkc;
@@ -908,7 +907,7 @@ static struct mlx5dr_mr *dr_reg_mr(struct mlx5_core_dev *mdev,
 
 static void dr_dereg_mr(struct mlx5_core_dev *mdev, struct mlx5dr_mr *mr)
 {
-       mlx5_core_destroy_mkey(mdev, &mr->mkey);
+       mlx5_core_destroy_mkey(mdev, mr->mkey);
        dma_unmap_single(mlx5_core_dma_dev(mdev), mr->dma_addr, mr->size,
                         DMA_BIDIRECTIONAL);
        kfree(mr);
@@ -1039,7 +1038,7 @@ int mlx5dr_send_ring_force_drain(struct mlx5dr_domain *dmn)
        send_info.write.lkey = 0;
        /* Using the sync_mr in order to write/read */
        send_info.remote_addr = (uintptr_t)send_ring->sync_mr->addr;
-       send_info.rkey = send_ring->sync_mr->mkey.key;
+       send_info.rkey = send_ring->sync_mr->mkey;
 
        for (i = 0; i < num_of_sends_req; i++) {
                ret = dr_postsend_icm_data(dmn, &send_info);
index 73fed94..3f47d2b 100644 (file)
@@ -1264,7 +1264,7 @@ struct mlx5dr_cq {
 
 struct mlx5dr_mr {
        struct mlx5_core_dev *mdev;
-       struct mlx5_core_mkey mkey;
+       u32 mkey;
        dma_addr_t dma_addr;
        void *addr;
        size_t size;
index da481a7..01e9c41 100644 (file)
@@ -36,7 +36,7 @@
 #include <linux/mlx5/driver.h>
 #include "mlx5_core.h"
 
-int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn)
+static int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn)
 {
        u32 out[MLX5_ST_SZ_DW(alloc_uar_out)] = {};
        u32 in[MLX5_ST_SZ_DW(alloc_uar_in)] = {};
@@ -44,13 +44,14 @@ int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn)
 
        MLX5_SET(alloc_uar_in, in, opcode, MLX5_CMD_OP_ALLOC_UAR);
        err = mlx5_cmd_exec_inout(dev, alloc_uar, in, out);
-       if (!err)
-               *uarn = MLX5_GET(alloc_uar_out, out, uar);
-       return err;
+       if (err)
+               return err;
+
+       *uarn = MLX5_GET(alloc_uar_out, out, uar);
+       return 0;
 }
-EXPORT_SYMBOL(mlx5_cmd_alloc_uar);
 
-int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn)
+static int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn)
 {
        u32 in[MLX5_ST_SZ_DW(dealloc_uar_in)] = {};
 
@@ -58,7 +59,6 @@ int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn)
        MLX5_SET(dealloc_uar_in, in, uar, uarn);
        return mlx5_cmd_exec_in(dev, dealloc_uar, in);
 }
-EXPORT_SYMBOL(mlx5_cmd_free_uar);
 
 static int uars_per_sys_page(struct mlx5_core_dev *mdev)
 {
index 13b0259..fcace73 100644 (file)
@@ -353,13 +353,10 @@ static int mlxsw_pci_rdq_skb_alloc(struct mlxsw_pci *mlxsw_pci,
        struct sk_buff *skb;
        int err;
 
-       elem_info->u.rdq.skb = NULL;
        skb = netdev_alloc_skb_ip_align(NULL, buf_len);
        if (!skb)
                return -ENOMEM;
 
-       /* Assume that wqe was previously zeroed. */
-
        err = mlxsw_pci_wqe_frag_map(mlxsw_pci, wqe, 0, skb->data,
                                     buf_len, DMA_FROM_DEVICE);
        if (err)
@@ -597,21 +594,26 @@ static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci,
        struct pci_dev *pdev = mlxsw_pci->pdev;
        struct mlxsw_pci_queue_elem_info *elem_info;
        struct mlxsw_rx_info rx_info = {};
-       char *wqe;
+       char wqe[MLXSW_PCI_WQE_SIZE];
        struct sk_buff *skb;
        u16 byte_count;
        int err;
 
        elem_info = mlxsw_pci_queue_elem_info_consumer_get(q);
-       skb = elem_info->u.sdq.skb;
-       if (!skb)
-               return;
-       wqe = elem_info->elem;
-       mlxsw_pci_wqe_frag_unmap(mlxsw_pci, wqe, 0, DMA_FROM_DEVICE);
+       skb = elem_info->u.rdq.skb;
+       memcpy(wqe, elem_info->elem, MLXSW_PCI_WQE_SIZE);
 
        if (q->consumer_counter++ != consumer_counter_limit)
                dev_dbg_ratelimited(&pdev->dev, "Consumer counter does not match limit in RDQ\n");
 
+       err = mlxsw_pci_rdq_skb_alloc(mlxsw_pci, elem_info);
+       if (err) {
+               dev_err_ratelimited(&pdev->dev, "Failed to alloc skb for RDQ\n");
+               goto out;
+       }
+
+       mlxsw_pci_wqe_frag_unmap(mlxsw_pci, wqe, 0, DMA_FROM_DEVICE);
+
        if (mlxsw_pci_cqe_lag_get(cqe_v, cqe)) {
                rx_info.is_lag = true;
                rx_info.u.lag_id = mlxsw_pci_cqe_lag_id_get(cqe_v, cqe);
@@ -647,10 +649,7 @@ static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci,
        skb_put(skb, byte_count);
        mlxsw_core_skb_receive(mlxsw_pci->core, skb, &rx_info);
 
-       memset(wqe, 0, q->elem_size);
-       err = mlxsw_pci_rdq_skb_alloc(mlxsw_pci, elem_info);
-       if (err)
-               dev_dbg_ratelimited(&pdev->dev, "Failed to alloc skb for RDQ\n");
+out:
        /* Everything is set up, ring doorbell to pass elem to HW */
        q->producer_counter++;
        mlxsw_pci_queue_doorbell_producer_ring(mlxsw_pci, q);
index ddb5ad8..4243d3b 100644 (file)
@@ -271,6 +271,7 @@ mlxsw_sp_qdisc_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
 
 struct mlxsw_sp_qdisc_tree_validate {
        bool forbid_ets;
+       bool forbid_root_tbf;
        bool forbid_tbf;
        bool forbid_red;
 };
@@ -310,18 +311,26 @@ __mlxsw_sp_qdisc_tree_validate(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
                if (validate.forbid_red)
                        return -EINVAL;
                validate.forbid_red = true;
+               validate.forbid_root_tbf = true;
                validate.forbid_ets = true;
                break;
        case MLXSW_SP_QDISC_TBF:
-               if (validate.forbid_tbf)
-                       return -EINVAL;
-               validate.forbid_tbf = true;
-               validate.forbid_ets = true;
+               if (validate.forbid_root_tbf) {
+                       if (validate.forbid_tbf)
+                               return -EINVAL;
+                       /* This is a TC TBF. */
+                       validate.forbid_tbf = true;
+                       validate.forbid_ets = true;
+               } else {
+                       /* This is root TBF. */
+                       validate.forbid_root_tbf = true;
+               }
                break;
        case MLXSW_SP_QDISC_PRIO:
        case MLXSW_SP_QDISC_ETS:
                if (validate.forbid_ets)
                        return -EINVAL;
+               validate.forbid_root_tbf = true;
                validate.forbid_ets = true;
                break;
        default:
@@ -905,16 +914,34 @@ mlxsw_sp_setup_tc_qdisc_leaf_clean_stats(struct mlxsw_sp_port *mlxsw_sp_port,
        mlxsw_sp_qdisc->stats_base.backlog = 0;
 }
 
+static enum mlxsw_reg_qeec_hr
+mlxsw_sp_qdisc_tbf_hr(struct mlxsw_sp_port *mlxsw_sp_port,
+                     struct mlxsw_sp_qdisc *mlxsw_sp_qdisc)
+{
+       if (mlxsw_sp_qdisc == &mlxsw_sp_port->qdisc->root_qdisc)
+               return MLXSW_REG_QEEC_HR_PORT;
+
+       /* Configure subgroup shaper, so that both UC and MC traffic is subject
+        * to shaping. That is unlike RED, however UC queue lengths are going to
+        * be different than MC ones due to different pool and quota
+        * configurations, so the configuration is not applicable. For shaper on
+        * the other hand, subjecting the overall stream to the configured
+        * shaper makes sense. Also note that that is what we do for
+        * ieee_setmaxrate().
+        */
+       return MLXSW_REG_QEEC_HR_SUBGROUP;
+}
+
 static int
 mlxsw_sp_qdisc_tbf_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
                           struct mlxsw_sp_qdisc *mlxsw_sp_qdisc)
 {
+       enum mlxsw_reg_qeec_hr hr = mlxsw_sp_qdisc_tbf_hr(mlxsw_sp_port,
+                                                         mlxsw_sp_qdisc);
        int tclass_num = mlxsw_sp_qdisc_get_tclass_num(mlxsw_sp_port,
                                                       mlxsw_sp_qdisc);
 
-       return mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port,
-                                            MLXSW_REG_QEEC_HR_SUBGROUP,
-                                            tclass_num, 0,
+       return mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port, hr, tclass_num, 0,
                                             MLXSW_REG_QEEC_MAS_DIS, 0);
 }
 
@@ -996,6 +1023,8 @@ mlxsw_sp_qdisc_tbf_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle,
                           struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
                           void *params)
 {
+       enum mlxsw_reg_qeec_hr hr = mlxsw_sp_qdisc_tbf_hr(mlxsw_sp_port,
+                                                         mlxsw_sp_qdisc);
        struct tc_tbf_qopt_offload_replace_params *p = params;
        u64 rate_kbps = mlxsw_sp_qdisc_tbf_rate_kbps(p);
        int tclass_num;
@@ -1016,17 +1045,7 @@ mlxsw_sp_qdisc_tbf_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle,
                /* check_params above was supposed to reject this value. */
                return -EINVAL;
 
-       /* Configure subgroup shaper, so that both UC and MC traffic is subject
-        * to shaping. That is unlike RED, however UC queue lengths are going to
-        * be different than MC ones due to different pool and quota
-        * configurations, so the configuration is not applicable. For shaper on
-        * the other hand, subjecting the overall stream to the configured
-        * shaper makes sense. Also note that that is what we do for
-        * ieee_setmaxrate().
-        */
-       return mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port,
-                                            MLXSW_REG_QEEC_HR_SUBGROUP,
-                                            tclass_num, 0,
+       return mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port, hr, tclass_num, 0,
                                             rate_kbps, burst_size);
 }
 
index 03d0240..4fc9782 100644 (file)
@@ -1743,6 +1743,16 @@ static int lan743x_tx_ring_init(struct lan743x_tx *tx)
                ret = -EINVAL;
                goto cleanup;
        }
+       if (dma_set_mask_and_coherent(&tx->adapter->pdev->dev,
+                                     DMA_BIT_MASK(64))) {
+               if (dma_set_mask_and_coherent(&tx->adapter->pdev->dev,
+                                             DMA_BIT_MASK(32))) {
+                       dev_warn(&tx->adapter->pdev->dev,
+                                "lan743x_: No suitable DMA available\n");
+                       ret = -ENOMEM;
+                       goto cleanup;
+               }
+       }
        ring_allocation_size = ALIGN(tx->ring_size *
                                     sizeof(struct lan743x_tx_descriptor),
                                     PAGE_SIZE);
@@ -1934,7 +1944,8 @@ static void lan743x_rx_update_tail(struct lan743x_rx *rx, int index)
                                  index);
 }
 
-static int lan743x_rx_init_ring_element(struct lan743x_rx *rx, int index)
+static int lan743x_rx_init_ring_element(struct lan743x_rx *rx, int index,
+                                       gfp_t gfp)
 {
        struct net_device *netdev = rx->adapter->netdev;
        struct device *dev = &rx->adapter->pdev->dev;
@@ -1948,7 +1959,7 @@ static int lan743x_rx_init_ring_element(struct lan743x_rx *rx, int index)
 
        descriptor = &rx->ring_cpu_ptr[index];
        buffer_info = &rx->buffer_info[index];
-       skb = __netdev_alloc_skb(netdev, buffer_length, GFP_ATOMIC | GFP_DMA);
+       skb = __netdev_alloc_skb(netdev, buffer_length, gfp);
        if (!skb)
                return -ENOMEM;
        dma_ptr = dma_map_single(dev, skb->data, buffer_length, DMA_FROM_DEVICE);
@@ -2110,7 +2121,8 @@ static int lan743x_rx_process_buffer(struct lan743x_rx *rx)
 
        /* save existing skb, allocate new skb and map to dma */
        skb = buffer_info->skb;
-       if (lan743x_rx_init_ring_element(rx, rx->last_head)) {
+       if (lan743x_rx_init_ring_element(rx, rx->last_head,
+                                        GFP_ATOMIC | GFP_DMA)) {
                /* failed to allocate next skb.
                 * Memory is very low.
                 * Drop this packet and reuse buffer.
@@ -2276,6 +2288,16 @@ static int lan743x_rx_ring_init(struct lan743x_rx *rx)
                ret = -EINVAL;
                goto cleanup;
        }
+       if (dma_set_mask_and_coherent(&rx->adapter->pdev->dev,
+                                     DMA_BIT_MASK(64))) {
+               if (dma_set_mask_and_coherent(&rx->adapter->pdev->dev,
+                                             DMA_BIT_MASK(32))) {
+                       dev_warn(&rx->adapter->pdev->dev,
+                                "lan743x_: No suitable DMA available\n");
+                       ret = -ENOMEM;
+                       goto cleanup;
+               }
+       }
        ring_allocation_size = ALIGN(rx->ring_size *
                                     sizeof(struct lan743x_rx_descriptor),
                                     PAGE_SIZE);
@@ -2315,13 +2337,16 @@ static int lan743x_rx_ring_init(struct lan743x_rx *rx)
 
        rx->last_head = 0;
        for (index = 0; index < rx->ring_size; index++) {
-               ret = lan743x_rx_init_ring_element(rx, index);
+               ret = lan743x_rx_init_ring_element(rx, index, GFP_KERNEL);
                if (ret)
                        goto cleanup;
        }
        return 0;
 
 cleanup:
+       netif_warn(rx->adapter, ifup, rx->adapter->netdev,
+                  "Error allocating memory for LAN743x\n");
+
        lan743x_rx_ring_cleanup(rx);
        return ret;
 }
@@ -3019,6 +3044,8 @@ static int lan743x_pm_resume(struct device *dev)
        if (ret) {
                netif_err(adapter, probe, adapter->netdev,
                          "lan743x_hardware_init returned %d\n", ret);
+               lan743x_pci_cleanup(adapter);
+               return ret;
        }
 
        /* open netdev when netdev is at running state while resume.
index 34c22ee..aaf7aae 100644 (file)
@@ -831,7 +831,7 @@ struct lan743x_rx_buffer_info {
        unsigned int    buffer_length;
 };
 
-#define LAN743X_RX_RING_SIZE        (65)
+#define LAN743X_RX_RING_SIZE        (128)
 
 #define RX_PROCESS_RESULT_NOTHING_TO_DO     (0)
 #define RX_PROCESS_RESULT_BUFFER_RECEIVED   (1)
index 11c83a9..f469950 100644 (file)
@@ -182,15 +182,21 @@ static int
 nfp_bpf_check_mtu(struct nfp_app *app, struct net_device *netdev, int new_mtu)
 {
        struct nfp_net *nn = netdev_priv(netdev);
-       unsigned int max_mtu;
+       struct nfp_bpf_vnic *bv;
+       struct bpf_prog *prog;
 
        if (~nn->dp.ctrl & NFP_NET_CFG_CTRL_BPF)
                return 0;
 
-       max_mtu = nn_readb(nn, NFP_NET_CFG_BPF_INL_MTU) * 64 - 32;
-       if (new_mtu > max_mtu) {
-               nn_info(nn, "BPF offload active, MTU over %u not supported\n",
-                       max_mtu);
+       if (nn->xdp_hw.prog) {
+               prog = nn->xdp_hw.prog;
+       } else {
+               bv = nn->app_priv;
+               prog = bv->tc_prog;
+       }
+
+       if (nfp_bpf_offload_check_mtu(nn, prog, new_mtu)) {
+               nn_info(nn, "BPF offload active, potential packet access beyond hardware packet boundary");
                return -EBUSY;
        }
        return 0;
index d0e17ee..16841bb 100644 (file)
@@ -560,6 +560,8 @@ bool nfp_is_subprog_start(struct nfp_insn_meta *meta);
 void nfp_bpf_jit_prepare(struct nfp_prog *nfp_prog);
 int nfp_bpf_jit(struct nfp_prog *prog);
 bool nfp_bpf_supported_opcode(u8 code);
+bool nfp_bpf_offload_check_mtu(struct nfp_net *nn, struct bpf_prog *prog,
+                              unsigned int mtu);
 
 int nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx,
                    int prev_insn_idx);
index 5385185..9d97cd2 100644 (file)
@@ -481,19 +481,28 @@ int nfp_bpf_event_output(struct nfp_app_bpf *bpf, const void *data,
        return 0;
 }
 
+bool nfp_bpf_offload_check_mtu(struct nfp_net *nn, struct bpf_prog *prog,
+                              unsigned int mtu)
+{
+       unsigned int fw_mtu, pkt_off;
+
+       fw_mtu = nn_readb(nn, NFP_NET_CFG_BPF_INL_MTU) * 64 - 32;
+       pkt_off = min(prog->aux->max_pkt_offset, mtu);
+
+       return fw_mtu < pkt_off;
+}
+
 static int
 nfp_net_bpf_load(struct nfp_net *nn, struct bpf_prog *prog,
                 struct netlink_ext_ack *extack)
 {
        struct nfp_prog *nfp_prog = prog->aux->offload->dev_priv;
-       unsigned int fw_mtu, pkt_off, max_stack, max_prog_len;
+       unsigned int max_stack, max_prog_len;
        dma_addr_t dma_addr;
        void *img;
        int err;
 
-       fw_mtu = nn_readb(nn, NFP_NET_CFG_BPF_INL_MTU) * 64 - 32;
-       pkt_off = min(prog->aux->max_pkt_offset, nn->dp.netdev->mtu);
-       if (fw_mtu < pkt_off) {
+       if (nfp_bpf_offload_check_mtu(nn, prog, nn->dp.netdev->mtu)) {
                NL_SET_ERR_MSG_MOD(extack, "BPF offload not supported with potential packet access beyond HW packet split boundary");
                return -EOPNOTSUPP;
        }
index a63cc29..bc39558 100644 (file)
@@ -1015,9 +1015,6 @@ static int lpc_eth_close(struct net_device *ndev)
        napi_disable(&pldat->napi);
        netif_stop_queue(ndev);
 
-       if (ndev->phydev)
-               phy_stop(ndev->phydev);
-
        spin_lock_irqsave(&pldat->lock, flags);
        __lpc_eth_reset(pldat);
        netif_carrier_off(ndev);
@@ -1025,6 +1022,8 @@ static int lpc_eth_close(struct net_device *ndev)
        writel(0, LPC_ENET_MAC2(pldat->net_base));
        spin_unlock_irqrestore(&pldat->lock, flags);
 
+       if (ndev->phydev)
+               phy_stop(ndev->phydev);
        clk_disable_unprepare(pldat->clk);
 
        return 0;
index 4cfab44..07dd3c3 100644 (file)
@@ -844,7 +844,7 @@ netxen_check_options(struct netxen_adapter *adapter)
        adapter->fw_version = NETXEN_VERSION_CODE(fw_major, fw_minor, fw_build);
 
        /* Get FW Mini Coredump template and store it */
-        if (NX_IS_REVISION_P3(adapter->ahw.revision_id)) {
+       if (NX_IS_REVISION_P3(adapter->ahw.revision_id)) {
                if (adapter->mdump.md_template == NULL ||
                                adapter->fw_version > prev_fw_version) {
                        kfree(adapter->mdump.md_template);
index ee6c9c8..bbe21db 100644 (file)
@@ -156,6 +156,7 @@ static const struct pci_device_id rtl8169_pci_tbl[] = {
        { PCI_VDEVICE(REALTEK,  0x8129) },
        { PCI_VDEVICE(REALTEK,  0x8136), RTL_CFG_NO_GBIT },
        { PCI_VDEVICE(REALTEK,  0x8161) },
+       { PCI_VDEVICE(REALTEK,  0x8162) },
        { PCI_VDEVICE(REALTEK,  0x8167) },
        { PCI_VDEVICE(REALTEK,  0x8168) },
        { PCI_VDEVICE(NCUBE,    0x8168) },
index 2c319dd..31f522b 100644 (file)
@@ -78,7 +78,9 @@ static void ifb_ri_tasklet(struct tasklet_struct *t)
        while ((skb = __skb_dequeue(&txp->tq)) != NULL) {
                /* Skip tc and netfilter to prevent redirection loop. */
                skb->redirected = 0;
+#ifdef CONFIG_NET_CLS_ACT
                skb->tc_skip_classify = 1;
+#endif
                nf_skip_egress(skb, true);
 
                u64_stats_update_begin(&txp->tsync);
index f1cbe1f..dae95d9 100644 (file)
@@ -484,7 +484,7 @@ static int at803x_set_wol(struct phy_device *phydev,
 static void at803x_get_wol(struct phy_device *phydev,
                           struct ethtool_wolinfo *wol)
 {
-       u32 value;
+       int value;
 
        wol->supported = WAKE_MAGIC;
        wol->wolopts = 0;
index 4dc00bd..a4de3d2 100644 (file)
@@ -6,6 +6,8 @@
 #include <linux/delay.h>
 #include <linux/mii.h>
 #include <linux/phy.h>
+#include <linux/ethtool.h>
+#include <linux/ethtool_netlink.h>
 
 /* External Register Control Register */
 #define LAN87XX_EXT_REG_CTL                     (0x14)
 #define        PHYACC_ATTR_BANK_MISC           1
 #define        PHYACC_ATTR_BANK_PCS            2
 #define        PHYACC_ATTR_BANK_AFE            3
+#define        PHYACC_ATTR_BANK_DSP            4
 #define        PHYACC_ATTR_BANK_MAX            7
 
+/* measurement defines */
+#define        LAN87XX_CABLE_TEST_OK           0
+#define        LAN87XX_CABLE_TEST_OPEN 1
+#define        LAN87XX_CABLE_TEST_SAME_SHORT   2
+
 #define DRIVER_AUTHOR  "Nisar Sayed <nisar.sayed@microchip.com>"
 #define DRIVER_DESC    "Microchip LAN87XX T1 PHY driver"
 
@@ -226,11 +234,240 @@ static int lan87xx_config_init(struct phy_device *phydev)
        return rc < 0 ? rc : 0;
 }
 
+static int microchip_cable_test_start_common(struct phy_device *phydev)
+{
+       int bmcr, bmsr, ret;
+
+       /* If auto-negotiation is enabled, but not complete, the cable
+        * test never completes. So disable auto-neg.
+        */
+       bmcr = phy_read(phydev, MII_BMCR);
+       if (bmcr < 0)
+               return bmcr;
+
+       bmsr = phy_read(phydev, MII_BMSR);
+
+       if (bmsr < 0)
+               return bmsr;
+
+       if (bmcr & BMCR_ANENABLE) {
+               ret =  phy_modify(phydev, MII_BMCR, BMCR_ANENABLE, 0);
+               if (ret < 0)
+                       return ret;
+               ret = genphy_soft_reset(phydev);
+               if (ret < 0)
+                       return ret;
+       }
+
+       /* If the link is up, allow it some time to go down */
+       if (bmsr & BMSR_LSTATUS)
+               msleep(1500);
+
+       return 0;
+}
+
+static int lan87xx_cable_test_start(struct phy_device *phydev)
+{
+       static const struct access_ereg_val cable_test[] = {
+               /* min wait */
+               {PHYACC_ATTR_MODE_WRITE, PHYACC_ATTR_BANK_DSP, 93,
+                0, 0},
+               /* max wait */
+               {PHYACC_ATTR_MODE_WRITE, PHYACC_ATTR_BANK_DSP, 94,
+                10, 0},
+               /* pulse cycle */
+               {PHYACC_ATTR_MODE_WRITE, PHYACC_ATTR_BANK_DSP, 95,
+                90, 0},
+               /* cable diag thresh */
+               {PHYACC_ATTR_MODE_WRITE, PHYACC_ATTR_BANK_DSP, 92,
+                60, 0},
+               /* max gain */
+               {PHYACC_ATTR_MODE_WRITE, PHYACC_ATTR_BANK_DSP, 79,
+                31, 0},
+               /* clock align for each iteration */
+               {PHYACC_ATTR_MODE_MODIFY, PHYACC_ATTR_BANK_DSP, 55,
+                0, 0x0038},
+               /* max cycle wait config */
+               {PHYACC_ATTR_MODE_WRITE, PHYACC_ATTR_BANK_DSP, 94,
+                70, 0},
+               /* start cable diag*/
+               {PHYACC_ATTR_MODE_WRITE, PHYACC_ATTR_BANK_DSP, 90,
+                1, 0},
+       };
+       int rc, i;
+
+       rc = microchip_cable_test_start_common(phydev);
+       if (rc < 0)
+               return rc;
+
+       /* start cable diag */
+       /* check if part is alive - if not, return diagnostic error */
+       rc = access_ereg(phydev, PHYACC_ATTR_MODE_READ, PHYACC_ATTR_BANK_SMI,
+                        0x00, 0);
+       if (rc < 0)
+               return rc;
+
+       /* master/slave specific configs */
+       rc = access_ereg(phydev, PHYACC_ATTR_MODE_READ, PHYACC_ATTR_BANK_SMI,
+                        0x0A, 0);
+       if (rc < 0)
+               return rc;
+
+       if ((rc & 0x4000) != 0x4000) {
+               /* DUT is Slave */
+               rc = access_ereg_modify_changed(phydev, PHYACC_ATTR_BANK_AFE,
+                                               0x0E, 0x5, 0x7);
+               if (rc < 0)
+                       return rc;
+               rc = access_ereg_modify_changed(phydev, PHYACC_ATTR_BANK_SMI,
+                                               0x1A, 0x8, 0x8);
+               if (rc < 0)
+                       return rc;
+       } else {
+               /* DUT is Master */
+               rc = access_ereg_modify_changed(phydev, PHYACC_ATTR_BANK_SMI,
+                                               0x10, 0x8, 0x40);
+               if (rc < 0)
+                       return rc;
+       }
+
+       for (i = 0; i < ARRAY_SIZE(cable_test); i++) {
+               if (cable_test[i].mode == PHYACC_ATTR_MODE_MODIFY) {
+                       rc = access_ereg_modify_changed(phydev,
+                                                       cable_test[i].bank,
+                                                       cable_test[i].offset,
+                                                       cable_test[i].val,
+                                                       cable_test[i].mask);
+                       /* wait 50ms */
+                       msleep(50);
+               } else {
+                       rc = access_ereg(phydev, cable_test[i].mode,
+                                        cable_test[i].bank,
+                                        cable_test[i].offset,
+                                        cable_test[i].val);
+               }
+               if (rc < 0)
+                       return rc;
+       }
+       /* cable diag started */
+
+       return 0;
+}
+
+static int lan87xx_cable_test_report_trans(u32 result)
+{
+       switch (result) {
+       case LAN87XX_CABLE_TEST_OK:
+               return ETHTOOL_A_CABLE_RESULT_CODE_OK;
+       case LAN87XX_CABLE_TEST_OPEN:
+               return ETHTOOL_A_CABLE_RESULT_CODE_OPEN;
+       case LAN87XX_CABLE_TEST_SAME_SHORT:
+               return ETHTOOL_A_CABLE_RESULT_CODE_SAME_SHORT;
+       default:
+               /* DIAGNOSTIC_ERROR */
+               return ETHTOOL_A_CABLE_RESULT_CODE_UNSPEC;
+       }
+}
+
+static int lan87xx_cable_test_report(struct phy_device *phydev)
+{
+       int pos_peak_cycle = 0, pos_peak_in_phases = 0, pos_peak_phase = 0;
+       int neg_peak_cycle = 0, neg_peak_in_phases = 0, neg_peak_phase = 0;
+       int noise_margin = 20, time_margin = 89, jitter_var = 30;
+       int min_time_diff = 96, max_time_diff = 96 + time_margin;
+       bool fault = false, check_a = false, check_b = false;
+       int gain_idx = 0, pos_peak = 0, neg_peak = 0;
+       int pos_peak_time = 0, neg_peak_time = 0;
+       int pos_peak_in_phases_hybrid = 0;
+       int detect = -1;
+
+       gain_idx = access_ereg(phydev, PHYACC_ATTR_MODE_READ,
+                              PHYACC_ATTR_BANK_DSP, 151, 0);
+       /* read non-hybrid results */
+       pos_peak = access_ereg(phydev, PHYACC_ATTR_MODE_READ,
+                              PHYACC_ATTR_BANK_DSP, 153, 0);
+       neg_peak = access_ereg(phydev, PHYACC_ATTR_MODE_READ,
+                              PHYACC_ATTR_BANK_DSP, 154, 0);
+       pos_peak_time = access_ereg(phydev, PHYACC_ATTR_MODE_READ,
+                                   PHYACC_ATTR_BANK_DSP, 156, 0);
+       neg_peak_time = access_ereg(phydev, PHYACC_ATTR_MODE_READ,
+                                   PHYACC_ATTR_BANK_DSP, 157, 0);
+
+       pos_peak_cycle = (pos_peak_time >> 7) & 0x7F;
+       /* calculate non-hybrid values */
+       pos_peak_phase = pos_peak_time & 0x7F;
+       pos_peak_in_phases = (pos_peak_cycle * 96) + pos_peak_phase;
+       neg_peak_cycle = (neg_peak_time >> 7) & 0x7F;
+       neg_peak_phase = neg_peak_time & 0x7F;
+       neg_peak_in_phases = (neg_peak_cycle * 96) + neg_peak_phase;
+
+       /* process values */
+       check_a =
+               ((pos_peak_in_phases - neg_peak_in_phases) >= min_time_diff) &&
+               ((pos_peak_in_phases - neg_peak_in_phases) < max_time_diff) &&
+               pos_peak_in_phases_hybrid < pos_peak_in_phases &&
+               (pos_peak_in_phases_hybrid < (neg_peak_in_phases + jitter_var));
+       check_b =
+               ((neg_peak_in_phases - pos_peak_in_phases) >= min_time_diff) &&
+               ((neg_peak_in_phases - pos_peak_in_phases) < max_time_diff) &&
+               pos_peak_in_phases_hybrid < neg_peak_in_phases &&
+               (pos_peak_in_phases_hybrid < (pos_peak_in_phases + jitter_var));
+
+       if (pos_peak_in_phases > neg_peak_in_phases && check_a)
+               detect = 2;
+       else if ((neg_peak_in_phases > pos_peak_in_phases) && check_b)
+               detect = 1;
+
+       if (pos_peak > noise_margin && neg_peak > noise_margin &&
+           gain_idx >= 0) {
+               if (detect == 1 || detect == 2)
+                       fault = true;
+       }
+
+       if (!fault)
+               detect = 0;
+
+       ethnl_cable_test_result(phydev, ETHTOOL_A_CABLE_PAIR_A,
+                               lan87xx_cable_test_report_trans(detect));
+
+       return 0;
+}
+
+static int lan87xx_cable_test_get_status(struct phy_device *phydev,
+                                        bool *finished)
+{
+       int rc = 0;
+
+       *finished = false;
+
+       /* check if cable diag was finished */
+       rc = access_ereg(phydev, PHYACC_ATTR_MODE_READ, PHYACC_ATTR_BANK_DSP,
+                        90, 0);
+       if (rc < 0)
+               return rc;
+
+       if ((rc & 2) == 2) {
+               /* stop cable diag*/
+               rc = access_ereg(phydev, PHYACC_ATTR_MODE_WRITE,
+                                PHYACC_ATTR_BANK_DSP,
+                                90, 0);
+               if (rc < 0)
+                       return rc;
+
+               *finished = true;
+
+               return lan87xx_cable_test_report(phydev);
+       }
+
+       return 0;
+}
+
 static struct phy_driver microchip_t1_phy_driver[] = {
        {
                .phy_id         = 0x0007c150,
                .phy_id_mask    = 0xfffffff0,
                .name           = "Microchip LAN87xx T1",
+               .flags          = PHY_POLL_CABLE_TEST,
 
                .features       = PHY_BASIC_T1_FEATURES,
 
@@ -241,6 +478,8 @@ static struct phy_driver microchip_t1_phy_driver[] = {
 
                .suspend        = genphy_suspend,
                .resume         = genphy_resume,
+               .cable_test_start = lan87xx_cable_test_start,
+               .cable_test_get_status = lan87xx_cable_test_get_status,
        }
 };
 
index f124a8a..a3bfb15 100644 (file)
@@ -243,62 +243,10 @@ static void phy_sanitize_settings(struct phy_device *phydev)
        }
 }
 
-int phy_ethtool_ksettings_set(struct phy_device *phydev,
-                             const struct ethtool_link_ksettings *cmd)
-{
-       __ETHTOOL_DECLARE_LINK_MODE_MASK(advertising);
-       u8 autoneg = cmd->base.autoneg;
-       u8 duplex = cmd->base.duplex;
-       u32 speed = cmd->base.speed;
-
-       if (cmd->base.phy_address != phydev->mdio.addr)
-               return -EINVAL;
-
-       linkmode_copy(advertising, cmd->link_modes.advertising);
-
-       /* We make sure that we don't pass unsupported values in to the PHY */
-       linkmode_and(advertising, advertising, phydev->supported);
-
-       /* Verify the settings we care about. */
-       if (autoneg != AUTONEG_ENABLE && autoneg != AUTONEG_DISABLE)
-               return -EINVAL;
-
-       if (autoneg == AUTONEG_ENABLE && linkmode_empty(advertising))
-               return -EINVAL;
-
-       if (autoneg == AUTONEG_DISABLE &&
-           ((speed != SPEED_1000 &&
-             speed != SPEED_100 &&
-             speed != SPEED_10) ||
-            (duplex != DUPLEX_HALF &&
-             duplex != DUPLEX_FULL)))
-               return -EINVAL;
-
-       phydev->autoneg = autoneg;
-
-       if (autoneg == AUTONEG_DISABLE) {
-               phydev->speed = speed;
-               phydev->duplex = duplex;
-       }
-
-       linkmode_copy(phydev->advertising, advertising);
-
-       linkmode_mod_bit(ETHTOOL_LINK_MODE_Autoneg_BIT,
-                        phydev->advertising, autoneg == AUTONEG_ENABLE);
-
-       phydev->master_slave_set = cmd->base.master_slave_cfg;
-       phydev->mdix_ctrl = cmd->base.eth_tp_mdix_ctrl;
-
-       /* Restart the PHY */
-       phy_start_aneg(phydev);
-
-       return 0;
-}
-EXPORT_SYMBOL(phy_ethtool_ksettings_set);
-
 void phy_ethtool_ksettings_get(struct phy_device *phydev,
                               struct ethtool_link_ksettings *cmd)
 {
+       mutex_lock(&phydev->lock);
        linkmode_copy(cmd->link_modes.supported, phydev->supported);
        linkmode_copy(cmd->link_modes.advertising, phydev->advertising);
        linkmode_copy(cmd->link_modes.lp_advertising, phydev->lp_advertising);
@@ -317,6 +265,7 @@ void phy_ethtool_ksettings_get(struct phy_device *phydev,
        cmd->base.autoneg = phydev->autoneg;
        cmd->base.eth_tp_mdix_ctrl = phydev->mdix_ctrl;
        cmd->base.eth_tp_mdix = phydev->mdix;
+       mutex_unlock(&phydev->lock);
 }
 EXPORT_SYMBOL(phy_ethtool_ksettings_get);
 
@@ -751,7 +700,7 @@ static int phy_check_link_status(struct phy_device *phydev)
 }
 
 /**
- * phy_start_aneg - start auto-negotiation for this PHY device
+ * _phy_start_aneg - start auto-negotiation for this PHY device
  * @phydev: the phy_device struct
  *
  * Description: Sanitizes the settings (if we're not autonegotiating
@@ -759,25 +708,43 @@ static int phy_check_link_status(struct phy_device *phydev)
  *   If the PHYCONTROL Layer is operating, we change the state to
  *   reflect the beginning of Auto-negotiation or forcing.
  */
-int phy_start_aneg(struct phy_device *phydev)
+static int _phy_start_aneg(struct phy_device *phydev)
 {
        int err;
 
+       lockdep_assert_held(&phydev->lock);
+
        if (!phydev->drv)
                return -EIO;
 
-       mutex_lock(&phydev->lock);
-
        if (AUTONEG_DISABLE == phydev->autoneg)
                phy_sanitize_settings(phydev);
 
        err = phy_config_aneg(phydev);
        if (err < 0)
-               goto out_unlock;
+               return err;
 
        if (phy_is_started(phydev))
                err = phy_check_link_status(phydev);
-out_unlock:
+
+       return err;
+}
+
+/**
+ * phy_start_aneg - start auto-negotiation for this PHY device
+ * @phydev: the phy_device struct
+ *
+ * Description: Sanitizes the settings (if we're not autonegotiating
+ *   them), and then calls the driver's config_aneg function.
+ *   If the PHYCONTROL Layer is operating, we change the state to
+ *   reflect the beginning of Auto-negotiation or forcing.
+ */
+int phy_start_aneg(struct phy_device *phydev)
+{
+       int err;
+
+       mutex_lock(&phydev->lock);
+       err = _phy_start_aneg(phydev);
        mutex_unlock(&phydev->lock);
 
        return err;
@@ -800,6 +767,61 @@ static int phy_poll_aneg_done(struct phy_device *phydev)
        return ret < 0 ? ret : 0;
 }
 
+int phy_ethtool_ksettings_set(struct phy_device *phydev,
+                             const struct ethtool_link_ksettings *cmd)
+{
+       __ETHTOOL_DECLARE_LINK_MODE_MASK(advertising);
+       u8 autoneg = cmd->base.autoneg;
+       u8 duplex = cmd->base.duplex;
+       u32 speed = cmd->base.speed;
+
+       if (cmd->base.phy_address != phydev->mdio.addr)
+               return -EINVAL;
+
+       linkmode_copy(advertising, cmd->link_modes.advertising);
+
+       /* We make sure that we don't pass unsupported values in to the PHY */
+       linkmode_and(advertising, advertising, phydev->supported);
+
+       /* Verify the settings we care about. */
+       if (autoneg != AUTONEG_ENABLE && autoneg != AUTONEG_DISABLE)
+               return -EINVAL;
+
+       if (autoneg == AUTONEG_ENABLE && linkmode_empty(advertising))
+               return -EINVAL;
+
+       if (autoneg == AUTONEG_DISABLE &&
+           ((speed != SPEED_1000 &&
+             speed != SPEED_100 &&
+             speed != SPEED_10) ||
+            (duplex != DUPLEX_HALF &&
+             duplex != DUPLEX_FULL)))
+               return -EINVAL;
+
+       mutex_lock(&phydev->lock);
+       phydev->autoneg = autoneg;
+
+       if (autoneg == AUTONEG_DISABLE) {
+               phydev->speed = speed;
+               phydev->duplex = duplex;
+       }
+
+       linkmode_copy(phydev->advertising, advertising);
+
+       linkmode_mod_bit(ETHTOOL_LINK_MODE_Autoneg_BIT,
+                        phydev->advertising, autoneg == AUTONEG_ENABLE);
+
+       phydev->master_slave_set = cmd->base.master_slave_cfg;
+       phydev->mdix_ctrl = cmd->base.eth_tp_mdix_ctrl;
+
+       /* Restart the PHY */
+       _phy_start_aneg(phydev);
+
+       mutex_unlock(&phydev->lock);
+       return 0;
+}
+EXPORT_SYMBOL(phy_ethtool_ksettings_set);
+
 /**
  * phy_speed_down - set speed to lowest speed supported by both link partners
  * @phydev: the phy_device struct
index 9a6a835..ff5d0e9 100644 (file)
@@ -1202,17 +1202,19 @@ static void tbnet_generate_mac(struct net_device *dev)
 {
        const struct tbnet *net = netdev_priv(dev);
        const struct tb_xdomain *xd = net->xd;
+       u8 addr[ETH_ALEN];
        u8 phy_port;
        u32 hash;
 
        phy_port = tb_phy_port_from_link(TBNET_L0_PORT_NUM(xd->route));
 
        /* Unicast and locally administered MAC */
-       dev->dev_addr[0] = phy_port << 4 | 0x02;
+       addr[0] = phy_port << 4 | 0x02;
        hash = jhash2((u32 *)xd->local_uuid, 4, 0);
-       memcpy(dev->dev_addr + 1, &hash, sizeof(hash));
+       memcpy(addr + 1, &hash, sizeof(hash));
        hash = jhash2((u32 *)xd->local_uuid, 4, hash);
-       dev->dev_addr[5] = hash & 0xff;
+       addr[5] = hash & 0xff;
+       eth_hw_addr_set(dev, addr);
 }
 
 static int tbnet_probe(struct tb_service *svc, const struct tb_service_id *id)
index 03319fd..f20376c 100644 (file)
@@ -4122,6 +4122,12 @@ static int lan78xx_probe(struct usb_interface *intf,
 
        dev->maxpacket = usb_maxpacket(dev->udev, dev->pipe_out, 1);
 
+       /* Reject broken descriptors. */
+       if (dev->maxpacket == 0) {
+               ret = -ENODEV;
+               goto out4;
+       }
+
        /* driver requires remote-wakeup capability during autosuspend. */
        intf->needs_remote_wakeup = 1;
 
index 350bae6..9a6450f 100644 (file)
@@ -1792,6 +1792,7 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod)
        dev->maxpacket = usb_maxpacket (dev->udev, dev->out, 1);
        if (dev->maxpacket == 0) {
                /* that is a broken device */
+               status = -ENODEV;
                goto out4;
        }
 
index c501b59..cc79343 100644 (file)
@@ -3177,12 +3177,16 @@ static int virtnet_probe(struct virtio_device *vdev)
        dev->max_mtu = MAX_MTU;
 
        /* Configuration may specify what MAC to use.  Otherwise random. */
-       if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC))
+       if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) {
+               u8 addr[ETH_ALEN];
+
                virtio_cread_bytes(vdev,
                                   offsetof(struct virtio_net_config, mac),
-                                  dev->dev_addr, dev->addr_len);
-       else
+                                  addr, ETH_ALEN);
+               eth_hw_addr_set(dev, addr);
+       } else {
                eth_hw_addr_random(dev);
+       }
 
        /* Set up our device-specific information */
        vi = netdev_priv(dev);
index 3e1b774..14fae31 100644 (file)
@@ -3833,7 +3833,6 @@ vmxnet3_suspend(struct device *device)
        vmxnet3_free_intr_resources(adapter);
 
        netif_device_detach(netdev);
-       netif_tx_stop_all_queues(netdev);
 
        /* Create wake-up filters. */
        pmConf = adapter->pm_conf;
index 57437e4..911f439 100644 (file)
@@ -1730,6 +1730,10 @@ static int netfront_resume(struct xenbus_device *dev)
 
        dev_dbg(&dev->dev, "%s\n", dev->nodename);
 
+       netif_tx_lock_bh(info->netdev);
+       netif_device_detach(info->netdev);
+       netif_tx_unlock_bh(info->netdev);
+
        xennet_disconnect_backend(info);
        return 0;
 }
@@ -2351,6 +2355,10 @@ static int xennet_connect(struct net_device *dev)
         * domain a kick because we've probably just requeued some
         * packets.
         */
+       netif_tx_lock_bh(np->netdev);
+       netif_device_attach(np->netdev);
+       netif_tx_unlock_bh(np->netdev);
+
        netif_carrier_on(np->netdev);
        for (j = 0; j < num_queues; ++j) {
                queue = &np->queues[j];
index 517376c..16ceb76 100644 (file)
@@ -1006,11 +1006,11 @@ static u64 port100_get_command_type_mask(struct port100 *dev)
 
        skb = port100_alloc_skb(dev, 0);
        if (!skb)
-               return -ENOMEM;
+               return 0;
 
        resp = port100_send_cmd_sync(dev, PORT100_CMD_GET_COMMAND_TYPE, skb);
        if (IS_ERR(resp))
-               return PTR_ERR(resp);
+               return 0;
 
        if (resp->len < 8)
                mask = 0;
index e79690b..d7f8175 100644 (file)
@@ -5,7 +5,6 @@
 
 #include <linux/err.h>
 #include <linux/io.h>
-#include <linux/mfd/syscon.h>
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
@@ -13,7 +12,6 @@
 #include <linux/pinctrl/pinctrl.h>
 #include <linux/pinctrl/pinmux.h>
 #include <linux/platform_device.h>
-#include <linux/regmap.h>
 #include <linux/slab.h>
 
 #define FLAG_BCM4708           BIT(1)
@@ -24,8 +22,7 @@ struct ns_pinctrl {
        struct device *dev;
        unsigned int chipset_flag;
        struct pinctrl_dev *pctldev;
-       struct regmap *regmap;
-       u32 offset;
+       void __iomem *base;
 
        struct pinctrl_desc pctldesc;
        struct ns_pinctrl_group *groups;
@@ -232,9 +229,9 @@ static int ns_pinctrl_set_mux(struct pinctrl_dev *pctrl_dev,
                unset |= BIT(pin_number);
        }
 
-       regmap_read(ns_pinctrl->regmap, ns_pinctrl->offset, &tmp);
+       tmp = readl(ns_pinctrl->base);
        tmp &= ~unset;
-       regmap_write(ns_pinctrl->regmap, ns_pinctrl->offset, tmp);
+       writel(tmp, ns_pinctrl->base);
 
        return 0;
 }
@@ -266,13 +263,13 @@ static const struct of_device_id ns_pinctrl_of_match_table[] = {
 static int ns_pinctrl_probe(struct platform_device *pdev)
 {
        struct device *dev = &pdev->dev;
-       struct device_node *np = dev->of_node;
        const struct of_device_id *of_id;
        struct ns_pinctrl *ns_pinctrl;
        struct pinctrl_desc *pctldesc;
        struct pinctrl_pin_desc *pin;
        struct ns_pinctrl_group *group;
        struct ns_pinctrl_function *function;
+       struct resource *res;
        int i;
 
        ns_pinctrl = devm_kzalloc(dev, sizeof(*ns_pinctrl), GFP_KERNEL);
@@ -290,18 +287,12 @@ static int ns_pinctrl_probe(struct platform_device *pdev)
                return -EINVAL;
        ns_pinctrl->chipset_flag = (uintptr_t)of_id->data;
 
-       ns_pinctrl->regmap = syscon_node_to_regmap(of_get_parent(np));
-       if (IS_ERR(ns_pinctrl->regmap)) {
-               int err = PTR_ERR(ns_pinctrl->regmap);
-
-               dev_err(dev, "Failed to map pinctrl regs: %d\n", err);
-
-               return err;
-       }
-
-       if (of_property_read_u32(np, "offset", &ns_pinctrl->offset)) {
-               dev_err(dev, "Failed to get register offset\n");
-               return -ENOENT;
+       res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
+                                          "cru_gpio_control");
+       ns_pinctrl->base = devm_ioremap_resource(dev, res);
+       if (IS_ERR(ns_pinctrl->base)) {
+               dev_err(dev, "Failed to map pinctrl regs\n");
+               return PTR_ERR(ns_pinctrl->base);
        }
 
        memcpy(pctldesc, &ns_pinctrl_desc, sizeof(*pctldesc));
index 8d0f88e..bae9d42 100644 (file)
@@ -840,6 +840,34 @@ static const struct pinconf_ops amd_pinconf_ops = {
        .pin_config_group_set = amd_pinconf_group_set,
 };
 
+static void amd_gpio_irq_init(struct amd_gpio *gpio_dev)
+{
+       struct pinctrl_desc *desc = gpio_dev->pctrl->desc;
+       unsigned long flags;
+       u32 pin_reg, mask;
+       int i;
+
+       mask = BIT(WAKE_CNTRL_OFF_S0I3) | BIT(WAKE_CNTRL_OFF_S3) |
+               BIT(INTERRUPT_MASK_OFF) | BIT(INTERRUPT_ENABLE_OFF) |
+               BIT(WAKE_CNTRL_OFF_S4);
+
+       for (i = 0; i < desc->npins; i++) {
+               int pin = desc->pins[i].number;
+               const struct pin_desc *pd = pin_desc_get(gpio_dev->pctrl, pin);
+
+               if (!pd)
+                       continue;
+
+               raw_spin_lock_irqsave(&gpio_dev->lock, flags);
+
+               pin_reg = readl(gpio_dev->base + i * 4);
+               pin_reg &= ~mask;
+               writel(pin_reg, gpio_dev->base + i * 4);
+
+               raw_spin_unlock_irqrestore(&gpio_dev->lock, flags);
+       }
+}
+
 #ifdef CONFIG_PM_SLEEP
 static bool amd_gpio_should_save(struct amd_gpio *gpio_dev, unsigned int pin)
 {
@@ -976,6 +1004,9 @@ static int amd_gpio_probe(struct platform_device *pdev)
                return PTR_ERR(gpio_dev->pctrl);
        }
 
+       /* Disable and mask interrupts */
+       amd_gpio_irq_init(gpio_dev);
+
        girq = &gpio_dev->gc.irq;
        girq->chip = &amd_gpio_irqchip;
        /* This will let us handle the parent IRQ in the driver */
index 68b3886..dfd8888 100644 (file)
@@ -1644,8 +1644,8 @@ int __maybe_unused stm32_pinctrl_resume(struct device *dev)
        struct stm32_pinctrl_group *g = pctl->groups;
        int i;
 
-       for (i = g->pin; i < g->pin + pctl->ngroups; i++)
-               stm32_pinctrl_restore_gpio_regs(pctl, i);
+       for (i = 0; i < pctl->ngroups; i++, g++)
+               stm32_pinctrl_restore_gpio_regs(pctl, g->pin);
 
        return 0;
 }
index f9b2d66..0e4bc8b 100644 (file)
@@ -284,11 +284,11 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info,
        /* Create a posix clock and link it to the device. */
        err = posix_clock_register(&ptp->clock, &ptp->dev);
        if (err) {
-               if (ptp->pps_source)
-                       pps_unregister_source(ptp->pps_source);
+               if (ptp->pps_source)
+                       pps_unregister_source(ptp->pps_source);
 
                if (ptp->kworker)
-                       kthread_destroy_worker(ptp->kworker);
+                       kthread_destroy_worker(ptp->kworker);
 
                put_device(&ptp->dev);
 
index be799a5..b0056ae 100644 (file)
@@ -147,8 +147,8 @@ config RESET_OXNAS
        bool
 
 config RESET_PISTACHIO
-       bool "Pistachio Reset Driver" if COMPILE_TEST
-       default MACH_PISTACHIO
+       bool "Pistachio Reset Driver"
+       depends on MIPS || COMPILE_TEST
        help
          This enables the reset driver for ImgTec Pistachio SoCs.
 
index b6f074d..433fa0c 100644 (file)
@@ -38,7 +38,7 @@ static int brcm_rescal_reset_set(struct reset_controller_dev *rcdev,
        }
 
        ret = readl_poll_timeout(base + BRCM_RESCAL_STATUS, reg,
-                                !(reg & BRCM_RESCAL_STATUS_BIT), 100, 1000);
+                                (reg & BRCM_RESCAL_STATUS_BIT), 100, 1000);
        if (ret) {
                dev_err(data->dev, "time out on SATA/PCIe rescal\n");
                return ret;
index 2a72f86..8c6492e 100644 (file)
@@ -92,3 +92,29 @@ void __init socfpga_reset_init(void)
        for_each_matching_node(np, socfpga_early_reset_dt_ids)
                a10_reset_init(np);
 }
+
+/*
+ * The early driver is problematic, because it doesn't register
+ * itself as a driver. This causes certain device links to prevent
+ * consumer devices from probing. The hacky solution is to register
+ * an empty driver, whose only job is to attach itself to the reset
+ * manager and call probe.
+ */
+static const struct of_device_id socfpga_reset_dt_ids[] = {
+       { .compatible = "altr,rst-mgr", },
+       { /* sentinel */ },
+};
+
+static int reset_simple_probe(struct platform_device *pdev)
+{
+       return 0;
+}
+
+static struct platform_driver reset_socfpga_driver = {
+       .probe  = reset_simple_probe,
+       .driver = {
+               .name           = "socfpga-reset",
+               .of_match_table = socfpga_reset_dt_ids,
+       },
+};
+builtin_platform_driver(reset_socfpga_driver);
index 24d3395..4c5bba5 100644 (file)
@@ -20,6 +20,7 @@ static int tegra_bpmp_reset_common(struct reset_controller_dev *rstc,
        struct tegra_bpmp *bpmp = to_tegra_bpmp(rstc);
        struct mrq_reset_request request;
        struct tegra_bpmp_message msg;
+       int err;
 
        memset(&request, 0, sizeof(request));
        request.cmd = command;
@@ -30,7 +31,13 @@ static int tegra_bpmp_reset_common(struct reset_controller_dev *rstc,
        msg.tx.data = &request;
        msg.tx.size = sizeof(request);
 
-       return tegra_bpmp_transfer(bpmp, &msg);
+       err = tegra_bpmp_transfer(bpmp, &msg);
+       if (err)
+               return err;
+       if (msg.rx.ret)
+               return -EINVAL;
+
+       return 0;
 }
 
 static int tegra_bpmp_reset_module(struct reset_controller_dev *rstc,
index 3f6f14f..24b72ee 100644 (file)
@@ -220,7 +220,8 @@ int scsi_add_host_with_dma(struct Scsi_Host *shost, struct device *dev,
                goto fail;
        }
 
-       shost->cmd_per_lun = min_t(short, shost->cmd_per_lun,
+       /* Use min_t(int, ...) in case shost->can_queue exceeds SHRT_MAX */
+       shost->cmd_per_lun = min_t(int, shost->cmd_per_lun,
                                   shost->can_queue);
 
        error = scsi_init_sense_cache(shost);
index 2197988..3cae880 100644 (file)
@@ -3736,7 +3736,7 @@ mpi3mr_probe(struct pci_dev *pdev, const struct pci_device_id *id)
        shost->max_lun = -1;
        shost->unique_id = mrioc->id;
 
-       shost->max_channel = 1;
+       shost->max_channel = 0;
        shost->max_id = 0xFFFFFFFF;
 
        if (prot_mask >= 0)
index 4b5d28d..655cf5d 100644 (file)
@@ -431,7 +431,7 @@ done_unmap_sg:
        goto done_free_fcport;
 
 done_free_fcport:
-       if (bsg_request->msgcode == FC_BSG_RPT_ELS)
+       if (bsg_request->msgcode != FC_BSG_RPT_ELS)
                qla2x00_free_fcport(fcport);
 done:
        return rval;
index d2e40aa..836fedc 100644 (file)
@@ -4157,7 +4157,7 @@ qla2x00_mem_alloc(struct qla_hw_data *ha, uint16_t req_len, uint16_t rsp_len,
                                        ql_dbg_pci(ql_dbg_init, ha->pdev,
                                            0xe0ee, "%s: failed alloc dsd\n",
                                            __func__);
-                                       return 1;
+                                       return -ENOMEM;
                                }
                                ha->dif_bundle_kallocs++;
 
index b3478ed..7d8242c 100644 (file)
@@ -3319,8 +3319,7 @@ int qlt_xmit_response(struct qla_tgt_cmd *cmd, int xmit_type,
                        "RESET-RSP online/active/old-count/new-count = %d/%d/%d/%d.\n",
                        vha->flags.online, qla2x00_reset_active(vha),
                        cmd->reset_count, qpair->chip_reset);
-               spin_unlock_irqrestore(qpair->qp_lock_ptr, flags);
-               return 0;
+               goto out_unmap_unlock;
        }
 
        /* Does F/W have an IOCBs for this request */
@@ -3445,10 +3444,6 @@ int qlt_rdy_to_xfer(struct qla_tgt_cmd *cmd)
        prm.sg = NULL;
        prm.req_cnt = 1;
 
-       /* Calculate number of entries and segments required */
-       if (qlt_pci_map_calc_cnt(&prm) != 0)
-               return -EAGAIN;
-
        if (!qpair->fw_started || (cmd->reset_count != qpair->chip_reset) ||
            (cmd->sess && cmd->sess->deleted)) {
                /*
@@ -3466,6 +3461,10 @@ int qlt_rdy_to_xfer(struct qla_tgt_cmd *cmd)
                return 0;
        }
 
+       /* Calculate number of entries and segments required */
+       if (qlt_pci_map_calc_cnt(&prm) != 0)
+               return -EAGAIN;
+
        spin_lock_irqsave(qpair->qp_lock_ptr, flags);
        /* Does F/W have an IOCBs for this request */
        res = qlt_check_reserve_free_req(qpair, prm.req_cnt);
@@ -3870,9 +3869,6 @@ void qlt_free_cmd(struct qla_tgt_cmd *cmd)
 
        BUG_ON(cmd->cmd_in_wq);
 
-       if (cmd->sg_mapped)
-               qlt_unmap_sg(cmd->vha, cmd);
-
        if (!cmd->q_full)
                qlt_decr_num_pend_cmds(cmd->vha);
 
index b241f9e..291ecc3 100644 (file)
@@ -553,8 +553,10 @@ EXPORT_SYMBOL(scsi_device_get);
  */
 void scsi_device_put(struct scsi_device *sdev)
 {
-       module_put(sdev->host->hostt->module);
+       struct module *mod = sdev->host->hostt->module;
+
        put_device(&sdev->sdev_gendev);
+       module_put(mod);
 }
 EXPORT_SYMBOL(scsi_device_put);
 
index 8679325..a35841b 100644 (file)
@@ -449,9 +449,12 @@ static void scsi_device_dev_release_usercontext(struct work_struct *work)
        struct scsi_vpd *vpd_pg80 = NULL, *vpd_pg83 = NULL;
        struct scsi_vpd *vpd_pg0 = NULL, *vpd_pg89 = NULL;
        unsigned long flags;
+       struct module *mod;
 
        sdev = container_of(work, struct scsi_device, ew.work);
 
+       mod = sdev->host->hostt->module;
+
        scsi_dh_release_device(sdev);
 
        parent = sdev->sdev_gendev.parent;
@@ -502,11 +505,17 @@ static void scsi_device_dev_release_usercontext(struct work_struct *work)
 
        if (parent)
                put_device(parent);
+       module_put(mod);
 }
 
 static void scsi_device_dev_release(struct device *dev)
 {
        struct scsi_device *sdp = to_scsi_device(dev);
+
+       /* Set module pointer as NULL in case of module unloading */
+       if (!try_module_get(sdp->host->hostt->module))
+               sdp->host->hostt->module = NULL;
+
        execute_in_process_context(scsi_device_dev_release_usercontext,
                                   &sdp->ew);
 }
index 922e4c7..78343d3 100644 (file)
@@ -2930,8 +2930,6 @@ iscsi_set_param(struct iscsi_transport *transport, struct iscsi_uevent *ev)
                        session->recovery_tmo = value;
                break;
        default:
-               err = transport->set_param(conn, ev->u.set_param.param,
-                                          data, ev->u.set_param.len);
                if ((conn->state == ISCSI_CONN_BOUND) ||
                        (conn->state == ISCSI_CONN_UP)) {
                        err = transport->set_param(conn, ev->u.set_param.param,
index 523bf2f..fce6333 100644 (file)
@@ -3683,7 +3683,12 @@ static int sd_resume(struct device *dev)
 static int sd_resume_runtime(struct device *dev)
 {
        struct scsi_disk *sdkp = dev_get_drvdata(dev);
-       struct scsi_device *sdp = sdkp->device;
+       struct scsi_device *sdp;
+
+       if (!sdkp)      /* E.g.: runtime resume at the start of sd_probe() */
+               return 0;
+
+       sdp = sdkp->device;
 
        if (sdp->ignore_media_change) {
                /* clear the device's sense data */
index ebbbc12..9eb1b88 100644 (file)
@@ -1285,11 +1285,15 @@ static void storvsc_on_channel_callback(void *context)
        foreach_vmbus_pkt(desc, channel) {
                struct vstor_packet *packet = hv_pkt_data(desc);
                struct storvsc_cmd_request *request = NULL;
+               u32 pktlen = hv_pkt_datalen(desc);
                u64 rqst_id = desc->trans_id;
+               u32 minlen = rqst_id ? sizeof(struct vstor_packet) -
+                       stor_device->vmscsi_size_delta : sizeof(enum vstor_packet_operation);
 
-               if (hv_pkt_datalen(desc) < sizeof(struct vstor_packet) -
-                               stor_device->vmscsi_size_delta) {
-                       dev_err(&device->device, "Invalid packet len\n");
+               if (pktlen < minlen) {
+                       dev_err(&device->device,
+                               "Invalid pkt: id=%llu, len=%u, minlen=%u\n",
+                               rqst_id, pktlen, minlen);
                        continue;
                }
 
@@ -1302,13 +1306,23 @@ static void storvsc_on_channel_callback(void *context)
                        if (rqst_id == 0) {
                                /*
                                 * storvsc_on_receive() looks at the vstor_packet in the message
-                                * from the ring buffer.  If the operation in the vstor_packet is
-                                * COMPLETE_IO, then we call storvsc_on_io_completion(), and
-                                * dereference the guest memory address.  Make sure we don't call
-                                * storvsc_on_io_completion() with a guest memory address that is
-                                * zero if Hyper-V were to construct and send such a bogus packet.
+                                * from the ring buffer.
+                                *
+                                * - If the operation in the vstor_packet is COMPLETE_IO, then
+                                *   we call storvsc_on_io_completion(), and dereference the
+                                *   guest memory address.  Make sure we don't call
+                                *   storvsc_on_io_completion() with a guest memory address
+                                *   that is zero if Hyper-V were to construct and send such
+                                *   a bogus packet.
+                                *
+                                * - If the operation in the vstor_packet is FCHBA_DATA, then
+                                *   we call cache_wwn(), and access the data payload area of
+                                *   the packet (wwn_packet); however, there is no guarantee
+                                *   that the packet is big enough to contain such area.
+                                *   Future-proof the code by rejecting such a bogus packet.
                                 */
-                               if (packet->operation == VSTOR_OPERATION_COMPLETE_IO) {
+                               if (packet->operation == VSTOR_OPERATION_COMPLETE_IO ||
+                                   packet->operation == VSTOR_OPERATION_FCHBA_DATA) {
                                        dev_err(&device->device, "Invalid packet with ID of 0\n");
                                        continue;
                                }
index 149c1aa..5142455 100644 (file)
@@ -370,20 +370,6 @@ static void ufs_intel_common_exit(struct ufs_hba *hba)
 
 static int ufs_intel_resume(struct ufs_hba *hba, enum ufs_pm_op op)
 {
-       /*
-        * To support S4 (suspend-to-disk) with spm_lvl other than 5, the base
-        * address registers must be restored because the restore kernel can
-        * have used different addresses.
-        */
-       ufshcd_writel(hba, lower_32_bits(hba->utrdl_dma_addr),
-                     REG_UTP_TRANSFER_REQ_LIST_BASE_L);
-       ufshcd_writel(hba, upper_32_bits(hba->utrdl_dma_addr),
-                     REG_UTP_TRANSFER_REQ_LIST_BASE_H);
-       ufshcd_writel(hba, lower_32_bits(hba->utmrdl_dma_addr),
-                     REG_UTP_TASK_REQ_LIST_BASE_L);
-       ufshcd_writel(hba, upper_32_bits(hba->utmrdl_dma_addr),
-                     REG_UTP_TASK_REQ_LIST_BASE_H);
-
        if (ufshcd_is_link_hibern8(hba)) {
                int ret = ufshcd_uic_hibern8_exit(hba);
 
@@ -463,6 +449,18 @@ static struct ufs_hba_variant_ops ufs_intel_lkf_hba_vops = {
        .device_reset           = ufs_intel_device_reset,
 };
 
+#ifdef CONFIG_PM_SLEEP
+static int ufshcd_pci_restore(struct device *dev)
+{
+       struct ufs_hba *hba = dev_get_drvdata(dev);
+
+       /* Force a full reset and restore */
+       ufshcd_set_link_off(hba);
+
+       return ufshcd_system_resume(dev);
+}
+#endif
+
 /**
  * ufshcd_pci_shutdown - main function to put the controller in reset state
  * @pdev: pointer to PCI device handle
@@ -546,9 +544,14 @@ ufshcd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 }
 
 static const struct dev_pm_ops ufshcd_pci_pm_ops = {
-       SET_SYSTEM_SLEEP_PM_OPS(ufshcd_system_suspend, ufshcd_system_resume)
        SET_RUNTIME_PM_OPS(ufshcd_runtime_suspend, ufshcd_runtime_resume, NULL)
 #ifdef CONFIG_PM_SLEEP
+       .suspend        = ufshcd_system_suspend,
+       .resume         = ufshcd_system_resume,
+       .freeze         = ufshcd_system_suspend,
+       .thaw           = ufshcd_system_resume,
+       .poweroff       = ufshcd_system_suspend,
+       .restore        = ufshcd_pci_restore,
        .prepare        = ufshcd_suspend_prepare,
        .complete       = ufshcd_resume_complete,
 #endif
index 44fc9ee..ca40923 100644 (file)
@@ -134,7 +134,7 @@ static int dfl_spi_altera_probe(struct dfl_device *dfl_dev)
        if (!master)
                return -ENOMEM;
 
-       master->bus_num = dfl_dev->id;
+       master->bus_num = -1;
 
        hw = spi_master_get_devdata(master);
 
index f7a7c14..65147aa 100644 (file)
@@ -48,7 +48,7 @@ static int altera_spi_probe(struct platform_device *pdev)
                return err;
 
        /* setup the master state. */
-       master->bus_num = pdev->id;
+       master->bus_num = -1;
 
        if (pdata) {
                if (pdata->num_chipselect > ALTERA_SPI_MAX_CS) {
index feebda6..e4484ac 100644 (file)
@@ -1716,12 +1716,13 @@ static int verify_controller_parameters(struct pl022 *pl022,
                                return -EINVAL;
                        }
                } else {
-                       if (chip_info->duplex != SSP_MICROWIRE_CHANNEL_FULL_DUPLEX)
+                       if (chip_info->duplex != SSP_MICROWIRE_CHANNEL_FULL_DUPLEX) {
                                dev_err(&pl022->adev->dev,
                                        "Microwire half duplex mode requested,"
                                        " but this is only available in the"
                                        " ST version of PL022\n");
-                       return -EINVAL;
+                               return -EINVAL;
+                       }
                }
        }
        return 0;
index 713292b..3226c4e 100644 (file)
@@ -1194,7 +1194,7 @@ static int __maybe_unused tegra_slink_runtime_suspend(struct device *dev)
        return 0;
 }
 
-static int tegra_slink_runtime_resume(struct device *dev)
+static int __maybe_unused tegra_slink_runtime_resume(struct device *dev)
 {
        struct spi_master *master = dev_get_drvdata(dev);
        struct tegra_slink_data *tspi = spi_master_get_devdata(master);
index 5d24c1b..d5785c0 100644 (file)
@@ -409,7 +409,7 @@ int cvm_oct_common_init(struct net_device *dev)
        struct octeon_ethernet *priv = netdev_priv(dev);
        int ret;
 
-       ret = of_get_mac_address(priv->of_node, dev->dev_addr);
+       ret = of_get_ethdev_address(priv->of_node, dev);
        if (ret)
                eth_hw_addr_random(dev);
 
index 01a848a..3163b31 100644 (file)
@@ -15,7 +15,7 @@ struct mlx5_vdpa_direct_mr {
        u64 start;
        u64 end;
        u32 perm;
-       struct mlx5_core_mkey mr;
+       u32 mr;
        struct sg_table sg_head;
        int log_size;
        int nsg;
@@ -25,7 +25,7 @@ struct mlx5_vdpa_direct_mr {
 };
 
 struct mlx5_vdpa_mr {
-       struct mlx5_core_mkey mkey;
+       u32 mkey;
 
        /* list of direct MRs descendants of this indirect mr */
        struct list_head head;
@@ -99,9 +99,9 @@ int mlx5_vdpa_alloc_transport_domain(struct mlx5_vdpa_dev *mvdev, u32 *tdn);
 void mlx5_vdpa_dealloc_transport_domain(struct mlx5_vdpa_dev *mvdev, u32 tdn);
 int mlx5_vdpa_alloc_resources(struct mlx5_vdpa_dev *mvdev);
 void mlx5_vdpa_free_resources(struct mlx5_vdpa_dev *mvdev);
-int mlx5_vdpa_create_mkey(struct mlx5_vdpa_dev *mvdev, struct mlx5_core_mkey *mkey, u32 *in,
+int mlx5_vdpa_create_mkey(struct mlx5_vdpa_dev *mvdev, u32 *mkey, u32 *in,
                          int inlen);
-int mlx5_vdpa_destroy_mkey(struct mlx5_vdpa_dev *mvdev, struct mlx5_core_mkey *mkey);
+int mlx5_vdpa_destroy_mkey(struct mlx5_vdpa_dev *mvdev, u32 mkey);
 int mlx5_vdpa_handle_set_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb,
                             bool *change_map);
 int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb);
index ff010c6..a639b92 100644 (file)
@@ -88,7 +88,7 @@ static int create_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct
 
 static void destroy_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr)
 {
-       mlx5_vdpa_destroy_mkey(mvdev, &mr->mr);
+       mlx5_vdpa_destroy_mkey(mvdev, mr->mr);
 }
 
 static u64 map_start(struct vhost_iotlb_map *map, struct mlx5_vdpa_direct_mr *mr)
@@ -162,7 +162,7 @@ again:
                }
 
                if (preve == dmr->start) {
-                       klm->key = cpu_to_be32(dmr->mr.key);
+                       klm->key = cpu_to_be32(dmr->mr);
                        klm->bcount = cpu_to_be32(klm_bcount(dmr->end - dmr->start));
                        preve = dmr->end;
                } else {
@@ -217,7 +217,7 @@ static int create_indirect_key(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr
 
 static void destroy_indirect_key(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mkey)
 {
-       mlx5_vdpa_destroy_mkey(mvdev, &mkey->mkey);
+       mlx5_vdpa_destroy_mkey(mvdev, mkey->mkey);
 }
 
 static int map_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr,
@@ -449,7 +449,7 @@ static int create_dma_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
 
 static void destroy_dma_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
 {
-       mlx5_vdpa_destroy_mkey(mvdev, &mr->mkey);
+       mlx5_vdpa_destroy_mkey(mvdev, mr->mkey);
 }
 
 static int dup_iotlb(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *src)
index 15e266d..9800f9b 100644 (file)
@@ -198,12 +198,11 @@ void mlx5_vdpa_dealloc_transport_domain(struct mlx5_vdpa_dev *mvdev, u32 tdn)
        mlx5_cmd_exec_in(mvdev->mdev, dealloc_transport_domain, in);
 }
 
-int mlx5_vdpa_create_mkey(struct mlx5_vdpa_dev *mvdev, struct mlx5_core_mkey *mkey, u32 *in,
+int mlx5_vdpa_create_mkey(struct mlx5_vdpa_dev *mvdev, u32 *mkey, u32 *in,
                          int inlen)
 {
        u32 lout[MLX5_ST_SZ_DW(create_mkey_out)] = {};
        u32 mkey_index;
-       void *mkc;
        int err;
 
        MLX5_SET(create_mkey_in, in, opcode, MLX5_CMD_OP_CREATE_MKEY);
@@ -213,22 +212,18 @@ int mlx5_vdpa_create_mkey(struct mlx5_vdpa_dev *mvdev, struct mlx5_core_mkey *mk
        if (err)
                return err;
 
-       mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
        mkey_index = MLX5_GET(create_mkey_out, lout, mkey_index);
-       mkey->iova = MLX5_GET64(mkc, mkc, start_addr);
-       mkey->size = MLX5_GET64(mkc, mkc, len);
-       mkey->key |= mlx5_idx_to_mkey(mkey_index);
-       mkey->pd = MLX5_GET(mkc, mkc, pd);
+       *mkey |= mlx5_idx_to_mkey(mkey_index);
        return 0;
 }
 
-int mlx5_vdpa_destroy_mkey(struct mlx5_vdpa_dev *mvdev, struct mlx5_core_mkey *mkey)
+int mlx5_vdpa_destroy_mkey(struct mlx5_vdpa_dev *mvdev, u32 mkey)
 {
        u32 in[MLX5_ST_SZ_DW(destroy_mkey_in)] = {};
 
        MLX5_SET(destroy_mkey_in, in, uid, mvdev->res.uid);
        MLX5_SET(destroy_mkey_in, in, opcode, MLX5_CMD_OP_DESTROY_MKEY);
-       MLX5_SET(destroy_mkey_in, in, mkey_index, mlx5_mkey_to_idx(mkey->key));
+       MLX5_SET(destroy_mkey_in, in, mkey_index, mlx5_mkey_to_idx(mkey));
        return mlx5_cmd_exec_in(mvdev->mdev, destroy_mkey, in);
 }
 
index bd56de7..5c7d2a9 100644 (file)
@@ -865,7 +865,7 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
        MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr);
        MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr);
        MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr);
-       MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, ndev->mvdev.mr.mkey.key);
+       MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, ndev->mvdev.mr.mkey);
        MLX5_SET(virtio_q, vq_ctx, umem_1_id, mvq->umem1.id);
        MLX5_SET(virtio_q, vq_ctx, umem_1_size, mvq->umem1.size);
        MLX5_SET(virtio_q, vq_ctx, umem_2_id, mvq->umem2.id);
index 26e3d90..841667a 100644 (file)
@@ -80,6 +80,7 @@ struct vduse_dev {
        struct vdpa_callback config_cb;
        struct work_struct inject;
        spinlock_t irq_lock;
+       struct rw_semaphore rwsem;
        int minor;
        bool broken;
        bool connected;
@@ -410,6 +411,8 @@ static void vduse_dev_reset(struct vduse_dev *dev)
        if (domain->bounce_map)
                vduse_domain_reset_bounce_map(domain);
 
+       down_write(&dev->rwsem);
+
        dev->status = 0;
        dev->driver_features = 0;
        dev->generation++;
@@ -443,6 +446,8 @@ static void vduse_dev_reset(struct vduse_dev *dev)
                flush_work(&vq->inject);
                flush_work(&vq->kick);
        }
+
+       up_write(&dev->rwsem);
 }
 
 static int vduse_vdpa_set_vq_address(struct vdpa_device *vdpa, u16 idx,
@@ -885,6 +890,23 @@ static void vduse_vq_irq_inject(struct work_struct *work)
        spin_unlock_irq(&vq->irq_lock);
 }
 
+static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
+                                   struct work_struct *irq_work)
+{
+       int ret = -EINVAL;
+
+       down_read(&dev->rwsem);
+       if (!(dev->status & VIRTIO_CONFIG_S_DRIVER_OK))
+               goto unlock;
+
+       ret = 0;
+       queue_work(vduse_irq_wq, irq_work);
+unlock:
+       up_read(&dev->rwsem);
+
+       return ret;
+}
+
 static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
                            unsigned long arg)
 {
@@ -966,8 +988,7 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
                break;
        }
        case VDUSE_DEV_INJECT_CONFIG_IRQ:
-               ret = 0;
-               queue_work(vduse_irq_wq, &dev->inject);
+               ret = vduse_dev_queue_irq_work(dev, &dev->inject);
                break;
        case VDUSE_VQ_SETUP: {
                struct vduse_vq_config config;
@@ -1053,9 +1074,8 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
                if (index >= dev->vq_num)
                        break;
 
-               ret = 0;
                index = array_index_nospec(index, dev->vq_num);
-               queue_work(vduse_irq_wq, &dev->vqs[index].inject);
+               ret = vduse_dev_queue_irq_work(dev, &dev->vqs[index].inject);
                break;
        }
        default:
@@ -1136,6 +1156,7 @@ static struct vduse_dev *vduse_dev_create(void)
        INIT_LIST_HEAD(&dev->send_list);
        INIT_LIST_HEAD(&dev->recv_list);
        spin_lock_init(&dev->irq_lock);
+       init_rwsem(&dev->rwsem);
 
        INIT_WORK(&dev->inject, vduse_dev_irq_inject);
        init_waitqueue_head(&dev->waitq);
index dd95dfd..3035bb6 100644 (file)
@@ -576,7 +576,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
        /* Last one doesn't continue. */
        desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT);
        if (!indirect && vq->use_dma_api)
-               vq->split.desc_extra[prev & (vq->split.vring.num - 1)].flags =
+               vq->split.desc_extra[prev & (vq->split.vring.num - 1)].flags &=
                        ~VRING_DESC_F_NEXT;
 
        if (indirect) {
index 643c6c2..ced2fc0 100644 (file)
@@ -71,8 +71,6 @@
 #define TCOBASE(p)     ((p)->tco_res->start)
 /* SMI Control and Enable Register */
 #define SMI_EN(p)      ((p)->smi_res->start)
-#define TCO_EN         (1 << 13)
-#define GBL_SMI_EN     (1 << 0)
 
 #define TCO_RLD(p)     (TCOBASE(p) + 0x00) /* TCO Timer Reload/Curr. Value */
 #define TCOv1_TMR(p)   (TCOBASE(p) + 0x01) /* TCOv1 Timer Initial Value*/
@@ -357,12 +355,8 @@ static int iTCO_wdt_set_timeout(struct watchdog_device *wd_dev, unsigned int t)
 
        tmrval = seconds_to_ticks(p, t);
 
-       /*
-        * If TCO SMIs are off, the timer counts down twice before rebooting.
-        * Otherwise, the BIOS generally reboots when the SMI triggers.
-        */
-       if (p->smi_res &&
-           (inl(SMI_EN(p)) & (TCO_EN | GBL_SMI_EN)) != (TCO_EN | GBL_SMI_EN))
+       /* For TCO v1 the timer counts down twice before rebooting */
+       if (p->iTCO_version == 1)
                tmrval /= 2;
 
        /* from the specs: */
@@ -527,7 +521,7 @@ static int iTCO_wdt_probe(struct platform_device *pdev)
                 * Disables TCO logic generating an SMI#
                 */
                val32 = inl(SMI_EN(p));
-               val32 &= ~TCO_EN;       /* Turn off SMI clearing watchdog */
+               val32 &= 0xffffdfff;    /* Turn off SMI clearing watchdog */
                outl(val32, SMI_EN(p));
        }
 
index 2693ffb..31b03fa 100644 (file)
@@ -119,7 +119,7 @@ static int ixp4xx_wdt_probe(struct platform_device *pdev)
        iwdt = devm_kzalloc(dev, sizeof(*iwdt), GFP_KERNEL);
        if (!iwdt)
                return -ENOMEM;
-       iwdt->base = dev->platform_data;
+       iwdt->base = (void __iomem *)dev->platform_data;
 
        /*
         * Retrieve rate from a fixed clock from the device tree if
index 1616f93..74d785b 100644 (file)
@@ -268,8 +268,12 @@ static int omap_wdt_probe(struct platform_device *pdev)
                        wdev->wdog.bootstatus = WDIOF_CARDRESET;
        }
 
-       if (!early_enable)
+       if (early_enable) {
+               omap_wdt_start(&wdev->wdog);
+               set_bit(WDOG_HW_RUNNING, &wdev->wdog.status);
+       } else {
                omap_wdt_disable(wdev);
+       }
 
        ret = watchdog_register_device(&wdev->wdog);
        if (ret) {
index ee9ff38..9791c74 100644 (file)
@@ -130,7 +130,7 @@ static u64 sbsa_gwdt_reg_read(struct sbsa_gwdt *gwdt)
        if (gwdt->version == 0)
                return readl(gwdt->control_base + SBSA_GWDT_WOR);
        else
-               return readq(gwdt->control_base + SBSA_GWDT_WOR);
+               return lo_hi_readq(gwdt->control_base + SBSA_GWDT_WOR);
 }
 
 static void sbsa_gwdt_reg_write(u64 val, struct sbsa_gwdt *gwdt)
@@ -138,7 +138,7 @@ static void sbsa_gwdt_reg_write(u64 val, struct sbsa_gwdt *gwdt)
        if (gwdt->version == 0)
                writel((u32)val, gwdt->control_base + SBSA_GWDT_WOR);
        else
-               writeq(val, gwdt->control_base + SBSA_GWDT_WOR);
+               lo_hi_writeq(val, gwdt->control_base + SBSA_GWDT_WOR);
 }
 
 /*
@@ -411,4 +411,3 @@ MODULE_AUTHOR("Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com>");
 MODULE_AUTHOR("Al Stone <al.stone@linaro.org>");
 MODULE_AUTHOR("Timur Tabi <timur@codeaurora.org>");
 MODULE_LICENSE("GPL v2");
-MODULE_ALIAS("platform:" DRV_NAME);
index 16b5fca..54c1f8b 100644 (file)
@@ -358,7 +358,7 @@ int autofs_wait(struct autofs_sb_info *sbi,
                qstr.len = strlen(p);
                offset = p - name;
        }
-       qstr.hash = full_name_hash(dentry, name, qstr.len);
+       qstr.hash = full_name_hash(dentry, qstr.name, qstr.len);
 
        if (mutex_lock_interruptible(&sbi->wq_mutex)) {
                kfree(name);
index 319596d..f55f9f9 100644 (file)
@@ -1121,6 +1121,9 @@ int fuse_init_fs_context_submount(struct fs_context *fsc);
  */
 void fuse_conn_destroy(struct fuse_mount *fm);
 
+/* Drop the connection and free the fuse mount */
+void fuse_mount_destroy(struct fuse_mount *fm);
+
 /**
  * Add connection to control filesystem
  */
index 36cd031..12d49a1 100644 (file)
@@ -457,14 +457,6 @@ static void fuse_send_destroy(struct fuse_mount *fm)
        }
 }
 
-static void fuse_put_super(struct super_block *sb)
-{
-       struct fuse_mount *fm = get_fuse_mount_super(sb);
-
-       fuse_conn_put(fm->fc);
-       kfree(fm);
-}
-
 static void convert_fuse_statfs(struct kstatfs *stbuf, struct fuse_kstatfs *attr)
 {
        stbuf->f_type    = FUSE_SUPER_MAGIC;
@@ -1003,7 +995,6 @@ static const struct super_operations fuse_super_operations = {
        .evict_inode    = fuse_evict_inode,
        .write_inode    = fuse_write_inode,
        .drop_inode     = generic_delete_inode,
-       .put_super      = fuse_put_super,
        .umount_begin   = fuse_umount_begin,
        .statfs         = fuse_statfs,
        .sync_fs        = fuse_sync_fs,
@@ -1424,20 +1415,17 @@ static int fuse_get_tree_submount(struct fs_context *fsc)
        if (!fm)
                return -ENOMEM;
 
+       fm->fc = fuse_conn_get(fc);
        fsc->s_fs_info = fm;
        sb = sget_fc(fsc, NULL, set_anon_super_fc);
-       if (IS_ERR(sb)) {
-               kfree(fm);
+       if (fsc->s_fs_info)
+               fuse_mount_destroy(fm);
+       if (IS_ERR(sb))
                return PTR_ERR(sb);
-       }
-       fm->fc = fuse_conn_get(fc);
 
        /* Initialize superblock, making @mp_fi its root */
        err = fuse_fill_super_submount(sb, mp_fi);
        if (err) {
-               fuse_conn_put(fc);
-               kfree(fm);
-               sb->s_fs_info = NULL;
                deactivate_locked_super(sb);
                return err;
        }
@@ -1569,8 +1557,6 @@ static int fuse_fill_super(struct super_block *sb, struct fs_context *fsc)
 {
        struct fuse_fs_context *ctx = fsc->fs_private;
        int err;
-       struct fuse_conn *fc;
-       struct fuse_mount *fm;
 
        if (!ctx->file || !ctx->rootmode_present ||
            !ctx->user_id_present || !ctx->group_id_present)
@@ -1580,42 +1566,18 @@ static int fuse_fill_super(struct super_block *sb, struct fs_context *fsc)
         * Require mount to happen from the same user namespace which
         * opened /dev/fuse to prevent potential attacks.
         */
-       err = -EINVAL;
        if ((ctx->file->f_op != &fuse_dev_operations) ||
            (ctx->file->f_cred->user_ns != sb->s_user_ns))
-               goto err;
+               return -EINVAL;
        ctx->fudptr = &ctx->file->private_data;
 
-       fc = kmalloc(sizeof(*fc), GFP_KERNEL);
-       err = -ENOMEM;
-       if (!fc)
-               goto err;
-
-       fm = kzalloc(sizeof(*fm), GFP_KERNEL);
-       if (!fm) {
-               kfree(fc);
-               goto err;
-       }
-
-       fuse_conn_init(fc, fm, sb->s_user_ns, &fuse_dev_fiq_ops, NULL);
-       fc->release = fuse_free_conn;
-
-       sb->s_fs_info = fm;
-
        err = fuse_fill_super_common(sb, ctx);
        if (err)
-               goto err_put_conn;
+               return err;
        /* file->private_data shall be visible on all CPUs after this */
        smp_mb();
        fuse_send_init(get_fuse_mount_super(sb));
        return 0;
-
- err_put_conn:
-       fuse_conn_put(fc);
-       kfree(fm);
-       sb->s_fs_info = NULL;
- err:
-       return err;
 }
 
 /*
@@ -1637,22 +1599,40 @@ static int fuse_get_tree(struct fs_context *fsc)
 {
        struct fuse_fs_context *ctx = fsc->fs_private;
        struct fuse_dev *fud;
+       struct fuse_conn *fc;
+       struct fuse_mount *fm;
        struct super_block *sb;
        int err;
 
+       fc = kmalloc(sizeof(*fc), GFP_KERNEL);
+       if (!fc)
+               return -ENOMEM;
+
+       fm = kzalloc(sizeof(*fm), GFP_KERNEL);
+       if (!fm) {
+               kfree(fc);
+               return -ENOMEM;
+       }
+
+       fuse_conn_init(fc, fm, fsc->user_ns, &fuse_dev_fiq_ops, NULL);
+       fc->release = fuse_free_conn;
+
+       fsc->s_fs_info = fm;
+
        if (ctx->fd_present)
                ctx->file = fget(ctx->fd);
 
        if (IS_ENABLED(CONFIG_BLOCK) && ctx->is_bdev) {
                err = get_tree_bdev(fsc, fuse_fill_super);
-               goto out_fput;
+               goto out;
        }
        /*
         * While block dev mount can be initialized with a dummy device fd
         * (found by device name), normal fuse mounts can't
         */
+       err = -EINVAL;
        if (!ctx->file)
-               return -EINVAL;
+               goto out;
 
        /*
         * Allow creating a fuse mount with an already initialized fuse
@@ -1668,7 +1648,9 @@ static int fuse_get_tree(struct fs_context *fsc)
        } else {
                err = get_tree_nodev(fsc, fuse_fill_super);
        }
-out_fput:
+out:
+       if (fsc->s_fs_info)
+               fuse_mount_destroy(fm);
        if (ctx->file)
                fput(ctx->file);
        return err;
@@ -1747,17 +1729,25 @@ static void fuse_sb_destroy(struct super_block *sb)
        struct fuse_mount *fm = get_fuse_mount_super(sb);
        bool last;
 
-       if (fm) {
+       if (sb->s_root) {
                last = fuse_mount_remove(fm);
                if (last)
                        fuse_conn_destroy(fm);
        }
 }
 
+void fuse_mount_destroy(struct fuse_mount *fm)
+{
+       fuse_conn_put(fm->fc);
+       kfree(fm);
+}
+EXPORT_SYMBOL(fuse_mount_destroy);
+
 static void fuse_kill_sb_anon(struct super_block *sb)
 {
        fuse_sb_destroy(sb);
        kill_anon_super(sb);
+       fuse_mount_destroy(get_fuse_mount_super(sb));
 }
 
 static struct file_system_type fuse_fs_type = {
@@ -1775,6 +1765,7 @@ static void fuse_kill_sb_blk(struct super_block *sb)
 {
        fuse_sb_destroy(sb);
        kill_block_super(sb);
+       fuse_mount_destroy(get_fuse_mount_super(sb));
 }
 
 static struct file_system_type fuseblk_fs_type = {
index 0ad89c6..94fc874 100644 (file)
@@ -1394,12 +1394,13 @@ static void virtio_kill_sb(struct super_block *sb)
        bool last;
 
        /* If mount failed, we can still be called without any fc */
-       if (fm) {
+       if (sb->s_root) {
                last = fuse_mount_remove(fm);
                if (last)
                        virtio_fs_conn_destroy(fm);
        }
        kill_anon_super(sb);
+       fuse_mount_destroy(fm);
 }
 
 static int virtio_fs_test_super(struct super_block *sb,
@@ -1455,19 +1456,14 @@ static int virtio_fs_get_tree(struct fs_context *fsc)
 
        fsc->s_fs_info = fm;
        sb = sget_fc(fsc, virtio_fs_test_super, set_anon_super_fc);
-       if (fsc->s_fs_info) {
-               fuse_conn_put(fc);
-               kfree(fm);
-       }
+       if (fsc->s_fs_info)
+               fuse_mount_destroy(fm);
        if (IS_ERR(sb))
                return PTR_ERR(sb);
 
        if (!sb->s_root) {
                err = virtio_fs_fill_super(sb, fsc);
                if (err) {
-                       fuse_conn_put(fc);
-                       kfree(fm);
-                       sb->s_fs_info = NULL;
                        deactivate_locked_super(sb);
                        return err;
                }
index 5bf8aa8..422a7ed 100644 (file)
@@ -253,7 +253,7 @@ static bool io_wqe_create_worker(struct io_wqe *wqe, struct io_wqe_acct *acct)
                pr_warn_once("io-wq is not configured for unbound workers");
 
        raw_spin_lock(&wqe->lock);
-       if (acct->nr_workers == acct->max_workers) {
+       if (acct->nr_workers >= acct->max_workers) {
                raw_spin_unlock(&wqe->lock);
                return true;
        }
@@ -1291,15 +1291,18 @@ int io_wq_max_workers(struct io_wq *wq, int *new_count)
 
        rcu_read_lock();
        for_each_node(node) {
+               struct io_wqe *wqe = wq->wqes[node];
                struct io_wqe_acct *acct;
 
+               raw_spin_lock(&wqe->lock);
                for (i = 0; i < IO_WQ_ACCT_NR; i++) {
-                       acct = &wq->wqes[node]->acct[i];
+                       acct = &wqe->acct[i];
                        prev = max_t(int, acct->max_workers, prev);
                        if (new_count[i])
                                acct->max_workers = new_count[i];
                        new_count[i] = prev;
                }
+               raw_spin_unlock(&wqe->lock);
        }
        rcu_read_unlock();
        return 0;
index e68d278..bc18af5 100644 (file)
@@ -456,6 +456,8 @@ struct io_ring_ctx {
                struct work_struct              exit_work;
                struct list_head                tctx_list;
                struct completion               ref_comp;
+               u32                             iowq_limits[2];
+               bool                            iowq_limits_set;
        };
 };
 
@@ -1368,11 +1370,6 @@ static void io_req_track_inflight(struct io_kiocb *req)
        }
 }
 
-static inline void io_unprep_linked_timeout(struct io_kiocb *req)
-{
-       req->flags &= ~REQ_F_LINK_TIMEOUT;
-}
-
 static struct io_kiocb *__io_prep_linked_timeout(struct io_kiocb *req)
 {
        if (WARN_ON_ONCE(!req->link))
@@ -6983,7 +6980,7 @@ issue_sqe:
                switch (io_arm_poll_handler(req)) {
                case IO_APOLL_READY:
                        if (linked_timeout)
-                               io_unprep_linked_timeout(req);
+                               io_queue_linked_timeout(linked_timeout);
                        goto issue_sqe;
                case IO_APOLL_ABORTED:
                        /*
@@ -9638,7 +9635,16 @@ static int __io_uring_add_tctx_node(struct io_ring_ctx *ctx)
                ret = io_uring_alloc_task_context(current, ctx);
                if (unlikely(ret))
                        return ret;
+
                tctx = current->io_uring;
+               if (ctx->iowq_limits_set) {
+                       unsigned int limits[2] = { ctx->iowq_limits[0],
+                                                  ctx->iowq_limits[1], };
+
+                       ret = io_wq_max_workers(tctx->io_wq, limits);
+                       if (ret)
+                               return ret;
+               }
        }
        if (!xa_load(&tctx->xa, (unsigned long)ctx)) {
                node = kmalloc(sizeof(*node), GFP_KERNEL);
@@ -10643,7 +10649,9 @@ static int io_unregister_iowq_aff(struct io_ring_ctx *ctx)
 
 static int io_register_iowq_max_workers(struct io_ring_ctx *ctx,
                                        void __user *arg)
+       __must_hold(&ctx->uring_lock)
 {
+       struct io_tctx_node *node;
        struct io_uring_task *tctx = NULL;
        struct io_sq_data *sqd = NULL;
        __u32 new_count[2];
@@ -10674,13 +10682,19 @@ static int io_register_iowq_max_workers(struct io_ring_ctx *ctx,
                tctx = current->io_uring;
        }
 
-       ret = -EINVAL;
-       if (!tctx || !tctx->io_wq)
-               goto err;
+       BUILD_BUG_ON(sizeof(new_count) != sizeof(ctx->iowq_limits));
 
-       ret = io_wq_max_workers(tctx->io_wq, new_count);
-       if (ret)
-               goto err;
+       memcpy(ctx->iowq_limits, new_count, sizeof(new_count));
+       ctx->iowq_limits_set = true;
+
+       ret = -EINVAL;
+       if (tctx && tctx->io_wq) {
+               ret = io_wq_max_workers(tctx->io_wq, new_count);
+               if (ret)
+                       goto err;
+       } else {
+               memset(new_count, 0, sizeof(new_count));
+       }
 
        if (sqd) {
                mutex_unlock(&sqd->lock);
@@ -10690,6 +10704,22 @@ static int io_register_iowq_max_workers(struct io_ring_ctx *ctx,
        if (copy_to_user(arg, new_count, sizeof(new_count)))
                return -EFAULT;
 
+       /* that's it for SQPOLL, only the SQPOLL task creates requests */
+       if (sqd)
+               return 0;
+
+       /* now propagate the restriction to all registered users */
+       list_for_each_entry(node, &ctx->tctx_list, ctx_node) {
+               struct io_uring_task *tctx = node->task->io_uring;
+
+               if (WARN_ON_ONCE(!tctx->io_wq))
+                       continue;
+
+               for (i = 0; i < ARRAY_SIZE(new_count); i++)
+                       new_count[i] = ctx->iowq_limits[i];
+               /* ignore errors, it always returns zero anyway */
+               (void)io_wq_max_workers(tctx->io_wq, new_count);
+       }
        return 0;
 err:
        if (sqd) {
index 71c989f..30a92dd 100644 (file)
@@ -298,8 +298,8 @@ int ksmbd_decode_ntlmssp_auth_blob(struct authenticate_message *authblob,
                                   int blob_len, struct ksmbd_session *sess)
 {
        char *domain_name;
-       unsigned int lm_off, nt_off;
-       unsigned short nt_len;
+       unsigned int nt_off, dn_off;
+       unsigned short nt_len, dn_len;
        int ret;
 
        if (blob_len < sizeof(struct authenticate_message)) {
@@ -314,15 +314,17 @@ int ksmbd_decode_ntlmssp_auth_blob(struct authenticate_message *authblob,
                return -EINVAL;
        }
 
-       lm_off = le32_to_cpu(authblob->LmChallengeResponse.BufferOffset);
        nt_off = le32_to_cpu(authblob->NtChallengeResponse.BufferOffset);
        nt_len = le16_to_cpu(authblob->NtChallengeResponse.Length);
+       dn_off = le32_to_cpu(authblob->DomainName.BufferOffset);
+       dn_len = le16_to_cpu(authblob->DomainName.Length);
+
+       if (blob_len < (u64)dn_off + dn_len || blob_len < (u64)nt_off + nt_len)
+               return -EINVAL;
 
        /* TODO : use domain name that imported from configuration file */
-       domain_name = smb_strndup_from_utf16((const char *)authblob +
-                       le32_to_cpu(authblob->DomainName.BufferOffset),
-                       le16_to_cpu(authblob->DomainName.Length), true,
-                       sess->conn->local_nls);
+       domain_name = smb_strndup_from_utf16((const char *)authblob + dn_off,
+                                            dn_len, true, sess->conn->local_nls);
        if (IS_ERR(domain_name))
                return PTR_ERR(domain_name);
 
index 48b18b4..b57a0d8 100644 (file)
@@ -61,6 +61,8 @@ struct ksmbd_conn *ksmbd_conn_alloc(void)
                conn->local_nls = load_nls_default();
        atomic_set(&conn->req_running, 0);
        atomic_set(&conn->r_count, 0);
+       conn->total_credits = 1;
+
        init_waitqueue_head(&conn->req_running_q);
        INIT_LIST_HEAD(&conn->conns_list);
        INIT_LIST_HEAD(&conn->sessions);
index 2fbe2bc..c6718a0 100644 (file)
@@ -211,6 +211,7 @@ struct ksmbd_tree_disconnect_request {
  */
 struct ksmbd_logout_request {
        __s8    account[KSMBD_REQ_MAX_ACCOUNT_NAME_SZ]; /* user account name */
+       __u32   account_flags;
 };
 
 /*
@@ -317,6 +318,7 @@ enum KSMBD_TREE_CONN_STATUS {
 #define KSMBD_USER_FLAG_BAD_UID                BIT(2)
 #define KSMBD_USER_FLAG_BAD_USER       BIT(3)
 #define KSMBD_USER_FLAG_GUEST_ACCOUNT  BIT(4)
+#define KSMBD_USER_FLAG_DELAY_SESSION  BIT(5)
 
 /*
  * Share config flags.
index d21629a..1019d36 100644 (file)
@@ -55,7 +55,7 @@ struct ksmbd_user *ksmbd_alloc_user(struct ksmbd_login_response *resp)
 
 void ksmbd_free_user(struct ksmbd_user *user)
 {
-       ksmbd_ipc_logout_request(user->name);
+       ksmbd_ipc_logout_request(user->name, user->flags);
        kfree(user->name);
        kfree(user->passkey);
        kfree(user);
index b2bb074..aff80b0 100644 (file)
@@ -18,6 +18,7 @@ struct ksmbd_user {
 
        size_t                  passkey_sz;
        char                    *passkey;
+       unsigned int            failed_login_count;
 };
 
 static inline bool user_guest(struct ksmbd_user *user)
index 9edd9c1..030ca57 100644 (file)
@@ -284,11 +284,13 @@ static inline int smb2_ioctl_resp_len(struct smb2_ioctl_req *h)
                le32_to_cpu(h->MaxOutputResponse);
 }
 
-static int smb2_validate_credit_charge(struct smb2_hdr *hdr)
+static int smb2_validate_credit_charge(struct ksmbd_conn *conn,
+                                      struct smb2_hdr *hdr)
 {
-       int req_len = 0, expect_resp_len = 0, calc_credit_num, max_len;
-       int credit_charge = le16_to_cpu(hdr->CreditCharge);
+       unsigned int req_len = 0, expect_resp_len = 0, calc_credit_num, max_len;
+       unsigned short credit_charge = le16_to_cpu(hdr->CreditCharge);
        void *__hdr = hdr;
+       int ret;
 
        switch (hdr->Command) {
        case SMB2_QUERY_INFO:
@@ -310,21 +312,37 @@ static int smb2_validate_credit_charge(struct smb2_hdr *hdr)
                req_len = smb2_ioctl_req_len(__hdr);
                expect_resp_len = smb2_ioctl_resp_len(__hdr);
                break;
-       default:
+       case SMB2_CANCEL:
                return 0;
+       default:
+               req_len = 1;
+               break;
        }
 
-       credit_charge = max(1, credit_charge);
-       max_len = max(req_len, expect_resp_len);
+       credit_charge = max_t(unsigned short, credit_charge, 1);
+       max_len = max_t(unsigned int, req_len, expect_resp_len);
        calc_credit_num = DIV_ROUND_UP(max_len, SMB2_MAX_BUFFER_SIZE);
 
        if (credit_charge < calc_credit_num) {
-               pr_err("Insufficient credit charge, given: %d, needed: %d\n",
-                      credit_charge, calc_credit_num);
+               ksmbd_debug(SMB, "Insufficient credit charge, given: %d, needed: %d\n",
+                           credit_charge, calc_credit_num);
+               return 1;
+       } else if (credit_charge > conn->max_credits) {
+               ksmbd_debug(SMB, "Too large credit charge: %d\n", credit_charge);
                return 1;
        }
 
-       return 0;
+       spin_lock(&conn->credits_lock);
+       if (credit_charge <= conn->total_credits) {
+               conn->total_credits -= credit_charge;
+               ret = 0;
+       } else {
+               ksmbd_debug(SMB, "Insufficient credits granted, given: %u, granted: %u\n",
+                           credit_charge, conn->total_credits);
+               ret = 1;
+       }
+       spin_unlock(&conn->credits_lock);
+       return ret;
 }
 
 int ksmbd_smb2_check_message(struct ksmbd_work *work)
@@ -382,26 +400,20 @@ int ksmbd_smb2_check_message(struct ksmbd_work *work)
                }
        }
 
-       if ((work->conn->vals->capabilities & SMB2_GLOBAL_CAP_LARGE_MTU) &&
-           smb2_validate_credit_charge(hdr)) {
-               work->conn->ops->set_rsp_status(work, STATUS_INVALID_PARAMETER);
-               return 1;
-       }
-
        if (smb2_calc_size(hdr, &clc_len))
                return 1;
 
        if (len != clc_len) {
                /* client can return one byte more due to implied bcc[0] */
                if (clc_len == len + 1)
-                       return 0;
+                       goto validate_credit;
 
                /*
                 * Some windows servers (win2016) will pad also the final
                 * PDU in a compound to 8 bytes.
                 */
                if (ALIGN(clc_len, 8) == len)
-                       return 0;
+                       goto validate_credit;
 
                /*
                 * windows client also pad up to 8 bytes when compounding.
@@ -414,7 +426,7 @@ int ksmbd_smb2_check_message(struct ksmbd_work *work)
                                    "cli req padded more than expected. Length %d not %d for cmd:%d mid:%llu\n",
                                    len, clc_len, command,
                                    le64_to_cpu(hdr->MessageId));
-                       return 0;
+                       goto validate_credit;
                }
 
                ksmbd_debug(SMB,
@@ -425,6 +437,13 @@ int ksmbd_smb2_check_message(struct ksmbd_work *work)
                return 1;
        }
 
+validate_credit:
+       if ((work->conn->vals->capabilities & SMB2_GLOBAL_CAP_LARGE_MTU) &&
+           smb2_validate_credit_charge(work->conn, hdr)) {
+               work->conn->ops->set_rsp_status(work, STATUS_INVALID_PARAMETER);
+               return 1;
+       }
+
        return 0;
 }
 
index b06456e..fb6a65d 100644 (file)
@@ -284,6 +284,7 @@ int init_smb3_11_server(struct ksmbd_conn *conn)
 
 void init_smb2_max_read_size(unsigned int sz)
 {
+       sz = clamp_val(sz, SMB3_MIN_IOSIZE, SMB3_MAX_IOSIZE);
        smb21_server_values.max_read_size = sz;
        smb30_server_values.max_read_size = sz;
        smb302_server_values.max_read_size = sz;
@@ -292,6 +293,7 @@ void init_smb2_max_read_size(unsigned int sz)
 
 void init_smb2_max_write_size(unsigned int sz)
 {
+       sz = clamp_val(sz, SMB3_MIN_IOSIZE, SMB3_MAX_IOSIZE);
        smb21_server_values.max_write_size = sz;
        smb30_server_values.max_write_size = sz;
        smb302_server_values.max_write_size = sz;
@@ -300,6 +302,7 @@ void init_smb2_max_write_size(unsigned int sz)
 
 void init_smb2_max_trans_size(unsigned int sz)
 {
+       sz = clamp_val(sz, SMB3_MIN_IOSIZE, SMB3_MAX_IOSIZE);
        smb21_server_values.max_trans_size = sz;
        smb30_server_values.max_trans_size = sz;
        smb302_server_values.max_trans_size = sz;
index 005aa93..7e448df 100644 (file)
@@ -292,22 +292,6 @@ int init_smb2_neg_rsp(struct ksmbd_work *work)
        return 0;
 }
 
-static int smb2_consume_credit_charge(struct ksmbd_work *work,
-                                     unsigned short credit_charge)
-{
-       struct ksmbd_conn *conn = work->conn;
-       unsigned int rsp_credits = 1;
-
-       if (!conn->total_credits)
-               return 0;
-
-       if (credit_charge > 0)
-               rsp_credits = credit_charge;
-
-       conn->total_credits -= rsp_credits;
-       return rsp_credits;
-}
-
 /**
  * smb2_set_rsp_credits() - set number of credits in response buffer
  * @work:      smb work containing smb response buffer
@@ -317,49 +301,43 @@ int smb2_set_rsp_credits(struct ksmbd_work *work)
        struct smb2_hdr *req_hdr = ksmbd_req_buf_next(work);
        struct smb2_hdr *hdr = ksmbd_resp_buf_next(work);
        struct ksmbd_conn *conn = work->conn;
-       unsigned short credits_requested = le16_to_cpu(req_hdr->CreditRequest);
-       unsigned short credit_charge = 1, credits_granted = 0;
-       unsigned short aux_max, aux_credits, min_credits;
-       int rsp_credit_charge;
+       unsigned short credits_requested;
+       unsigned short credit_charge, credits_granted = 0;
+       unsigned short aux_max, aux_credits;
 
-       if (hdr->Command == SMB2_CANCEL)
-               goto out;
+       if (work->send_no_response)
+               return 0;
 
-       /* get default minimum credits by shifting maximum credits by 4 */
-       min_credits = conn->max_credits >> 4;
+       hdr->CreditCharge = req_hdr->CreditCharge;
 
-       if (conn->total_credits >= conn->max_credits) {
+       if (conn->total_credits > conn->max_credits) {
+               hdr->CreditRequest = 0;
                pr_err("Total credits overflow: %d\n", conn->total_credits);
-               conn->total_credits = min_credits;
-       }
-
-       rsp_credit_charge =
-               smb2_consume_credit_charge(work, le16_to_cpu(req_hdr->CreditCharge));
-       if (rsp_credit_charge < 0)
                return -EINVAL;
+       }
 
-       hdr->CreditCharge = cpu_to_le16(rsp_credit_charge);
+       credit_charge = max_t(unsigned short,
+                             le16_to_cpu(req_hdr->CreditCharge), 1);
+       credits_requested = max_t(unsigned short,
+                                 le16_to_cpu(req_hdr->CreditRequest), 1);
 
-       if (credits_requested > 0) {
-               aux_credits = credits_requested - 1;
-               aux_max = 32;
-               if (hdr->Command == SMB2_NEGOTIATE)
-                       aux_max = 0;
-               aux_credits = (aux_credits < aux_max) ? aux_credits : aux_max;
-               credits_granted = aux_credits + credit_charge;
+       /* according to smb2.credits smbtorture, Windows server
+        * 2016 or later grant up to 8192 credits at once.
+        *
+        * TODO: Need to adjuct CreditRequest value according to
+        * current cpu load
+        */
+       aux_credits = credits_requested - 1;
+       if (hdr->Command == SMB2_NEGOTIATE)
+               aux_max = 0;
+       else
+               aux_max = conn->max_credits - credit_charge;
+       aux_credits = min_t(unsigned short, aux_credits, aux_max);
+       credits_granted = credit_charge + aux_credits;
 
-               /* if credits granted per client is getting bigger than default
-                * minimum credits then we should wrap it up within the limits.
-                */
-               if ((conn->total_credits + credits_granted) > min_credits)
-                       credits_granted = min_credits - conn->total_credits;
-               /*
-                * TODO: Need to adjuct CreditRequest value according to
-                * current cpu load
-                */
-       } else if (conn->total_credits == 0) {
-               credits_granted = 1;
-       }
+       if (conn->max_credits - conn->total_credits < credits_granted)
+               credits_granted = conn->max_credits -
+                       conn->total_credits;
 
        conn->total_credits += credits_granted;
        work->credits_granted += credits_granted;
@@ -368,7 +346,6 @@ int smb2_set_rsp_credits(struct ksmbd_work *work)
                /* Update CreditRequest in last request */
                hdr->CreditRequest = cpu_to_le16(work->credits_granted);
        }
-out:
        ksmbd_debug(SMB,
                    "credits: requested[%d] granted[%d] total_granted[%d]\n",
                    credits_requested, credits_granted,
@@ -472,6 +449,12 @@ bool is_chained_smb2_message(struct ksmbd_work *work)
                        return false;
                }
 
+               if ((u64)get_rfc1002_len(work->response_buf) + MAX_CIFS_SMALL_BUFFER_SIZE >
+                   work->response_sz) {
+                       pr_err("next response offset exceeds response buffer size\n");
+                       return false;
+               }
+
                ksmbd_debug(SMB, "got SMB2 chained command\n");
                init_chained_smb2_rsp(work);
                return true;
@@ -541,7 +524,7 @@ int smb2_allocate_rsp_buf(struct ksmbd_work *work)
 {
        struct smb2_hdr *hdr = work->request_buf;
        size_t small_sz = MAX_CIFS_SMALL_BUFFER_SIZE;
-       size_t large_sz = work->conn->vals->max_trans_size + MAX_SMB2_HDR_SIZE;
+       size_t large_sz = small_sz + work->conn->vals->max_trans_size;
        size_t sz = small_sz;
        int cmd = le16_to_cpu(hdr->Command);
 
@@ -1274,19 +1257,13 @@ static int generate_preauth_hash(struct ksmbd_work *work)
        return 0;
 }
 
-static int decode_negotiation_token(struct ksmbd_work *work,
-                                   struct negotiate_message *negblob)
+static int decode_negotiation_token(struct ksmbd_conn *conn,
+                                   struct negotiate_message *negblob,
+                                   size_t sz)
 {
-       struct ksmbd_conn *conn = work->conn;
-       struct smb2_sess_setup_req *req;
-       int sz;
-
        if (!conn->use_spnego)
                return -EINVAL;
 
-       req = work->request_buf;
-       sz = le16_to_cpu(req->SecurityBufferLength);
-
        if (ksmbd_decode_negTokenInit((char *)negblob, sz, conn)) {
                if (ksmbd_decode_negTokenTarg((char *)negblob, sz, conn)) {
                        conn->auth_mechs |= KSMBD_AUTH_NTLMSSP;
@@ -1298,9 +1275,9 @@ static int decode_negotiation_token(struct ksmbd_work *work,
 }
 
 static int ntlm_negotiate(struct ksmbd_work *work,
-                         struct negotiate_message *negblob)
+                         struct negotiate_message *negblob,
+                         size_t negblob_len)
 {
-       struct smb2_sess_setup_req *req = work->request_buf;
        struct smb2_sess_setup_rsp *rsp = work->response_buf;
        struct challenge_message *chgblob;
        unsigned char *spnego_blob = NULL;
@@ -1309,8 +1286,7 @@ static int ntlm_negotiate(struct ksmbd_work *work,
        int sz, rc;
 
        ksmbd_debug(SMB, "negotiate phase\n");
-       sz = le16_to_cpu(req->SecurityBufferLength);
-       rc = ksmbd_decode_ntlmssp_neg_blob(negblob, sz, work->sess);
+       rc = ksmbd_decode_ntlmssp_neg_blob(negblob, negblob_len, work->sess);
        if (rc)
                return rc;
 
@@ -1378,12 +1354,23 @@ static struct ksmbd_user *session_user(struct ksmbd_conn *conn,
        struct authenticate_message *authblob;
        struct ksmbd_user *user;
        char *name;
-       int sz;
+       unsigned int auth_msg_len, name_off, name_len, secbuf_len;
 
+       secbuf_len = le16_to_cpu(req->SecurityBufferLength);
+       if (secbuf_len < sizeof(struct authenticate_message)) {
+               ksmbd_debug(SMB, "blob len %d too small\n", secbuf_len);
+               return NULL;
+       }
        authblob = user_authblob(conn, req);
-       sz = le32_to_cpu(authblob->UserName.BufferOffset);
-       name = smb_strndup_from_utf16((const char *)authblob + sz,
-                                     le16_to_cpu(authblob->UserName.Length),
+       name_off = le32_to_cpu(authblob->UserName.BufferOffset);
+       name_len = le16_to_cpu(authblob->UserName.Length);
+       auth_msg_len = le16_to_cpu(req->SecurityBufferOffset) + secbuf_len;
+
+       if (auth_msg_len < (u64)name_off + name_len)
+               return NULL;
+
+       name = smb_strndup_from_utf16((const char *)authblob + name_off,
+                                     name_len,
                                      true,
                                      conn->local_nls);
        if (IS_ERR(name)) {
@@ -1629,6 +1616,7 @@ int smb2_sess_setup(struct ksmbd_work *work)
        struct smb2_sess_setup_rsp *rsp = work->response_buf;
        struct ksmbd_session *sess;
        struct negotiate_message *negblob;
+       unsigned int negblob_len, negblob_off;
        int rc = 0;
 
        ksmbd_debug(SMB, "Received request for session setup\n");
@@ -1709,10 +1697,16 @@ int smb2_sess_setup(struct ksmbd_work *work)
        if (sess->state == SMB2_SESSION_EXPIRED)
                sess->state = SMB2_SESSION_IN_PROGRESS;
 
+       negblob_off = le16_to_cpu(req->SecurityBufferOffset);
+       negblob_len = le16_to_cpu(req->SecurityBufferLength);
+       if (negblob_off < (offsetof(struct smb2_sess_setup_req, Buffer) - 4) ||
+           negblob_len < offsetof(struct negotiate_message, NegotiateFlags))
+               return -EINVAL;
+
        negblob = (struct negotiate_message *)((char *)&req->hdr.ProtocolId +
-                       le16_to_cpu(req->SecurityBufferOffset));
+                       negblob_off);
 
-       if (decode_negotiation_token(work, negblob) == 0) {
+       if (decode_negotiation_token(conn, negblob, negblob_len) == 0) {
                if (conn->mechToken)
                        negblob = (struct negotiate_message *)conn->mechToken;
        }
@@ -1736,7 +1730,7 @@ int smb2_sess_setup(struct ksmbd_work *work)
                        sess->Preauth_HashValue = NULL;
                } else if (conn->preferred_auth_mech == KSMBD_AUTH_NTLMSSP) {
                        if (negblob->MessageType == NtLmNegotiate) {
-                               rc = ntlm_negotiate(work, negblob);
+                               rc = ntlm_negotiate(work, negblob, negblob_len);
                                if (rc)
                                        goto out_err;
                                rsp->hdr.Status =
@@ -1796,9 +1790,30 @@ out_err:
                conn->mechToken = NULL;
        }
 
-       if (rc < 0 && sess) {
-               ksmbd_session_destroy(sess);
-               work->sess = NULL;
+       if (rc < 0) {
+               /*
+                * SecurityBufferOffset should be set to zero
+                * in session setup error response.
+                */
+               rsp->SecurityBufferOffset = 0;
+
+               if (sess) {
+                       bool try_delay = false;
+
+                       /*
+                        * To avoid dictionary attacks (repeated session setups rapidly sent) to
+                        * connect to server, ksmbd make a delay of a 5 seconds on session setup
+                        * failure to make it harder to send enough random connection requests
+                        * to break into a server.
+                        */
+                       if (sess->user && sess->user->flags & KSMBD_USER_FLAG_DELAY_SESSION)
+                               try_delay = true;
+
+                       ksmbd_session_destroy(sess);
+                       work->sess = NULL;
+                       if (try_delay)
+                               ssleep(5);
+               }
        }
 
        return rc;
@@ -3779,6 +3794,24 @@ static int verify_info_level(int info_level)
        return 0;
 }
 
+static int smb2_calc_max_out_buf_len(struct ksmbd_work *work,
+                                    unsigned short hdr2_len,
+                                    unsigned int out_buf_len)
+{
+       int free_len;
+
+       if (out_buf_len > work->conn->vals->max_trans_size)
+               return -EINVAL;
+
+       free_len = (int)(work->response_sz -
+                        (get_rfc1002_len(work->response_buf) + 4)) -
+               hdr2_len;
+       if (free_len < 0)
+               return -EINVAL;
+
+       return min_t(int, out_buf_len, free_len);
+}
+
 int smb2_query_dir(struct ksmbd_work *work)
 {
        struct ksmbd_conn *conn = work->conn;
@@ -3855,9 +3888,13 @@ int smb2_query_dir(struct ksmbd_work *work)
        memset(&d_info, 0, sizeof(struct ksmbd_dir_info));
        d_info.wptr = (char *)rsp->Buffer;
        d_info.rptr = (char *)rsp->Buffer;
-       d_info.out_buf_len = (work->response_sz - (get_rfc1002_len(rsp_org) + 4));
-       d_info.out_buf_len = min_t(int, d_info.out_buf_len, le32_to_cpu(req->OutputBufferLength)) -
-               sizeof(struct smb2_query_directory_rsp);
+       d_info.out_buf_len =
+               smb2_calc_max_out_buf_len(work, 8,
+                                         le32_to_cpu(req->OutputBufferLength));
+       if (d_info.out_buf_len < 0) {
+               rc = -EINVAL;
+               goto err_out;
+       }
        d_info.flags = srch_flag;
 
        /*
@@ -4091,12 +4128,11 @@ static int smb2_get_ea(struct ksmbd_work *work, struct ksmbd_file *fp,
                                    le32_to_cpu(req->Flags));
        }
 
-       buf_free_len = work->response_sz -
-                       (get_rfc1002_len(rsp_org) + 4) -
-                       sizeof(struct smb2_query_info_rsp);
-
-       if (le32_to_cpu(req->OutputBufferLength) < buf_free_len)
-               buf_free_len = le32_to_cpu(req->OutputBufferLength);
+       buf_free_len =
+               smb2_calc_max_out_buf_len(work, 8,
+                                         le32_to_cpu(req->OutputBufferLength));
+       if (buf_free_len < 0)
+               return -EINVAL;
 
        rc = ksmbd_vfs_listxattr(path->dentry, &xattr_list);
        if (rc < 0) {
@@ -4407,6 +4443,8 @@ static void get_file_stream_info(struct ksmbd_work *work,
        struct path *path = &fp->filp->f_path;
        ssize_t xattr_list_len;
        int nbytes = 0, streamlen, stream_name_len, next, idx = 0;
+       int buf_free_len;
+       struct smb2_query_info_req *req = ksmbd_req_buf_next(work);
 
        generic_fillattr(file_mnt_user_ns(fp->filp), file_inode(fp->filp),
                         &stat);
@@ -4420,6 +4458,12 @@ static void get_file_stream_info(struct ksmbd_work *work,
                goto out;
        }
 
+       buf_free_len =
+               smb2_calc_max_out_buf_len(work, 8,
+                                         le32_to_cpu(req->OutputBufferLength));
+       if (buf_free_len < 0)
+               goto out;
+
        while (idx < xattr_list_len) {
                stream_name = xattr_list + idx;
                streamlen = strlen(stream_name);
@@ -4444,6 +4488,10 @@ static void get_file_stream_info(struct ksmbd_work *work,
                streamlen = snprintf(stream_buf, streamlen + 1,
                                     ":%s", &stream_name[XATTR_NAME_STREAM_LEN]);
 
+               next = sizeof(struct smb2_file_stream_info) + streamlen * 2;
+               if (next > buf_free_len)
+                       break;
+
                file_info = (struct smb2_file_stream_info *)&rsp->Buffer[nbytes];
                streamlen  = smbConvertToUTF16((__le16 *)file_info->StreamName,
                                               stream_buf, streamlen,
@@ -4454,12 +4502,13 @@ static void get_file_stream_info(struct ksmbd_work *work,
                file_info->StreamSize = cpu_to_le64(stream_name_len);
                file_info->StreamAllocationSize = cpu_to_le64(stream_name_len);
 
-               next = sizeof(struct smb2_file_stream_info) + streamlen;
                nbytes += next;
+               buf_free_len -= next;
                file_info->NextEntryOffset = cpu_to_le32(next);
        }
 
-       if (!S_ISDIR(stat.mode)) {
+       if (!S_ISDIR(stat.mode) &&
+           buf_free_len >= sizeof(struct smb2_file_stream_info) + 7 * 2) {
                file_info = (struct smb2_file_stream_info *)
                        &rsp->Buffer[nbytes];
                streamlen = smbConvertToUTF16((__le16 *)file_info->StreamName,
@@ -6220,8 +6269,7 @@ static noinline int smb2_write_pipe(struct ksmbd_work *work)
            (offsetof(struct smb2_write_req, Buffer) - 4)) {
                data_buf = (char *)&req->Buffer[0];
        } else {
-               if ((le16_to_cpu(req->DataOffset) > get_rfc1002_len(req)) ||
-                   (le16_to_cpu(req->DataOffset) + length > get_rfc1002_len(req))) {
+               if ((u64)le16_to_cpu(req->DataOffset) + length > get_rfc1002_len(req)) {
                        pr_err("invalid write data offset %u, smb_len %u\n",
                               le16_to_cpu(req->DataOffset),
                               get_rfc1002_len(req));
@@ -6379,8 +6427,7 @@ int smb2_write(struct ksmbd_work *work)
                    (offsetof(struct smb2_write_req, Buffer) - 4)) {
                        data_buf = (char *)&req->Buffer[0];
                } else {
-                       if ((le16_to_cpu(req->DataOffset) > get_rfc1002_len(req)) ||
-                           (le16_to_cpu(req->DataOffset) + length > get_rfc1002_len(req))) {
+                       if ((u64)le16_to_cpu(req->DataOffset) + length > get_rfc1002_len(req)) {
                                pr_err("invalid write data offset %u, smb_len %u\n",
                                       le16_to_cpu(req->DataOffset),
                                       get_rfc1002_len(req));
@@ -7023,24 +7070,26 @@ out2:
        return err;
 }
 
-static int fsctl_copychunk(struct ksmbd_work *work, struct smb2_ioctl_req *req,
+static int fsctl_copychunk(struct ksmbd_work *work,
+                          struct copychunk_ioctl_req *ci_req,
+                          unsigned int cnt_code,
+                          unsigned int input_count,
+                          unsigned long long volatile_id,
+                          unsigned long long persistent_id,
                           struct smb2_ioctl_rsp *rsp)
 {
-       struct copychunk_ioctl_req *ci_req;
        struct copychunk_ioctl_rsp *ci_rsp;
        struct ksmbd_file *src_fp = NULL, *dst_fp = NULL;
        struct srv_copychunk *chunks;
        unsigned int i, chunk_count, chunk_count_written = 0;
        unsigned int chunk_size_written = 0;
        loff_t total_size_written = 0;
-       int ret, cnt_code;
+       int ret = 0;
 
-       cnt_code = le32_to_cpu(req->CntCode);
-       ci_req = (struct copychunk_ioctl_req *)&req->Buffer[0];
        ci_rsp = (struct copychunk_ioctl_rsp *)&rsp->Buffer[0];
 
-       rsp->VolatileFileId = req->VolatileFileId;
-       rsp->PersistentFileId = req->PersistentFileId;
+       rsp->VolatileFileId = cpu_to_le64(volatile_id);
+       rsp->PersistentFileId = cpu_to_le64(persistent_id);
        ci_rsp->ChunksWritten =
                cpu_to_le32(ksmbd_server_side_copy_max_chunk_count());
        ci_rsp->ChunkBytesWritten =
@@ -7050,12 +7099,13 @@ static int fsctl_copychunk(struct ksmbd_work *work, struct smb2_ioctl_req *req,
 
        chunks = (struct srv_copychunk *)&ci_req->Chunks[0];
        chunk_count = le32_to_cpu(ci_req->ChunkCount);
+       if (chunk_count == 0)
+               goto out;
        total_size_written = 0;
 
        /* verify the SRV_COPYCHUNK_COPY packet */
        if (chunk_count > ksmbd_server_side_copy_max_chunk_count() ||
-           le32_to_cpu(req->InputCount) <
-            offsetof(struct copychunk_ioctl_req, Chunks) +
+           input_count < offsetof(struct copychunk_ioctl_req, Chunks) +
             chunk_count * sizeof(struct srv_copychunk)) {
                rsp->hdr.Status = STATUS_INVALID_PARAMETER;
                return -EINVAL;
@@ -7076,9 +7126,7 @@ static int fsctl_copychunk(struct ksmbd_work *work, struct smb2_ioctl_req *req,
 
        src_fp = ksmbd_lookup_foreign_fd(work,
                                         le64_to_cpu(ci_req->ResumeKey[0]));
-       dst_fp = ksmbd_lookup_fd_slow(work,
-                                     le64_to_cpu(req->VolatileFileId),
-                                     le64_to_cpu(req->PersistentFileId));
+       dst_fp = ksmbd_lookup_fd_slow(work, volatile_id, persistent_id);
        ret = -EINVAL;
        if (!src_fp ||
            src_fp->persistent_id != le64_to_cpu(ci_req->ResumeKey[1])) {
@@ -7153,8 +7201,8 @@ static __be32 idev_ipv4_address(struct in_device *idev)
 }
 
 static int fsctl_query_iface_info_ioctl(struct ksmbd_conn *conn,
-                                       struct smb2_ioctl_req *req,
-                                       struct smb2_ioctl_rsp *rsp)
+                                       struct smb2_ioctl_rsp *rsp,
+                                       unsigned int out_buf_len)
 {
        struct network_interface_info_ioctl_rsp *nii_rsp = NULL;
        int nbytes = 0;
@@ -7166,6 +7214,12 @@ static int fsctl_query_iface_info_ioctl(struct ksmbd_conn *conn,
 
        rtnl_lock();
        for_each_netdev(&init_net, netdev) {
+               if (out_buf_len <
+                   nbytes + sizeof(struct network_interface_info_ioctl_rsp)) {
+                       rtnl_unlock();
+                       return -ENOSPC;
+               }
+
                if (netdev->type == ARPHRD_LOOPBACK)
                        continue;
 
@@ -7245,11 +7299,6 @@ static int fsctl_query_iface_info_ioctl(struct ksmbd_conn *conn,
        if (nii_rsp)
                nii_rsp->Next = 0;
 
-       if (!nbytes) {
-               rsp->hdr.Status = STATUS_BUFFER_TOO_SMALL;
-               return -EINVAL;
-       }
-
        rsp->PersistentFileId = cpu_to_le64(SMB2_NO_FID);
        rsp->VolatileFileId = cpu_to_le64(SMB2_NO_FID);
        return nbytes;
@@ -7257,11 +7306,16 @@ static int fsctl_query_iface_info_ioctl(struct ksmbd_conn *conn,
 
 static int fsctl_validate_negotiate_info(struct ksmbd_conn *conn,
                                         struct validate_negotiate_info_req *neg_req,
-                                        struct validate_negotiate_info_rsp *neg_rsp)
+                                        struct validate_negotiate_info_rsp *neg_rsp,
+                                        unsigned int in_buf_len)
 {
        int ret = 0;
        int dialect;
 
+       if (in_buf_len < sizeof(struct validate_negotiate_info_req) +
+                       le16_to_cpu(neg_req->DialectCount) * sizeof(__le16))
+               return -EINVAL;
+
        dialect = ksmbd_lookup_dialect_by_id(neg_req->Dialects,
                                             neg_req->DialectCount);
        if (dialect == BAD_PROT_ID || dialect != conn->dialect) {
@@ -7295,7 +7349,7 @@ err_out:
 static int fsctl_query_allocated_ranges(struct ksmbd_work *work, u64 id,
                                        struct file_allocated_range_buffer *qar_req,
                                        struct file_allocated_range_buffer *qar_rsp,
-                                       int in_count, int *out_count)
+                                       unsigned int in_count, unsigned int *out_count)
 {
        struct ksmbd_file *fp;
        loff_t start, length;
@@ -7322,7 +7376,8 @@ static int fsctl_query_allocated_ranges(struct ksmbd_work *work, u64 id,
 }
 
 static int fsctl_pipe_transceive(struct ksmbd_work *work, u64 id,
-                                int out_buf_len, struct smb2_ioctl_req *req,
+                                unsigned int out_buf_len,
+                                struct smb2_ioctl_req *req,
                                 struct smb2_ioctl_rsp *rsp)
 {
        struct ksmbd_rpc_command *rpc_resp;
@@ -7436,8 +7491,7 @@ int smb2_ioctl(struct ksmbd_work *work)
 {
        struct smb2_ioctl_req *req;
        struct smb2_ioctl_rsp *rsp, *rsp_org;
-       int cnt_code, nbytes = 0;
-       int out_buf_len;
+       unsigned int cnt_code, nbytes = 0, out_buf_len, in_buf_len;
        u64 id = KSMBD_NO_FID;
        struct ksmbd_conn *conn = work->conn;
        int ret = 0;
@@ -7465,8 +7519,14 @@ int smb2_ioctl(struct ksmbd_work *work)
        }
 
        cnt_code = le32_to_cpu(req->CntCode);
-       out_buf_len = le32_to_cpu(req->MaxOutputResponse);
-       out_buf_len = min(KSMBD_IPC_MAX_PAYLOAD, out_buf_len);
+       ret = smb2_calc_max_out_buf_len(work, 48,
+                                       le32_to_cpu(req->MaxOutputResponse));
+       if (ret < 0) {
+               rsp->hdr.Status = STATUS_INVALID_PARAMETER;
+               goto out;
+       }
+       out_buf_len = (unsigned int)ret;
+       in_buf_len = le32_to_cpu(req->InputCount);
 
        switch (cnt_code) {
        case FSCTL_DFS_GET_REFERRALS:
@@ -7494,6 +7554,7 @@ int smb2_ioctl(struct ksmbd_work *work)
                break;
        }
        case FSCTL_PIPE_TRANSCEIVE:
+               out_buf_len = min_t(u32, KSMBD_IPC_MAX_PAYLOAD, out_buf_len);
                nbytes = fsctl_pipe_transceive(work, id, out_buf_len, req, rsp);
                break;
        case FSCTL_VALIDATE_NEGOTIATE_INFO:
@@ -7502,9 +7563,16 @@ int smb2_ioctl(struct ksmbd_work *work)
                        goto out;
                }
 
+               if (in_buf_len < sizeof(struct validate_negotiate_info_req))
+                       return -EINVAL;
+
+               if (out_buf_len < sizeof(struct validate_negotiate_info_rsp))
+                       return -EINVAL;
+
                ret = fsctl_validate_negotiate_info(conn,
                        (struct validate_negotiate_info_req *)&req->Buffer[0],
-                       (struct validate_negotiate_info_rsp *)&rsp->Buffer[0]);
+                       (struct validate_negotiate_info_rsp *)&rsp->Buffer[0],
+                       in_buf_len);
                if (ret < 0)
                        goto out;
 
@@ -7513,9 +7581,10 @@ int smb2_ioctl(struct ksmbd_work *work)
                rsp->VolatileFileId = cpu_to_le64(SMB2_NO_FID);
                break;
        case FSCTL_QUERY_NETWORK_INTERFACE_INFO:
-               nbytes = fsctl_query_iface_info_ioctl(conn, req, rsp);
-               if (nbytes < 0)
+               ret = fsctl_query_iface_info_ioctl(conn, rsp, out_buf_len);
+               if (ret < 0)
                        goto out;
+               nbytes = ret;
                break;
        case FSCTL_REQUEST_RESUME_KEY:
                if (out_buf_len < sizeof(struct resume_key_ioctl_rsp)) {
@@ -7540,15 +7609,33 @@ int smb2_ioctl(struct ksmbd_work *work)
                        goto out;
                }
 
+               if (in_buf_len < sizeof(struct copychunk_ioctl_req)) {
+                       ret = -EINVAL;
+                       goto out;
+               }
+
                if (out_buf_len < sizeof(struct copychunk_ioctl_rsp)) {
                        ret = -EINVAL;
                        goto out;
                }
 
                nbytes = sizeof(struct copychunk_ioctl_rsp);
-               fsctl_copychunk(work, req, rsp);
+               rsp->VolatileFileId = req->VolatileFileId;
+               rsp->PersistentFileId = req->PersistentFileId;
+               fsctl_copychunk(work,
+                               (struct copychunk_ioctl_req *)&req->Buffer[0],
+                               le32_to_cpu(req->CntCode),
+                               le32_to_cpu(req->InputCount),
+                               le64_to_cpu(req->VolatileFileId),
+                               le64_to_cpu(req->PersistentFileId),
+                               rsp);
                break;
        case FSCTL_SET_SPARSE:
+               if (in_buf_len < sizeof(struct file_sparse)) {
+                       ret = -EINVAL;
+                       goto out;
+               }
+
                ret = fsctl_set_sparse(work, id,
                                       (struct file_sparse *)&req->Buffer[0]);
                if (ret < 0)
@@ -7567,6 +7654,11 @@ int smb2_ioctl(struct ksmbd_work *work)
                        goto out;
                }
 
+               if (in_buf_len < sizeof(struct file_zero_data_information)) {
+                       ret = -EINVAL;
+                       goto out;
+               }
+
                zero_data =
                        (struct file_zero_data_information *)&req->Buffer[0];
 
@@ -7586,6 +7678,11 @@ int smb2_ioctl(struct ksmbd_work *work)
                break;
        }
        case FSCTL_QUERY_ALLOCATED_RANGES:
+               if (in_buf_len < sizeof(struct file_allocated_range_buffer)) {
+                       ret = -EINVAL;
+                       goto out;
+               }
+
                ret = fsctl_query_allocated_ranges(work, id,
                        (struct file_allocated_range_buffer *)&req->Buffer[0],
                        (struct file_allocated_range_buffer *)&rsp->Buffer[0],
@@ -7626,6 +7723,11 @@ int smb2_ioctl(struct ksmbd_work *work)
                struct duplicate_extents_to_file *dup_ext;
                loff_t src_off, dst_off, length, cloned;
 
+               if (in_buf_len < sizeof(struct duplicate_extents_to_file)) {
+                       ret = -EINVAL;
+                       goto out;
+               }
+
                dup_ext = (struct duplicate_extents_to_file *)&req->Buffer[0];
 
                fp_in = ksmbd_lookup_fd_slow(work, dup_ext->VolatileFileHandle,
@@ -7696,6 +7798,8 @@ out:
                rsp->hdr.Status = STATUS_OBJECT_NAME_NOT_FOUND;
        else if (ret == -EOPNOTSUPP)
                rsp->hdr.Status = STATUS_NOT_SUPPORTED;
+       else if (ret == -ENOSPC)
+               rsp->hdr.Status = STATUS_BUFFER_TOO_SMALL;
        else if (ret < 0 || rsp->hdr.Status == 0)
                rsp->hdr.Status = STATUS_INVALID_PARAMETER;
        smb2_set_err_rsp(work);
index a6dec5e..ff5a2f0 100644 (file)
 #define SMB21_DEFAULT_IOSIZE   (1024 * 1024)
 #define SMB3_DEFAULT_IOSIZE    (4 * 1024 * 1024)
 #define SMB3_DEFAULT_TRANS_SIZE        (1024 * 1024)
+#define SMB3_MIN_IOSIZE        (64 * 1024)
+#define SMB3_MAX_IOSIZE        (8 * 1024 * 1024)
 
 /*
  * SMB2 Header Definition
index 44aea33..1acf189 100644 (file)
@@ -601,7 +601,7 @@ int ksmbd_ipc_tree_disconnect_request(unsigned long long session_id,
        return ret;
 }
 
-int ksmbd_ipc_logout_request(const char *account)
+int ksmbd_ipc_logout_request(const char *account, int flags)
 {
        struct ksmbd_ipc_msg *msg;
        struct ksmbd_logout_request *req;
@@ -616,6 +616,7 @@ int ksmbd_ipc_logout_request(const char *account)
 
        msg->type = KSMBD_EVENT_LOGOUT_REQUEST;
        req = (struct ksmbd_logout_request *)msg->payload;
+       req->account_flags = flags;
        strscpy(req->account, account, KSMBD_REQ_MAX_ACCOUNT_NAME_SZ);
 
        ret = ipc_msg_send(msg);
index 9eacc89..5e5b90a 100644 (file)
@@ -25,7 +25,7 @@ ksmbd_ipc_tree_connect_request(struct ksmbd_session *sess,
                               struct sockaddr *peer_addr);
 int ksmbd_ipc_tree_disconnect_request(unsigned long long session_id,
                                      unsigned long long connect_id);
-int ksmbd_ipc_logout_request(const char *account);
+int ksmbd_ipc_logout_request(const char *account, int flags);
 struct ksmbd_share_config_response *
 ksmbd_ipc_share_config_request(const char *name);
 struct ksmbd_spnego_authen_response *
index 3a7fa23..a2fd5a4 100644 (file)
@@ -549,6 +549,10 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc)
 
        switch (recvmsg->type) {
        case SMB_DIRECT_MSG_NEGOTIATE_REQ:
+               if (wc->byte_len < sizeof(struct smb_direct_negotiate_req)) {
+                       put_empty_recvmsg(t, recvmsg);
+                       return;
+               }
                t->negotiation_requested = true;
                t->full_packet_received = true;
                wake_up_interruptible(&t->wait_status);
@@ -556,10 +560,23 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc)
        case SMB_DIRECT_MSG_DATA_TRANSFER: {
                struct smb_direct_data_transfer *data_transfer =
                        (struct smb_direct_data_transfer *)recvmsg->packet;
-               int data_length = le32_to_cpu(data_transfer->data_length);
+               unsigned int data_length;
                int avail_recvmsg_count, receive_credits;
 
+               if (wc->byte_len <
+                   offsetof(struct smb_direct_data_transfer, padding)) {
+                       put_empty_recvmsg(t, recvmsg);
+                       return;
+               }
+
+               data_length = le32_to_cpu(data_transfer->data_length);
                if (data_length) {
+                       if (wc->byte_len < sizeof(struct smb_direct_data_transfer) +
+                           (u64)data_length) {
+                               put_empty_recvmsg(t, recvmsg);
+                               return;
+                       }
+
                        if (t->full_packet_received)
                                recvmsg->first_segment = true;
 
@@ -568,7 +585,7 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc)
                        else
                                t->full_packet_received = true;
 
-                       enqueue_reassembly(t, recvmsg, data_length);
+                       enqueue_reassembly(t, recvmsg, (int)data_length);
                        wake_up_interruptible(&t->wait_reassembly_queue);
 
                        spin_lock(&t->receive_credit_lock);
index b419542..835b384 100644 (file)
@@ -1023,7 +1023,7 @@ int ksmbd_vfs_zero_data(struct ksmbd_work *work, struct ksmbd_file *fp,
 
 int ksmbd_vfs_fqar_lseek(struct ksmbd_file *fp, loff_t start, loff_t length,
                         struct file_allocated_range_buffer *ranges,
-                        int in_count, int *out_count)
+                        unsigned int in_count, unsigned int *out_count)
 {
        struct file *f = fp->filp;
        struct inode *inode = file_inode(fp->filp);
index 7b1dcaa..b0d5b8f 100644 (file)
@@ -166,7 +166,7 @@ int ksmbd_vfs_zero_data(struct ksmbd_work *work, struct ksmbd_file *fp,
 struct file_allocated_range_buffer;
 int ksmbd_vfs_fqar_lseek(struct ksmbd_file *fp, loff_t start, loff_t length,
                         struct file_allocated_range_buffer *ranges,
-                        int in_count, int *out_count);
+                        unsigned int in_count, unsigned int *out_count);
 int ksmbd_vfs_unlink(struct user_namespace *user_ns,
                     struct dentry *dir, struct dentry *dentry);
 void *ksmbd_vfs_init_kstat(char **p, struct ksmbd_kstat *ksmbd_kstat);
index fb172a0..20ecb00 100644 (file)
@@ -22,9 +22,14 @@ typedef __builtin_va_list va_list;
 #define va_arg(v, l)            __builtin_va_arg(v, l)
 #define va_copy(d, s)           __builtin_va_copy(d, s)
 #else
+#ifdef __KERNEL__
 #include <linux/stdarg.h>
-#endif
-#endif
+#else
+/* Used to build acpi tools */
+#include <stdarg.h>
+#endif /* __KERNEL__ */
+#endif /* ACPI_USE_BUILTIN_STDARG */
+#endif /* ! va_arg */
 
 #define ACPI_INLINE             __inline__
 
index 37f36da..a241dcf 100644 (file)
@@ -123,6 +123,8 @@ struct device;
  */
 unsigned long *bitmap_alloc(unsigned int nbits, gfp_t flags);
 unsigned long *bitmap_zalloc(unsigned int nbits, gfp_t flags);
+unsigned long *bitmap_alloc_node(unsigned int nbits, gfp_t flags, int node);
+unsigned long *bitmap_zalloc_node(unsigned int nbits, gfp_t flags, int node);
 void bitmap_free(const unsigned long *bitmap);
 
 /* Managed variants of the above. */
index 2746fd8..3536ab4 100644 (file)
@@ -517,6 +517,7 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map,
 
 #define cgroup_bpf_enabled(atype) (0)
 #define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, atype, t_ctx) ({ 0; })
+#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, atype) ({ 0; })
 #define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0)
 #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; })
index 1c7fd7c..e6f5579 100644 (file)
@@ -931,8 +931,11 @@ struct bpf_array_aux {
         * stored in the map to make sure that all callers and callees have
         * the same prog type and JITed flag.
         */
-       enum bpf_prog_type type;
-       bool jited;
+       struct {
+               spinlock_t lock;
+               enum bpf_prog_type type;
+               bool jited;
+       } owner;
        /* Programs with direct jumps into programs part of this array. */
        struct list_head poke_progs;
        struct bpf_map *map;
index 9c81724..bbe1eef 100644 (file)
@@ -101,14 +101,14 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_STACK_TRACE, stack_trace_map_ops)
 #endif
 BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_of_maps_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops)
-#ifdef CONFIG_NET
-BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops)
-BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP_HASH, dev_map_hash_ops)
-BPF_MAP_TYPE(BPF_MAP_TYPE_SK_STORAGE, sk_storage_map_ops)
 #ifdef CONFIG_BPF_LSM
 BPF_MAP_TYPE(BPF_MAP_TYPE_INODE_STORAGE, inode_storage_map_ops)
 #endif
 BPF_MAP_TYPE(BPF_MAP_TYPE_TASK_STORAGE, task_storage_map_ops)
+#ifdef CONFIG_NET
+BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops)
+BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP_HASH, dev_map_hash_ops)
+BPF_MAP_TYPE(BPF_MAP_TYPE_SK_STORAGE, sk_storage_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops)
 #if defined(CONFIG_XDP_SOCKETS)
 BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops)
index 47f80ad..8231a6a 100644 (file)
@@ -1050,6 +1050,7 @@ extern int bpf_jit_enable;
 extern int bpf_jit_harden;
 extern int bpf_jit_kallsyms;
 extern long bpf_jit_limit;
+extern long bpf_jit_limit_max;
 
 typedef void (*bpf_jit_fill_hole_t)(void *area, unsigned int size);
 
index f8a0bbb..9c25edf 100644 (file)
@@ -290,6 +290,7 @@ enum {
        MLX5_UMR_INLINE                 = (1 << 7),
 };
 
+#define MLX5_UMR_KLM_ALIGNMENT 4
 #define MLX5_UMR_MTT_ALIGNMENT 0x40
 #define MLX5_UMR_MTT_MASK      (MLX5_UMR_MTT_ALIGNMENT - 1)
 #define MLX5_UMR_MTT_MIN_CHUNK_SIZE MLX5_UMR_MTT_ALIGNMENT
@@ -799,10 +800,23 @@ struct mlx5_cqe64 {
        u8              tls_outer_l3_tunneled;
        u8              rsvd0;
        __be16          wqe_id;
-       u8              lro_tcppsh_abort_dupack;
-       u8              lro_min_ttl;
-       __be16          lro_tcp_win;
-       __be32          lro_ack_seq_num;
+       union {
+               struct {
+                       u8      tcppsh_abort_dupack;
+                       u8      min_ttl;
+                       __be16  tcp_win;
+                       __be32  ack_seq_num;
+               } lro;
+               struct {
+                       u8      reserved0:1;
+                       u8      match:1;
+                       u8      flush:1;
+                       u8      reserved3:5;
+                       u8      header_size;
+                       __be16  header_entry_index;
+                       __be32  data_offset;
+               } shampo;
+       };
        __be32          rss_hash_result;
        u8              rss_hash_type;
        u8              ml_path;
@@ -872,7 +886,7 @@ static inline u8 get_cqe_opcode(struct mlx5_cqe64 *cqe)
 
 static inline u8 get_cqe_lro_tcppsh(struct mlx5_cqe64 *cqe)
 {
-       return (cqe->lro_tcppsh_abort_dupack >> 6) & 1;
+       return (cqe->lro.tcppsh_abort_dupack >> 6) & 1;
 }
 
 static inline u8 get_cqe_l4_hdr_type(struct mlx5_cqe64 *cqe)
@@ -1186,6 +1200,7 @@ enum mlx5_cap_type {
        MLX5_CAP_VDPA_EMULATION = 0x13,
        MLX5_CAP_DEV_EVENT = 0x14,
        MLX5_CAP_IPSEC,
+       MLX5_CAP_DEV_SHAMPO = 0x1d,
        MLX5_CAP_GENERAL_2 = 0x20,
        MLX5_CAP_PORT_SELECTION = 0x25,
        /* NUM OF CAP Types */
@@ -1431,6 +1446,9 @@ enum mlx5_qcam_feature_groups {
 #define MLX5_CAP_IPSEC(mdev, cap)\
        MLX5_GET(ipsec_cap, (mdev)->caps.hca[MLX5_CAP_IPSEC]->cur, cap)
 
+#define MLX5_CAP_DEV_SHAMPO(mdev, cap)\
+       MLX5_GET(shampo_cap, mdev->caps.hca_cur[MLX5_CAP_DEV_SHAMPO], cap)
+
 enum {
        MLX5_CMD_STAT_OK                        = 0x0,
        MLX5_CMD_STAT_INT_ERR                   = 0x1,
@@ -1475,6 +1493,8 @@ static inline u16 mlx5_to_sw_pkey_sz(int pkey_sz)
        return MLX5_MIN_PKEY_TABLE_SIZE << pkey_sz;
 }
 
+#define MLX5_RDMA_RX_NUM_COUNTERS_PRIOS 2
+#define MLX5_RDMA_TX_NUM_COUNTERS_PRIOS 1
 #define MLX5_BY_PASS_NUM_REGULAR_PRIOS 16
 #define MLX5_BY_PASS_NUM_DONT_TRAP_PRIOS 16
 #define MLX5_BY_PASS_NUM_MULTICAST_PRIOS 1
index f617dfb..a623ec6 100644 (file)
@@ -357,22 +357,6 @@ struct mlx5_core_sig_ctx {
        u32                     sigerr_count;
 };
 
-enum {
-       MLX5_MKEY_MR = 1,
-       MLX5_MKEY_MW,
-       MLX5_MKEY_INDIRECT_DEVX,
-};
-
-struct mlx5_core_mkey {
-       u64                     iova;
-       u64                     size;
-       u32                     key;
-       u32                     pd;
-       u32                     type;
-       struct wait_queue_head wait;
-       refcount_t usecount;
-};
-
 #define MLX5_24BIT_MASK                ((1 << 24) - 1)
 
 enum mlx5_res_type {
@@ -654,7 +638,7 @@ struct mlx5e_resources {
        struct mlx5e_hw_objs {
                u32                        pdn;
                struct mlx5_td             td;
-               struct mlx5_core_mkey      mkey;
+               u32                        mkey;
                struct mlx5_sq_bfreg       bfreg;
        } hw_objs;
        struct devlink_port dl_port;
@@ -1007,8 +991,6 @@ void mlx5_cmd_mbox_status(void *out, u8 *status, u32 *syndrome);
 bool mlx5_cmd_is_down(struct mlx5_core_dev *dev);
 
 int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type);
-int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn);
-int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn);
 void mlx5_health_flush(struct mlx5_core_dev *dev);
 void mlx5_health_cleanup(struct mlx5_core_dev *dev);
 int mlx5_health_init(struct mlx5_core_dev *dev);
@@ -1026,13 +1008,11 @@ struct mlx5_cmd_mailbox *mlx5_alloc_cmd_mailbox_chain(struct mlx5_core_dev *dev,
                                                      gfp_t flags, int npages);
 void mlx5_free_cmd_mailbox_chain(struct mlx5_core_dev *dev,
                                 struct mlx5_cmd_mailbox *head);
-int mlx5_core_create_mkey(struct mlx5_core_dev *dev,
-                         struct mlx5_core_mkey *mkey,
-                         u32 *in, int inlen);
-int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev,
-                          struct mlx5_core_mkey *mkey);
-int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey,
-                        u32 *out, int outlen);
+int mlx5_core_create_mkey(struct mlx5_core_dev *dev, u32 *mkey, u32 *in,
+                         int inlen);
+int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, u32 mkey);
+int mlx5_core_query_mkey(struct mlx5_core_dev *dev, u32 mkey, u32 *out,
+                        int outlen);
 int mlx5_core_alloc_pd(struct mlx5_core_dev *dev, u32 *pdn);
 int mlx5_core_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn);
 int mlx5_pagealloc_init(struct mlx5_core_dev *dev);
index 7a43fec..a7e1155 100644 (file)
@@ -84,6 +84,8 @@ enum mlx5_flow_namespace_type {
        MLX5_FLOW_NAMESPACE_RDMA_RX_KERNEL,
        MLX5_FLOW_NAMESPACE_RDMA_TX,
        MLX5_FLOW_NAMESPACE_PORT_SEL,
+       MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS,
+       MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS,
 };
 
 enum {
index 746381e..3636df9 100644 (file)
@@ -343,7 +343,7 @@ struct mlx5_ifc_flow_table_fields_supported_bits {
        u8         outer_geneve_oam[0x1];
        u8         outer_geneve_protocol_type[0x1];
        u8         outer_geneve_opt_len[0x1];
-       u8         reserved_at_1e[0x1];
+       u8         source_vhca_port[0x1];
        u8         source_eswitch_port[0x1];
 
        u8         inner_dmac[0x1];
@@ -394,6 +394,14 @@ struct mlx5_ifc_flow_table_fields_supported_bits {
        u8         metadata_reg_c_0[0x1];
 };
 
+struct mlx5_ifc_flow_table_fields_supported_2_bits {
+       u8         reserved_at_0[0xe];
+       u8         bth_opcode[0x1];
+       u8         reserved_at_f[0x11];
+
+       u8         reserved_at_20[0x60];
+};
+
 struct mlx5_ifc_flow_table_prop_layout_bits {
        u8         ft_support[0x1];
        u8         reserved_at_1[0x1];
@@ -540,7 +548,7 @@ struct mlx5_ifc_fte_match_set_misc_bits {
        union mlx5_ifc_gre_key_bits gre_key;
 
        u8         vxlan_vni[0x18];
-       u8         reserved_at_b8[0x8];
+       u8         bth_opcode[0x8];
 
        u8         geneve_vni[0x18];
        u8         reserved_at_d8[0x7];
@@ -757,7 +765,15 @@ struct mlx5_ifc_flow_table_nic_cap_bits {
 
        struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_transmit_sniffer;
 
-       u8         reserved_at_e00[0x1200];
+       u8         reserved_at_e00[0x700];
+
+       struct mlx5_ifc_flow_table_fields_supported_2_bits ft_field_support_2_nic_receive_rdma;
+
+       u8         reserved_at_1580[0x280];
+
+       struct mlx5_ifc_flow_table_fields_supported_2_bits ft_field_support_2_nic_transmit_rdma;
+
+       u8         reserved_at_1880[0x780];
 
        u8         sw_steering_nic_rx_action_drop_icm_address[0x40];
 
@@ -1350,7 +1366,9 @@ struct mlx5_ifc_cmd_hca_cap_bits {
        u8         reserved_at_b0[0x1];
        u8         uplink_follow[0x1];
        u8         ts_cqe_to_dest_cqn[0x1];
-       u8         reserved_at_b3[0xd];
+       u8         reserved_at_b3[0x7];
+       u8         shampo[0x1];
+       u8         reserved_at_bb[0x5];
 
        u8         max_sgl_for_optimized_performance[0x8];
        u8         log_max_cq_sz[0x8];
@@ -1893,7 +1911,21 @@ struct mlx5_ifc_wq_bits {
        u8         reserved_at_139[0x4];
        u8         log_wqe_stride_size[0x3];
 
-       u8         reserved_at_140[0x4c0];
+       u8         reserved_at_140[0x80];
+
+       u8         headers_mkey[0x20];
+
+       u8         shampo_enable[0x1];
+       u8         reserved_at_1e1[0x4];
+       u8         log_reservation_size[0x3];
+       u8         reserved_at_1e8[0x5];
+       u8         log_max_num_of_packets_per_reservation[0x3];
+       u8         reserved_at_1f0[0x6];
+       u8         log_headers_entry_size[0x2];
+       u8         reserved_at_1f8[0x4];
+       u8         log_headers_buffer_entry_num[0x4];
+
+       u8         reserved_at_200[0x400];
 
        struct mlx5_ifc_cmd_pas_bits pas[];
 };
@@ -3169,6 +3201,20 @@ struct mlx5_ifc_roce_addr_layout_bits {
        u8         reserved_at_e0[0x20];
 };
 
+struct mlx5_ifc_shampo_cap_bits {
+       u8    reserved_at_0[0x3];
+       u8    shampo_log_max_reservation_size[0x5];
+       u8    reserved_at_8[0x3];
+       u8    shampo_log_min_reservation_size[0x5];
+       u8    shampo_min_mss_size[0x10];
+
+       u8    reserved_at_20[0x3];
+       u8    shampo_max_log_headers_entry_size[0x5];
+       u8    reserved_at_28[0x18];
+
+       u8    reserved_at_40[0x7c0];
+};
+
 union mlx5_ifc_hca_cap_union_bits {
        struct mlx5_ifc_cmd_hca_cap_bits cmd_hca_cap;
        struct mlx5_ifc_cmd_hca_cap_2_bits cmd_hca_cap_2;
@@ -3187,6 +3233,7 @@ union mlx5_ifc_hca_cap_union_bits {
        struct mlx5_ifc_tls_cap_bits tls_cap;
        struct mlx5_ifc_device_mem_cap_bits device_mem_cap;
        struct mlx5_ifc_virtio_emulation_cap_bits virtio_emulation_cap;
+       struct mlx5_ifc_shampo_cap_bits shampo_cap;
        u8         reserved_at_0[0x8000];
 };
 
@@ -3361,8 +3408,9 @@ enum {
 };
 
 enum {
-       MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO  = 0x1,
-       MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO  = 0x2,
+       MLX5_TIRC_PACKET_MERGE_MASK_IPV4_LRO  = BIT(0),
+       MLX5_TIRC_PACKET_MERGE_MASK_IPV6_LRO  = BIT(1),
+       MLX5_TIRC_PACKET_MERGE_MASK_SHAMPO    = BIT(2),
 };
 
 enum {
@@ -3387,7 +3435,7 @@ struct mlx5_ifc_tirc_bits {
 
        u8         reserved_at_80[0x4];
        u8         lro_timeout_period_usecs[0x10];
-       u8         lro_enable_mask[0x4];
+       u8         packet_merge_mask[0x4];
        u8         lro_max_ip_payload_size[0x8];
 
        u8         reserved_at_a0[0x40];
@@ -3569,6 +3617,18 @@ enum {
        MLX5_RQC_STATE_ERR  = 0x3,
 };
 
+enum {
+       MLX5_RQC_SHAMPO_NO_MATCH_ALIGNMENT_GRANULARITY_BYTE    = 0x0,
+       MLX5_RQC_SHAMPO_NO_MATCH_ALIGNMENT_GRANULARITY_STRIDE  = 0x1,
+       MLX5_RQC_SHAMPO_NO_MATCH_ALIGNMENT_GRANULARITY_PAGE    = 0x2,
+};
+
+enum {
+       MLX5_RQC_SHAMPO_MATCH_CRITERIA_TYPE_NO_MATCH    = 0x0,
+       MLX5_RQC_SHAMPO_MATCH_CRITERIA_TYPE_EXTENDED    = 0x1,
+       MLX5_RQC_SHAMPO_MATCH_CRITERIA_TYPE_FIVE_TUPLE  = 0x2,
+};
+
 struct mlx5_ifc_rqc_bits {
        u8         rlky[0x1];
        u8         delay_drop_en[0x1];
@@ -3601,7 +3661,13 @@ struct mlx5_ifc_rqc_bits {
        u8         reserved_at_c0[0x10];
        u8         hairpin_peer_vhca[0x10];
 
-       u8         reserved_at_e0[0xa0];
+       u8         reserved_at_e0[0x46];
+       u8         shampo_no_match_alignment_granularity[0x2];
+       u8         reserved_at_128[0x6];
+       u8         shampo_match_criteria_type[0x2];
+       u8         reservation_timeout[0x10];
+
+       u8         reserved_at_140[0x40];
 
        struct mlx5_ifc_wq_bits wq;
 };
@@ -6657,7 +6723,7 @@ struct mlx5_ifc_modify_tir_bitmask_bits {
        u8         reserved_at_3c[0x1];
        u8         hash[0x1];
        u8         reserved_at_3e[0x1];
-       u8         lro[0x1];
+       u8         packet_merge[0x1];
 };
 
 struct mlx5_ifc_modify_tir_out_bits {
@@ -7857,7 +7923,7 @@ struct mlx5_ifc_dealloc_uar_out_bits {
 
 struct mlx5_ifc_dealloc_uar_in_bits {
        u8         opcode[0x10];
-       u8         reserved_at_10[0x10];
+       u8         uid[0x10];
 
        u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
@@ -8714,7 +8780,7 @@ struct mlx5_ifc_alloc_uar_out_bits {
 
 struct mlx5_ifc_alloc_uar_in_bits {
        u8         opcode[0x10];
-       u8         reserved_at_10[0x10];
+       u8         uid[0x10];
 
        u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
index cb96f1e..0bd6520 100644 (file)
@@ -4244,6 +4244,9 @@ enum skb_ext_id {
 #if IS_ENABLED(CONFIG_MPTCP)
        SKB_EXT_MPTCP,
 #endif
+#if IS_ENABLED(CONFIG_MCTP_FLOWS)
+       SKB_EXT_MCTP,
+#endif
        SKB_EXT_NUM, /* must be last */
 };
 
index 14ab0c0..1ce9a9e 100644 (file)
@@ -128,6 +128,7 @@ int sk_msg_memcopy_from_iter(struct sock *sk, struct iov_iter *from,
                             struct sk_msg *msg, u32 bytes);
 int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
                   int len, int flags);
+bool sk_msg_is_readable(struct sock *sk);
 
 static inline void sk_msg_check_to_free(struct sk_msg *msg, u32 i, u32 bytes)
 {
index 7c9d5db..423f97b 100644 (file)
@@ -5442,7 +5442,6 @@ static inline void wiphy_unlock(struct wiphy *wiphy)
  *     netdev and may otherwise be used by driver read-only, will be update
  *     by cfg80211 on change_interface
  * @mgmt_registrations: list of registrations for management frames
- * @mgmt_registrations_lock: lock for the list
  * @mgmt_registrations_need_update: mgmt registrations were updated,
  *     need to propagate the update to the driver
  * @mtx: mutex used to lock data in this struct, may be used by drivers
@@ -5489,7 +5488,6 @@ struct wireless_dev {
        u32 identifier;
 
        struct list_head mgmt_registrations;
-       spinlock_t mgmt_registrations_lock;
        u8 mgmt_registrations_need_update:1;
 
        struct mutex mtx;
index 23bec70..7e35ec7 100644 (file)
@@ -152,6 +152,12 @@ struct mctp_sk_key {
 
        /* expiry timeout; valid (above) cleared on expiry */
        unsigned long   expiry;
+
+       /* free to use for device flow state tracking. Initialised to
+        * zero on initial key creation
+        */
+       unsigned long   dev_flow_state;
+       struct mctp_dev *dev;
 };
 
 struct mctp_skb_cb {
@@ -189,6 +195,13 @@ static inline struct mctp_skb_cb *mctp_cb(struct sk_buff *skb)
        return (void *)(skb->cb);
 }
 
+/* If CONFIG_MCTP_FLOWS, we may add one of these as a SKB extension,
+ * indicating the flow to the device driver.
+ */
+struct mctp_flow {
+       struct mctp_sk_key *key;
+};
+
 /* Route definition.
  *
  * These are held in the pernet->mctp.routes list, with RCU protection for
index 3a43946..5c0d04b 100644 (file)
@@ -14,6 +14,8 @@
 #include <linux/types.h>
 #include <linux/refcount.h>
 
+struct mctp_sk_key;
+
 struct mctp_dev {
        struct net_device       *dev;
 
@@ -21,6 +23,8 @@ struct mctp_dev {
 
        unsigned int            net;
 
+       const struct mctp_netdev_ops *ops;
+
        /* Only modified under RTNL. Reads have addrs_lock held */
        u8                      *addrs;
        size_t                  num_addrs;
@@ -29,12 +33,24 @@ struct mctp_dev {
        struct rcu_head         rcu;
 };
 
+struct mctp_netdev_ops {
+       void                    (*release_flow)(struct mctp_dev *dev,
+                                               struct mctp_sk_key *key);
+};
+
 #define MCTP_INITIAL_DEFAULT_NET       1
 
 struct mctp_dev *mctp_dev_get_rtnl(const struct net_device *dev);
 struct mctp_dev *__mctp_dev_get(const struct net_device *dev);
 
+int mctp_register_netdev(struct net_device *dev,
+                        const struct mctp_netdev_ops *ops);
+void mctp_unregister_netdev(struct net_device *dev);
+
 void mctp_dev_hold(struct mctp_dev *mdev);
 void mctp_dev_put(struct mctp_dev *mdev);
 
+void mctp_dev_set_key(struct mctp_dev *dev, struct mctp_sk_key *key);
+void mctp_dev_release_key(struct mctp_dev *dev, struct mctp_sk_key *key);
+
 #endif /* __NET_MCTPDEVICE_H */
index f83fa48..a925349 100644 (file)
@@ -71,6 +71,10 @@ struct mptcp_out_options {
                struct {
                        u64 sndr_key;
                        u64 rcvr_key;
+                       u64 data_seq;
+                       u32 subflow_seq;
+                       u16 data_len;
+                       __sum16 csum;
                };
                struct {
                        struct mptcp_addr_info addr;
index ada02c4..22179b2 100644 (file)
@@ -1302,7 +1302,7 @@ struct mini_Qdisc {
        struct tcf_block *block;
        struct gnet_stats_basic_sync __percpu *cpu_bstats;
        struct gnet_stats_queue __percpu *cpu_qstats;
-       struct rcu_head rcu;
+       unsigned long rcu_state;
 };
 
 static inline void mini_qdisc_bstats_cpu_update(struct mini_Qdisc *miniq,
index ff4e62a..620de05 100644 (file)
@@ -1210,13 +1210,16 @@ struct proto {
        unsigned int            inuse_idx;
 #endif
 
+       int                     (*forward_alloc_get)(const struct sock *sk);
+
        bool                    (*stream_memory_free)(const struct sock *sk, int wake);
-       bool                    (*stream_memory_read)(const struct sock *sk);
+       bool                    (*sock_is_readable)(struct sock *sk);
        /* Memory pressure */
        void                    (*enter_memory_pressure)(struct sock *sk);
        void                    (*leave_memory_pressure)(struct sock *sk);
        atomic_long_t           *memory_allocated;      /* Current allocated memory. */
        struct percpu_counter   *sockets_allocated;     /* Current number of sockets. */
+
        /*
         * Pressure flag: try to collapse.
         * Technical note: it is used by multiple contexts non atomically.
@@ -1294,20 +1297,22 @@ static inline void sk_refcnt_debug_release(const struct sock *sk)
 
 INDIRECT_CALLABLE_DECLARE(bool tcp_stream_memory_free(const struct sock *sk, int wake));
 
+static inline int sk_forward_alloc_get(const struct sock *sk)
+{
+       if (!sk->sk_prot->forward_alloc_get)
+               return sk->sk_forward_alloc;
+
+       return sk->sk_prot->forward_alloc_get(sk);
+}
+
 static inline bool __sk_stream_memory_free(const struct sock *sk, int wake)
 {
        if (READ_ONCE(sk->sk_wmem_queued) >= READ_ONCE(sk->sk_sndbuf))
                return false;
 
-#ifdef CONFIG_INET
        return sk->sk_prot->stream_memory_free ?
-               INDIRECT_CALL_1(sk->sk_prot->stream_memory_free,
-                               tcp_stream_memory_free,
-                               sk, wake) : true;
-#else
-       return sk->sk_prot->stream_memory_free ?
-               sk->sk_prot->stream_memory_free(sk, wake) : true;
-#endif
+               INDIRECT_CALL_INET_1(sk->sk_prot->stream_memory_free,
+                                    tcp_stream_memory_free, sk, wake) : true;
 }
 
 static inline bool sk_stream_memory_free(const struct sock *sk)
@@ -1573,6 +1578,11 @@ static inline void sk_mem_charge(struct sock *sk, int size)
        sk->sk_forward_alloc -= size;
 }
 
+/* the following macros control memory reclaiming in sk_mem_uncharge()
+ */
+#define SK_RECLAIM_THRESHOLD   (1 << 21)
+#define SK_RECLAIM_CHUNK       (1 << 20)
+
 static inline void sk_mem_uncharge(struct sock *sk, int size)
 {
        int reclaimable;
@@ -1589,8 +1599,8 @@ static inline void sk_mem_uncharge(struct sock *sk, int size)
         * If we reach 2 MBytes, reclaim 1 MBytes right now, there is
         * no need to hold that much forward allocation anyway.
         */
-       if (unlikely(reclaimable >= 1 << 21))
-               __sk_mem_reclaim(sk, 1 << 20);
+       if (unlikely(reclaimable >= SK_RECLAIM_THRESHOLD))
+               __sk_mem_reclaim(sk, SK_RECLAIM_CHUNK);
 }
 
 static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb)
@@ -2844,4 +2854,10 @@ int sock_get_timeout(long timeo, void *optval, bool old_timeval);
 int sock_copy_user_timeval(struct __kernel_sock_timeval *tv,
                           sockptr_t optval, int optlen, bool old_timeval);
 
+static inline bool sk_is_readable(struct sock *sk)
+{
+       if (sk->sk_prot->sock_is_readable)
+               return sk->sk_prot->sock_is_readable(sk);
+       return false;
+}
 #endif /* _SOCK_H */
index 60d806b..d353793 100644 (file)
@@ -299,28 +299,16 @@ void switchdev_port_fwd_mark_set(struct net_device *dev,
                                 struct net_device *group_dev,
                                 bool joining);
 
-int switchdev_handle_fdb_add_to_device(struct net_device *dev,
+int switchdev_handle_fdb_event_to_device(struct net_device *dev, unsigned long event,
                const struct switchdev_notifier_fdb_info *fdb_info,
                bool (*check_cb)(const struct net_device *dev),
                bool (*foreign_dev_check_cb)(const struct net_device *dev,
                                             const struct net_device *foreign_dev),
-               int (*add_cb)(struct net_device *dev,
-                             const struct net_device *orig_dev, const void *ctx,
+               int (*mod_cb)(struct net_device *dev, struct net_device *orig_dev,
+                             unsigned long event, const void *ctx,
                              const struct switchdev_notifier_fdb_info *fdb_info),
-               int (*lag_add_cb)(struct net_device *dev,
-                                 const struct net_device *orig_dev, const void *ctx,
-                                 const struct switchdev_notifier_fdb_info *fdb_info));
-
-int switchdev_handle_fdb_del_to_device(struct net_device *dev,
-               const struct switchdev_notifier_fdb_info *fdb_info,
-               bool (*check_cb)(const struct net_device *dev),
-               bool (*foreign_dev_check_cb)(const struct net_device *dev,
-                                            const struct net_device *foreign_dev),
-               int (*del_cb)(struct net_device *dev,
-                             const struct net_device *orig_dev, const void *ctx,
-                             const struct switchdev_notifier_fdb_info *fdb_info),
-               int (*lag_del_cb)(struct net_device *dev,
-                                 const struct net_device *orig_dev, const void *ctx,
+               int (*lag_mod_cb)(struct net_device *dev, struct net_device *orig_dev,
+                                 unsigned long event, const void *ctx,
                                  const struct switchdev_notifier_fdb_info *fdb_info));
 
 int switchdev_handle_port_obj_add(struct net_device *dev,
@@ -426,32 +414,16 @@ call_switchdev_blocking_notifiers(unsigned long val,
 }
 
 static inline int
-switchdev_handle_fdb_add_to_device(struct net_device *dev,
-               const struct switchdev_notifier_fdb_info *fdb_info,
-               bool (*check_cb)(const struct net_device *dev),
-               bool (*foreign_dev_check_cb)(const struct net_device *dev,
-                                            const struct net_device *foreign_dev),
-               int (*add_cb)(struct net_device *dev,
-                             const struct net_device *orig_dev, const void *ctx,
-                             const struct switchdev_notifier_fdb_info *fdb_info),
-               int (*lag_add_cb)(struct net_device *dev,
-                                 const struct net_device *orig_dev, const void *ctx,
-                                 const struct switchdev_notifier_fdb_info *fdb_info))
-{
-       return 0;
-}
-
-static inline int
-switchdev_handle_fdb_del_to_device(struct net_device *dev,
+switchdev_handle_fdb_event_to_device(struct net_device *dev, unsigned long event,
                const struct switchdev_notifier_fdb_info *fdb_info,
                bool (*check_cb)(const struct net_device *dev),
                bool (*foreign_dev_check_cb)(const struct net_device *dev,
                                             const struct net_device *foreign_dev),
-               int (*del_cb)(struct net_device *dev,
-                             const struct net_device *orig_dev, const void *ctx,
+               int (*mod_cb)(struct net_device *dev, struct net_device *orig_dev,
+                             unsigned long event, const void *ctx,
                              const struct switchdev_notifier_fdb_info *fdb_info),
-               int (*lag_del_cb)(struct net_device *dev,
-                                 const struct net_device *orig_dev, const void *ctx,
+               int (*lag_mod_cb)(struct net_device *dev, struct net_device *orig_dev,
+                                 unsigned long event, const void *ctx,
                                  const struct switchdev_notifier_fdb_info *fdb_info))
 {
        return 0;
index 701587a..8e8c592 100644 (file)
@@ -311,7 +311,7 @@ void tcp_shutdown(struct sock *sk, int how);
 int tcp_v4_early_demux(struct sk_buff *skb);
 int tcp_v4_rcv(struct sk_buff *skb);
 
-void tcp_remove_empty_skb(struct sock *sk, struct sk_buff *skb);
+void tcp_remove_empty_skb(struct sock *sk);
 int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw);
 int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
 int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size);
index adab19a..526cb2c 100644 (file)
@@ -361,6 +361,7 @@ int tls_sk_query(struct sock *sk, int optname, char __user *optval,
                int __user *optlen);
 int tls_sk_attach(struct sock *sk, int optname, char __user *optval,
                  unsigned int optlen);
+void tls_err_abort(struct sock *sk, int err);
 
 int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx);
 void tls_sw_strparser_arm(struct sock *sk, struct tls_context *ctx);
@@ -378,7 +379,7 @@ void tls_sw_release_resources_rx(struct sock *sk);
 void tls_sw_free_ctx_rx(struct tls_context *tls_ctx);
 int tls_sw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
                   int nonblock, int flags, int *addr_len);
-bool tls_sw_stream_read(const struct sock *sk);
+bool tls_sw_sock_is_readable(struct sock *sk);
 ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos,
                           struct pipe_inode_info *pipe,
                           size_t len, unsigned int flags);
@@ -469,12 +470,6 @@ static inline bool tls_is_sk_tx_device_offloaded(struct sock *sk)
 #endif
 }
 
-static inline void tls_err_abort(struct sock *sk, int err)
-{
-       sk->sk_err = err;
-       sk_error_report(sk);
-}
-
 static inline bool tls_bigint_increment(unsigned char *seq, int len)
 {
        int i;
@@ -515,7 +510,7 @@ static inline void tls_advance_record_sn(struct sock *sk,
                                         struct cipher_context *ctx)
 {
        if (tls_bigint_increment(ctx->rec_seq, prot->rec_seq_size))
-               tls_err_abort(sk, EBADMSG);
+               tls_err_abort(sk, -EBADMSG);
 
        if (prot->version != TLS_1_3_VERSION &&
            prot->cipher_type != TLS_CIPHER_CHACHA20_POLY1305)
index 360df45..909ecf4 100644 (file)
@@ -494,8 +494,9 @@ static inline struct sk_buff *udp_rcv_segment(struct sock *sk,
         * CHECKSUM_NONE in __udp_gso_segment. UDP GRO indeed builds partial
         * packets in udp_gro_complete_segment. As does UDP GSO, verified by
         * udp_send_skb. But when those packets are looped in dev_loopback_xmit
-        * their ip_summed is set to CHECKSUM_UNNECESSARY. Reset in this
-        * specific case, where PARTIAL is both correct and required.
+        * their ip_summed CHECKSUM_NONE is changed to CHECKSUM_UNNECESSARY.
+        * Reset in this specific case, where PARTIAL is both correct and
+        * required.
         */
        if (skb->pkt_type == PACKET_LOOPBACK)
                skb->ip_summed = CHECKSUM_PARTIAL;
index 5e1ccfa..c7a5be3 100644 (file)
@@ -1071,6 +1071,7 @@ static struct bpf_map *prog_array_map_alloc(union bpf_attr *attr)
        INIT_WORK(&aux->work, prog_array_map_clear_deferred);
        INIT_LIST_HEAD(&aux->poke_progs);
        mutex_init(&aux->poke_mutex);
+       spin_lock_init(&aux->owner.lock);
 
        map = array_map_alloc(attr);
        if (IS_ERR(map)) {
index ea8a468..ded9163 100644 (file)
@@ -524,6 +524,7 @@ int bpf_jit_enable   __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_DEFAULT_ON);
 int bpf_jit_kallsyms __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_DEFAULT_ON);
 int bpf_jit_harden   __read_mostly;
 long bpf_jit_limit   __read_mostly;
+long bpf_jit_limit_max __read_mostly;
 
 static void
 bpf_prog_ksym_set_addr(struct bpf_prog *prog)
@@ -817,7 +818,8 @@ u64 __weak bpf_jit_alloc_exec_limit(void)
 static int __init bpf_jit_charge_init(void)
 {
        /* Only used as heuristic here to derive limit. */
-       bpf_jit_limit = min_t(u64, round_up(bpf_jit_alloc_exec_limit() >> 2,
+       bpf_jit_limit_max = bpf_jit_alloc_exec_limit();
+       bpf_jit_limit = min_t(u64, round_up(bpf_jit_limit_max >> 2,
                                            PAGE_SIZE), LONG_MAX);
        return 0;
 }
@@ -1821,20 +1823,26 @@ static unsigned int __bpf_prog_ret0_warn(const void *ctx,
 bool bpf_prog_array_compatible(struct bpf_array *array,
                               const struct bpf_prog *fp)
 {
+       bool ret;
+
        if (fp->kprobe_override)
                return false;
 
-       if (!array->aux->type) {
+       spin_lock(&array->aux->owner.lock);
+
+       if (!array->aux->owner.type) {
                /* There's no owner yet where we could check for
                 * compatibility.
                 */
-               array->aux->type  = fp->type;
-               array->aux->jited = fp->jited;
-               return true;
+               array->aux->owner.type  = fp->type;
+               array->aux->owner.jited = fp->jited;
+               ret = true;
+       } else {
+               ret = array->aux->owner.type  == fp->type &&
+                     array->aux->owner.jited == fp->jited;
        }
-
-       return array->aux->type  == fp->type &&
-              array->aux->jited == fp->jited;
+       spin_unlock(&array->aux->owner.lock);
+       return ret;
 }
 
 static int bpf_check_tail_call(const struct bpf_prog *fp)
index 4e50c0b..1cad697 100644 (file)
@@ -543,8 +543,10 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
 
        if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) {
                array = container_of(map, struct bpf_array, map);
-               type  = array->aux->type;
-               jited = array->aux->jited;
+               spin_lock(&array->aux->owner.lock);
+               type  = array->aux->owner.type;
+               jited = array->aux->owner.jited;
+               spin_unlock(&array->aux->owner.lock);
        }
 
        seq_printf(m,
@@ -1337,12 +1339,11 @@ int generic_map_update_batch(struct bpf_map *map,
        void __user *values = u64_to_user_ptr(attr->batch.values);
        void __user *keys = u64_to_user_ptr(attr->batch.keys);
        u32 value_size, cp, max_count;
-       int ufd = attr->map_fd;
+       int ufd = attr->batch.map_fd;
        void *key, *value;
        struct fd f;
        int err = 0;
 
-       f = fdget(ufd);
        if (attr->batch.elem_flags & ~BPF_F_LOCK)
                return -EINVAL;
 
@@ -1367,6 +1368,7 @@ int generic_map_update_batch(struct bpf_map *map,
                return -ENOMEM;
        }
 
+       f = fdget(ufd); /* bpf_map_do_batch() guarantees ufd is valid */
        for (cp = 0; cp < max_count; cp++) {
                err = -EFAULT;
                if (copy_from_user(key, keys + cp * map->key_size,
@@ -1386,6 +1388,7 @@ int generic_map_update_batch(struct bpf_map *map,
 
        kvfree(value);
        kvfree(key);
+       fdput(f);
        return err;
 }
 
index 570b0c9..ea08f01 100644 (file)
@@ -2187,8 +2187,10 @@ static void cgroup_kill_sb(struct super_block *sb)
         * And don't kill the default root.
         */
        if (list_empty(&root->cgrp.self.children) && root != &cgrp_dfl_root &&
-           !percpu_ref_is_dying(&root->cgrp.self.refcnt))
+           !percpu_ref_is_dying(&root->cgrp.self.refcnt)) {
+               cgroup_bpf_offline(&root->cgrp);
                percpu_ref_kill(&root->cgrp.self.refcnt);
+       }
        cgroup_put(&root->cgrp);
        kernfs_kill_sb(sb);
 }
index 1bba412..f21714e 100644 (file)
@@ -8795,6 +8795,7 @@ void idle_task_exit(void)
                finish_arch_post_lock_switch();
        }
 
+       scs_task_reset(current);
        /* finish_cpu(), as ran on the BP, will clean up the active_mm state */
 }
 
index c4a15ae..5c5f208 100644 (file)
@@ -904,8 +904,8 @@ static int __trace_eprobe_create(int argc, const char *argv[])
 
        if (IS_ERR(ep)) {
                ret = PTR_ERR(ep);
-               /* This must return -ENOMEM, else there is a bug */
-               WARN_ON_ONCE(ret != -ENOMEM);
+               /* This must return -ENOMEM or misssing event, else there is a bug */
+               WARN_ON_ONCE(ret != -ENOMEM && ret != -ENODEV);
                ep = NULL;
                goto error;
        }
index 663dd81..9264088 100644 (file)
@@ -1398,6 +1398,19 @@ unsigned long *bitmap_zalloc(unsigned int nbits, gfp_t flags)
 }
 EXPORT_SYMBOL(bitmap_zalloc);
 
+unsigned long *bitmap_alloc_node(unsigned int nbits, gfp_t flags, int node)
+{
+       return kmalloc_array_node(BITS_TO_LONGS(nbits), sizeof(unsigned long),
+                                 flags, node);
+}
+EXPORT_SYMBOL(bitmap_alloc_node);
+
+unsigned long *bitmap_zalloc_node(unsigned int nbits, gfp_t flags, int node)
+{
+       return bitmap_alloc_node(nbits, flags | __GFP_ZERO, node);
+}
+EXPORT_SYMBOL(bitmap_zalloc_node);
+
 void bitmap_free(const unsigned long *bitmap)
 {
        kfree(bitmap);
index 1fea68b..c2dda40 100644 (file)
@@ -18,7 +18,6 @@
 #include <linux/secretmem.h>
 #include <linux/set_memory.h>
 #include <linux/sched/signal.h>
-#include <linux/refcount.h>
 
 #include <uapi/linux/magic.h>
 
@@ -41,11 +40,11 @@ module_param_named(enable, secretmem_enable, bool, 0400);
 MODULE_PARM_DESC(secretmem_enable,
                 "Enable secretmem and memfd_secret(2) system call");
 
-static refcount_t secretmem_users;
+static atomic_t secretmem_users;
 
 bool secretmem_active(void)
 {
-       return !!refcount_read(&secretmem_users);
+       return !!atomic_read(&secretmem_users);
 }
 
 static vm_fault_t secretmem_fault(struct vm_fault *vmf)
@@ -104,7 +103,7 @@ static const struct vm_operations_struct secretmem_vm_ops = {
 
 static int secretmem_release(struct inode *inode, struct file *file)
 {
-       refcount_dec(&secretmem_users);
+       atomic_dec(&secretmem_users);
        return 0;
 }
 
@@ -204,6 +203,8 @@ SYSCALL_DEFINE1(memfd_secret, unsigned int, flags)
 
        if (flags & ~(SECRETMEM_FLAGS_MASK | O_CLOEXEC))
                return -EINVAL;
+       if (atomic_read(&secretmem_users) < 0)
+               return -ENFILE;
 
        fd = get_unused_fd_flags(flags & O_CLOEXEC);
        if (fd < 0)
@@ -218,7 +219,7 @@ SYSCALL_DEFINE1(memfd_secret, unsigned int, flags)
        file->f_flags |= O_LARGEFILE;
 
        fd_install(fd, file);
-       refcount_inc(&secretmem_users);
+       atomic_inc(&secretmem_users);
        return fd;
 
 err_put_fd:
index 7242b32..2ed9496 100644 (file)
@@ -1560,10 +1560,14 @@ int batadv_bla_init(struct batadv_priv *bat_priv)
                return 0;
 
        bat_priv->bla.claim_hash = batadv_hash_new(128);
-       bat_priv->bla.backbone_hash = batadv_hash_new(32);
+       if (!bat_priv->bla.claim_hash)
+               return -ENOMEM;
 
-       if (!bat_priv->bla.claim_hash || !bat_priv->bla.backbone_hash)
+       bat_priv->bla.backbone_hash = batadv_hash_new(32);
+       if (!bat_priv->bla.backbone_hash) {
+               batadv_hash_destroy(bat_priv->bla.claim_hash);
                return -ENOMEM;
+       }
 
        batadv_hash_set_lock_class(bat_priv->bla.claim_hash,
                                   &batadv_claim_hash_lock_class_key);
index 3ddd66e..5207cd8 100644 (file)
@@ -190,29 +190,41 @@ int batadv_mesh_init(struct net_device *soft_iface)
 
        bat_priv->gw.generation = 0;
 
-       ret = batadv_v_mesh_init(bat_priv);
-       if (ret < 0)
-               goto err;
-
        ret = batadv_originator_init(bat_priv);
-       if (ret < 0)
-               goto err;
+       if (ret < 0) {
+               atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING);
+               goto err_orig;
+       }
 
        ret = batadv_tt_init(bat_priv);
-       if (ret < 0)
-               goto err;
+       if (ret < 0) {
+               atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING);
+               goto err_tt;
+       }
+
+       ret = batadv_v_mesh_init(bat_priv);
+       if (ret < 0) {
+               atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING);
+               goto err_v;
+       }
 
        ret = batadv_bla_init(bat_priv);
-       if (ret < 0)
-               goto err;
+       if (ret < 0) {
+               atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING);
+               goto err_bla;
+       }
 
        ret = batadv_dat_init(bat_priv);
-       if (ret < 0)
-               goto err;
+       if (ret < 0) {
+               atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING);
+               goto err_dat;
+       }
 
        ret = batadv_nc_mesh_init(bat_priv);
-       if (ret < 0)
-               goto err;
+       if (ret < 0) {
+               atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING);
+               goto err_nc;
+       }
 
        batadv_gw_init(bat_priv);
        batadv_mcast_init(bat_priv);
@@ -222,8 +234,20 @@ int batadv_mesh_init(struct net_device *soft_iface)
 
        return 0;
 
-err:
-       batadv_mesh_free(soft_iface);
+err_nc:
+       batadv_dat_free(bat_priv);
+err_dat:
+       batadv_bla_free(bat_priv);
+err_bla:
+       batadv_v_mesh_free(bat_priv);
+err_v:
+       batadv_tt_free(bat_priv);
+err_tt:
+       batadv_originator_free(bat_priv);
+err_orig:
+       batadv_purge_outstanding_packets(bat_priv, NULL);
+       atomic_set(&bat_priv->mesh_state, BATADV_MESH_INACTIVE);
+
        return ret;
 }
 
index 9f06132..0a7f1d3 100644 (file)
@@ -152,8 +152,10 @@ int batadv_nc_mesh_init(struct batadv_priv *bat_priv)
                                   &batadv_nc_coding_hash_lock_class_key);
 
        bat_priv->nc.decoding_hash = batadv_hash_new(128);
-       if (!bat_priv->nc.decoding_hash)
+       if (!bat_priv->nc.decoding_hash) {
+               batadv_hash_destroy(bat_priv->nc.coding_hash);
                goto err;
+       }
 
        batadv_hash_set_lock_class(bat_priv->nc.decoding_hash,
                                   &batadv_nc_decoding_hash_lock_class_key);
index e0b3dac..4b7ad66 100644 (file)
@@ -4162,8 +4162,10 @@ int batadv_tt_init(struct batadv_priv *bat_priv)
                return ret;
 
        ret = batadv_tt_global_init(bat_priv);
-       if (ret < 0)
+       if (ret < 0) {
+               batadv_tt_local_table_free(bat_priv);
                return ret;
+       }
 
        batadv_tvlv_handler_register(bat_priv, batadv_tt_tvlv_ogm_handler_v1,
                                     batadv_tt_tvlv_unicast_handler_v1,
index a6a68e1..6ccda68 100644 (file)
@@ -32,10 +32,6 @@ static const struct rhashtable_params br_fdb_rht_params = {
 };
 
 static struct kmem_cache *br_fdb_cache __read_mostly;
-static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
-                     const unsigned char *addr, u16 vid);
-static void fdb_notify(struct net_bridge *br,
-                      const struct net_bridge_fdb_entry *, int, bool);
 
 int __init br_fdb_init(void)
 {
@@ -87,6 +83,128 @@ static void fdb_rcu_free(struct rcu_head *head)
        kmem_cache_free(br_fdb_cache, ent);
 }
 
+static int fdb_to_nud(const struct net_bridge *br,
+                     const struct net_bridge_fdb_entry *fdb)
+{
+       if (test_bit(BR_FDB_LOCAL, &fdb->flags))
+               return NUD_PERMANENT;
+       else if (test_bit(BR_FDB_STATIC, &fdb->flags))
+               return NUD_NOARP;
+       else if (has_expired(br, fdb))
+               return NUD_STALE;
+       else
+               return NUD_REACHABLE;
+}
+
+static int fdb_fill_info(struct sk_buff *skb, const struct net_bridge *br,
+                        const struct net_bridge_fdb_entry *fdb,
+                        u32 portid, u32 seq, int type, unsigned int flags)
+{
+       const struct net_bridge_port *dst = READ_ONCE(fdb->dst);
+       unsigned long now = jiffies;
+       struct nda_cacheinfo ci;
+       struct nlmsghdr *nlh;
+       struct ndmsg *ndm;
+
+       nlh = nlmsg_put(skb, portid, seq, type, sizeof(*ndm), flags);
+       if (nlh == NULL)
+               return -EMSGSIZE;
+
+       ndm = nlmsg_data(nlh);
+       ndm->ndm_family  = AF_BRIDGE;
+       ndm->ndm_pad1    = 0;
+       ndm->ndm_pad2    = 0;
+       ndm->ndm_flags   = 0;
+       ndm->ndm_type    = 0;
+       ndm->ndm_ifindex = dst ? dst->dev->ifindex : br->dev->ifindex;
+       ndm->ndm_state   = fdb_to_nud(br, fdb);
+
+       if (test_bit(BR_FDB_OFFLOADED, &fdb->flags))
+               ndm->ndm_flags |= NTF_OFFLOADED;
+       if (test_bit(BR_FDB_ADDED_BY_EXT_LEARN, &fdb->flags))
+               ndm->ndm_flags |= NTF_EXT_LEARNED;
+       if (test_bit(BR_FDB_STICKY, &fdb->flags))
+               ndm->ndm_flags |= NTF_STICKY;
+
+       if (nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->key.addr))
+               goto nla_put_failure;
+       if (nla_put_u32(skb, NDA_MASTER, br->dev->ifindex))
+               goto nla_put_failure;
+       ci.ndm_used      = jiffies_to_clock_t(now - fdb->used);
+       ci.ndm_confirmed = 0;
+       ci.ndm_updated   = jiffies_to_clock_t(now - fdb->updated);
+       ci.ndm_refcnt    = 0;
+       if (nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
+               goto nla_put_failure;
+
+       if (fdb->key.vlan_id && nla_put(skb, NDA_VLAN, sizeof(u16),
+                                       &fdb->key.vlan_id))
+               goto nla_put_failure;
+
+       if (test_bit(BR_FDB_NOTIFY, &fdb->flags)) {
+               struct nlattr *nest = nla_nest_start(skb, NDA_FDB_EXT_ATTRS);
+               u8 notify_bits = FDB_NOTIFY_BIT;
+
+               if (!nest)
+                       goto nla_put_failure;
+               if (test_bit(BR_FDB_NOTIFY_INACTIVE, &fdb->flags))
+                       notify_bits |= FDB_NOTIFY_INACTIVE_BIT;
+
+               if (nla_put_u8(skb, NFEA_ACTIVITY_NOTIFY, notify_bits)) {
+                       nla_nest_cancel(skb, nest);
+                       goto nla_put_failure;
+               }
+
+               nla_nest_end(skb, nest);
+       }
+
+       nlmsg_end(skb, nlh);
+       return 0;
+
+nla_put_failure:
+       nlmsg_cancel(skb, nlh);
+       return -EMSGSIZE;
+}
+
+static inline size_t fdb_nlmsg_size(void)
+{
+       return NLMSG_ALIGN(sizeof(struct ndmsg))
+               + nla_total_size(ETH_ALEN) /* NDA_LLADDR */
+               + nla_total_size(sizeof(u32)) /* NDA_MASTER */
+               + nla_total_size(sizeof(u16)) /* NDA_VLAN */
+               + nla_total_size(sizeof(struct nda_cacheinfo))
+               + nla_total_size(0) /* NDA_FDB_EXT_ATTRS */
+               + nla_total_size(sizeof(u8)); /* NFEA_ACTIVITY_NOTIFY */
+}
+
+static void fdb_notify(struct net_bridge *br,
+                      const struct net_bridge_fdb_entry *fdb, int type,
+                      bool swdev_notify)
+{
+       struct net *net = dev_net(br->dev);
+       struct sk_buff *skb;
+       int err = -ENOBUFS;
+
+       if (swdev_notify)
+               br_switchdev_fdb_notify(br, fdb, type);
+
+       skb = nlmsg_new(fdb_nlmsg_size(), GFP_ATOMIC);
+       if (skb == NULL)
+               goto errout;
+
+       err = fdb_fill_info(skb, br, fdb, 0, 0, type, 0);
+       if (err < 0) {
+               /* -EMSGSIZE implies BUG in fdb_nlmsg_size() */
+               WARN_ON(err == -EMSGSIZE);
+               kfree_skb(skb);
+               goto errout;
+       }
+       rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
+       return;
+errout:
+       rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
+}
+
 static struct net_bridge_fdb_entry *fdb_find_rcu(struct rhashtable *tbl,
                                                 const unsigned char *addr,
                                                 __u16 vid)
@@ -257,6 +375,66 @@ void br_fdb_find_delete_local(struct net_bridge *br,
        spin_unlock_bh(&br->hash_lock);
 }
 
+static struct net_bridge_fdb_entry *fdb_create(struct net_bridge *br,
+                                              struct net_bridge_port *source,
+                                              const unsigned char *addr,
+                                              __u16 vid,
+                                              unsigned long flags)
+{
+       struct net_bridge_fdb_entry *fdb;
+       int err;
+
+       fdb = kmem_cache_alloc(br_fdb_cache, GFP_ATOMIC);
+       if (!fdb)
+               return NULL;
+
+       memcpy(fdb->key.addr.addr, addr, ETH_ALEN);
+       WRITE_ONCE(fdb->dst, source);
+       fdb->key.vlan_id = vid;
+       fdb->flags = flags;
+       fdb->updated = fdb->used = jiffies;
+       err = rhashtable_lookup_insert_fast(&br->fdb_hash_tbl, &fdb->rhnode,
+                                           br_fdb_rht_params);
+       if (err) {
+               kmem_cache_free(br_fdb_cache, fdb);
+               return NULL;
+       }
+
+       hlist_add_head_rcu(&fdb->fdb_node, &br->fdb_list);
+
+       return fdb;
+}
+
+static int fdb_add_local(struct net_bridge *br, struct net_bridge_port *source,
+                        const unsigned char *addr, u16 vid)
+{
+       struct net_bridge_fdb_entry *fdb;
+
+       if (!is_valid_ether_addr(addr))
+               return -EINVAL;
+
+       fdb = br_fdb_find(br, addr, vid);
+       if (fdb) {
+               /* it is okay to have multiple ports with same
+                * address, just use the first one.
+                */
+               if (test_bit(BR_FDB_LOCAL, &fdb->flags))
+                       return 0;
+               br_warn(br, "adding interface %s with same address as a received packet (addr:%pM, vlan:%u)\n",
+                       source ? source->dev->name : br->dev->name, addr, vid);
+               fdb_delete(br, fdb, true);
+       }
+
+       fdb = fdb_create(br, source, addr, vid,
+                        BIT(BR_FDB_LOCAL) | BIT(BR_FDB_STATIC));
+       if (!fdb)
+               return -ENOMEM;
+
+       fdb_add_hw_addr(br, addr);
+       fdb_notify(br, fdb, RTM_NEWNEIGH, true);
+       return 0;
+}
+
 void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr)
 {
        struct net_bridge_vlan_group *vg;
@@ -283,7 +461,7 @@ void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr)
 
 insert:
        /* insert new address,  may fail if invalid address or dup. */
-       fdb_insert(br, p, newaddr, 0);
+       fdb_add_local(br, p, newaddr, 0);
 
        if (!vg || !vg->num_vlans)
                goto done;
@@ -293,7 +471,7 @@ insert:
         * from under us.
         */
        list_for_each_entry(v, &vg->vlan_list, vlist)
-               fdb_insert(br, p, newaddr, v->vid);
+               fdb_add_local(br, p, newaddr, v->vid);
 
 done:
        spin_unlock_bh(&br->hash_lock);
@@ -313,7 +491,7 @@ void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr)
            !f->dst && !test_bit(BR_FDB_ADDED_BY_USER, &f->flags))
                fdb_delete_local(br, NULL, f);
 
-       fdb_insert(br, NULL, newaddr, 0);
+       fdb_add_local(br, NULL, newaddr, 0);
        vg = br_vlan_group(br);
        if (!vg || !vg->num_vlans)
                goto out;
@@ -328,7 +506,7 @@ void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr)
                if (f && test_bit(BR_FDB_LOCAL, &f->flags) &&
                    !f->dst && !test_bit(BR_FDB_ADDED_BY_USER, &f->flags))
                        fdb_delete_local(br, NULL, f);
-               fdb_insert(br, NULL, newaddr, v->vid);
+               fdb_add_local(br, NULL, newaddr, v->vid);
        }
 out:
        spin_unlock_bh(&br->hash_lock);
@@ -503,71 +681,14 @@ int br_fdb_fillbuf(struct net_bridge *br, void *buf,
        return num;
 }
 
-static struct net_bridge_fdb_entry *fdb_create(struct net_bridge *br,
-                                              struct net_bridge_port *source,
-                                              const unsigned char *addr,
-                                              __u16 vid,
-                                              unsigned long flags)
-{
-       struct net_bridge_fdb_entry *fdb;
-
-       fdb = kmem_cache_alloc(br_fdb_cache, GFP_ATOMIC);
-       if (fdb) {
-               memcpy(fdb->key.addr.addr, addr, ETH_ALEN);
-               WRITE_ONCE(fdb->dst, source);
-               fdb->key.vlan_id = vid;
-               fdb->flags = flags;
-               fdb->updated = fdb->used = jiffies;
-               if (rhashtable_lookup_insert_fast(&br->fdb_hash_tbl,
-                                                 &fdb->rhnode,
-                                                 br_fdb_rht_params)) {
-                       kmem_cache_free(br_fdb_cache, fdb);
-                       fdb = NULL;
-               } else {
-                       hlist_add_head_rcu(&fdb->fdb_node, &br->fdb_list);
-               }
-       }
-       return fdb;
-}
-
-static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
-                 const unsigned char *addr, u16 vid)
-{
-       struct net_bridge_fdb_entry *fdb;
-
-       if (!is_valid_ether_addr(addr))
-               return -EINVAL;
-
-       fdb = br_fdb_find(br, addr, vid);
-       if (fdb) {
-               /* it is okay to have multiple ports with same
-                * address, just use the first one.
-                */
-               if (test_bit(BR_FDB_LOCAL, &fdb->flags))
-                       return 0;
-               br_warn(br, "adding interface %s with same address as a received packet (addr:%pM, vlan:%u)\n",
-                      source ? source->dev->name : br->dev->name, addr, vid);
-               fdb_delete(br, fdb, true);
-       }
-
-       fdb = fdb_create(br, source, addr, vid,
-                        BIT(BR_FDB_LOCAL) | BIT(BR_FDB_STATIC));
-       if (!fdb)
-               return -ENOMEM;
-
-       fdb_add_hw_addr(br, addr);
-       fdb_notify(br, fdb, RTM_NEWNEIGH, true);
-       return 0;
-}
-
 /* Add entry for local address of interface */
-int br_fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
-                 const unsigned char *addr, u16 vid)
+int br_fdb_add_local(struct net_bridge *br, struct net_bridge_port *source,
+                    const unsigned char *addr, u16 vid)
 {
        int ret;
 
        spin_lock_bh(&br->hash_lock);
-       ret = fdb_insert(br, source, addr, vid);
+       ret = fdb_add_local(br, source, addr, vid);
        spin_unlock_bh(&br->hash_lock);
        return ret;
 }
@@ -638,182 +759,6 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
        }
 }
 
-static int fdb_to_nud(const struct net_bridge *br,
-                     const struct net_bridge_fdb_entry *fdb)
-{
-       if (test_bit(BR_FDB_LOCAL, &fdb->flags))
-               return NUD_PERMANENT;
-       else if (test_bit(BR_FDB_STATIC, &fdb->flags))
-               return NUD_NOARP;
-       else if (has_expired(br, fdb))
-               return NUD_STALE;
-       else
-               return NUD_REACHABLE;
-}
-
-static int fdb_fill_info(struct sk_buff *skb, const struct net_bridge *br,
-                        const struct net_bridge_fdb_entry *fdb,
-                        u32 portid, u32 seq, int type, unsigned int flags)
-{
-       const struct net_bridge_port *dst = READ_ONCE(fdb->dst);
-       unsigned long now = jiffies;
-       struct nda_cacheinfo ci;
-       struct nlmsghdr *nlh;
-       struct ndmsg *ndm;
-
-       nlh = nlmsg_put(skb, portid, seq, type, sizeof(*ndm), flags);
-       if (nlh == NULL)
-               return -EMSGSIZE;
-
-       ndm = nlmsg_data(nlh);
-       ndm->ndm_family  = AF_BRIDGE;
-       ndm->ndm_pad1    = 0;
-       ndm->ndm_pad2    = 0;
-       ndm->ndm_flags   = 0;
-       ndm->ndm_type    = 0;
-       ndm->ndm_ifindex = dst ? dst->dev->ifindex : br->dev->ifindex;
-       ndm->ndm_state   = fdb_to_nud(br, fdb);
-
-       if (test_bit(BR_FDB_OFFLOADED, &fdb->flags))
-               ndm->ndm_flags |= NTF_OFFLOADED;
-       if (test_bit(BR_FDB_ADDED_BY_EXT_LEARN, &fdb->flags))
-               ndm->ndm_flags |= NTF_EXT_LEARNED;
-       if (test_bit(BR_FDB_STICKY, &fdb->flags))
-               ndm->ndm_flags |= NTF_STICKY;
-
-       if (nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->key.addr))
-               goto nla_put_failure;
-       if (nla_put_u32(skb, NDA_MASTER, br->dev->ifindex))
-               goto nla_put_failure;
-       ci.ndm_used      = jiffies_to_clock_t(now - fdb->used);
-       ci.ndm_confirmed = 0;
-       ci.ndm_updated   = jiffies_to_clock_t(now - fdb->updated);
-       ci.ndm_refcnt    = 0;
-       if (nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
-               goto nla_put_failure;
-
-       if (fdb->key.vlan_id && nla_put(skb, NDA_VLAN, sizeof(u16),
-                                       &fdb->key.vlan_id))
-               goto nla_put_failure;
-
-       if (test_bit(BR_FDB_NOTIFY, &fdb->flags)) {
-               struct nlattr *nest = nla_nest_start(skb, NDA_FDB_EXT_ATTRS);
-               u8 notify_bits = FDB_NOTIFY_BIT;
-
-               if (!nest)
-                       goto nla_put_failure;
-               if (test_bit(BR_FDB_NOTIFY_INACTIVE, &fdb->flags))
-                       notify_bits |= FDB_NOTIFY_INACTIVE_BIT;
-
-               if (nla_put_u8(skb, NFEA_ACTIVITY_NOTIFY, notify_bits)) {
-                       nla_nest_cancel(skb, nest);
-                       goto nla_put_failure;
-               }
-
-               nla_nest_end(skb, nest);
-       }
-
-       nlmsg_end(skb, nlh);
-       return 0;
-
-nla_put_failure:
-       nlmsg_cancel(skb, nlh);
-       return -EMSGSIZE;
-}
-
-static inline size_t fdb_nlmsg_size(void)
-{
-       return NLMSG_ALIGN(sizeof(struct ndmsg))
-               + nla_total_size(ETH_ALEN) /* NDA_LLADDR */
-               + nla_total_size(sizeof(u32)) /* NDA_MASTER */
-               + nla_total_size(sizeof(u16)) /* NDA_VLAN */
-               + nla_total_size(sizeof(struct nda_cacheinfo))
-               + nla_total_size(0) /* NDA_FDB_EXT_ATTRS */
-               + nla_total_size(sizeof(u8)); /* NFEA_ACTIVITY_NOTIFY */
-}
-
-static int br_fdb_replay_one(struct net_bridge *br, struct notifier_block *nb,
-                            const struct net_bridge_fdb_entry *fdb,
-                            unsigned long action, const void *ctx)
-{
-       const struct net_bridge_port *p = READ_ONCE(fdb->dst);
-       struct switchdev_notifier_fdb_info item;
-       int err;
-
-       item.addr = fdb->key.addr.addr;
-       item.vid = fdb->key.vlan_id;
-       item.added_by_user = test_bit(BR_FDB_ADDED_BY_USER, &fdb->flags);
-       item.offloaded = test_bit(BR_FDB_OFFLOADED, &fdb->flags);
-       item.is_local = test_bit(BR_FDB_LOCAL, &fdb->flags);
-       item.info.dev = (!p || item.is_local) ? br->dev : p->dev;
-       item.info.ctx = ctx;
-
-       err = nb->notifier_call(nb, action, &item);
-       return notifier_to_errno(err);
-}
-
-int br_fdb_replay(const struct net_device *br_dev, const void *ctx, bool adding,
-                 struct notifier_block *nb)
-{
-       struct net_bridge_fdb_entry *fdb;
-       struct net_bridge *br;
-       unsigned long action;
-       int err = 0;
-
-       if (!nb)
-               return 0;
-
-       if (!netif_is_bridge_master(br_dev))
-               return -EINVAL;
-
-       br = netdev_priv(br_dev);
-
-       if (adding)
-               action = SWITCHDEV_FDB_ADD_TO_DEVICE;
-       else
-               action = SWITCHDEV_FDB_DEL_TO_DEVICE;
-
-       rcu_read_lock();
-
-       hlist_for_each_entry_rcu(fdb, &br->fdb_list, fdb_node) {
-               err = br_fdb_replay_one(br, nb, fdb, action, ctx);
-               if (err)
-                       break;
-       }
-
-       rcu_read_unlock();
-
-       return err;
-}
-
-static void fdb_notify(struct net_bridge *br,
-                      const struct net_bridge_fdb_entry *fdb, int type,
-                      bool swdev_notify)
-{
-       struct net *net = dev_net(br->dev);
-       struct sk_buff *skb;
-       int err = -ENOBUFS;
-
-       if (swdev_notify)
-               br_switchdev_fdb_notify(br, fdb, type);
-
-       skb = nlmsg_new(fdb_nlmsg_size(), GFP_ATOMIC);
-       if (skb == NULL)
-               goto errout;
-
-       err = fdb_fill_info(skb, br, fdb, 0, 0, type, 0);
-       if (err < 0) {
-               /* -EMSGSIZE implies BUG in fdb_nlmsg_size() */
-               WARN_ON(err == -EMSGSIZE);
-               kfree_skb(skb);
-               goto errout;
-       }
-       rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
-       return;
-errout:
-       rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
-}
-
 /* Dump information about entries, in response to GETNEIGH */
 int br_fdb_dump(struct sk_buff *skb,
                struct netlink_callback *cb,
index c11bba3..c1183fe 100644 (file)
@@ -670,7 +670,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev,
        else
                netdev_set_rx_headroom(dev, br_hr);
 
-       if (br_fdb_insert(br, p, dev->dev_addr, 0))
+       if (br_fdb_add_local(br, p, dev->dev_addr, 0))
                netdev_err(dev, "failed insert local address bridge forwarding table\n");
 
        if (br->dev->addr_assign_type != NET_ADDR_SET) {
index 61ccf46..4556d91 100644 (file)
@@ -552,252 +552,16 @@ out:
        return nlmsg_size;
 }
 
-struct br_mdb_complete_info {
-       struct net_bridge_port *port;
-       struct br_ip ip;
-};
-
-static void br_mdb_complete(struct net_device *dev, int err, void *priv)
-{
-       struct br_mdb_complete_info *data = priv;
-       struct net_bridge_port_group __rcu **pp;
-       struct net_bridge_port_group *p;
-       struct net_bridge_mdb_entry *mp;
-       struct net_bridge_port *port = data->port;
-       struct net_bridge *br = port->br;
-
-       if (err)
-               goto err;
-
-       spin_lock_bh(&br->multicast_lock);
-       mp = br_mdb_ip_get(br, &data->ip);
-       if (!mp)
-               goto out;
-       for (pp = &mp->ports; (p = mlock_dereference(*pp, br)) != NULL;
-            pp = &p->next) {
-               if (p->key.port != port)
-                       continue;
-               p->flags |= MDB_PG_FLAGS_OFFLOAD;
-       }
-out:
-       spin_unlock_bh(&br->multicast_lock);
-err:
-       kfree(priv);
-}
-
-static void br_switchdev_mdb_populate(struct switchdev_obj_port_mdb *mdb,
-                                     const struct net_bridge_mdb_entry *mp)
-{
-       if (mp->addr.proto == htons(ETH_P_IP))
-               ip_eth_mc_map(mp->addr.dst.ip4, mdb->addr);
-#if IS_ENABLED(CONFIG_IPV6)
-       else if (mp->addr.proto == htons(ETH_P_IPV6))
-               ipv6_eth_mc_map(&mp->addr.dst.ip6, mdb->addr);
-#endif
-       else
-               ether_addr_copy(mdb->addr, mp->addr.dst.mac_addr);
-
-       mdb->vid = mp->addr.vid;
-}
-
-static int br_mdb_replay_one(struct notifier_block *nb, struct net_device *dev,
-                            const struct switchdev_obj_port_mdb *mdb,
-                            unsigned long action, const void *ctx,
-                            struct netlink_ext_ack *extack)
-{
-       struct switchdev_notifier_port_obj_info obj_info = {
-               .info = {
-                       .dev = dev,
-                       .extack = extack,
-                       .ctx = ctx,
-               },
-               .obj = &mdb->obj,
-       };
-       int err;
-
-       err = nb->notifier_call(nb, action, &obj_info);
-       return notifier_to_errno(err);
-}
-
-static int br_mdb_queue_one(struct list_head *mdb_list,
-                           enum switchdev_obj_id id,
-                           const struct net_bridge_mdb_entry *mp,
-                           struct net_device *orig_dev)
-{
-       struct switchdev_obj_port_mdb *mdb;
-
-       mdb = kzalloc(sizeof(*mdb), GFP_ATOMIC);
-       if (!mdb)
-               return -ENOMEM;
-
-       mdb->obj.id = id;
-       mdb->obj.orig_dev = orig_dev;
-       br_switchdev_mdb_populate(mdb, mp);
-       list_add_tail(&mdb->obj.list, mdb_list);
-
-       return 0;
-}
-
-int br_mdb_replay(struct net_device *br_dev, struct net_device *dev,
-                 const void *ctx, bool adding, struct notifier_block *nb,
-                 struct netlink_ext_ack *extack)
-{
-       const struct net_bridge_mdb_entry *mp;
-       struct switchdev_obj *obj, *tmp;
-       struct net_bridge *br;
-       unsigned long action;
-       LIST_HEAD(mdb_list);
-       int err = 0;
-
-       ASSERT_RTNL();
-
-       if (!nb)
-               return 0;
-
-       if (!netif_is_bridge_master(br_dev) || !netif_is_bridge_port(dev))
-               return -EINVAL;
-
-       br = netdev_priv(br_dev);
-
-       if (!br_opt_get(br, BROPT_MULTICAST_ENABLED))
-               return 0;
-
-       /* We cannot walk over br->mdb_list protected just by the rtnl_mutex,
-        * because the write-side protection is br->multicast_lock. But we
-        * need to emulate the [ blocking ] calling context of a regular
-        * switchdev event, so since both br->multicast_lock and RCU read side
-        * critical sections are atomic, we have no choice but to pick the RCU
-        * read side lock, queue up all our events, leave the critical section
-        * and notify switchdev from blocking context.
-        */
-       rcu_read_lock();
-
-       hlist_for_each_entry_rcu(mp, &br->mdb_list, mdb_node) {
-               struct net_bridge_port_group __rcu * const *pp;
-               const struct net_bridge_port_group *p;
-
-               if (mp->host_joined) {
-                       err = br_mdb_queue_one(&mdb_list,
-                                              SWITCHDEV_OBJ_ID_HOST_MDB,
-                                              mp, br_dev);
-                       if (err) {
-                               rcu_read_unlock();
-                               goto out_free_mdb;
-                       }
-               }
-
-               for (pp = &mp->ports; (p = rcu_dereference(*pp)) != NULL;
-                    pp = &p->next) {
-                       if (p->key.port->dev != dev)
-                               continue;
-
-                       err = br_mdb_queue_one(&mdb_list,
-                                              SWITCHDEV_OBJ_ID_PORT_MDB,
-                                              mp, dev);
-                       if (err) {
-                               rcu_read_unlock();
-                               goto out_free_mdb;
-                       }
-               }
-       }
-
-       rcu_read_unlock();
-
-       if (adding)
-               action = SWITCHDEV_PORT_OBJ_ADD;
-       else
-               action = SWITCHDEV_PORT_OBJ_DEL;
-
-       list_for_each_entry(obj, &mdb_list, list) {
-               err = br_mdb_replay_one(nb, dev, SWITCHDEV_OBJ_PORT_MDB(obj),
-                                       action, ctx, extack);
-               if (err)
-                       goto out_free_mdb;
-       }
-
-out_free_mdb:
-       list_for_each_entry_safe(obj, tmp, &mdb_list, list) {
-               list_del(&obj->list);
-               kfree(SWITCHDEV_OBJ_PORT_MDB(obj));
-       }
-
-       return err;
-}
-
-static void br_mdb_switchdev_host_port(struct net_device *dev,
-                                      struct net_device *lower_dev,
-                                      struct net_bridge_mdb_entry *mp,
-                                      int type)
-{
-       struct switchdev_obj_port_mdb mdb = {
-               .obj = {
-                       .id = SWITCHDEV_OBJ_ID_HOST_MDB,
-                       .flags = SWITCHDEV_F_DEFER,
-                       .orig_dev = dev,
-               },
-       };
-
-       br_switchdev_mdb_populate(&mdb, mp);
-
-       switch (type) {
-       case RTM_NEWMDB:
-               switchdev_port_obj_add(lower_dev, &mdb.obj, NULL);
-               break;
-       case RTM_DELMDB:
-               switchdev_port_obj_del(lower_dev, &mdb.obj);
-               break;
-       }
-}
-
-static void br_mdb_switchdev_host(struct net_device *dev,
-                                 struct net_bridge_mdb_entry *mp, int type)
-{
-       struct net_device *lower_dev;
-       struct list_head *iter;
-
-       netdev_for_each_lower_dev(dev, lower_dev, iter)
-               br_mdb_switchdev_host_port(dev, lower_dev, mp, type);
-}
-
 void br_mdb_notify(struct net_device *dev,
                   struct net_bridge_mdb_entry *mp,
                   struct net_bridge_port_group *pg,
                   int type)
 {
-       struct br_mdb_complete_info *complete_info;
-       struct switchdev_obj_port_mdb mdb = {
-               .obj = {
-                       .id = SWITCHDEV_OBJ_ID_PORT_MDB,
-                       .flags = SWITCHDEV_F_DEFER,
-               },
-       };
        struct net *net = dev_net(dev);
        struct sk_buff *skb;
        int err = -ENOBUFS;
 
-       if (pg) {
-               br_switchdev_mdb_populate(&mdb, mp);
-
-               mdb.obj.orig_dev = pg->key.port->dev;
-               switch (type) {
-               case RTM_NEWMDB:
-                       complete_info = kmalloc(sizeof(*complete_info), GFP_ATOMIC);
-                       if (!complete_info)
-                               break;
-                       complete_info->port = pg->key.port;
-                       complete_info->ip = mp->addr;
-                       mdb.obj.complete_priv = complete_info;
-                       mdb.obj.complete = br_mdb_complete;
-                       if (switchdev_port_obj_add(pg->key.port->dev, &mdb.obj, NULL))
-                               kfree(complete_info);
-                       break;
-               case RTM_DELMDB:
-                       switchdev_port_obj_del(pg->key.port->dev, &mdb.obj);
-                       break;
-               }
-       } else {
-               br_mdb_switchdev_host(dev, mp, type);
-       }
+       br_switchdev_mdb_notify(dev, mp, pg, type);
 
        skb = nlmsg_new(rtnl_mdb_nlmsg_size(pg), GFP_ATOMIC);
        if (!skb)
index 37ca764..5552c00 100644 (file)
@@ -767,8 +767,8 @@ struct net_bridge_fdb_entry *br_fdb_find_rcu(struct net_bridge *br,
 int br_fdb_test_addr(struct net_device *dev, unsigned char *addr);
 int br_fdb_fillbuf(struct net_bridge *br, void *buf, unsigned long count,
                   unsigned long off);
-int br_fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
-                 const unsigned char *addr, u16 vid);
+int br_fdb_add_local(struct net_bridge *br, struct net_bridge_port *source,
+                    const unsigned char *addr, u16 vid);
 void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
                   const unsigned char *addr, u16 vid, unsigned long flags);
 
@@ -792,8 +792,6 @@ int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p,
                              bool swdev_notify);
 void br_fdb_offloaded_set(struct net_bridge *br, struct net_bridge_port *p,
                          const unsigned char *addr, u16 vid, bool offloaded);
-int br_fdb_replay(const struct net_device *br_dev, const void *ctx, bool adding,
-                 struct notifier_block *nb);
 
 /* br_forward.c */
 enum br_pkt_type {
@@ -958,9 +956,11 @@ int br_multicast_toggle_vlan_snooping(struct net_bridge *br, bool on,
                                      struct netlink_ext_ack *extack);
 bool br_multicast_toggle_global_vlan(struct net_bridge_vlan *vlan, bool on);
 
-int br_mdb_replay(struct net_device *br_dev, struct net_device *dev,
-                 const void *ctx, bool adding, struct notifier_block *nb,
-                 struct netlink_ext_ack *extack);
+void br_switchdev_mdb_notify(struct net_device *dev,
+                            struct net_bridge_mdb_entry *mp,
+                            struct net_bridge_port_group *pg,
+                            int type);
+
 int br_rports_fill_info(struct sk_buff *skb,
                        const struct net_bridge_mcast *brmctx);
 int br_multicast_dump_querier_state(struct sk_buff *skb,
@@ -1396,12 +1396,11 @@ static inline bool br_multicast_toggle_global_vlan(struct net_bridge_vlan *vlan,
        return false;
 }
 
-static inline int br_mdb_replay(struct net_device *br_dev,
-                               struct net_device *dev, const void *ctx,
-                               bool adding, struct notifier_block *nb,
-                               struct netlink_ext_ack *extack)
+static inline void br_switchdev_mdb_notify(struct net_device *dev,
+                                          struct net_bridge_mdb_entry *mp,
+                                          struct net_bridge_port_group *pg,
+                                          int type)
 {
-       return -EOPNOTSUPP;
 }
 
 static inline bool
@@ -1461,9 +1460,6 @@ void br_vlan_notify(const struct net_bridge *br,
                    const struct net_bridge_port *p,
                    u16 vid, u16 vid_range,
                    int cmd);
-int br_vlan_replay(struct net_device *br_dev, struct net_device *dev,
-                  const void *ctx, bool adding, struct notifier_block *nb,
-                  struct netlink_ext_ack *extack);
 bool br_vlan_can_enter_range(const struct net_bridge_vlan *v_curr,
                             const struct net_bridge_vlan *range_end);
 
@@ -1710,13 +1706,11 @@ static inline bool br_vlan_can_enter_range(const struct net_bridge_vlan *v_curr,
        return true;
 }
 
-static inline int br_vlan_replay(struct net_device *br_dev,
-                                struct net_device *dev, const void *ctx,
-                                bool adding, struct notifier_block *nb,
-                                struct netlink_ext_ack *extack)
+static inline u16 br_vlan_flags(const struct net_bridge_vlan *v, u16 pvid)
 {
-       return -EOPNOTSUPP;
+       return 0;
 }
+
 #endif
 
 /* br_vlan_options.c */
index 6bf518d..f8fbaaa 100644 (file)
@@ -4,6 +4,7 @@
 #include <linux/netdevice.h>
 #include <linux/rtnetlink.h>
 #include <linux/skbuff.h>
+#include <net/ip.h>
 #include <net/switchdev.h>
 
 #include "br_private.h"
@@ -122,28 +123,38 @@ int br_switchdev_set_port_flag(struct net_bridge_port *p,
        return 0;
 }
 
+static void br_switchdev_fdb_populate(struct net_bridge *br,
+                                     struct switchdev_notifier_fdb_info *item,
+                                     const struct net_bridge_fdb_entry *fdb,
+                                     const void *ctx)
+{
+       const struct net_bridge_port *p = READ_ONCE(fdb->dst);
+
+       item->addr = fdb->key.addr.addr;
+       item->vid = fdb->key.vlan_id;
+       item->added_by_user = test_bit(BR_FDB_ADDED_BY_USER, &fdb->flags);
+       item->offloaded = test_bit(BR_FDB_OFFLOADED, &fdb->flags);
+       item->is_local = test_bit(BR_FDB_LOCAL, &fdb->flags);
+       item->info.dev = (!p || item->is_local) ? br->dev : p->dev;
+       item->info.ctx = ctx;
+}
+
 void
 br_switchdev_fdb_notify(struct net_bridge *br,
                        const struct net_bridge_fdb_entry *fdb, int type)
 {
-       const struct net_bridge_port *dst = READ_ONCE(fdb->dst);
-       struct switchdev_notifier_fdb_info info = {
-               .addr = fdb->key.addr.addr,
-               .vid = fdb->key.vlan_id,
-               .added_by_user = test_bit(BR_FDB_ADDED_BY_USER, &fdb->flags),
-               .is_local = test_bit(BR_FDB_LOCAL, &fdb->flags),
-               .offloaded = test_bit(BR_FDB_OFFLOADED, &fdb->flags),
-       };
-       struct net_device *dev = (!dst || info.is_local) ? br->dev : dst->dev;
+       struct switchdev_notifier_fdb_info item;
+
+       br_switchdev_fdb_populate(br, &item, fdb, NULL);
 
        switch (type) {
        case RTM_DELNEIGH:
                call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_DEVICE,
-                                        dev, &info.info, NULL);
+                                        item.info.dev, &item.info, NULL);
                break;
        case RTM_NEWNEIGH:
                call_switchdev_notifiers(SWITCHDEV_FDB_ADD_TO_DEVICE,
-                                        dev, &info.info, NULL);
+                                        item.info.dev, &item.info, NULL);
                break;
        }
 }
@@ -270,6 +281,397 @@ static void nbp_switchdev_del(struct net_bridge_port *p)
        }
 }
 
+static int
+br_switchdev_fdb_replay_one(struct net_bridge *br, struct notifier_block *nb,
+                           const struct net_bridge_fdb_entry *fdb,
+                           unsigned long action, const void *ctx)
+{
+       struct switchdev_notifier_fdb_info item;
+       int err;
+
+       br_switchdev_fdb_populate(br, &item, fdb, ctx);
+
+       err = nb->notifier_call(nb, action, &item);
+       return notifier_to_errno(err);
+}
+
+static int
+br_switchdev_fdb_replay(const struct net_device *br_dev, const void *ctx,
+                       bool adding, struct notifier_block *nb)
+{
+       struct net_bridge_fdb_entry *fdb;
+       struct net_bridge *br;
+       unsigned long action;
+       int err = 0;
+
+       if (!nb)
+               return 0;
+
+       if (!netif_is_bridge_master(br_dev))
+               return -EINVAL;
+
+       br = netdev_priv(br_dev);
+
+       if (adding)
+               action = SWITCHDEV_FDB_ADD_TO_DEVICE;
+       else
+               action = SWITCHDEV_FDB_DEL_TO_DEVICE;
+
+       rcu_read_lock();
+
+       hlist_for_each_entry_rcu(fdb, &br->fdb_list, fdb_node) {
+               err = br_switchdev_fdb_replay_one(br, nb, fdb, action, ctx);
+               if (err)
+                       break;
+       }
+
+       rcu_read_unlock();
+
+       return err;
+}
+
+static int
+br_switchdev_vlan_replay_one(struct notifier_block *nb,
+                            struct net_device *dev,
+                            struct switchdev_obj_port_vlan *vlan,
+                            const void *ctx, unsigned long action,
+                            struct netlink_ext_ack *extack)
+{
+       struct switchdev_notifier_port_obj_info obj_info = {
+               .info = {
+                       .dev = dev,
+                       .extack = extack,
+                       .ctx = ctx,
+               },
+               .obj = &vlan->obj,
+       };
+       int err;
+
+       err = nb->notifier_call(nb, action, &obj_info);
+       return notifier_to_errno(err);
+}
+
+static int br_switchdev_vlan_replay(struct net_device *br_dev,
+                                   struct net_device *dev,
+                                   const void *ctx, bool adding,
+                                   struct notifier_block *nb,
+                                   struct netlink_ext_ack *extack)
+{
+       struct net_bridge_vlan_group *vg;
+       struct net_bridge_vlan *v;
+       struct net_bridge_port *p;
+       struct net_bridge *br;
+       unsigned long action;
+       int err = 0;
+       u16 pvid;
+
+       ASSERT_RTNL();
+
+       if (!nb)
+               return 0;
+
+       if (!netif_is_bridge_master(br_dev))
+               return -EINVAL;
+
+       if (!netif_is_bridge_master(dev) && !netif_is_bridge_port(dev))
+               return -EINVAL;
+
+       if (netif_is_bridge_master(dev)) {
+               br = netdev_priv(dev);
+               vg = br_vlan_group(br);
+               p = NULL;
+       } else {
+               p = br_port_get_rtnl(dev);
+               if (WARN_ON(!p))
+                       return -EINVAL;
+               vg = nbp_vlan_group(p);
+               br = p->br;
+       }
+
+       if (!vg)
+               return 0;
+
+       if (adding)
+               action = SWITCHDEV_PORT_OBJ_ADD;
+       else
+               action = SWITCHDEV_PORT_OBJ_DEL;
+
+       pvid = br_get_pvid(vg);
+
+       list_for_each_entry(v, &vg->vlan_list, vlist) {
+               struct switchdev_obj_port_vlan vlan = {
+                       .obj.orig_dev = dev,
+                       .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
+                       .flags = br_vlan_flags(v, pvid),
+                       .vid = v->vid,
+               };
+
+               if (!br_vlan_should_use(v))
+                       continue;
+
+               err = br_switchdev_vlan_replay_one(nb, dev, &vlan, ctx,
+                                                  action, extack);
+               if (err)
+                       return err;
+       }
+
+       return err;
+}
+
+#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+struct br_switchdev_mdb_complete_info {
+       struct net_bridge_port *port;
+       struct br_ip ip;
+};
+
+static void br_switchdev_mdb_complete(struct net_device *dev, int err, void *priv)
+{
+       struct br_switchdev_mdb_complete_info *data = priv;
+       struct net_bridge_port_group __rcu **pp;
+       struct net_bridge_port_group *p;
+       struct net_bridge_mdb_entry *mp;
+       struct net_bridge_port *port = data->port;
+       struct net_bridge *br = port->br;
+
+       if (err)
+               goto err;
+
+       spin_lock_bh(&br->multicast_lock);
+       mp = br_mdb_ip_get(br, &data->ip);
+       if (!mp)
+               goto out;
+       for (pp = &mp->ports; (p = mlock_dereference(*pp, br)) != NULL;
+            pp = &p->next) {
+               if (p->key.port != port)
+                       continue;
+               p->flags |= MDB_PG_FLAGS_OFFLOAD;
+       }
+out:
+       spin_unlock_bh(&br->multicast_lock);
+err:
+       kfree(priv);
+}
+
+static void br_switchdev_mdb_populate(struct switchdev_obj_port_mdb *mdb,
+                                     const struct net_bridge_mdb_entry *mp)
+{
+       if (mp->addr.proto == htons(ETH_P_IP))
+               ip_eth_mc_map(mp->addr.dst.ip4, mdb->addr);
+#if IS_ENABLED(CONFIG_IPV6)
+       else if (mp->addr.proto == htons(ETH_P_IPV6))
+               ipv6_eth_mc_map(&mp->addr.dst.ip6, mdb->addr);
+#endif
+       else
+               ether_addr_copy(mdb->addr, mp->addr.dst.mac_addr);
+
+       mdb->vid = mp->addr.vid;
+}
+
+static void br_switchdev_host_mdb_one(struct net_device *dev,
+                                     struct net_device *lower_dev,
+                                     struct net_bridge_mdb_entry *mp,
+                                     int type)
+{
+       struct switchdev_obj_port_mdb mdb = {
+               .obj = {
+                       .id = SWITCHDEV_OBJ_ID_HOST_MDB,
+                       .flags = SWITCHDEV_F_DEFER,
+                       .orig_dev = dev,
+               },
+       };
+
+       br_switchdev_mdb_populate(&mdb, mp);
+
+       switch (type) {
+       case RTM_NEWMDB:
+               switchdev_port_obj_add(lower_dev, &mdb.obj, NULL);
+               break;
+       case RTM_DELMDB:
+               switchdev_port_obj_del(lower_dev, &mdb.obj);
+               break;
+       }
+}
+
+static void br_switchdev_host_mdb(struct net_device *dev,
+                                 struct net_bridge_mdb_entry *mp, int type)
+{
+       struct net_device *lower_dev;
+       struct list_head *iter;
+
+       netdev_for_each_lower_dev(dev, lower_dev, iter)
+               br_switchdev_host_mdb_one(dev, lower_dev, mp, type);
+}
+
+static int
+br_switchdev_mdb_replay_one(struct notifier_block *nb, struct net_device *dev,
+                           const struct switchdev_obj_port_mdb *mdb,
+                           unsigned long action, const void *ctx,
+                           struct netlink_ext_ack *extack)
+{
+       struct switchdev_notifier_port_obj_info obj_info = {
+               .info = {
+                       .dev = dev,
+                       .extack = extack,
+                       .ctx = ctx,
+               },
+               .obj = &mdb->obj,
+       };
+       int err;
+
+       err = nb->notifier_call(nb, action, &obj_info);
+       return notifier_to_errno(err);
+}
+
+static int br_switchdev_mdb_queue_one(struct list_head *mdb_list,
+                                     enum switchdev_obj_id id,
+                                     const struct net_bridge_mdb_entry *mp,
+                                     struct net_device *orig_dev)
+{
+       struct switchdev_obj_port_mdb *mdb;
+
+       mdb = kzalloc(sizeof(*mdb), GFP_ATOMIC);
+       if (!mdb)
+               return -ENOMEM;
+
+       mdb->obj.id = id;
+       mdb->obj.orig_dev = orig_dev;
+       br_switchdev_mdb_populate(mdb, mp);
+       list_add_tail(&mdb->obj.list, mdb_list);
+
+       return 0;
+}
+
+void br_switchdev_mdb_notify(struct net_device *dev,
+                            struct net_bridge_mdb_entry *mp,
+                            struct net_bridge_port_group *pg,
+                            int type)
+{
+       struct br_switchdev_mdb_complete_info *complete_info;
+       struct switchdev_obj_port_mdb mdb = {
+               .obj = {
+                       .id = SWITCHDEV_OBJ_ID_PORT_MDB,
+                       .flags = SWITCHDEV_F_DEFER,
+               },
+       };
+
+       if (!pg)
+               return br_switchdev_host_mdb(dev, mp, type);
+
+       br_switchdev_mdb_populate(&mdb, mp);
+
+       mdb.obj.orig_dev = pg->key.port->dev;
+       switch (type) {
+       case RTM_NEWMDB:
+               complete_info = kmalloc(sizeof(*complete_info), GFP_ATOMIC);
+               if (!complete_info)
+                       break;
+               complete_info->port = pg->key.port;
+               complete_info->ip = mp->addr;
+               mdb.obj.complete_priv = complete_info;
+               mdb.obj.complete = br_switchdev_mdb_complete;
+               if (switchdev_port_obj_add(pg->key.port->dev, &mdb.obj, NULL))
+                       kfree(complete_info);
+               break;
+       case RTM_DELMDB:
+               switchdev_port_obj_del(pg->key.port->dev, &mdb.obj);
+               break;
+       }
+}
+#endif
+
+static int
+br_switchdev_mdb_replay(struct net_device *br_dev, struct net_device *dev,
+                       const void *ctx, bool adding, struct notifier_block *nb,
+                       struct netlink_ext_ack *extack)
+{
+#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+       const struct net_bridge_mdb_entry *mp;
+       struct switchdev_obj *obj, *tmp;
+       struct net_bridge *br;
+       unsigned long action;
+       LIST_HEAD(mdb_list);
+       int err = 0;
+
+       ASSERT_RTNL();
+
+       if (!nb)
+               return 0;
+
+       if (!netif_is_bridge_master(br_dev) || !netif_is_bridge_port(dev))
+               return -EINVAL;
+
+       br = netdev_priv(br_dev);
+
+       if (!br_opt_get(br, BROPT_MULTICAST_ENABLED))
+               return 0;
+
+       /* We cannot walk over br->mdb_list protected just by the rtnl_mutex,
+        * because the write-side protection is br->multicast_lock. But we
+        * need to emulate the [ blocking ] calling context of a regular
+        * switchdev event, so since both br->multicast_lock and RCU read side
+        * critical sections are atomic, we have no choice but to pick the RCU
+        * read side lock, queue up all our events, leave the critical section
+        * and notify switchdev from blocking context.
+        */
+       rcu_read_lock();
+
+       hlist_for_each_entry_rcu(mp, &br->mdb_list, mdb_node) {
+               struct net_bridge_port_group __rcu * const *pp;
+               const struct net_bridge_port_group *p;
+
+               if (mp->host_joined) {
+                       err = br_switchdev_mdb_queue_one(&mdb_list,
+                                                        SWITCHDEV_OBJ_ID_HOST_MDB,
+                                                        mp, br_dev);
+                       if (err) {
+                               rcu_read_unlock();
+                               goto out_free_mdb;
+                       }
+               }
+
+               for (pp = &mp->ports; (p = rcu_dereference(*pp)) != NULL;
+                    pp = &p->next) {
+                       if (p->key.port->dev != dev)
+                               continue;
+
+                       err = br_switchdev_mdb_queue_one(&mdb_list,
+                                                        SWITCHDEV_OBJ_ID_PORT_MDB,
+                                                        mp, dev);
+                       if (err) {
+                               rcu_read_unlock();
+                               goto out_free_mdb;
+                       }
+               }
+       }
+
+       rcu_read_unlock();
+
+       if (adding)
+               action = SWITCHDEV_PORT_OBJ_ADD;
+       else
+               action = SWITCHDEV_PORT_OBJ_DEL;
+
+       list_for_each_entry(obj, &mdb_list, list) {
+               err = br_switchdev_mdb_replay_one(nb, dev,
+                                                 SWITCHDEV_OBJ_PORT_MDB(obj),
+                                                 action, ctx, extack);
+               if (err)
+                       goto out_free_mdb;
+       }
+
+out_free_mdb:
+       list_for_each_entry_safe(obj, tmp, &mdb_list, list) {
+               list_del(&obj->list);
+               kfree(SWITCHDEV_OBJ_PORT_MDB(obj));
+       }
+
+       if (err)
+               return err;
+#endif
+
+       return 0;
+}
+
 static int nbp_switchdev_sync_objs(struct net_bridge_port *p, const void *ctx,
                                   struct notifier_block *atomic_nb,
                                   struct notifier_block *blocking_nb,
@@ -279,15 +681,17 @@ static int nbp_switchdev_sync_objs(struct net_bridge_port *p, const void *ctx,
        struct net_device *dev = p->dev;
        int err;
 
-       err = br_vlan_replay(br_dev, dev, ctx, true, blocking_nb, extack);
+       err = br_switchdev_vlan_replay(br_dev, dev, ctx, true, blocking_nb,
+                                      extack);
        if (err && err != -EOPNOTSUPP)
                return err;
 
-       err = br_mdb_replay(br_dev, dev, ctx, true, blocking_nb, extack);
+       err = br_switchdev_mdb_replay(br_dev, dev, ctx, true, blocking_nb,
+                                     extack);
        if (err && err != -EOPNOTSUPP)
                return err;
 
-       err = br_fdb_replay(br_dev, ctx, true, atomic_nb);
+       err = br_switchdev_fdb_replay(br_dev, ctx, true, atomic_nb);
        if (err && err != -EOPNOTSUPP)
                return err;
 
@@ -302,11 +706,11 @@ static void nbp_switchdev_unsync_objs(struct net_bridge_port *p,
        struct net_device *br_dev = p->br->dev;
        struct net_device *dev = p->dev;
 
-       br_vlan_replay(br_dev, dev, ctx, false, blocking_nb, NULL);
+       br_switchdev_vlan_replay(br_dev, dev, ctx, false, blocking_nb, NULL);
 
-       br_mdb_replay(br_dev, dev, ctx, false, blocking_nb, NULL);
+       br_switchdev_mdb_replay(br_dev, dev, ctx, false, blocking_nb, NULL);
 
-       br_fdb_replay(br_dev, ctx, false, atomic_nb);
+       br_switchdev_fdb_replay(br_dev, ctx, false, atomic_nb);
 }
 
 /* Let the bridge know that this port is offloaded, so that it can assign a
index 19f65ab..49e105e 100644 (file)
@@ -293,7 +293,7 @@ static int __vlan_add(struct net_bridge_vlan *v, u16 flags,
 
        /* Add the dev mac and count the vlan only if it's usable */
        if (br_vlan_should_use(v)) {
-               err = br_fdb_insert(br, p, dev->dev_addr, v->vid);
+               err = br_fdb_add_local(br, p, dev->dev_addr, v->vid);
                if (err) {
                        br_err(br, "failed insert local address into bridge forwarding table\n");
                        goto out_filt;
@@ -683,8 +683,7 @@ static int br_vlan_add_existing(struct net_bridge *br,
                        goto err_flags;
                }
                /* It was only kept for port vlans, now make it real */
-               err = br_fdb_insert(br, NULL, br->dev->dev_addr,
-                                   vlan->vid);
+               err = br_fdb_add_local(br, NULL, br->dev->dev_addr, vlan->vid);
                if (err) {
                        br_err(br, "failed to insert local address into bridge forwarding table\n");
                        goto err_fdb_insert;
@@ -1861,90 +1860,6 @@ out_kfree:
        kfree_skb(skb);
 }
 
-static int br_vlan_replay_one(struct notifier_block *nb,
-                             struct net_device *dev,
-                             struct switchdev_obj_port_vlan *vlan,
-                             const void *ctx, unsigned long action,
-                             struct netlink_ext_ack *extack)
-{
-       struct switchdev_notifier_port_obj_info obj_info = {
-               .info = {
-                       .dev = dev,
-                       .extack = extack,
-                       .ctx = ctx,
-               },
-               .obj = &vlan->obj,
-       };
-       int err;
-
-       err = nb->notifier_call(nb, action, &obj_info);
-       return notifier_to_errno(err);
-}
-
-int br_vlan_replay(struct net_device *br_dev, struct net_device *dev,
-                  const void *ctx, bool adding, struct notifier_block *nb,
-                  struct netlink_ext_ack *extack)
-{
-       struct net_bridge_vlan_group *vg;
-       struct net_bridge_vlan *v;
-       struct net_bridge_port *p;
-       struct net_bridge *br;
-       unsigned long action;
-       int err = 0;
-       u16 pvid;
-
-       ASSERT_RTNL();
-
-       if (!nb)
-               return 0;
-
-       if (!netif_is_bridge_master(br_dev))
-               return -EINVAL;
-
-       if (!netif_is_bridge_master(dev) && !netif_is_bridge_port(dev))
-               return -EINVAL;
-
-       if (netif_is_bridge_master(dev)) {
-               br = netdev_priv(dev);
-               vg = br_vlan_group(br);
-               p = NULL;
-       } else {
-               p = br_port_get_rtnl(dev);
-               if (WARN_ON(!p))
-                       return -EINVAL;
-               vg = nbp_vlan_group(p);
-               br = p->br;
-       }
-
-       if (!vg)
-               return 0;
-
-       if (adding)
-               action = SWITCHDEV_PORT_OBJ_ADD;
-       else
-               action = SWITCHDEV_PORT_OBJ_DEL;
-
-       pvid = br_get_pvid(vg);
-
-       list_for_each_entry(v, &vg->vlan_list, vlist) {
-               struct switchdev_obj_port_vlan vlan = {
-                       .obj.orig_dev = dev,
-                       .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
-                       .flags = br_vlan_flags(v, pvid),
-                       .vid = v->vid,
-               };
-
-               if (!br_vlan_should_use(v))
-                       continue;
-
-               err = br_vlan_replay_one(nb, dev, &vlan, ctx, action, extack);
-               if (err)
-                       return err;
-       }
-
-       return err;
-}
-
 /* check if v_curr can enter a range ending in range_end */
 bool br_vlan_can_enter_range(const struct net_bridge_vlan *v_curr,
                             const struct net_bridge_vlan *range_end)
index 4e3d19a..edeb811 100644 (file)
@@ -3171,6 +3171,12 @@ static u16 skb_tx_hash(const struct net_device *dev,
 
                qoffset = sb_dev->tc_to_txq[tc].offset;
                qcount = sb_dev->tc_to_txq[tc].count;
+               if (unlikely(!qcount)) {
+                       net_warn_ratelimited("%s: invalid qcount, qoffset %u for tc %u\n",
+                                            sb_dev->name, qoffset, tc);
+                       qoffset = 0;
+                       qcount = dev->real_num_tx_queues;
+               }
        }
 
        if (skb_rx_queue_recorded(skb)) {
@@ -3914,7 +3920,8 @@ int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
        skb_reset_mac_header(skb);
        __skb_pull(skb, skb_network_offset(skb));
        skb->pkt_type = PACKET_LOOPBACK;
-       skb->ip_summed = CHECKSUM_UNNECESSARY;
+       if (skb->ip_summed == CHECKSUM_NONE)
+               skb->ip_summed = CHECKSUM_UNNECESSARY;
        WARN_ON(!skb_dst(skb));
        skb_dst_force(skb);
        netif_rx_ni(skb);
@@ -9922,6 +9929,11 @@ static netdev_features_t netdev_fix_features(struct net_device *dev,
                }
        }
 
+       if ((features & NETIF_F_GRO_HW) && (features & NETIF_F_LRO)) {
+               netdev_dbg(dev, "Dropping LRO feature since HW-GRO is requested.\n");
+               features &= ~NETIF_F_LRO;
+       }
+
        if (features & NETIF_F_HW_TLS_TX) {
                bool ip_csum = (features & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) ==
                        (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
index f38ef4b..2d8abe8 100644 (file)
@@ -102,7 +102,7 @@ struct devlink_dpipe_header devlink_dpipe_header_ethernet = {
        .fields_count = ARRAY_SIZE(devlink_dpipe_fields_ethernet),
        .global = true,
 };
-EXPORT_SYMBOL(devlink_dpipe_header_ethernet);
+EXPORT_SYMBOL_GPL(devlink_dpipe_header_ethernet);
 
 static struct devlink_dpipe_field devlink_dpipe_fields_ipv4[] = {
        {
@@ -119,7 +119,7 @@ struct devlink_dpipe_header devlink_dpipe_header_ipv4 = {
        .fields_count = ARRAY_SIZE(devlink_dpipe_fields_ipv4),
        .global = true,
 };
-EXPORT_SYMBOL(devlink_dpipe_header_ipv4);
+EXPORT_SYMBOL_GPL(devlink_dpipe_header_ipv4);
 
 static struct devlink_dpipe_field devlink_dpipe_fields_ipv6[] = {
        {
@@ -136,7 +136,7 @@ struct devlink_dpipe_header devlink_dpipe_header_ipv6 = {
        .fields_count = ARRAY_SIZE(devlink_dpipe_fields_ipv6),
        .global = true,
 };
-EXPORT_SYMBOL(devlink_dpipe_header_ipv6);
+EXPORT_SYMBOL_GPL(devlink_dpipe_header_ipv6);
 
 EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwmsg);
 EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwerr);
@@ -3365,7 +3365,7 @@ void devlink_dpipe_entry_clear(struct devlink_dpipe_entry *entry)
                kfree(value[value_index].mask);
        }
 }
-EXPORT_SYMBOL(devlink_dpipe_entry_clear);
+EXPORT_SYMBOL_GPL(devlink_dpipe_entry_clear);
 
 static int devlink_dpipe_entries_fill(struct genl_info *info,
                                      enum devlink_command cmd, int flags,
@@ -4925,45 +4925,6 @@ static int devlink_nl_cmd_param_set_doit(struct sk_buff *skb,
                                               info, DEVLINK_CMD_PARAM_NEW);
 }
 
-static int devlink_param_register_one(struct devlink *devlink,
-                                     unsigned int port_index,
-                                     struct list_head *param_list,
-                                     const struct devlink_param *param,
-                                     enum devlink_command cmd)
-{
-       struct devlink_param_item *param_item;
-
-       if (devlink_param_find_by_name(param_list, param->name))
-               return -EEXIST;
-
-       if (param->supported_cmodes == BIT(DEVLINK_PARAM_CMODE_DRIVERINIT))
-               WARN_ON(param->get || param->set);
-       else
-               WARN_ON(!param->get || !param->set);
-
-       param_item = kzalloc(sizeof(*param_item), GFP_KERNEL);
-       if (!param_item)
-               return -ENOMEM;
-       param_item->param = param;
-
-       list_add_tail(&param_item->list, param_list);
-       return 0;
-}
-
-static void devlink_param_unregister_one(struct devlink *devlink,
-                                        unsigned int port_index,
-                                        struct list_head *param_list,
-                                        const struct devlink_param *param,
-                                        enum devlink_command cmd)
-{
-       struct devlink_param_item *param_item;
-
-       param_item = devlink_param_find_by_name(param_list, param->name);
-       WARN_ON(!param_item);
-       list_del(&param_item->list);
-       kfree(param_item);
-}
-
 static int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg,
                                                struct netlink_callback *cb)
 {
@@ -10092,73 +10053,6 @@ static int devlink_param_verify(const struct devlink_param *param)
                return devlink_param_driver_verify(param);
 }
 
-static int __devlink_param_register_one(struct devlink *devlink,
-                                       unsigned int port_index,
-                                       struct list_head *param_list,
-                                       const struct devlink_param *param,
-                                       enum devlink_command reg_cmd)
-{
-       int err;
-
-       err = devlink_param_verify(param);
-       if (err)
-               return err;
-
-       return devlink_param_register_one(devlink, port_index,
-                                         param_list, param, reg_cmd);
-}
-
-static int __devlink_params_register(struct devlink *devlink,
-                                    unsigned int port_index,
-                                    struct list_head *param_list,
-                                    const struct devlink_param *params,
-                                    size_t params_count,
-                                    enum devlink_command reg_cmd,
-                                    enum devlink_command unreg_cmd)
-{
-       const struct devlink_param *param = params;
-       int i;
-       int err;
-
-       mutex_lock(&devlink->lock);
-       for (i = 0; i < params_count; i++, param++) {
-               err = __devlink_param_register_one(devlink, port_index,
-                                                  param_list, param, reg_cmd);
-               if (err)
-                       goto rollback;
-       }
-
-       mutex_unlock(&devlink->lock);
-       return 0;
-
-rollback:
-       if (!i)
-               goto unlock;
-       for (param--; i > 0; i--, param--)
-               devlink_param_unregister_one(devlink, port_index, param_list,
-                                            param, unreg_cmd);
-unlock:
-       mutex_unlock(&devlink->lock);
-       return err;
-}
-
-static void __devlink_params_unregister(struct devlink *devlink,
-                                       unsigned int port_index,
-                                       struct list_head *param_list,
-                                       const struct devlink_param *params,
-                                       size_t params_count,
-                                       enum devlink_command cmd)
-{
-       const struct devlink_param *param = params;
-       int i;
-
-       mutex_lock(&devlink->lock);
-       for (i = 0; i < params_count; i++, param++)
-               devlink_param_unregister_one(devlink, 0, param_list, param,
-                                            cmd);
-       mutex_unlock(&devlink->lock);
-}
-
 /**
  *     devlink_params_register - register configuration parameters
  *
@@ -10172,12 +10066,25 @@ int devlink_params_register(struct devlink *devlink,
                            const struct devlink_param *params,
                            size_t params_count)
 {
+       const struct devlink_param *param = params;
+       int i, err;
+
        ASSERT_DEVLINK_NOT_REGISTERED(devlink);
 
-       return __devlink_params_register(devlink, 0, &devlink->param_list,
-                                        params, params_count,
-                                        DEVLINK_CMD_PARAM_NEW,
-                                        DEVLINK_CMD_PARAM_DEL);
+       for (i = 0; i < params_count; i++, param++) {
+               err = devlink_param_register(devlink, param);
+               if (err)
+                       goto rollback;
+       }
+       return 0;
+
+rollback:
+       if (!i)
+               return err;
+
+       for (param--; i > 0; i--, param--)
+               devlink_param_unregister(devlink, param);
+       return err;
 }
 EXPORT_SYMBOL_GPL(devlink_params_register);
 
@@ -10191,11 +10098,13 @@ void devlink_params_unregister(struct devlink *devlink,
                               const struct devlink_param *params,
                               size_t params_count)
 {
+       const struct devlink_param *param = params;
+       int i;
+
        ASSERT_DEVLINK_NOT_REGISTERED(devlink);
 
-       return __devlink_params_unregister(devlink, 0, &devlink->param_list,
-                                          params, params_count,
-                                          DEVLINK_CMD_PARAM_DEL);
+       for (i = 0; i < params_count; i++, param++)
+               devlink_param_unregister(devlink, param);
 }
 EXPORT_SYMBOL_GPL(devlink_params_unregister);
 
@@ -10211,15 +10120,26 @@ EXPORT_SYMBOL_GPL(devlink_params_unregister);
 int devlink_param_register(struct devlink *devlink,
                           const struct devlink_param *param)
 {
-       int err;
+       struct devlink_param_item *param_item;
 
        ASSERT_DEVLINK_NOT_REGISTERED(devlink);
 
-       mutex_lock(&devlink->lock);
-       err = __devlink_param_register_one(devlink, 0, &devlink->param_list,
-                                          param, DEVLINK_CMD_PARAM_NEW);
-       mutex_unlock(&devlink->lock);
-       return err;
+       WARN_ON(devlink_param_verify(param));
+       WARN_ON(devlink_param_find_by_name(&devlink->param_list, param->name));
+
+       if (param->supported_cmodes == BIT(DEVLINK_PARAM_CMODE_DRIVERINIT))
+               WARN_ON(param->get || param->set);
+       else
+               WARN_ON(!param->get || !param->set);
+
+       param_item = kzalloc(sizeof(*param_item), GFP_KERNEL);
+       if (!param_item)
+               return -ENOMEM;
+
+       param_item->param = param;
+
+       list_add_tail(&param_item->list, &devlink->param_list);
+       return 0;
 }
 EXPORT_SYMBOL_GPL(devlink_param_register);
 
@@ -10231,12 +10151,15 @@ EXPORT_SYMBOL_GPL(devlink_param_register);
 void devlink_param_unregister(struct devlink *devlink,
                              const struct devlink_param *param)
 {
+       struct devlink_param_item *param_item;
+
        ASSERT_DEVLINK_NOT_REGISTERED(devlink);
 
-       mutex_lock(&devlink->lock);
-       devlink_param_unregister_one(devlink, 0, &devlink->param_list, param,
-                                    DEVLINK_CMD_PARAM_DEL);
-       mutex_unlock(&devlink->lock);
+       param_item =
+               devlink_param_find_by_name(&devlink->param_list, param->name);
+       WARN_ON(!param_item);
+       list_del(&param_item->list);
+       kfree(param_item);
 }
 EXPORT_SYMBOL_GPL(devlink_param_unregister);
 
@@ -10754,7 +10677,8 @@ devlink_trap_group_notify(struct devlink *devlink,
 
        WARN_ON_ONCE(cmd != DEVLINK_CMD_TRAP_GROUP_NEW &&
                     cmd != DEVLINK_CMD_TRAP_GROUP_DEL);
-       ASSERT_DEVLINK_REGISTERED(devlink);
+       if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
+               return;
 
        msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
        if (!msg)
@@ -11081,6 +11005,9 @@ devlink_trap_group_register(struct devlink *devlink,
        }
 
        list_add_tail(&group_item->list, &devlink->trap_group_list);
+       devlink_trap_group_notify(devlink, group_item,
+                                 DEVLINK_CMD_TRAP_GROUP_NEW);
+
        return 0;
 
 err_group_init:
@@ -11101,6 +11028,8 @@ devlink_trap_group_unregister(struct devlink *devlink,
        if (WARN_ON_ONCE(!group_item))
                return;
 
+       devlink_trap_group_notify(devlink, group_item,
+                                 DEVLINK_CMD_TRAP_GROUP_DEL);
        list_del(&group_item->list);
        free_percpu(group_item->stats);
        kfree(group_item);
@@ -11120,8 +11049,6 @@ int devlink_trap_groups_register(struct devlink *devlink,
 {
        int i, err;
 
-       ASSERT_DEVLINK_NOT_REGISTERED(devlink);
-
        mutex_lock(&devlink->lock);
        for (i = 0; i < groups_count; i++) {
                const struct devlink_trap_group *group = &groups[i];
@@ -11159,8 +11086,6 @@ void devlink_trap_groups_unregister(struct devlink *devlink,
 {
        int i;
 
-       ASSERT_DEVLINK_NOT_REGISTERED(devlink);
-
        mutex_lock(&devlink->lock);
        for (i = groups_count - 1; i >= 0; i--)
                devlink_trap_group_unregister(devlink, &groups[i]);
@@ -11178,7 +11103,8 @@ devlink_trap_policer_notify(struct devlink *devlink,
 
        WARN_ON_ONCE(cmd != DEVLINK_CMD_TRAP_POLICER_NEW &&
                     cmd != DEVLINK_CMD_TRAP_POLICER_DEL);
-       ASSERT_DEVLINK_REGISTERED(devlink);
+       if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
+               return;
 
        msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
        if (!msg)
@@ -11220,6 +11146,9 @@ devlink_trap_policer_register(struct devlink *devlink,
        }
 
        list_add_tail(&policer_item->list, &devlink->trap_policer_list);
+       devlink_trap_policer_notify(devlink, policer_item,
+                                   DEVLINK_CMD_TRAP_POLICER_NEW);
+
        return 0;
 
 err_policer_init:
@@ -11237,6 +11166,8 @@ devlink_trap_policer_unregister(struct devlink *devlink,
        if (WARN_ON_ONCE(!policer_item))
                return;
 
+       devlink_trap_policer_notify(devlink, policer_item,
+                                   DEVLINK_CMD_TRAP_POLICER_DEL);
        list_del(&policer_item->list);
        if (devlink->ops->trap_policer_fini)
                devlink->ops->trap_policer_fini(devlink, policer);
@@ -11258,8 +11189,6 @@ devlink_trap_policers_register(struct devlink *devlink,
 {
        int i, err;
 
-       ASSERT_DEVLINK_NOT_REGISTERED(devlink);
-
        mutex_lock(&devlink->lock);
        for (i = 0; i < policers_count; i++) {
                const struct devlink_trap_policer *policer = &policers[i];
@@ -11301,8 +11230,6 @@ devlink_trap_policers_unregister(struct devlink *devlink,
 {
        int i;
 
-       ASSERT_DEVLINK_NOT_REGISTERED(devlink);
-
        mutex_lock(&devlink->lock);
        for (i = policers_count - 1; i >= 0; i--)
                devlink_trap_policer_unregister(devlink, &policers[i]);
index d6e4e0b..9c01c64 100644 (file)
@@ -2028,9 +2028,9 @@ int netdev_register_kobject(struct net_device *ndev)
 int netdev_change_owner(struct net_device *ndev, const struct net *net_old,
                        const struct net *net_new)
 {
+       kuid_t old_uid = GLOBAL_ROOT_UID, new_uid = GLOBAL_ROOT_UID;
+       kgid_t old_gid = GLOBAL_ROOT_GID, new_gid = GLOBAL_ROOT_GID;
        struct device *dev = &ndev->dev;
-       kuid_t old_uid, new_uid;
-       kgid_t old_gid, new_gid;
        int error;
 
        net_ns_get_ownership(net_old, &old_uid, &old_gid);
index 74601bb..67a9188 100644 (file)
@@ -70,6 +70,7 @@
 #include <net/xfrm.h>
 #include <net/mpls.h>
 #include <net/mptcp.h>
+#include <net/mctp.h>
 #include <net/page_pool.h>
 
 #include <linux/uaccess.h>
@@ -80,6 +81,7 @@
 #include <linux/indirect_call_wrapper.h>
 
 #include "datagram.h"
+#include "sock_destructor.h"
 
 struct kmem_cache *skbuff_head_cache __ro_after_init;
 static struct kmem_cache *skbuff_fclone_cache __ro_after_init;
@@ -1803,30 +1805,39 @@ EXPORT_SYMBOL(skb_realloc_headroom);
 struct sk_buff *skb_expand_head(struct sk_buff *skb, unsigned int headroom)
 {
        int delta = headroom - skb_headroom(skb);
+       int osize = skb_end_offset(skb);
+       struct sock *sk = skb->sk;
 
        if (WARN_ONCE(delta <= 0,
                      "%s is expecting an increase in the headroom", __func__))
                return skb;
 
-       /* pskb_expand_head() might crash, if skb is shared */
-       if (skb_shared(skb)) {
+       delta = SKB_DATA_ALIGN(delta);
+       /* pskb_expand_head() might crash, if skb is shared. */
+       if (skb_shared(skb) || !is_skb_wmem(skb)) {
                struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
 
-               if (likely(nskb)) {
-                       if (skb->sk)
-                               skb_set_owner_w(nskb, skb->sk);
-                       consume_skb(skb);
-               } else {
-                       kfree_skb(skb);
-               }
+               if (unlikely(!nskb))
+                       goto fail;
+
+               if (sk)
+                       skb_set_owner_w(nskb, sk);
+               consume_skb(skb);
                skb = nskb;
        }
-       if (skb &&
-           pskb_expand_head(skb, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) {
-               kfree_skb(skb);
-               skb = NULL;
+       if (pskb_expand_head(skb, delta, 0, GFP_ATOMIC))
+               goto fail;
+
+       if (sk && is_skb_wmem(skb)) {
+               delta = skb_end_offset(skb) - osize;
+               refcount_add(delta, &sk->sk_wmem_alloc);
+               skb->truesize += delta;
        }
        return skb;
+
+fail:
+       kfree_skb(skb);
+       return NULL;
 }
 EXPORT_SYMBOL(skb_expand_head);
 
@@ -4430,6 +4441,9 @@ static const u8 skb_ext_type_len[] = {
 #if IS_ENABLED(CONFIG_MPTCP)
        [SKB_EXT_MPTCP] = SKB_EXT_CHUNKSIZEOF(struct mptcp_ext),
 #endif
+#if IS_ENABLED(CONFIG_MCTP_FLOWS)
+       [SKB_EXT_MCTP] = SKB_EXT_CHUNKSIZEOF(struct mctp_flow),
+#endif
 };
 
 static __always_inline unsigned int skb_ext_total_length(void)
@@ -4447,6 +4461,9 @@ static __always_inline unsigned int skb_ext_total_length(void)
 #if IS_ENABLED(CONFIG_MPTCP)
                skb_ext_type_len[SKB_EXT_MPTCP] +
 #endif
+#if IS_ENABLED(CONFIG_MCTP_FLOWS)
+               skb_ext_type_len[SKB_EXT_MCTP] +
+#endif
                0;
 }
 
@@ -6519,6 +6536,14 @@ static void skb_ext_put_sp(struct sec_path *sp)
 }
 #endif
 
+#ifdef CONFIG_MCTP_FLOWS
+static void skb_ext_put_mctp(struct mctp_flow *flow)
+{
+       if (flow->key)
+               mctp_key_unref(flow->key);
+}
+#endif
+
 void __skb_ext_del(struct sk_buff *skb, enum skb_ext_id id)
 {
        struct skb_ext *ext = skb->extensions;
@@ -6554,6 +6579,10 @@ free_now:
        if (__skb_ext_exist(ext, SKB_EXT_SEC_PATH))
                skb_ext_put_sp(skb_ext_get_ptr(ext, SKB_EXT_SEC_PATH));
 #endif
+#ifdef CONFIG_MCTP_FLOWS
+       if (__skb_ext_exist(ext, SKB_EXT_MCTP))
+               skb_ext_put_mctp(skb_ext_get_ptr(ext, SKB_EXT_MCTP));
+#endif
 
        kmem_cache_free(skbuff_ext_cache, ext);
 }
index 2d6249b..a86ef7e 100644 (file)
@@ -474,6 +474,20 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
 }
 EXPORT_SYMBOL_GPL(sk_msg_recvmsg);
 
+bool sk_msg_is_readable(struct sock *sk)
+{
+       struct sk_psock *psock;
+       bool empty = true;
+
+       rcu_read_lock();
+       psock = sk_psock(sk);
+       if (likely(psock))
+               empty = list_empty(&psock->ingress_msg);
+       rcu_read_unlock();
+       return !empty;
+}
+EXPORT_SYMBOL_GPL(sk_msg_is_readable);
+
 static struct sk_msg *sk_psock_create_ingress_msg(struct sock *sk,
                                                  struct sk_buff *skb)
 {
diff --git a/net/core/sock_destructor.h b/net/core/sock_destructor.h
new file mode 100644 (file)
index 0000000..2f396e6
--- /dev/null
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _NET_CORE_SOCK_DESTRUCTOR_H
+#define _NET_CORE_SOCK_DESTRUCTOR_H
+#include <net/tcp.h>
+
+static inline bool is_skb_wmem(const struct sk_buff *skb)
+{
+       return skb->destructor == sock_wfree ||
+              skb->destructor == __sock_wfree ||
+              (IS_ENABLED(CONFIG_INET) && skb->destructor == tcp_wfree);
+}
+#endif
index c8496c1..5f88526 100644 (file)
@@ -419,7 +419,7 @@ static struct ctl_table net_core_table[] = {
                .mode           = 0600,
                .proc_handler   = proc_dolongvec_minmax_bpf_restricted,
                .extra1         = &long_one,
-               .extra2         = &long_max,
+               .extra2         = &bpf_jit_limit_max,
        },
 #endif
        {
index cc92ccb..5ddc29f 100644 (file)
@@ -143,8 +143,6 @@ void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq)
        if (xdp_rxq->reg_state == REG_STATE_UNUSED)
                return;
 
-       WARN(!(xdp_rxq->reg_state == REG_STATE_REGISTERED), "Driver BUG");
-
        xdp_rxq_info_unreg_mem_model(xdp_rxq);
 
        xdp_rxq->reg_state = REG_STATE_UNREGISTERED;
index dbda0e0..db066f0 100644 (file)
@@ -2468,10 +2468,9 @@ static bool dsa_foreign_dev_check(const struct net_device *dev,
 }
 
 static int dsa_slave_fdb_event(struct net_device *dev,
-                              const struct net_device *orig_dev,
-                              const void *ctx,
-                              const struct switchdev_notifier_fdb_info *fdb_info,
-                              unsigned long event)
+                              struct net_device *orig_dev,
+                              unsigned long event, const void *ctx,
+                              const struct switchdev_notifier_fdb_info *fdb_info)
 {
        struct dsa_switchdev_event_work *switchdev_work;
        struct dsa_port *dp = dsa_slave_to_port(dev);
@@ -2525,24 +2524,6 @@ static int dsa_slave_fdb_event(struct net_device *dev,
        return 0;
 }
 
-static int
-dsa_slave_fdb_add_to_device(struct net_device *dev,
-                           const struct net_device *orig_dev, const void *ctx,
-                           const struct switchdev_notifier_fdb_info *fdb_info)
-{
-       return dsa_slave_fdb_event(dev, orig_dev, ctx, fdb_info,
-                                  SWITCHDEV_FDB_ADD_TO_DEVICE);
-}
-
-static int
-dsa_slave_fdb_del_to_device(struct net_device *dev,
-                           const struct net_device *orig_dev, const void *ctx,
-                           const struct switchdev_notifier_fdb_info *fdb_info)
-{
-       return dsa_slave_fdb_event(dev, orig_dev, ctx, fdb_info,
-                                  SWITCHDEV_FDB_DEL_TO_DEVICE);
-}
-
 /* Called under rcu_read_lock() */
 static int dsa_slave_switchdev_event(struct notifier_block *unused,
                                     unsigned long event, void *ptr)
@@ -2557,18 +2538,12 @@ static int dsa_slave_switchdev_event(struct notifier_block *unused,
                                                     dsa_slave_port_attr_set);
                return notifier_from_errno(err);
        case SWITCHDEV_FDB_ADD_TO_DEVICE:
-               err = switchdev_handle_fdb_add_to_device(dev, ptr,
-                                                        dsa_slave_dev_check,
-                                                        dsa_foreign_dev_check,
-                                                        dsa_slave_fdb_add_to_device,
-                                                        NULL);
-               return notifier_from_errno(err);
        case SWITCHDEV_FDB_DEL_TO_DEVICE:
-               err = switchdev_handle_fdb_del_to_device(dev, ptr,
-                                                        dsa_slave_dev_check,
-                                                        dsa_foreign_dev_check,
-                                                        dsa_slave_fdb_del_to_device,
-                                                        NULL);
+               err = switchdev_handle_fdb_event_to_device(dev, event, ptr,
+                                                          dsa_slave_dev_check,
+                                                          dsa_foreign_dev_check,
+                                                          dsa_slave_fdb_event,
+                                                          NULL);
                return notifier_from_errno(err);
        default:
                return NOTIFY_DONE;
index 8eb4283..0189e3c 100644 (file)
@@ -150,7 +150,7 @@ void inet_sock_destruct(struct sock *sk)
        WARN_ON(atomic_read(&sk->sk_rmem_alloc));
        WARN_ON(refcount_read(&sk->sk_wmem_alloc));
        WARN_ON(sk->sk_wmem_queued);
-       WARN_ON(sk->sk_forward_alloc);
+       WARN_ON(sk_forward_alloc_get(sk));
 
        kfree(rcu_dereference_protected(inet->inet_opt, 1));
        dst_release(rcu_dereference_protected(sk->sk_dst_cache, 1));
@@ -769,26 +769,28 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr,
        DECLARE_SOCKADDR(struct sockaddr_in *, sin, uaddr);
 
        sin->sin_family = AF_INET;
+       lock_sock(sk);
        if (peer) {
                if (!inet->inet_dport ||
                    (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT)) &&
-                    peer == 1))
+                    peer == 1)) {
+                       release_sock(sk);
                        return -ENOTCONN;
+               }
                sin->sin_port = inet->inet_dport;
                sin->sin_addr.s_addr = inet->inet_daddr;
-               BPF_CGROUP_RUN_SA_PROG_LOCK(sk, (struct sockaddr *)sin,
-                                           CGROUP_INET4_GETPEERNAME,
-                                           NULL);
+               BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin,
+                                      CGROUP_INET4_GETPEERNAME);
        } else {
                __be32 addr = inet->inet_rcv_saddr;
                if (!addr)
                        addr = inet->inet_saddr;
                sin->sin_port = inet->inet_sport;
                sin->sin_addr.s_addr = addr;
-               BPF_CGROUP_RUN_SA_PROG_LOCK(sk, (struct sockaddr *)sin,
-                                           CGROUP_INET4_GETSOCKNAME,
-                                           NULL);
+               BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin,
+                                      CGROUP_INET4_GETSOCKNAME);
        }
+       release_sock(sk);
        memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
        return sizeof(*sin);
 }
index ef78972..c8fa6e7 100644 (file)
@@ -271,7 +271,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
                struct inet_diag_meminfo minfo = {
                        .idiag_rmem = sk_rmem_alloc_get(sk),
                        .idiag_wmem = READ_ONCE(sk->sk_wmem_queued),
-                       .idiag_fmem = sk->sk_forward_alloc,
+                       .idiag_fmem = sk_forward_alloc_get(sk),
                        .idiag_tmem = sk_wmem_alloc_get(sk),
                };
 
index 816d8aa..9d41d5d 100644 (file)
@@ -262,6 +262,11 @@ static int __init ic_open_devs(void)
                                 dev->name, able, d->xid);
                }
        }
+       /* Devices with a complex topology like SFP ethernet interfaces needs
+        * the rtnl_lock at init. The carrier wait-loop must therefore run
+        * without holding it.
+        */
+       rtnl_unlock();
 
        /* no point in waiting if we could not bring up at least one device */
        if (!ic_first_dev)
@@ -274,9 +279,13 @@ static int __init ic_open_devs(void)
                           msecs_to_jiffies(carrier_timeout * 1000))) {
                int wait, elapsed;
 
+               rtnl_lock();
                for_each_netdev(&init_net, dev)
-                       if (ic_is_init_dev(dev) && netif_carrier_ok(dev))
+                       if (ic_is_init_dev(dev) && netif_carrier_ok(dev)) {
+                               rtnl_unlock();
                                goto have_carrier;
+                       }
+               rtnl_unlock();
 
                msleep(1);
 
@@ -289,7 +298,6 @@ static int __init ic_open_devs(void)
                next_msg = jiffies + msecs_to_jiffies(20000);
        }
 have_carrier:
-       rtnl_unlock();
 
        *last = NULL;
 
index d0b848f..a7b1138 100644 (file)
@@ -481,10 +481,7 @@ static bool tcp_stream_is_readable(struct sock *sk, int target)
 {
        if (tcp_epollin_ready(sk, target))
                return true;
-
-       if (sk->sk_prot->stream_memory_read)
-               return sk->sk_prot->stream_memory_read(sk);
-       return false;
+       return sk_is_readable(sk);
 }
 
 /*
@@ -658,10 +655,8 @@ void tcp_skb_entail(struct sock *sk, struct sk_buff *skb)
        struct tcp_sock *tp = tcp_sk(sk);
        struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
 
-       skb->csum    = 0;
        tcb->seq     = tcb->end_seq = tp->write_seq;
        tcb->tcp_flags = TCPHDR_ACK;
-       tcb->sacked  = 0;
        __skb_header_release(skb);
        tcp_add_write_queue_tail(sk, skb);
        sk_wmem_queued_add(sk, skb->truesize);
@@ -876,11 +871,7 @@ struct sk_buff *tcp_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp,
                }
                if (likely(mem_scheduled)) {
                        skb_reserve(skb, MAX_TCP_HEADER);
-                       /*
-                        * Make sure that we have exactly size bytes
-                        * available to the caller, no more, no less.
-                        */
-                       skb->reserved_tailroom = skb->end - skb->tail - size;
+                       skb->ip_summed = CHECKSUM_PARTIAL;
                        INIT_LIST_HEAD(&skb->tcp_tsorted_anchor);
                        return skb;
                }
@@ -933,8 +924,10 @@ int tcp_send_mss(struct sock *sk, int *size_goal, int flags)
  * importantly be able to generate EPOLLOUT for Edge Trigger epoll()
  * users.
  */
-void tcp_remove_empty_skb(struct sock *sk, struct sk_buff *skb)
+void tcp_remove_empty_skb(struct sock *sk)
 {
+       struct sk_buff *skb = tcp_write_queue_tail(sk);
+
        if (skb && TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) {
                tcp_unlink_write_queue(skb, sk);
                if (tcp_write_queue_empty(sk))
@@ -996,7 +989,6 @@ new_segment:
        skb->truesize += copy;
        sk_wmem_queued_add(sk, copy);
        sk_mem_charge(sk, copy);
-       skb->ip_summed = CHECKSUM_PARTIAL;
        WRITE_ONCE(tp->write_seq, tp->write_seq + copy);
        TCP_SKB_CB(skb)->end_seq += copy;
        tcp_skb_pcount_set(skb, 0);
@@ -1087,7 +1079,7 @@ out:
        return copied;
 
 do_error:
-       tcp_remove_empty_skb(sk, tcp_write_queue_tail(sk));
+       tcp_remove_empty_skb(sk);
        if (copied)
                goto out;
 out_err:
@@ -1292,7 +1284,6 @@ new_segment:
                                goto wait_for_space;
 
                        process_backlog++;
-                       skb->ip_summed = CHECKSUM_PARTIAL;
 
                        tcp_skb_entail(sk, skb);
                        copy = size_goal;
@@ -1309,14 +1300,7 @@ new_segment:
                if (copy > msg_data_left(msg))
                        copy = msg_data_left(msg);
 
-               /* Where to copy to? */
-               if (skb_availroom(skb) > 0 && !zc) {
-                       /* We have some space in skb head. Superb! */
-                       copy = min_t(int, copy, skb_availroom(skb));
-                       err = skb_add_data_nocache(sk, skb, &msg->msg_iter, copy);
-                       if (err)
-                               goto do_fault;
-               } else if (!zc) {
+               if (!zc) {
                        bool merge = true;
                        int i = skb_shinfo(skb)->nr_frags;
                        struct page_frag *pfrag = sk_page_frag(sk);
@@ -1415,9 +1399,7 @@ out_nopush:
        return copied + copied_syn;
 
 do_error:
-       skb = tcp_write_queue_tail(sk);
-do_fault:
-       tcp_remove_empty_skb(sk, skb);
+       tcp_remove_empty_skb(sk);
 
        if (copied + copied_syn)
                goto out;
index d3e9386..5f4d6f4 100644 (file)
@@ -150,19 +150,6 @@ int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg,
 EXPORT_SYMBOL_GPL(tcp_bpf_sendmsg_redir);
 
 #ifdef CONFIG_BPF_SYSCALL
-static bool tcp_bpf_stream_read(const struct sock *sk)
-{
-       struct sk_psock *psock;
-       bool empty = true;
-
-       rcu_read_lock();
-       psock = sk_psock(sk);
-       if (likely(psock))
-               empty = list_empty(&psock->ingress_msg);
-       rcu_read_unlock();
-       return !empty;
-}
-
 static int tcp_msg_wait_data(struct sock *sk, struct sk_psock *psock,
                             long timeo)
 {
@@ -232,6 +219,7 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock,
        bool cork = false, enospc = sk_msg_full(msg);
        struct sock *sk_redir;
        u32 tosend, delta = 0;
+       u32 eval = __SK_NONE;
        int ret;
 
 more_data:
@@ -275,13 +263,24 @@ more_data:
        case __SK_REDIRECT:
                sk_redir = psock->sk_redir;
                sk_msg_apply_bytes(psock, tosend);
+               if (!psock->apply_bytes) {
+                       /* Clean up before releasing the sock lock. */
+                       eval = psock->eval;
+                       psock->eval = __SK_NONE;
+                       psock->sk_redir = NULL;
+               }
                if (psock->cork) {
                        cork = true;
                        psock->cork = NULL;
                }
                sk_msg_return(sk, msg, tosend);
                release_sock(sk);
+
                ret = tcp_bpf_sendmsg_redir(sk_redir, msg, tosend, flags);
+
+               if (eval == __SK_REDIRECT)
+                       sock_put(sk_redir);
+
                lock_sock(sk);
                if (unlikely(ret < 0)) {
                        int free = sk_msg_free_nocharge(sk, msg);
@@ -479,7 +478,7 @@ static void tcp_bpf_rebuild_protos(struct proto prot[TCP_BPF_NUM_CFGS],
        prot[TCP_BPF_BASE].unhash               = sock_map_unhash;
        prot[TCP_BPF_BASE].close                = sock_map_close;
        prot[TCP_BPF_BASE].recvmsg              = tcp_bpf_recvmsg;
-       prot[TCP_BPF_BASE].stream_memory_read   = tcp_bpf_stream_read;
+       prot[TCP_BPF_BASE].sock_is_readable     = sk_msg_is_readable;
 
        prot[TCP_BPF_TX]                        = prot[TCP_BPF_BASE];
        prot[TCP_BPF_TX].sendmsg                = tcp_bpf_sendmsg;
index c0c55a8..6867e5d 100644 (file)
@@ -394,7 +394,6 @@ static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
        skb->ip_summed = CHECKSUM_PARTIAL;
 
        TCP_SKB_CB(skb)->tcp_flags = flags;
-       TCP_SKB_CB(skb)->sacked = 0;
 
        tcp_skb_pcount_set(skb, 1);
 
@@ -1590,8 +1589,6 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
 
        skb_split(skb, buff, len);
 
-       buff->ip_summed = CHECKSUM_PARTIAL;
-
        buff->tstamp = skb->tstamp;
        tcp_fragment_tstamp(skb, buff);
 
@@ -1676,7 +1673,6 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
        delta_truesize = __pskb_trim_head(skb, len);
 
        TCP_SKB_CB(skb)->seq += len;
-       skb->ip_summed = CHECKSUM_PARTIAL;
 
        if (delta_truesize) {
                skb->truesize      -= delta_truesize;
@@ -2142,12 +2138,8 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
        TCP_SKB_CB(skb)->tcp_flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH);
        TCP_SKB_CB(buff)->tcp_flags = flags;
 
-       /* This packet was never sent out yet, so no SACK bits. */
-       TCP_SKB_CB(buff)->sacked = 0;
-
        tcp_skb_fragment_eor(skb, buff);
 
-       buff->ip_summed = CHECKSUM_PARTIAL;
        skb_split(skb, buff, len);
        tcp_fragment_tstamp(skb, buff);
 
@@ -2401,9 +2393,6 @@ static int tcp_mtu_probe(struct sock *sk)
        TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq;
        TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size;
        TCP_SKB_CB(nskb)->tcp_flags = TCPHDR_ACK;
-       TCP_SKB_CB(nskb)->sacked = 0;
-       nskb->csum = 0;
-       nskb->ip_summed = CHECKSUM_PARTIAL;
 
        tcp_insert_write_queue_before(nskb, skb, sk);
        tcp_highest_sack_replace(sk, skb, nskb);
@@ -3045,13 +3034,9 @@ static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
 
        BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1);
 
-       if (next_skb_size) {
-               if (next_skb_size <= skb_availroom(skb))
-                       skb_copy_bits(next_skb, 0, skb_put(skb, next_skb_size),
-                                     next_skb_size);
-               else if (!tcp_skb_shift(skb, next_skb, 1, next_skb_size))
-                       return false;
-       }
+       if (next_skb_size && !tcp_skb_shift(skb, next_skb, 1, next_skb_size))
+               return false;
+
        tcp_highest_sack_replace(sk, next_skb, skb);
 
        /* Update sequence range on original skb. */
@@ -3757,7 +3742,6 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
        syn_data = tcp_stream_alloc_skb(sk, space, sk->sk_allocation, false);
        if (!syn_data)
                goto fallback;
-       syn_data->ip_summed = CHECKSUM_PARTIAL;
        memcpy(syn_data->cb, syn->cb, sizeof(syn->cb));
        if (space) {
                int copied = copy_from_iter(skb_put(syn_data, space), space,
index 8536b2a..2fffcf2 100644 (file)
@@ -2867,6 +2867,9 @@ __poll_t udp_poll(struct file *file, struct socket *sock, poll_table *wait)
            !(sk->sk_shutdown & RCV_SHUTDOWN) && first_packet_length(sk) == -1)
                mask &= ~(EPOLLIN | EPOLLRDNORM);
 
+       /* psock ingress_msg queue should not contain any bad checksum frames */
+       if (sk_is_readable(sk))
+               mask |= EPOLLIN | EPOLLRDNORM;
        return mask;
 
 }
index 7a1d5f4..bbe6569 100644 (file)
@@ -114,6 +114,7 @@ static void udp_bpf_rebuild_protos(struct proto *prot, const struct proto *base)
        *prot        = *base;
        prot->close  = sock_map_close;
        prot->recvmsg = udp_bpf_recvmsg;
+       prot->sock_is_readable = sk_msg_is_readable;
 }
 
 static void udp_bpf_check_v6_needs_rebuild(struct proto *ops)
index b5878bb..0c4da16 100644 (file)
@@ -521,31 +521,32 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
        sin->sin6_family = AF_INET6;
        sin->sin6_flowinfo = 0;
        sin->sin6_scope_id = 0;
+       lock_sock(sk);
        if (peer) {
-               if (!inet->inet_dport)
-                       return -ENOTCONN;
-               if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT)) &&
-                   peer == 1)
+               if (!inet->inet_dport ||
+                   (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT)) &&
+                   peer == 1)) {
+                       release_sock(sk);
                        return -ENOTCONN;
+               }
                sin->sin6_port = inet->inet_dport;
                sin->sin6_addr = sk->sk_v6_daddr;
                if (np->sndflow)
                        sin->sin6_flowinfo = np->flow_label;
-               BPF_CGROUP_RUN_SA_PROG_LOCK(sk, (struct sockaddr *)sin,
-                                           CGROUP_INET6_GETPEERNAME,
-                                           NULL);
+               BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin,
+                                      CGROUP_INET6_GETPEERNAME);
        } else {
                if (ipv6_addr_any(&sk->sk_v6_rcv_saddr))
                        sin->sin6_addr = np->saddr;
                else
                        sin->sin6_addr = sk->sk_v6_rcv_saddr;
                sin->sin6_port = inet->inet_sport;
-               BPF_CGROUP_RUN_SA_PROG_LOCK(sk, (struct sockaddr *)sin,
-                                           CGROUP_INET6_GETSOCKNAME,
-                                           NULL);
+               BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin,
+                                      CGROUP_INET6_GETSOCKNAME);
        }
        sin->sin6_scope_id = ipv6_iface_scope_id(&sin->sin6_addr,
                                                 sk->sk_bound_dev_if);
+       release_sock(sk);
        return sizeof(*sin);
 }
 EXPORT_SYMBOL(inet6_getname);
index 9b9ef09..3ae25b8 100644 (file)
@@ -6306,11 +6306,11 @@ static int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
 
 static struct ctl_table ipv6_route_table_template[] = {
        {
-               .procname       =       "flush",
-               .data           =       &init_net.ipv6.sysctl.flush_delay,
+               .procname       =       "max_size",
+               .data           =       &init_net.ipv6.sysctl.ip6_rt_max_size,
                .maxlen         =       sizeof(int),
-               .mode           =       0200,
-               .proc_handler   =       ipv6_sysctl_rtcache_flush
+               .mode           =       0644,
+               .proc_handler   =       proc_dointvec,
        },
        {
                .procname       =       "gc_thresh",
@@ -6320,11 +6320,11 @@ static struct ctl_table ipv6_route_table_template[] = {
                .proc_handler   =       proc_dointvec,
        },
        {
-               .procname       =       "max_size",
-               .data           =       &init_net.ipv6.sysctl.ip6_rt_max_size,
+               .procname       =       "flush",
+               .data           =       &init_net.ipv6.sysctl.flush_delay,
                .maxlen         =       sizeof(int),
-               .mode           =       0644,
-               .proc_handler   =       proc_dointvec,
+               .mode           =       0200,
+               .proc_handler   =       ipv6_sysctl_rtcache_flush
        },
        {
                .procname       =       "gc_min_interval",
@@ -6396,10 +6396,10 @@ struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
                        GFP_KERNEL);
 
        if (table) {
-               table[0].data = &net->ipv6.sysctl.flush_delay;
-               table[0].extra1 = net;
+               table[0].data = &net->ipv6.sysctl.ip6_rt_max_size;
                table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
-               table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
+               table[2].data = &net->ipv6.sysctl.flush_delay;
+               table[2].extra1 = net;
                table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
                table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
                table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
@@ -6411,7 +6411,7 @@ struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
 
                /* Don't export sysctls to unprivileged users */
                if (net->user_ns != &init_user_ns)
-                       table[0].procname = NULL;
+                       table[1].procname = NULL;
        }
 
        return table;
index c678e77..2cc9b0e 100644 (file)
@@ -969,7 +969,6 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
        fl6.flowlabel = label;
 
        buff->ip_summed = CHECKSUM_PARTIAL;
-       buff->csum = 0;
 
        __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
 
index a4212a3..15ac08d 100644 (file)
@@ -672,7 +672,7 @@ ieee80211_mesh_update_bss_params(struct ieee80211_sub_if_data *sdata,
                                 u8 *ie, u8 ie_len)
 {
        struct ieee80211_supported_band *sband;
-       const u8 *cap;
+       const struct element *cap;
        const struct ieee80211_he_operation *he_oper = NULL;
 
        sband = ieee80211_get_sband(sdata);
@@ -687,9 +687,10 @@ ieee80211_mesh_update_bss_params(struct ieee80211_sub_if_data *sdata,
 
        sdata->vif.bss_conf.he_support = true;
 
-       cap = cfg80211_find_ext_ie(WLAN_EID_EXT_HE_OPERATION, ie, ie_len);
-       if (cap && cap[1] >= ieee80211_he_oper_size(&cap[3]))
-               he_oper = (void *)(cap + 3);
+       cap = cfg80211_find_ext_elem(WLAN_EID_EXT_HE_OPERATION, ie, ie_len);
+       if (cap && cap->datalen >= 1 + sizeof(*he_oper) &&
+           cap->datalen >= 1 + ieee80211_he_oper_size(cap->data + 1))
+               he_oper = (void *)(cap->data + 1);
 
        if (he_oper)
                sdata->vif.bss_conf.he_oper.params =
index 868c922..3a5c0e7 100644 (file)
@@ -1,7 +1,7 @@
 
 menuconfig MCTP
        depends on NET
-       tristate "MCTP core protocol support"
+       bool "MCTP core protocol support"
        help
          Management Component Transport Protocol (MCTP) is an in-system
          protocol for communicating between management controllers and
@@ -16,3 +16,8 @@ config MCTP_TEST
         bool "MCTP core tests" if !KUNIT_ALL_TESTS
         depends on MCTP=y && KUNIT=y
         default KUNIT_ALL_TESTS
+
+config MCTP_FLOWS
+       bool
+       depends on MCTP
+       select SKB_EXTENSIONS
index 3827d62..8799ee7 100644 (file)
@@ -260,6 +260,24 @@ void mctp_dev_put(struct mctp_dev *mdev)
        }
 }
 
+void mctp_dev_release_key(struct mctp_dev *dev, struct mctp_sk_key *key)
+       __must_hold(&key->lock)
+{
+       if (!dev)
+               return;
+       if (dev->ops && dev->ops->release_flow)
+               dev->ops->release_flow(dev, key);
+       key->dev = NULL;
+       mctp_dev_put(dev);
+}
+
+void mctp_dev_set_key(struct mctp_dev *dev, struct mctp_sk_key *key)
+       __must_hold(&key->lock)
+{
+       mctp_dev_hold(dev);
+       key->dev = dev;
+}
+
 static struct mctp_dev *mctp_add_dev(struct net_device *dev)
 {
        struct mctp_dev *mdev;
@@ -414,6 +432,39 @@ static int mctp_dev_notify(struct notifier_block *this, unsigned long event,
        return NOTIFY_OK;
 }
 
+static int mctp_register_netdevice(struct net_device *dev,
+                                  const struct mctp_netdev_ops *ops)
+{
+       struct mctp_dev *mdev;
+
+       mdev = mctp_add_dev(dev);
+       if (IS_ERR(mdev))
+               return PTR_ERR(mdev);
+
+       mdev->ops = ops;
+
+       return register_netdevice(dev);
+}
+
+int mctp_register_netdev(struct net_device *dev,
+                        const struct mctp_netdev_ops *ops)
+{
+       int rc;
+
+       rtnl_lock();
+       rc = mctp_register_netdevice(dev, ops);
+       rtnl_unlock();
+
+       return rc;
+}
+EXPORT_SYMBOL_GPL(mctp_register_netdev);
+
+void mctp_unregister_netdev(struct net_device *dev)
+{
+       unregister_netdev(dev);
+}
+EXPORT_SYMBOL_GPL(mctp_unregister_netdev);
+
 static struct rtnl_af_ops mctp_af_ops = {
        .family = AF_MCTP,
        .fill_link_af = mctp_fill_link_af,
index c23ab35..46c4482 100644 (file)
@@ -29,6 +29,8 @@
 static const unsigned int mctp_message_maxlen = 64 * 1024;
 static const unsigned long mctp_key_lifetime = 6 * CONFIG_HZ;
 
+static void mctp_flow_prepare_output(struct sk_buff *skb, struct mctp_dev *dev);
+
 /* route output callbacks */
 static int mctp_route_discard(struct mctp_route *route, struct sk_buff *skb)
 {
@@ -152,8 +154,19 @@ static struct mctp_sk_key *mctp_key_alloc(struct mctp_sock *msk,
 
 void mctp_key_unref(struct mctp_sk_key *key)
 {
-       if (refcount_dec_and_test(&key->refs))
-               kfree(key);
+       unsigned long flags;
+
+       if (!refcount_dec_and_test(&key->refs))
+               return;
+
+       /* even though no refs exist here, the lock allows us to stay
+        * consistent with the locking requirement of mctp_dev_release_key
+        */
+       spin_lock_irqsave(&key->lock, flags);
+       mctp_dev_release_key(key->dev, key);
+       spin_unlock_irqrestore(&key->lock, flags);
+
+       kfree(key);
 }
 
 static int mctp_key_add(struct mctp_sk_key *key, struct mctp_sock *msk)
@@ -204,6 +217,7 @@ static void __mctp_key_unlock_drop(struct mctp_sk_key *key, struct net *net,
        key->reasm_head = NULL;
        key->reasm_dead = true;
        key->valid = false;
+       mctp_dev_release_key(key->dev, key);
        spin_unlock_irqrestore(&key->lock, flags);
 
        spin_lock_irqsave(&net->mctp.keys_lock, flags);
@@ -222,6 +236,40 @@ static void __mctp_key_unlock_drop(struct mctp_sk_key *key, struct net *net,
 
 }
 
+#ifdef CONFIG_MCTP_FLOWS
+static void mctp_skb_set_flow(struct sk_buff *skb, struct mctp_sk_key *key)
+{
+       struct mctp_flow *flow;
+
+       flow = skb_ext_add(skb, SKB_EXT_MCTP);
+       if (!flow)
+               return;
+
+       refcount_inc(&key->refs);
+       flow->key = key;
+}
+
+static void mctp_flow_prepare_output(struct sk_buff *skb, struct mctp_dev *dev)
+{
+       struct mctp_sk_key *key;
+       struct mctp_flow *flow;
+
+       flow = skb_ext_find(skb, SKB_EXT_MCTP);
+       if (!flow)
+               return;
+
+       key = flow->key;
+
+       if (WARN_ON(key->dev && key->dev != dev))
+               return;
+
+       mctp_dev_set_key(dev, key);
+}
+#else
+static void mctp_skb_set_flow(struct sk_buff *skb, struct mctp_sk_key *key) {}
+static void mctp_flow_prepare_output(struct sk_buff *skb, struct mctp_dev *dev) {}
+#endif
+
 static int mctp_frag_queue(struct mctp_sk_key *key, struct sk_buff *skb)
 {
        struct mctp_hdr *hdr = mctp_hdr(skb);
@@ -465,6 +513,8 @@ static int mctp_route_output(struct mctp_route *route, struct sk_buff *skb)
                return -EHOSTUNREACH;
        }
 
+       mctp_flow_prepare_output(skb, route->dev);
+
        rc = dev_queue_xmit(skb);
        if (rc)
                rc = net_xmit_errno(rc);
@@ -532,14 +582,14 @@ static void mctp_reserve_tag(struct net *net, struct mctp_sk_key *key,
 /* Allocate a locally-owned tag value for (saddr, daddr), and reserve
  * it for the socket msk
  */
-static int mctp_alloc_local_tag(struct mctp_sock *msk,
-                               mctp_eid_t saddr, mctp_eid_t daddr, u8 *tagp)
+static struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk,
+                                               mctp_eid_t saddr,
+                                               mctp_eid_t daddr, u8 *tagp)
 {
        struct net *net = sock_net(&msk->sk);
        struct netns_mctp *mns = &net->mctp;
        struct mctp_sk_key *key, *tmp;
        unsigned long flags;
-       int rc = -EAGAIN;
        u8 tagbits;
 
        /* for NULL destination EIDs, we may get a response from any peer */
@@ -549,7 +599,7 @@ static int mctp_alloc_local_tag(struct mctp_sock *msk,
        /* be optimistic, alloc now */
        key = mctp_key_alloc(msk, saddr, daddr, 0, GFP_KERNEL);
        if (!key)
-               return -ENOMEM;
+               return ERR_PTR(-ENOMEM);
 
        /* 8 possible tag values */
        tagbits = 0xff;
@@ -591,18 +641,16 @@ static int mctp_alloc_local_tag(struct mctp_sock *msk,
                trace_mctp_key_acquire(key);
 
                *tagp = key->tag;
-               /* done with the key in this scope */
-               mctp_key_unref(key);
-               key = NULL;
-               rc = 0;
        }
 
        spin_unlock_irqrestore(&mns->keys_lock, flags);
 
-       if (!tagbits)
+       if (!tagbits) {
                kfree(key);
+               return ERR_PTR(-EBUSY);
+       }
 
-       return rc;
+       return key;
 }
 
 /* routing lookups */
@@ -740,6 +788,7 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt,
        struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
        struct mctp_skb_cb *cb = mctp_cb(skb);
        struct mctp_route tmp_rt;
+       struct mctp_sk_key *key;
        struct net_device *dev;
        struct mctp_hdr *hdr;
        unsigned long flags;
@@ -799,11 +848,17 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt,
                goto out_release;
 
        if (req_tag & MCTP_HDR_FLAG_TO) {
-               rc = mctp_alloc_local_tag(msk, saddr, daddr, &tag);
-               if (rc)
+               key = mctp_alloc_local_tag(msk, saddr, daddr, &tag);
+               if (IS_ERR(key)) {
+                       rc = PTR_ERR(key);
                        goto out_release;
+               }
+               mctp_skb_set_flow(skb, key);
+               /* done with the key in this scope */
+               mctp_key_unref(key);
                tag |= MCTP_HDR_FLAG_TO;
        } else {
+               key = NULL;
                tag = req_tag;
        }
 
index 422f4ac..7c3420a 100644 (file)
@@ -485,11 +485,11 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb,
                mpext = mptcp_get_ext(skb);
                data_len = mpext ? mpext->data_len : 0;
 
-               /* we will check ext_copy.data_len in mptcp_write_options() to
+               /* we will check ops->data_len in mptcp_write_options() to
                 * discriminate between TCPOLEN_MPTCP_MPC_ACK_DATA and
                 * TCPOLEN_MPTCP_MPC_ACK
                 */
-               opts->ext_copy.data_len = data_len;
+               opts->data_len = data_len;
                opts->suboptions = OPTION_MPTCP_MPC_ACK;
                opts->sndr_key = subflow->local_key;
                opts->rcvr_key = subflow->remote_key;
@@ -505,9 +505,9 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb,
                        len = TCPOLEN_MPTCP_MPC_ACK_DATA;
                        if (opts->csum_reqd) {
                                /* we need to propagate more info to csum the pseudo hdr */
-                               opts->ext_copy.data_seq = mpext->data_seq;
-                               opts->ext_copy.subflow_seq = mpext->subflow_seq;
-                               opts->ext_copy.csum = mpext->csum;
+                               opts->data_seq = mpext->data_seq;
+                               opts->subflow_seq = mpext->subflow_seq;
+                               opts->csum = mpext->csum;
                                len += TCPOLEN_MPTCP_DSS_CHECKSUM;
                        }
                        *size = ALIGN(len, 4);
@@ -1223,7 +1223,7 @@ static void mptcp_set_rwin(const struct tcp_sock *tp)
                WRITE_ONCE(msk->rcv_wnd_sent, ack_seq);
 }
 
-static u16 mptcp_make_csum(const struct mptcp_ext *mpext)
+static u16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __sum16 sum)
 {
        struct csum_pseudo_header header;
        __wsum csum;
@@ -1233,15 +1233,21 @@ static u16 mptcp_make_csum(const struct mptcp_ext *mpext)
         * always the 64-bit value, irrespective of what length is used in the
         * DSS option itself.
         */
-       header.data_seq = cpu_to_be64(mpext->data_seq);
-       header.subflow_seq = htonl(mpext->subflow_seq);
-       header.data_len = htons(mpext->data_len);
+       header.data_seq = cpu_to_be64(data_seq);
+       header.subflow_seq = htonl(subflow_seq);
+       header.data_len = htons(data_len);
        header.csum = 0;
 
-       csum = csum_partial(&header, sizeof(header), ~csum_unfold(mpext->csum));
+       csum = csum_partial(&header, sizeof(header), ~csum_unfold(sum));
        return (__force u16)csum_fold(csum);
 }
 
+static u16 mptcp_make_csum(const struct mptcp_ext *mpext)
+{
+       return __mptcp_make_csum(mpext->data_seq, mpext->subflow_seq, mpext->data_len,
+                                mpext->csum);
+}
+
 void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
                         struct mptcp_out_options *opts)
 {
@@ -1332,7 +1338,7 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
                        len = TCPOLEN_MPTCP_MPC_SYN;
                } else if (OPTION_MPTCP_MPC_SYNACK & opts->suboptions) {
                        len = TCPOLEN_MPTCP_MPC_SYNACK;
-               } else if (opts->ext_copy.data_len) {
+               } else if (opts->data_len) {
                        len = TCPOLEN_MPTCP_MPC_ACK_DATA;
                        if (opts->csum_reqd)
                                len += TCPOLEN_MPTCP_DSS_CHECKSUM;
@@ -1361,14 +1367,17 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
 
                put_unaligned_be64(opts->rcvr_key, ptr);
                ptr += 2;
-               if (!opts->ext_copy.data_len)
+               if (!opts->data_len)
                        goto mp_capable_done;
 
                if (opts->csum_reqd) {
-                       put_unaligned_be32(opts->ext_copy.data_len << 16 |
-                                          mptcp_make_csum(&opts->ext_copy), ptr);
+                       put_unaligned_be32(opts->data_len << 16 |
+                                          __mptcp_make_csum(opts->data_seq,
+                                                            opts->subflow_seq,
+                                                            opts->data_len,
+                                                            opts->csum), ptr);
                } else {
-                       put_unaligned_be32(opts->ext_copy.data_len << 16 |
+                       put_unaligned_be32(opts->data_len << 16 |
                                           TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
                }
                ptr += 1;
index cd6b11c..b7e32e3 100644 (file)
@@ -126,6 +126,11 @@ static void mptcp_drop(struct sock *sk, struct sk_buff *skb)
        __kfree_skb(skb);
 }
 
+static void mptcp_rmem_charge(struct sock *sk, int size)
+{
+       mptcp_sk(sk)->rmem_fwd_alloc -= size;
+}
+
 static bool mptcp_try_coalesce(struct sock *sk, struct sk_buff *to,
                               struct sk_buff *from)
 {
@@ -142,7 +147,7 @@ static bool mptcp_try_coalesce(struct sock *sk, struct sk_buff *to,
        MPTCP_SKB_CB(to)->end_seq = MPTCP_SKB_CB(from)->end_seq;
        kfree_skb_partial(from, fragstolen);
        atomic_add(delta, &sk->sk_rmem_alloc);
-       sk_mem_charge(sk, delta);
+       mptcp_rmem_charge(sk, delta);
        return true;
 }
 
@@ -155,6 +160,44 @@ static bool mptcp_ooo_try_coalesce(struct mptcp_sock *msk, struct sk_buff *to,
        return mptcp_try_coalesce((struct sock *)msk, to, from);
 }
 
+static void __mptcp_rmem_reclaim(struct sock *sk, int amount)
+{
+       amount >>= SK_MEM_QUANTUM_SHIFT;
+       mptcp_sk(sk)->rmem_fwd_alloc -= amount << SK_MEM_QUANTUM_SHIFT;
+       __sk_mem_reduce_allocated(sk, amount);
+}
+
+static void mptcp_rmem_uncharge(struct sock *sk, int size)
+{
+       struct mptcp_sock *msk = mptcp_sk(sk);
+       int reclaimable;
+
+       msk->rmem_fwd_alloc += size;
+       reclaimable = msk->rmem_fwd_alloc - sk_unused_reserved_mem(sk);
+
+       /* see sk_mem_uncharge() for the rationale behind the following schema */
+       if (unlikely(reclaimable >= SK_RECLAIM_THRESHOLD))
+               __mptcp_rmem_reclaim(sk, SK_RECLAIM_CHUNK);
+}
+
+static void mptcp_rfree(struct sk_buff *skb)
+{
+       unsigned int len = skb->truesize;
+       struct sock *sk = skb->sk;
+
+       atomic_sub(len, &sk->sk_rmem_alloc);
+       mptcp_rmem_uncharge(sk, len);
+}
+
+static void mptcp_set_owner_r(struct sk_buff *skb, struct sock *sk)
+{
+       skb_orphan(skb);
+       skb->sk = sk;
+       skb->destructor = mptcp_rfree;
+       atomic_add(skb->truesize, &sk->sk_rmem_alloc);
+       mptcp_rmem_charge(sk, skb->truesize);
+}
+
 /* "inspired" by tcp_data_queue_ofo(), main differences:
  * - use mptcp seqs
  * - don't cope with sacks
@@ -267,7 +310,29 @@ merge_right:
 
 end:
        skb_condense(skb);
-       skb_set_owner_r(skb, sk);
+       mptcp_set_owner_r(skb, sk);
+}
+
+static bool mptcp_rmem_schedule(struct sock *sk, struct sock *ssk, int size)
+{
+       struct mptcp_sock *msk = mptcp_sk(sk);
+       int amt, amount;
+
+       if (size < msk->rmem_fwd_alloc)
+               return true;
+
+       amt = sk_mem_pages(size);
+       amount = amt << SK_MEM_QUANTUM_SHIFT;
+       msk->rmem_fwd_alloc += amount;
+       if (!__sk_mem_raise_allocated(sk, size, amt, SK_MEM_RECV)) {
+               if (ssk->sk_forward_alloc < amount) {
+                       msk->rmem_fwd_alloc -= amount;
+                       return false;
+               }
+
+               ssk->sk_forward_alloc -= amount;
+       }
+       return true;
 }
 
 static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk,
@@ -285,15 +350,8 @@ static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk,
        skb_orphan(skb);
 
        /* try to fetch required memory from subflow */
-       if (!sk_rmem_schedule(sk, skb, skb->truesize)) {
-               int amount = sk_mem_pages(skb->truesize) << SK_MEM_QUANTUM_SHIFT;
-
-               if (ssk->sk_forward_alloc < amount)
-                       goto drop;
-
-               ssk->sk_forward_alloc -= amount;
-               sk->sk_forward_alloc += amount;
-       }
+       if (!mptcp_rmem_schedule(sk, ssk, skb->truesize))
+               goto drop;
 
        has_rxtstamp = TCP_SKB_CB(skb)->has_rxtstamp;
 
@@ -313,7 +371,7 @@ static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk,
                if (tail && mptcp_try_coalesce(sk, tail, skb))
                        return true;
 
-               skb_set_owner_r(skb, sk);
+               mptcp_set_owner_r(skb, sk);
                __skb_queue_tail(&sk->sk_receive_queue, skb);
                return true;
        } else if (after64(MPTCP_SKB_CB(skb)->map_seq, msk->ack_seq)) {
@@ -908,122 +966,20 @@ static bool mptcp_frag_can_collapse_to(const struct mptcp_sock *msk,
                df->data_seq + df->data_len == msk->write_seq;
 }
 
-static int mptcp_wmem_with_overhead(int size)
-{
-       return size + ((sizeof(struct mptcp_data_frag) * size) >> PAGE_SHIFT);
-}
-
-static void __mptcp_wmem_reserve(struct sock *sk, int size)
-{
-       int amount = mptcp_wmem_with_overhead(size);
-       struct mptcp_sock *msk = mptcp_sk(sk);
-
-       WARN_ON_ONCE(msk->wmem_reserved);
-       if (WARN_ON_ONCE(amount < 0))
-               amount = 0;
-
-       if (amount <= sk->sk_forward_alloc)
-               goto reserve;
-
-       /* under memory pressure try to reserve at most a single page
-        * otherwise try to reserve the full estimate and fallback
-        * to a single page before entering the error path
-        */
-       if ((tcp_under_memory_pressure(sk) && amount > PAGE_SIZE) ||
-           !sk_wmem_schedule(sk, amount)) {
-               if (amount <= PAGE_SIZE)
-                       goto nomem;
-
-               amount = PAGE_SIZE;
-               if (!sk_wmem_schedule(sk, amount))
-                       goto nomem;
-       }
-
-reserve:
-       msk->wmem_reserved = amount;
-       sk->sk_forward_alloc -= amount;
-       return;
-
-nomem:
-       /* we will wait for memory on next allocation */
-       msk->wmem_reserved = -1;
-}
-
-static void __mptcp_update_wmem(struct sock *sk)
+static void __mptcp_mem_reclaim_partial(struct sock *sk)
 {
-       struct mptcp_sock *msk = mptcp_sk(sk);
+       int reclaimable = mptcp_sk(sk)->rmem_fwd_alloc - sk_unused_reserved_mem(sk);
 
        lockdep_assert_held_once(&sk->sk_lock.slock);
 
-       if (!msk->wmem_reserved)
-               return;
-
-       if (msk->wmem_reserved < 0)
-               msk->wmem_reserved = 0;
-       if (msk->wmem_reserved > 0) {
-               sk->sk_forward_alloc += msk->wmem_reserved;
-               msk->wmem_reserved = 0;
-       }
-}
-
-static bool mptcp_wmem_alloc(struct sock *sk, int size)
-{
-       struct mptcp_sock *msk = mptcp_sk(sk);
-
-       /* check for pre-existing error condition */
-       if (msk->wmem_reserved < 0)
-               return false;
-
-       if (msk->wmem_reserved >= size)
-               goto account;
-
-       mptcp_data_lock(sk);
-       if (!sk_wmem_schedule(sk, size)) {
-               mptcp_data_unlock(sk);
-               return false;
-       }
-
-       sk->sk_forward_alloc -= size;
-       msk->wmem_reserved += size;
-       mptcp_data_unlock(sk);
-
-account:
-       msk->wmem_reserved -= size;
-       return true;
-}
-
-static void mptcp_wmem_uncharge(struct sock *sk, int size)
-{
-       struct mptcp_sock *msk = mptcp_sk(sk);
-
-       if (msk->wmem_reserved < 0)
-               msk->wmem_reserved = 0;
-       msk->wmem_reserved += size;
-}
-
-static void __mptcp_mem_reclaim_partial(struct sock *sk)
-{
-       lockdep_assert_held_once(&sk->sk_lock.slock);
-       __mptcp_update_wmem(sk);
+       __mptcp_rmem_reclaim(sk, reclaimable - 1);
        sk_mem_reclaim_partial(sk);
 }
 
 static void mptcp_mem_reclaim_partial(struct sock *sk)
 {
-       struct mptcp_sock *msk = mptcp_sk(sk);
-
-       /* if we are experiencing a transint allocation error,
-        * the forward allocation memory has been already
-        * released
-        */
-       if (msk->wmem_reserved < 0)
-               return;
-
        mptcp_data_lock(sk);
-       sk->sk_forward_alloc += msk->wmem_reserved;
-       sk_mem_reclaim_partial(sk);
-       msk->wmem_reserved = sk->sk_forward_alloc;
-       sk->sk_forward_alloc = 0;
+       __mptcp_mem_reclaim_partial(sk);
        mptcp_data_unlock(sk);
 }
 
@@ -1218,7 +1174,7 @@ static struct sk_buff *__mptcp_do_alloc_tx_skb(struct sock *sk, gfp_t gfp)
        if (likely(skb)) {
                if (likely(__mptcp_add_ext(skb, gfp))) {
                        skb_reserve(skb, MAX_TCP_HEADER);
-                       skb->reserved_tailroom = skb->end - skb->tail;
+                       skb->ip_summed = CHECKSUM_PARTIAL;
                        INIT_LIST_HEAD(&skb->tcp_tsorted_anchor);
                        return skb;
                }
@@ -1335,7 +1291,7 @@ alloc_skb:
                u64 snd_una = READ_ONCE(msk->snd_una);
 
                if (snd_una != msk->snd_nxt) {
-                       tcp_remove_empty_skb(ssk, tcp_write_queue_tail(ssk));
+                       tcp_remove_empty_skb(ssk);
                        return 0;
                }
 
@@ -1351,7 +1307,7 @@ alloc_skb:
 
        copy = min_t(size_t, copy, info->limit - info->sent);
        if (!sk_wmem_schedule(ssk, copy)) {
-               tcp_remove_empty_skb(ssk, tcp_write_queue_tail(ssk));
+               tcp_remove_empty_skb(ssk);
                return -ENOMEM;
        }
 
@@ -1367,7 +1323,6 @@ alloc_skb:
        skb->truesize += copy;
        sk_wmem_queued_add(ssk, copy);
        sk_mem_charge(ssk, copy);
-       skb->ip_summed = CHECKSUM_PARTIAL;
        WRITE_ONCE(tcp_sk(ssk)->write_seq, tcp_sk(ssk)->write_seq + copy);
        TCP_SKB_CB(skb)->end_seq += copy;
        tcp_skb_pcount_set(skb, 0);
@@ -1513,8 +1468,7 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
        return NULL;
 }
 
-static void mptcp_push_release(struct sock *sk, struct sock *ssk,
-                              struct mptcp_sendmsg_info *info)
+static void mptcp_push_release(struct sock *ssk, struct mptcp_sendmsg_info *info)
 {
        tcp_push(ssk, 0, info->mss_now, tcp_sk(ssk)->nonagle, info->size_goal);
        release_sock(ssk);
@@ -1577,7 +1531,7 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags)
                         * the last round, release prev_ssk
                         */
                        if (ssk != prev_ssk && prev_ssk)
-                               mptcp_push_release(sk, prev_ssk, &info);
+                               mptcp_push_release(prev_ssk, &info);
                        if (!ssk)
                                goto out;
 
@@ -1590,7 +1544,7 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags)
 
                        ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info);
                        if (ret <= 0) {
-                               mptcp_push_release(sk, ssk, &info);
+                               mptcp_push_release(ssk, &info);
                                goto out;
                        }
 
@@ -1605,7 +1559,7 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags)
 
        /* at this point we held the socket lock for the last subflow we used */
        if (ssk)
-               mptcp_push_release(sk, ssk, &info);
+               mptcp_push_release(ssk, &info);
 
 out:
        /* ensure the rtx timer is running */
@@ -1664,7 +1618,6 @@ out:
        /* __mptcp_alloc_tx_skb could have released some wmem and we are
         * not going to flush it via release_sock()
         */
-       __mptcp_update_wmem(sk);
        if (copied) {
                tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle,
                         info.size_goal);
@@ -1701,7 +1654,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
        /* silently ignore everything else */
        msg->msg_flags &= MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL;
 
-       mptcp_lock_sock(sk, __mptcp_wmem_reserve(sk, min_t(size_t, 1 << 20, len)));
+       lock_sock(sk);
 
        timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
 
@@ -1749,17 +1702,17 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
                psize = min_t(size_t, psize, msg_data_left(msg));
                total_ts = psize + frag_truesize;
 
-               if (!mptcp_wmem_alloc(sk, total_ts))
+               if (!sk_wmem_schedule(sk, total_ts))
                        goto wait_for_memory;
 
                if (copy_page_from_iter(dfrag->page, offset, psize,
                                        &msg->msg_iter) != psize) {
-                       mptcp_wmem_uncharge(sk, psize + frag_truesize);
                        ret = -EFAULT;
                        goto out;
                }
 
                /* data successfully copied into the write queue */
+               sk->sk_forward_alloc -= total_ts;
                copied += psize;
                dfrag->data_len += psize;
                frag_truesize += psize;
@@ -1956,7 +1909,7 @@ static void __mptcp_update_rmem(struct sock *sk)
                return;
 
        atomic_sub(msk->rmem_released, &sk->sk_rmem_alloc);
-       sk_mem_uncharge(sk, msk->rmem_released);
+       mptcp_rmem_uncharge(sk, msk->rmem_released);
        WRITE_ONCE(msk->rmem_released, 0);
 }
 
@@ -2024,7 +1977,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
        if (unlikely(flags & MSG_ERRQUEUE))
                return inet_recv_error(sk, msg, len, addr_len);
 
-       mptcp_lock_sock(sk, __mptcp_splice_receive_queue(sk));
+       lock_sock(sk);
        if (unlikely(sk->sk_state == TCP_LISTEN)) {
                copied = -ENOTCONN;
                goto out_err;
@@ -2504,7 +2457,7 @@ static int __mptcp_init_sock(struct sock *sk)
        __skb_queue_head_init(&msk->receive_queue);
        msk->out_of_order_queue = RB_ROOT;
        msk->first_pending = NULL;
-       msk->wmem_reserved = 0;
+       msk->rmem_fwd_alloc = 0;
        WRITE_ONCE(msk->rmem_released, 0);
        msk->timer_ival = TCP_RTO_MIN;
 
@@ -2715,7 +2668,7 @@ static void __mptcp_destroy_sock(struct sock *sk)
 
        sk->sk_prot->destroy(sk);
 
-       WARN_ON_ONCE(msk->wmem_reserved);
+       WARN_ON_ONCE(msk->rmem_fwd_alloc);
        WARN_ON_ONCE(msk->rmem_released);
        sk_stream_kill_queues(sk);
        xfrm_sk_free_policy(sk);
@@ -2948,8 +2901,14 @@ void mptcp_destroy_common(struct mptcp_sock *msk)
 
        /* move to sk_receive_queue, sk_stream_kill_queues will purge it */
        skb_queue_splice_tail_init(&msk->receive_queue, &sk->sk_receive_queue);
-
+       __skb_queue_purge(&sk->sk_receive_queue);
        skb_rbtree_purge(&msk->out_of_order_queue);
+
+       /* move all the rx fwd alloc into the sk_mem_reclaim_final in
+        * inet_sock_destruct() will dispose it
+        */
+       sk->sk_forward_alloc += msk->rmem_fwd_alloc;
+       msk->rmem_fwd_alloc = 0;
        mptcp_token_destroy(msk);
        mptcp_pm_free_anno_list(msk);
 }
@@ -3031,10 +2990,6 @@ static void mptcp_release_cb(struct sock *sk)
        if (test_and_clear_bit(MPTCP_ERROR_REPORT, &mptcp_sk(sk)->flags))
                __mptcp_error_report(sk);
 
-       /* push_pending may touch wmem_reserved, ensure we do the cleanup
-        * later
-        */
-       __mptcp_update_wmem(sk);
        __mptcp_update_rmem(sk);
 }
 
@@ -3184,6 +3139,11 @@ static void mptcp_shutdown(struct sock *sk, int how)
                __mptcp_wr_shutdown(sk);
 }
 
+static int mptcp_forward_alloc_get(const struct sock *sk)
+{
+       return sk->sk_forward_alloc + mptcp_sk(sk)->rmem_fwd_alloc;
+}
+
 static struct proto mptcp_prot = {
        .name           = "MPTCP",
        .owner          = THIS_MODULE,
@@ -3201,6 +3161,7 @@ static struct proto mptcp_prot = {
        .hash           = mptcp_hash,
        .unhash         = mptcp_unhash,
        .get_port       = mptcp_get_port,
+       .forward_alloc_get      = mptcp_forward_alloc_get,
        .sockets_allocated      = &mptcp_sockets_allocated,
        .memory_allocated       = &tcp_memory_allocated,
        .memory_pressure        = &tcp_memory_pressure,
index 284fdce..67a61ac 100644 (file)
@@ -227,7 +227,7 @@ struct mptcp_sock {
        u64             ack_seq;
        u64             rcv_wnd_sent;
        u64             rcv_data_fin_seq;
-       int             wmem_reserved;
+       int             rmem_fwd_alloc;
        struct sock     *last_snd;
        int             snd_burst;
        int             old_wspace;
@@ -272,19 +272,6 @@ struct mptcp_sock {
        char            ca_name[TCP_CA_NAME_MAX];
 };
 
-#define mptcp_lock_sock(___sk, cb) do {                                        \
-       struct sock *__sk = (___sk); /* silence macro reuse warning */  \
-       might_sleep();                                                  \
-       spin_lock_bh(&__sk->sk_lock.slock);                             \
-       if (__sk->sk_lock.owned)                                        \
-               __lock_sock(__sk);                                      \
-       cb;                                                             \
-       __sk->sk_lock.owned = 1;                                        \
-       spin_unlock(&__sk->sk_lock.slock);                              \
-       mutex_acquire(&__sk->sk_lock.dep_map, 0, 0, _RET_IP_);          \
-       local_bh_enable();                                              \
-} while (0)
-
 #define mptcp_data_lock(sk) spin_lock_bh(&(sk)->sk_lock.slock)
 #define mptcp_data_unlock(sk) spin_unlock_bh(&(sk)->sk_lock.slock)
 
index 4625496..0a04468 100644 (file)
@@ -457,7 +457,7 @@ META_COLLECTOR(int_sk_fwd_alloc)
                *err = -1;
                return;
        }
-       dst->value = sk->sk_forward_alloc;
+       dst->value = sk_forward_alloc_get(sk);
 }
 
 META_COLLECTOR(int_sk_sndbuf)
index b0ff0df..3b0f620 100644 (file)
@@ -1487,10 +1487,6 @@ void psched_ppscfg_precompute(struct psched_pktrate *r, u64 pktrate64)
 }
 EXPORT_SYMBOL(psched_ppscfg_precompute);
 
-static void mini_qdisc_rcu_func(struct rcu_head *head)
-{
-}
-
 void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp,
                          struct tcf_proto *tp_head)
 {
@@ -1503,28 +1499,30 @@ void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp,
 
        if (!tp_head) {
                RCU_INIT_POINTER(*miniqp->p_miniq, NULL);
-               /* Wait for flying RCU callback before it is freed. */
-               rcu_barrier();
-               return;
-       }
+       } else {
+               miniq = miniq_old != &miniqp->miniq1 ?
+                       &miniqp->miniq1 : &miniqp->miniq2;
 
-       miniq = !miniq_old || miniq_old == &miniqp->miniq2 ?
-               &miniqp->miniq1 : &miniqp->miniq2;
+               /* We need to make sure that readers won't see the miniq
+                * we are about to modify. So ensure that at least one RCU
+                * grace period has elapsed since the miniq was made
+                * inactive.
+                */
+               if (IS_ENABLED(CONFIG_PREEMPT_RT))
+                       cond_synchronize_rcu(miniq->rcu_state);
+               else if (!poll_state_synchronize_rcu(miniq->rcu_state))
+                       synchronize_rcu_expedited();
 
-       /* We need to make sure that readers won't see the miniq
-        * we are about to modify. So wait until previous call_rcu callback
-        * is done.
-        */
-       rcu_barrier();
-       miniq->filter_list = tp_head;
-       rcu_assign_pointer(*miniqp->p_miniq, miniq);
+               miniq->filter_list = tp_head;
+               rcu_assign_pointer(*miniqp->p_miniq, miniq);
+       }
 
        if (miniq_old)
-               /* This is counterpart of the rcu barriers above. We need to
+               /* This is counterpart of the rcu sync above. We need to
                 * block potential new user of miniq_old until all readers
                 * are not seeing it.
                 */
-               call_rcu(&miniq_old->rcu, mini_qdisc_rcu_func);
+               miniq_old->rcu_state = start_poll_synchronize_rcu();
 }
 EXPORT_SYMBOL(mini_qdisc_pair_swap);
 
@@ -1543,6 +1541,8 @@ void mini_qdisc_pair_init(struct mini_Qdisc_pair *miniqp, struct Qdisc *qdisc,
        miniqp->miniq1.cpu_qstats = qdisc->cpu_qstats;
        miniqp->miniq2.cpu_bstats = qdisc->cpu_bstats;
        miniqp->miniq2.cpu_qstats = qdisc->cpu_qstats;
+       miniqp->miniq1.rcu_state = get_state_synchronize_rcu();
+       miniqp->miniq2.rcu_state = miniqp->miniq1.rcu_state;
        miniqp->p_miniq = p_miniq;
 }
 EXPORT_SYMBOL(mini_qdisc_pair_init);
index 72de08e..1073c76 100644 (file)
@@ -56,6 +56,7 @@ struct gred_sched {
        u32             DPs;
        u32             def;
        struct red_vars wred_set;
+       struct tc_gred_qopt_offload *opt;
 };
 
 static inline int gred_wred_mode(struct gred_sched *table)
@@ -311,42 +312,43 @@ static void gred_offload(struct Qdisc *sch, enum tc_gred_command command)
 {
        struct gred_sched *table = qdisc_priv(sch);
        struct net_device *dev = qdisc_dev(sch);
-       struct tc_gred_qopt_offload opt = {
-               .command        = command,
-               .handle         = sch->handle,
-               .parent         = sch->parent,
-       };
+       struct tc_gred_qopt_offload *opt = table->opt;
 
        if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
                return;
 
+       memset(opt, 0, sizeof(*opt));
+       opt->command = command;
+       opt->handle = sch->handle;
+       opt->parent = sch->parent;
+
        if (command == TC_GRED_REPLACE) {
                unsigned int i;
 
-               opt.set.grio_on = gred_rio_mode(table);
-               opt.set.wred_on = gred_wred_mode(table);
-               opt.set.dp_cnt = table->DPs;
-               opt.set.dp_def = table->def;
+               opt->set.grio_on = gred_rio_mode(table);
+               opt->set.wred_on = gred_wred_mode(table);
+               opt->set.dp_cnt = table->DPs;
+               opt->set.dp_def = table->def;
 
                for (i = 0; i < table->DPs; i++) {
                        struct gred_sched_data *q = table->tab[i];
 
                        if (!q)
                                continue;
-                       opt.set.tab[i].present = true;
-                       opt.set.tab[i].limit = q->limit;
-                       opt.set.tab[i].prio = q->prio;
-                       opt.set.tab[i].min = q->parms.qth_min >> q->parms.Wlog;
-                       opt.set.tab[i].max = q->parms.qth_max >> q->parms.Wlog;
-                       opt.set.tab[i].is_ecn = gred_use_ecn(q);
-                       opt.set.tab[i].is_harddrop = gred_use_harddrop(q);
-                       opt.set.tab[i].probability = q->parms.max_P;
-                       opt.set.tab[i].backlog = &q->backlog;
+                       opt->set.tab[i].present = true;
+                       opt->set.tab[i].limit = q->limit;
+                       opt->set.tab[i].prio = q->prio;
+                       opt->set.tab[i].min = q->parms.qth_min >> q->parms.Wlog;
+                       opt->set.tab[i].max = q->parms.qth_max >> q->parms.Wlog;
+                       opt->set.tab[i].is_ecn = gred_use_ecn(q);
+                       opt->set.tab[i].is_harddrop = gred_use_harddrop(q);
+                       opt->set.tab[i].probability = q->parms.max_P;
+                       opt->set.tab[i].backlog = &q->backlog;
                }
-               opt.set.qstats = &sch->qstats;
+               opt->set.qstats = &sch->qstats;
        }
 
-       dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_GRED, &opt);
+       dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_GRED, opt);
 }
 
 static int gred_offload_dump_stats(struct Qdisc *sch)
@@ -731,6 +733,7 @@ err_unlock_free:
 static int gred_init(struct Qdisc *sch, struct nlattr *opt,
                     struct netlink_ext_ack *extack)
 {
+       struct gred_sched *table = qdisc_priv(sch);
        struct nlattr *tb[TCA_GRED_MAX + 1];
        int err;
 
@@ -754,6 +757,12 @@ static int gred_init(struct Qdisc *sch, struct nlattr *opt,
                sch->limit = qdisc_dev(sch)->tx_queue_len
                             * psched_mtu(qdisc_dev(sch));
 
+       if (qdisc_dev(sch)->netdev_ops->ndo_setup_tc) {
+               table->opt = kzalloc(sizeof(*table->opt), GFP_KERNEL);
+               if (!table->opt)
+                       return -ENOMEM;
+       }
+
        return gred_change_table_def(sch, tb[TCA_GRED_DPS], extack);
 }
 
@@ -910,6 +919,7 @@ static void gred_destroy(struct Qdisc *sch)
                        gred_destroy_vq(table->tab[i]);
        }
        gred_offload(sch, TC_GRED_DESTROY);
+       kfree(table->opt);
 }
 
 static struct Qdisc_ops gred_qdisc_ops __read_mostly = {
index cf1d45d..9267922 100644 (file)
@@ -1084,11 +1084,15 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt,
        offload = nla_get_flag(tb[TCA_HTB_OFFLOAD]);
 
        if (offload) {
-               if (sch->parent != TC_H_ROOT)
+               if (sch->parent != TC_H_ROOT) {
+                       NL_SET_ERR_MSG(extack, "HTB must be the root qdisc to use offload");
                        return -EOPNOTSUPP;
+               }
 
-               if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
+               if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) {
+                       NL_SET_ERR_MSG(extack, "hw-tc-offload ethtool feature flag must be on");
                        return -EOPNOTSUPP;
+               }
 
                q->num_direct_qdiscs = dev->real_num_tx_queues;
                q->direct_qdiscs = kcalloc(q->num_direct_qdiscs,
index 32df65f..fb3da4d 100644 (file)
@@ -156,6 +156,12 @@ static enum sctp_disposition __sctp_sf_do_9_1_abort(
                                        void *arg,
                                        struct sctp_cmd_seq *commands);
 
+static enum sctp_disposition
+__sctp_sf_do_9_2_reshutack(struct net *net, const struct sctp_endpoint *ep,
+                          const struct sctp_association *asoc,
+                          const union sctp_subtype type, void *arg,
+                          struct sctp_cmd_seq *commands);
+
 /* Small helper function that checks if the chunk length
  * is of the appropriate length.  The 'required_length' argument
  * is set to be the size of a specific chunk we are testing.
@@ -337,6 +343,14 @@ enum sctp_disposition sctp_sf_do_5_1B_init(struct net *net,
        if (!chunk->singleton)
                return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
+       /* Make sure that the INIT chunk has a valid length.
+        * Normally, this would cause an ABORT with a Protocol Violation
+        * error, but since we don't have an association, we'll
+        * just discard the packet.
+        */
+       if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_init_chunk)))
+               return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+
        /* If the packet is an OOTB packet which is temporarily on the
         * control endpoint, respond with an ABORT.
         */
@@ -351,14 +365,6 @@ enum sctp_disposition sctp_sf_do_5_1B_init(struct net *net,
        if (chunk->sctp_hdr->vtag != 0)
                return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands);
 
-       /* Make sure that the INIT chunk has a valid length.
-        * Normally, this would cause an ABORT with a Protocol Violation
-        * error, but since we don't have an association, we'll
-        * just discard the packet.
-        */
-       if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_init_chunk)))
-               return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
-
        /* If the INIT is coming toward a closing socket, we'll send back
         * and ABORT.  Essentially, this catches the race of INIT being
         * backloged to the socket at the same time as the user issues close().
@@ -704,6 +710,9 @@ enum sctp_disposition sctp_sf_do_5_1D_ce(struct net *net,
        struct sock *sk;
        int error = 0;
 
+       if (asoc && !sctp_vtag_verify(chunk, asoc))
+               return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+
        /* If the packet is an OOTB packet which is temporarily on the
         * control endpoint, respond with an ABORT.
         */
@@ -718,7 +727,8 @@ enum sctp_disposition sctp_sf_do_5_1D_ce(struct net *net,
         * in sctp_unpack_cookie().
         */
        if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr)))
-               return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+               return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
+                                                 commands);
 
        /* If the endpoint is not listening or if the number of associations
         * on the TCP-style socket exceed the max backlog, respond with an
@@ -1524,20 +1534,16 @@ static enum sctp_disposition sctp_sf_do_unexpected_init(
        if (!chunk->singleton)
                return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
+       /* Make sure that the INIT chunk has a valid length. */
+       if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_init_chunk)))
+               return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+
        /* 3.1 A packet containing an INIT chunk MUST have a zero Verification
         * Tag.
         */
        if (chunk->sctp_hdr->vtag != 0)
                return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands);
 
-       /* Make sure that the INIT chunk has a valid length.
-        * In this case, we generate a protocol violation since we have
-        * an association established.
-        */
-       if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_init_chunk)))
-               return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
-                                                 commands);
-
        if (SCTP_INPUT_CB(chunk->skb)->encap_port != chunk->transport->encap_port)
                return sctp_sf_new_encap_port(net, ep, asoc, type, arg, commands);
 
@@ -1882,9 +1888,9 @@ static enum sctp_disposition sctp_sf_do_dupcook_a(
         * its peer.
        */
        if (sctp_state(asoc, SHUTDOWN_ACK_SENT)) {
-               disposition = sctp_sf_do_9_2_reshutack(net, ep, asoc,
-                               SCTP_ST_CHUNK(chunk->chunk_hdr->type),
-                               chunk, commands);
+               disposition = __sctp_sf_do_9_2_reshutack(net, ep, asoc,
+                                                        SCTP_ST_CHUNK(chunk->chunk_hdr->type),
+                                                        chunk, commands);
                if (SCTP_DISPOSITION_NOMEM == disposition)
                        goto nomem;
 
@@ -2202,9 +2208,11 @@ enum sctp_disposition sctp_sf_do_5_2_4_dupcook(
         * enough for the chunk header.  Cookie length verification is
         * done later.
         */
-       if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr)))
-               return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
-                                                 commands);
+       if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr))) {
+               if (!sctp_vtag_verify(chunk, asoc))
+                       asoc = NULL;
+               return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands);
+       }
 
        /* "Decode" the chunk.  We have no optional parameters so we
         * are in good shape.
@@ -2341,7 +2349,7 @@ enum sctp_disposition sctp_sf_shutdown_pending_abort(
         */
        if (SCTP_ADDR_DEL ==
                    sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest))
-               return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands);
+               return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
        if (!sctp_err_chunk_valid(chunk))
                return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
@@ -2387,7 +2395,7 @@ enum sctp_disposition sctp_sf_shutdown_sent_abort(
         */
        if (SCTP_ADDR_DEL ==
                    sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest))
-               return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands);
+               return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
        if (!sctp_err_chunk_valid(chunk))
                return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
@@ -2657,7 +2665,7 @@ enum sctp_disposition sctp_sf_do_9_1_abort(
         */
        if (SCTP_ADDR_DEL ==
                    sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest))
-               return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands);
+               return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
        if (!sctp_err_chunk_valid(chunk))
                return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
@@ -2970,13 +2978,11 @@ enum sctp_disposition sctp_sf_do_9_2_shut_ctsn(
  * that belong to this association, it should discard the INIT chunk and
  * retransmit the SHUTDOWN ACK chunk.
  */
-enum sctp_disposition sctp_sf_do_9_2_reshutack(
-                                       struct net *net,
-                                       const struct sctp_endpoint *ep,
-                                       const struct sctp_association *asoc,
-                                       const union sctp_subtype type,
-                                       void *arg,
-                                       struct sctp_cmd_seq *commands)
+static enum sctp_disposition
+__sctp_sf_do_9_2_reshutack(struct net *net, const struct sctp_endpoint *ep,
+                          const struct sctp_association *asoc,
+                          const union sctp_subtype type, void *arg,
+                          struct sctp_cmd_seq *commands)
 {
        struct sctp_chunk *chunk = arg;
        struct sctp_chunk *reply;
@@ -3010,6 +3016,26 @@ nomem:
        return SCTP_DISPOSITION_NOMEM;
 }
 
+enum sctp_disposition
+sctp_sf_do_9_2_reshutack(struct net *net, const struct sctp_endpoint *ep,
+                        const struct sctp_association *asoc,
+                        const union sctp_subtype type, void *arg,
+                        struct sctp_cmd_seq *commands)
+{
+       struct sctp_chunk *chunk = arg;
+
+       if (!chunk->singleton)
+               return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+
+       if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_init_chunk)))
+               return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+
+       if (chunk->sctp_hdr->vtag != 0)
+               return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands);
+
+       return __sctp_sf_do_9_2_reshutack(net, ep, asoc, type, arg, commands);
+}
+
 /*
  * sctp_sf_do_ecn_cwr
  *
@@ -3662,6 +3688,9 @@ enum sctp_disposition sctp_sf_ootb(struct net *net,
 
        SCTP_INC_STATS(net, SCTP_MIB_OUTOFBLUES);
 
+       if (asoc && !sctp_vtag_verify(chunk, asoc))
+               asoc = NULL;
+
        ch = (struct sctp_chunkhdr *)chunk->chunk_hdr;
        do {
                /* Report violation if the chunk is less then minimal */
@@ -3777,12 +3806,6 @@ static enum sctp_disposition sctp_sf_shut_8_4_5(
 
        SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS);
 
-       /* If the chunk length is invalid, we don't want to process
-        * the reset of the packet.
-        */
-       if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr)))
-               return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
-
        /* We need to discard the rest of the packet to prevent
         * potential boomming attacks from additional bundled chunks.
         * This is documented in SCTP Threats ID.
@@ -3810,6 +3833,9 @@ enum sctp_disposition sctp_sf_do_8_5_1_E_sa(struct net *net,
 {
        struct sctp_chunk *chunk = arg;
 
+       if (!sctp_vtag_verify(chunk, asoc))
+               asoc = NULL;
+
        /* Make sure that the SHUTDOWN_ACK chunk has a valid length. */
        if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr)))
                return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
@@ -3845,6 +3871,11 @@ enum sctp_disposition sctp_sf_do_asconf(struct net *net,
                return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
        }
 
+       /* Make sure that the ASCONF ADDIP chunk has a valid length.  */
+       if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_addip_chunk)))
+               return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
+                                                 commands);
+
        /* ADD-IP: Section 4.1.1
         * This chunk MUST be sent in an authenticated way by using
         * the mechanism defined in [I-D.ietf-tsvwg-sctp-auth]. If this chunk
@@ -3853,13 +3884,7 @@ enum sctp_disposition sctp_sf_do_asconf(struct net *net,
         */
        if (!asoc->peer.asconf_capable ||
            (!net->sctp.addip_noauth && !chunk->auth))
-               return sctp_sf_discard_chunk(net, ep, asoc, type, arg,
-                                            commands);
-
-       /* Make sure that the ASCONF ADDIP chunk has a valid length.  */
-       if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_addip_chunk)))
-               return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
-                                                 commands);
+               return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
        hdr = (struct sctp_addiphdr *)chunk->skb->data;
        serial = ntohl(hdr->serial);
@@ -3988,6 +4013,12 @@ enum sctp_disposition sctp_sf_do_asconf_ack(struct net *net,
                return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
        }
 
+       /* Make sure that the ADDIP chunk has a valid length.  */
+       if (!sctp_chunk_length_valid(asconf_ack,
+                                    sizeof(struct sctp_addip_chunk)))
+               return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
+                                                 commands);
+
        /* ADD-IP, Section 4.1.2:
         * This chunk MUST be sent in an authenticated way by using
         * the mechanism defined in [I-D.ietf-tsvwg-sctp-auth]. If this chunk
@@ -3996,14 +4027,7 @@ enum sctp_disposition sctp_sf_do_asconf_ack(struct net *net,
         */
        if (!asoc->peer.asconf_capable ||
            (!net->sctp.addip_noauth && !asconf_ack->auth))
-               return sctp_sf_discard_chunk(net, ep, asoc, type, arg,
-                                            commands);
-
-       /* Make sure that the ADDIP chunk has a valid length.  */
-       if (!sctp_chunk_length_valid(asconf_ack,
-                                    sizeof(struct sctp_addip_chunk)))
-               return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
-                                                 commands);
+               return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
        addip_hdr = (struct sctp_addiphdr *)asconf_ack->skb->data;
        rcvd_serial = ntohl(addip_hdr->serial);
@@ -4575,6 +4599,9 @@ enum sctp_disposition sctp_sf_discard_chunk(struct net *net,
 {
        struct sctp_chunk *chunk = arg;
 
+       if (asoc && !sctp_vtag_verify(chunk, asoc))
+               return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+
        /* Make sure that the chunk has a valid length.
         * Since we don't know the chunk type, we use a general
         * chunkhdr structure to make a comparison.
@@ -4642,6 +4669,9 @@ enum sctp_disposition sctp_sf_violation(struct net *net,
 {
        struct sctp_chunk *chunk = arg;
 
+       if (!sctp_vtag_verify(chunk, asoc))
+               return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+
        /* Make sure that the chunk has a valid length. */
        if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr)))
                return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
@@ -6348,6 +6378,7 @@ static struct sctp_packet *sctp_ootb_pkt_new(
                 * yet.
                 */
                switch (chunk->chunk_hdr->type) {
+               case SCTP_CID_INIT:
                case SCTP_CID_INIT_ACK:
                {
                        struct sctp_initack_chunk *initack;
index 5e50e00..8dc3438 100644 (file)
@@ -1185,7 +1185,7 @@ static void smc_connect_work(struct work_struct *work)
        if (smc->clcsock->sk->sk_err) {
                smc->sk.sk_err = smc->clcsock->sk->sk_err;
        } else if ((1 << smc->clcsock->sk->sk_state) &
-                                       (TCPF_SYN_SENT | TCP_SYN_RECV)) {
+                                       (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
                rc = sk_stream_wait_connect(smc->clcsock->sk, &timeo);
                if ((rc == -EPIPE) &&
                    ((1 << smc->clcsock->sk->sk_state) &
index a9623c9..b102680 100644 (file)
@@ -2154,7 +2154,7 @@ void smc_llc_link_active(struct smc_link *link)
                            link->smcibdev->ibdev->name, link->ibport);
        link->state = SMC_LNK_ACTIVE;
        if (link->lgr->llc_testlink_time) {
-               link->llc_testlink_time = link->lgr->llc_testlink_time * HZ;
+               link->llc_testlink_time = link->lgr->llc_testlink_time;
                schedule_delayed_work(&link->llc_testlink_wrk,
                                      link->llc_testlink_time);
        }
index 0b2c18e..8346047 100644 (file)
@@ -428,17 +428,17 @@ switchdev_lower_dev_find(struct net_device *dev,
        return switchdev_priv.lower_dev;
 }
 
-static int __switchdev_handle_fdb_add_to_device(struct net_device *dev,
-               const struct net_device *orig_dev,
+static int __switchdev_handle_fdb_event_to_device(struct net_device *dev,
+               struct net_device *orig_dev, unsigned long event,
                const struct switchdev_notifier_fdb_info *fdb_info,
                bool (*check_cb)(const struct net_device *dev),
                bool (*foreign_dev_check_cb)(const struct net_device *dev,
                                             const struct net_device *foreign_dev),
-               int (*add_cb)(struct net_device *dev,
-                             const struct net_device *orig_dev, const void *ctx,
+               int (*mod_cb)(struct net_device *dev, struct net_device *orig_dev,
+                             unsigned long event, const void *ctx,
                              const struct switchdev_notifier_fdb_info *fdb_info),
-               int (*lag_add_cb)(struct net_device *dev,
-                                 const struct net_device *orig_dev, const void *ctx,
+               int (*lag_mod_cb)(struct net_device *dev, struct net_device *orig_dev,
+                                 unsigned long event, const void *ctx,
                                  const struct switchdev_notifier_fdb_info *fdb_info))
 {
        const struct switchdev_notifier_info *info = &fdb_info->info;
@@ -447,17 +447,17 @@ static int __switchdev_handle_fdb_add_to_device(struct net_device *dev,
        int err = -EOPNOTSUPP;
 
        if (check_cb(dev))
-               return add_cb(dev, orig_dev, info->ctx, fdb_info);
+               return mod_cb(dev, orig_dev, event, info->ctx, fdb_info);
 
        if (netif_is_lag_master(dev)) {
                if (!switchdev_lower_dev_find(dev, check_cb, foreign_dev_check_cb))
                        goto maybe_bridged_with_us;
 
                /* This is a LAG interface that we offload */
-               if (!lag_add_cb)
+               if (!lag_mod_cb)
                        return -EOPNOTSUPP;
 
-               return lag_add_cb(dev, orig_dev, info->ctx, fdb_info);
+               return lag_mod_cb(dev, orig_dev, event, info->ctx, fdb_info);
        }
 
        /* Recurse through lower interfaces in case the FDB entry is pointing
@@ -481,10 +481,10 @@ static int __switchdev_handle_fdb_add_to_device(struct net_device *dev,
                                                      foreign_dev_check_cb))
                                continue;
 
-                       err = __switchdev_handle_fdb_add_to_device(lower_dev, orig_dev,
-                                                                  fdb_info, check_cb,
-                                                                  foreign_dev_check_cb,
-                                                                  add_cb, lag_add_cb);
+                       err = __switchdev_handle_fdb_event_to_device(lower_dev, orig_dev,
+                                                                    event, fdb_info, check_cb,
+                                                                    foreign_dev_check_cb,
+                                                                    mod_cb, lag_mod_cb);
                        if (err && err != -EOPNOTSUPP)
                                return err;
                }
@@ -503,140 +503,34 @@ maybe_bridged_with_us:
        if (!switchdev_lower_dev_find(br, check_cb, foreign_dev_check_cb))
                return 0;
 
-       return __switchdev_handle_fdb_add_to_device(br, orig_dev, fdb_info,
-                                                   check_cb, foreign_dev_check_cb,
-                                                   add_cb, lag_add_cb);
+       return __switchdev_handle_fdb_event_to_device(br, orig_dev, event, fdb_info,
+                                                     check_cb, foreign_dev_check_cb,
+                                                     mod_cb, lag_mod_cb);
 }
 
-int switchdev_handle_fdb_add_to_device(struct net_device *dev,
+int switchdev_handle_fdb_event_to_device(struct net_device *dev, unsigned long event,
                const struct switchdev_notifier_fdb_info *fdb_info,
                bool (*check_cb)(const struct net_device *dev),
                bool (*foreign_dev_check_cb)(const struct net_device *dev,
                                             const struct net_device *foreign_dev),
-               int (*add_cb)(struct net_device *dev,
-                             const struct net_device *orig_dev, const void *ctx,
+               int (*mod_cb)(struct net_device *dev, struct net_device *orig_dev,
+                             unsigned long event, const void *ctx,
                              const struct switchdev_notifier_fdb_info *fdb_info),
-               int (*lag_add_cb)(struct net_device *dev,
-                                 const struct net_device *orig_dev, const void *ctx,
+               int (*lag_mod_cb)(struct net_device *dev, struct net_device *orig_dev,
+                                 unsigned long event, const void *ctx,
                                  const struct switchdev_notifier_fdb_info *fdb_info))
 {
        int err;
 
-       err = __switchdev_handle_fdb_add_to_device(dev, dev, fdb_info,
-                                                  check_cb,
-                                                  foreign_dev_check_cb,
-                                                  add_cb, lag_add_cb);
+       err = __switchdev_handle_fdb_event_to_device(dev, dev, event, fdb_info,
+                                                    check_cb, foreign_dev_check_cb,
+                                                    mod_cb, lag_mod_cb);
        if (err == -EOPNOTSUPP)
                err = 0;
 
        return err;
 }
-EXPORT_SYMBOL_GPL(switchdev_handle_fdb_add_to_device);
-
-static int __switchdev_handle_fdb_del_to_device(struct net_device *dev,
-               const struct net_device *orig_dev,
-               const struct switchdev_notifier_fdb_info *fdb_info,
-               bool (*check_cb)(const struct net_device *dev),
-               bool (*foreign_dev_check_cb)(const struct net_device *dev,
-                                            const struct net_device *foreign_dev),
-               int (*del_cb)(struct net_device *dev,
-                             const struct net_device *orig_dev, const void *ctx,
-                             const struct switchdev_notifier_fdb_info *fdb_info),
-               int (*lag_del_cb)(struct net_device *dev,
-                                 const struct net_device *orig_dev, const void *ctx,
-                                 const struct switchdev_notifier_fdb_info *fdb_info))
-{
-       const struct switchdev_notifier_info *info = &fdb_info->info;
-       struct net_device *br, *lower_dev;
-       struct list_head *iter;
-       int err = -EOPNOTSUPP;
-
-       if (check_cb(dev))
-               return del_cb(dev, orig_dev, info->ctx, fdb_info);
-
-       if (netif_is_lag_master(dev)) {
-               if (!switchdev_lower_dev_find(dev, check_cb, foreign_dev_check_cb))
-                       goto maybe_bridged_with_us;
-
-               /* This is a LAG interface that we offload */
-               if (!lag_del_cb)
-                       return -EOPNOTSUPP;
-
-               return lag_del_cb(dev, orig_dev, info->ctx, fdb_info);
-       }
-
-       /* Recurse through lower interfaces in case the FDB entry is pointing
-        * towards a bridge device.
-        */
-       if (netif_is_bridge_master(dev)) {
-               if (!switchdev_lower_dev_find(dev, check_cb, foreign_dev_check_cb))
-                       return 0;
-
-               /* This is a bridge interface that we offload */
-               netdev_for_each_lower_dev(dev, lower_dev, iter) {
-                       /* Do not propagate FDB entries across bridges */
-                       if (netif_is_bridge_master(lower_dev))
-                               continue;
-
-                       /* Bridge ports might be either us, or LAG interfaces
-                        * that we offload.
-                        */
-                       if (!check_cb(lower_dev) &&
-                           !switchdev_lower_dev_find(lower_dev, check_cb,
-                                                     foreign_dev_check_cb))
-                               continue;
-
-                       err = __switchdev_handle_fdb_del_to_device(lower_dev, orig_dev,
-                                                                  fdb_info, check_cb,
-                                                                  foreign_dev_check_cb,
-                                                                  del_cb, lag_del_cb);
-                       if (err && err != -EOPNOTSUPP)
-                               return err;
-               }
-
-               return 0;
-       }
-
-maybe_bridged_with_us:
-       /* Event is neither on a bridge nor a LAG. Check whether it is on an
-        * interface that is in a bridge with us.
-        */
-       br = netdev_master_upper_dev_get_rcu(dev);
-       if (!br || !netif_is_bridge_master(br))
-               return 0;
-
-       if (!switchdev_lower_dev_find(br, check_cb, foreign_dev_check_cb))
-               return 0;
-
-       return __switchdev_handle_fdb_del_to_device(br, orig_dev, fdb_info,
-                                                   check_cb, foreign_dev_check_cb,
-                                                   del_cb, lag_del_cb);
-}
-
-int switchdev_handle_fdb_del_to_device(struct net_device *dev,
-               const struct switchdev_notifier_fdb_info *fdb_info,
-               bool (*check_cb)(const struct net_device *dev),
-               bool (*foreign_dev_check_cb)(const struct net_device *dev,
-                                            const struct net_device *foreign_dev),
-               int (*del_cb)(struct net_device *dev,
-                             const struct net_device *orig_dev, const void *ctx,
-                             const struct switchdev_notifier_fdb_info *fdb_info),
-               int (*lag_del_cb)(struct net_device *dev,
-                                 const struct net_device *orig_dev, const void *ctx,
-                                 const struct switchdev_notifier_fdb_info *fdb_info))
-{
-       int err;
-
-       err = __switchdev_handle_fdb_del_to_device(dev, dev, fdb_info,
-                                                  check_cb,
-                                                  foreign_dev_check_cb,
-                                                  del_cb, lag_del_cb);
-       if (err == -EOPNOTSUPP)
-               err = 0;
-
-       return err;
-}
-EXPORT_SYMBOL_GPL(switchdev_handle_fdb_del_to_device);
+EXPORT_SYMBOL_GPL(switchdev_handle_fdb_event_to_device);
 
 static int __switchdev_handle_port_obj_add(struct net_device *dev,
                        struct switchdev_notifier_port_obj_info *port_obj_info,
index c9391d3..dc60c32 100644 (file)
@@ -2285,43 +2285,53 @@ static bool tipc_crypto_key_rcv(struct tipc_crypto *rx, struct tipc_msg *hdr)
        u16 key_gen = msg_key_gen(hdr);
        u16 size = msg_data_sz(hdr);
        u8 *data = msg_data(hdr);
+       unsigned int keylen;
+
+       /* Verify whether the size can exist in the packet */
+       if (unlikely(size < sizeof(struct tipc_aead_key) + TIPC_AEAD_KEYLEN_MIN)) {
+               pr_debug("%s: message data size is too small\n", rx->name);
+               goto exit;
+       }
+
+       keylen = ntohl(*((__be32 *)(data + TIPC_AEAD_ALG_NAME)));
+
+       /* Verify the supplied size values */
+       if (unlikely(size != keylen + sizeof(struct tipc_aead_key) ||
+                    keylen > TIPC_AEAD_KEY_SIZE_MAX)) {
+               pr_debug("%s: invalid MSG_CRYPTO key size\n", rx->name);
+               goto exit;
+       }
 
        spin_lock(&rx->lock);
        if (unlikely(rx->skey || (key_gen == rx->key_gen && rx->key.keys))) {
                pr_err("%s: key existed <%p>, gen %d vs %d\n", rx->name,
                       rx->skey, key_gen, rx->key_gen);
-               goto exit;
+               goto exit_unlock;
        }
 
        /* Allocate memory for the key */
        skey = kmalloc(size, GFP_ATOMIC);
        if (unlikely(!skey)) {
                pr_err("%s: unable to allocate memory for skey\n", rx->name);
-               goto exit;
+               goto exit_unlock;
        }
 
        /* Copy key from msg data */
-       skey->keylen = ntohl(*((__be32 *)(data + TIPC_AEAD_ALG_NAME)));
+       skey->keylen = keylen;
        memcpy(skey->alg_name, data, TIPC_AEAD_ALG_NAME);
        memcpy(skey->key, data + TIPC_AEAD_ALG_NAME + sizeof(__be32),
               skey->keylen);
 
-       /* Sanity check */
-       if (unlikely(size != tipc_aead_key_size(skey))) {
-               kfree(skey);
-               skey = NULL;
-               goto exit;
-       }
-
        rx->key_gen = key_gen;
        rx->skey_mode = msg_key_mode(hdr);
        rx->skey = skey;
        rx->nokey = 0;
        mb(); /* for nokey flag */
 
-exit:
+exit_unlock:
        spin_unlock(&rx->lock);
 
+exit:
        /* Schedule the key attaching on this crypto */
        if (likely(skey && queue_delayed_work(tx->wq, &rx->work, 0)))
                return true;
index 278192e..acfba9f 100644 (file)
@@ -769,12 +769,12 @@ static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG],
 
        prot[TLS_BASE][TLS_SW] = prot[TLS_BASE][TLS_BASE];
        prot[TLS_BASE][TLS_SW].recvmsg            = tls_sw_recvmsg;
-       prot[TLS_BASE][TLS_SW].stream_memory_read = tls_sw_stream_read;
+       prot[TLS_BASE][TLS_SW].sock_is_readable   = tls_sw_sock_is_readable;
        prot[TLS_BASE][TLS_SW].close              = tls_sk_proto_close;
 
        prot[TLS_SW][TLS_SW] = prot[TLS_SW][TLS_BASE];
        prot[TLS_SW][TLS_SW].recvmsg            = tls_sw_recvmsg;
-       prot[TLS_SW][TLS_SW].stream_memory_read = tls_sw_stream_read;
+       prot[TLS_SW][TLS_SW].sock_is_readable   = tls_sw_sock_is_readable;
        prot[TLS_SW][TLS_SW].close              = tls_sk_proto_close;
 
 #ifdef CONFIG_TLS_DEVICE
index 4147bb2..d815640 100644 (file)
@@ -35,6 +35,7 @@
  * SOFTWARE.
  */
 
+#include <linux/bug.h>
 #include <linux/sched/signal.h>
 #include <linux/module.h>
 #include <linux/splice.h>
 #include <net/strparser.h>
 #include <net/tls.h>
 
+noinline void tls_err_abort(struct sock *sk, int err)
+{
+       WARN_ON_ONCE(err >= 0);
+       /* sk->sk_err should contain a positive error code. */
+       sk->sk_err = -err;
+       sk_error_report(sk);
+}
+
 static int __skb_nsg(struct sk_buff *skb, int offset, int len,
                      unsigned int recursion_level)
 {
@@ -419,7 +428,7 @@ int tls_tx_records(struct sock *sk, int flags)
 
 tx_err:
        if (rc < 0 && rc != -EAGAIN)
-               tls_err_abort(sk, EBADMSG);
+               tls_err_abort(sk, -EBADMSG);
 
        return rc;
 }
@@ -450,7 +459,7 @@ static void tls_encrypt_done(struct crypto_async_request *req, int err)
 
                /* If err is already set on socket, return the same code */
                if (sk->sk_err) {
-                       ctx->async_wait.err = sk->sk_err;
+                       ctx->async_wait.err = -sk->sk_err;
                } else {
                        ctx->async_wait.err = err;
                        tls_err_abort(sk, err);
@@ -769,7 +778,7 @@ static int tls_push_record(struct sock *sk, int flags,
                               msg_pl->sg.size + prot->tail_size, i);
        if (rc < 0) {
                if (rc != -EINPROGRESS) {
-                       tls_err_abort(sk, EBADMSG);
+                       tls_err_abort(sk, -EBADMSG);
                        if (split) {
                                tls_ctx->pending_open_record_frags = true;
                                tls_merge_open_record(sk, rec, tmp, orig_end);
@@ -1839,7 +1848,7 @@ int tls_sw_recvmsg(struct sock *sk,
                err = decrypt_skb_update(sk, skb, &msg->msg_iter,
                                         &chunk, &zc, async_capable);
                if (err < 0 && err != -EINPROGRESS) {
-                       tls_err_abort(sk, EBADMSG);
+                       tls_err_abort(sk, -EBADMSG);
                        goto recv_end;
                }
 
@@ -2019,7 +2028,7 @@ ssize_t tls_sw_splice_read(struct socket *sock,  loff_t *ppos,
                }
 
                if (err < 0) {
-                       tls_err_abort(sk, EBADMSG);
+                       tls_err_abort(sk, -EBADMSG);
                        goto splice_read_end;
                }
                ctx->decrypted = 1;
@@ -2038,7 +2047,7 @@ splice_read_end:
        return copied ? : err;
 }
 
-bool tls_sw_stream_read(const struct sock *sk)
+bool tls_sw_sock_is_readable(struct sock *sk)
 {
        struct tls_context *tls_ctx = tls_get_ctx(sk);
        struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
index 89f9e85..78e08e8 100644 (file)
@@ -3052,6 +3052,8 @@ static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wa
        /* readable? */
        if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
                mask |= EPOLLIN | EPOLLRDNORM;
+       if (sk_is_readable(sk))
+               mask |= EPOLLIN | EPOLLRDNORM;
 
        /* Connection-based need to check for termination and startup */
        if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
@@ -3091,6 +3093,8 @@ static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
        /* readable? */
        if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
                mask |= EPOLLIN | EPOLLRDNORM;
+       if (sk_is_readable(sk))
+               mask |= EPOLLIN | EPOLLRDNORM;
 
        /* Connection-based need to check for termination and startup */
        if (sk->sk_type == SOCK_SEQPACKET) {
index b927e2b..452376c 100644 (file)
@@ -102,6 +102,7 @@ static void unix_dgram_bpf_rebuild_protos(struct proto *prot, const struct proto
        *prot        = *base;
        prot->close  = sock_map_close;
        prot->recvmsg = unix_bpf_recvmsg;
+       prot->sock_is_readable = sk_msg_is_readable;
 }
 
 static void unix_stream_bpf_rebuild_protos(struct proto *prot,
@@ -110,6 +111,7 @@ static void unix_stream_bpf_rebuild_protos(struct proto *prot,
        *prot        = *base;
        prot->close  = sock_map_close;
        prot->recvmsg = unix_bpf_recvmsg;
+       prot->sock_is_readable = sk_msg_is_readable;
        prot->unhash  = sock_map_unhash;
 }
 
index 45be124..eb297e1 100644 (file)
@@ -524,6 +524,7 @@ use_default_name:
        INIT_WORK(&rdev->propagate_cac_done_wk, cfg80211_propagate_cac_done_wk);
        INIT_WORK(&rdev->mgmt_registrations_update_wk,
                  cfg80211_mgmt_registrations_update_wk);
+       spin_lock_init(&rdev->mgmt_registrations_lock);
 
 #ifdef CONFIG_CFG80211_DEFAULT_PS
        rdev->wiphy.flags |= WIPHY_FLAG_PS_ON_BY_DEFAULT;
@@ -1289,7 +1290,6 @@ void cfg80211_init_wdev(struct wireless_dev *wdev)
        INIT_LIST_HEAD(&wdev->event_list);
        spin_lock_init(&wdev->event_lock);
        INIT_LIST_HEAD(&wdev->mgmt_registrations);
-       spin_lock_init(&wdev->mgmt_registrations_lock);
        INIT_LIST_HEAD(&wdev->pmsr_list);
        spin_lock_init(&wdev->pmsr_lock);
        INIT_WORK(&wdev->pmsr_free_wk, cfg80211_pmsr_free_wk);
index b35d0db..1720abf 100644 (file)
@@ -100,6 +100,8 @@ struct cfg80211_registered_device {
        struct work_struct propagate_cac_done_wk;
 
        struct work_struct mgmt_registrations_update_wk;
+       /* lock for all wdev lists */
+       spinlock_t mgmt_registrations_lock;
 
        /* must be last because of the way we do wiphy_priv(),
         * and it should at least be aligned to NETDEV_ALIGN */
index 3aa69b3..783acd2 100644 (file)
@@ -452,9 +452,9 @@ static void cfg80211_mgmt_registrations_update(struct wireless_dev *wdev)
 
        lockdep_assert_held(&rdev->wiphy.mtx);
 
-       spin_lock_bh(&wdev->mgmt_registrations_lock);
+       spin_lock_bh(&rdev->mgmt_registrations_lock);
        if (!wdev->mgmt_registrations_need_update) {
-               spin_unlock_bh(&wdev->mgmt_registrations_lock);
+               spin_unlock_bh(&rdev->mgmt_registrations_lock);
                return;
        }
 
@@ -479,7 +479,7 @@ static void cfg80211_mgmt_registrations_update(struct wireless_dev *wdev)
        rcu_read_unlock();
 
        wdev->mgmt_registrations_need_update = 0;
-       spin_unlock_bh(&wdev->mgmt_registrations_lock);
+       spin_unlock_bh(&rdev->mgmt_registrations_lock);
 
        rdev_update_mgmt_frame_registrations(rdev, wdev, &upd);
 }
@@ -503,6 +503,7 @@ int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_portid,
                                int match_len, bool multicast_rx,
                                struct netlink_ext_ack *extack)
 {
+       struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
        struct cfg80211_mgmt_registration *reg, *nreg;
        int err = 0;
        u16 mgmt_type;
@@ -548,7 +549,7 @@ int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_portid,
        if (!nreg)
                return -ENOMEM;
 
-       spin_lock_bh(&wdev->mgmt_registrations_lock);
+       spin_lock_bh(&rdev->mgmt_registrations_lock);
 
        list_for_each_entry(reg, &wdev->mgmt_registrations, list) {
                int mlen = min(match_len, reg->match_len);
@@ -583,7 +584,7 @@ int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_portid,
                list_add(&nreg->list, &wdev->mgmt_registrations);
        }
        wdev->mgmt_registrations_need_update = 1;
-       spin_unlock_bh(&wdev->mgmt_registrations_lock);
+       spin_unlock_bh(&rdev->mgmt_registrations_lock);
 
        cfg80211_mgmt_registrations_update(wdev);
 
@@ -591,7 +592,7 @@ int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_portid,
 
  out:
        kfree(nreg);
-       spin_unlock_bh(&wdev->mgmt_registrations_lock);
+       spin_unlock_bh(&rdev->mgmt_registrations_lock);
 
        return err;
 }
@@ -602,7 +603,7 @@ void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlportid)
        struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
        struct cfg80211_mgmt_registration *reg, *tmp;
 
-       spin_lock_bh(&wdev->mgmt_registrations_lock);
+       spin_lock_bh(&rdev->mgmt_registrations_lock);
 
        list_for_each_entry_safe(reg, tmp, &wdev->mgmt_registrations, list) {
                if (reg->nlportid != nlportid)
@@ -615,7 +616,7 @@ void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlportid)
                schedule_work(&rdev->mgmt_registrations_update_wk);
        }
 
-       spin_unlock_bh(&wdev->mgmt_registrations_lock);
+       spin_unlock_bh(&rdev->mgmt_registrations_lock);
 
        if (nlportid && rdev->crit_proto_nlportid == nlportid) {
                rdev->crit_proto_nlportid = 0;
@@ -628,15 +629,16 @@ void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlportid)
 
 void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev)
 {
+       struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
        struct cfg80211_mgmt_registration *reg, *tmp;
 
-       spin_lock_bh(&wdev->mgmt_registrations_lock);
+       spin_lock_bh(&rdev->mgmt_registrations_lock);
        list_for_each_entry_safe(reg, tmp, &wdev->mgmt_registrations, list) {
                list_del(&reg->list);
                kfree(reg);
        }
        wdev->mgmt_registrations_need_update = 1;
-       spin_unlock_bh(&wdev->mgmt_registrations_lock);
+       spin_unlock_bh(&rdev->mgmt_registrations_lock);
 
        cfg80211_mgmt_registrations_update(wdev);
 }
@@ -784,7 +786,7 @@ bool cfg80211_rx_mgmt_khz(struct wireless_dev *wdev, int freq, int sig_dbm,
        data = buf + ieee80211_hdrlen(mgmt->frame_control);
        data_len = len - ieee80211_hdrlen(mgmt->frame_control);
 
-       spin_lock_bh(&wdev->mgmt_registrations_lock);
+       spin_lock_bh(&rdev->mgmt_registrations_lock);
 
        list_for_each_entry(reg, &wdev->mgmt_registrations, list) {
                if (reg->frame_type != ftype)
@@ -808,7 +810,7 @@ bool cfg80211_rx_mgmt_khz(struct wireless_dev *wdev, int freq, int sig_dbm,
                break;
        }
 
-       spin_unlock_bh(&wdev->mgmt_registrations_lock);
+       spin_unlock_bh(&rdev->mgmt_registrations_lock);
 
        trace_cfg80211_return_bool(result);
        return result;
index e4f79b2..22e92be 100644 (file)
@@ -418,14 +418,17 @@ cfg80211_add_nontrans_list(struct cfg80211_bss *trans_bss,
        }
        ssid_len = ssid[1];
        ssid = ssid + 2;
-       rcu_read_unlock();
 
        /* check if nontrans_bss is in the list */
        list_for_each_entry(bss, &trans_bss->nontrans_list, nontrans_list) {
-               if (is_bss(bss, nontrans_bss->bssid, ssid, ssid_len))
+               if (is_bss(bss, nontrans_bss->bssid, ssid, ssid_len)) {
+                       rcu_read_unlock();
                        return 0;
+               }
        }
 
+       rcu_read_unlock();
+
        /* add to the list */
        list_add_tail(&nontrans_bss->nontrans_list, &trans_bss->nontrans_list);
        return 0;
index 2991f71..5ff1f87 100644 (file)
@@ -1030,14 +1030,14 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
            !(rdev->wiphy.interface_modes & (1 << ntype)))
                return -EOPNOTSUPP;
 
-       /* if it's part of a bridge, reject changing type to station/ibss */
-       if (netif_is_bridge_port(dev) &&
-           (ntype == NL80211_IFTYPE_ADHOC ||
-            ntype == NL80211_IFTYPE_STATION ||
-            ntype == NL80211_IFTYPE_P2P_CLIENT))
-               return -EBUSY;
-
        if (ntype != otype) {
+               /* if it's part of a bridge, reject changing type to station/ibss */
+               if (netif_is_bridge_port(dev) &&
+                   (ntype == NL80211_IFTYPE_ADHOC ||
+                    ntype == NL80211_IFTYPE_STATION ||
+                    ntype == NL80211_IFTYPE_P2P_CLIENT))
+                       return -EBUSY;
+
                dev->ieee80211_ptr->use_4addr = false;
                dev->ieee80211_ptr->mesh_id_up_len = 0;
                wdev_lock(dev->ieee80211_ptr);
index 5c59790..d88bb65 100644 (file)
@@ -949,7 +949,6 @@ static void redir_to_connected(int family, int sotype, int sock_mapfd,
        int err, n;
        u32 key;
        char b;
-       int retries = 100;
 
        zero_verdict_count(verd_mapfd);
 
@@ -1002,17 +1001,11 @@ static void redir_to_connected(int family, int sotype, int sock_mapfd,
                goto close_peer1;
        if (pass != 1)
                FAIL("%s: want pass count 1, have %d", log_prefix, pass);
-again:
-       n = read(c0, &b, 1);
-       if (n < 0) {
-               if (errno == EAGAIN && retries--) {
-                       usleep(1000);
-                       goto again;
-               }
-               FAIL_ERRNO("%s: read", log_prefix);
-       }
+       n = recv_timeout(c0, &b, 1, 0, IO_TIMEOUT_SEC);
+       if (n < 0)
+               FAIL_ERRNO("%s: recv_timeout", log_prefix);
        if (n == 0)
-               FAIL("%s: incomplete read", log_prefix);
+               FAIL("%s: incomplete recv", log_prefix);
 
 close_peer1:
        xclose(p1);
@@ -1571,7 +1564,6 @@ static void unix_redir_to_connected(int sotype, int sock_mapfd,
        const char *log_prefix = redir_mode_str(mode);
        int c0, c1, p0, p1;
        unsigned int pass;
-       int retries = 100;
        int err, n;
        int sfd[2];
        u32 key;
@@ -1606,17 +1598,11 @@ static void unix_redir_to_connected(int sotype, int sock_mapfd,
        if (pass != 1)
                FAIL("%s: want pass count 1, have %d", log_prefix, pass);
 
-again:
-       n = read(mode == REDIR_INGRESS ? p0 : c0, &b, 1);
-       if (n < 0) {
-               if (errno == EAGAIN && retries--) {
-                       usleep(1000);
-                       goto again;
-               }
-               FAIL_ERRNO("%s: read", log_prefix);
-       }
+       n = recv_timeout(mode == REDIR_INGRESS ? p0 : c0, &b, 1, 0, IO_TIMEOUT_SEC);
+       if (n < 0)
+               FAIL_ERRNO("%s: recv_timeout", log_prefix);
        if (n == 0)
-               FAIL("%s: incomplete read", log_prefix);
+               FAIL("%s: incomplete recv", log_prefix);
 
 close:
        xclose(c1);
@@ -1748,7 +1734,6 @@ static void udp_redir_to_connected(int family, int sock_mapfd, int verd_mapfd,
        const char *log_prefix = redir_mode_str(mode);
        int c0, c1, p0, p1;
        unsigned int pass;
-       int retries = 100;
        int err, n;
        u32 key;
        char b;
@@ -1781,17 +1766,11 @@ static void udp_redir_to_connected(int family, int sock_mapfd, int verd_mapfd,
        if (pass != 1)
                FAIL("%s: want pass count 1, have %d", log_prefix, pass);
 
-again:
-       n = read(mode == REDIR_INGRESS ? p0 : c0, &b, 1);
-       if (n < 0) {
-               if (errno == EAGAIN && retries--) {
-                       usleep(1000);
-                       goto again;
-               }
-               FAIL_ERRNO("%s: read", log_prefix);
-       }
+       n = recv_timeout(mode == REDIR_INGRESS ? p0 : c0, &b, 1, 0, IO_TIMEOUT_SEC);
+       if (n < 0)
+               FAIL_ERRNO("%s: recv_timeout", log_prefix);
        if (n == 0)
-               FAIL("%s: incomplete read", log_prefix);
+               FAIL("%s: incomplete recv", log_prefix);
 
 close_cli1:
        xclose(c1);
@@ -1841,7 +1820,6 @@ static void inet_unix_redir_to_connected(int family, int type, int sock_mapfd,
        const char *log_prefix = redir_mode_str(mode);
        int c0, c1, p0, p1;
        unsigned int pass;
-       int retries = 100;
        int err, n;
        int sfd[2];
        u32 key;
@@ -1876,17 +1854,11 @@ static void inet_unix_redir_to_connected(int family, int type, int sock_mapfd,
        if (pass != 1)
                FAIL("%s: want pass count 1, have %d", log_prefix, pass);
 
-again:
-       n = read(mode == REDIR_INGRESS ? p0 : c0, &b, 1);
-       if (n < 0) {
-               if (errno == EAGAIN && retries--) {
-                       usleep(1000);
-                       goto again;
-               }
-               FAIL_ERRNO("%s: read", log_prefix);
-       }
+       n = recv_timeout(mode == REDIR_INGRESS ? p0 : c0, &b, 1, 0, IO_TIMEOUT_SEC);
+       if (n < 0)
+               FAIL_ERRNO("%s: recv_timeout", log_prefix);
        if (n == 0)
-               FAIL("%s: incomplete read", log_prefix);
+               FAIL("%s: incomplete recv", log_prefix);
 
 close_cli1:
        xclose(c1);
@@ -1932,7 +1904,6 @@ static void unix_inet_redir_to_connected(int family, int type, int sock_mapfd,
        int sfd[2];
        u32 key;
        char b;
-       int retries = 100;
 
        zero_verdict_count(verd_mapfd);
 
@@ -1963,17 +1934,11 @@ static void unix_inet_redir_to_connected(int family, int type, int sock_mapfd,
        if (pass != 1)
                FAIL("%s: want pass count 1, have %d", log_prefix, pass);
 
-again:
-       n = read(mode == REDIR_INGRESS ? p0 : c0, &b, 1);
-       if (n < 0) {
-               if (errno == EAGAIN && retries--) {
-                       usleep(1000);
-                       goto again;
-               }
-               FAIL_ERRNO("%s: read", log_prefix);
-       }
+       n = recv_timeout(mode == REDIR_INGRESS ? p0 : c0, &b, 1, 0, IO_TIMEOUT_SEC);
+       if (n < 0)
+               FAIL_ERRNO("%s: recv_timeout", log_prefix);
        if (n == 0)
-               FAIL("%s: incomplete read", log_prefix);
+               FAIL("%s: incomplete recv", log_prefix);
 
 close:
        xclose(c1);
index ade79ef..071a33d 100755 (executable)
@@ -6,7 +6,9 @@
 
 ALL_TESTS="
        test_root
+       test_port_tbf
        test_etsprio
+       test_etsprio_port_tbf
 "
 NUM_NETIFS=1
 lib_dir=$(dirname $0)/../../../net/forwarding
@@ -221,6 +223,12 @@ test_root()
        do_test_combinations 1 0
 }
 
+test_port_tbf()
+{
+       with_tbf 1: root \
+               do_test_combinations 8 1
+}
+
 do_test_etsprio()
 {
        local parent=$1; shift
@@ -264,6 +272,12 @@ test_etsprio()
        do_test_etsprio root ""
 }
 
+test_etsprio_port_tbf()
+{
+       with_tbf 1: root \
+               do_test_etsprio "parent 1:1" "-TBF"
+}
+
 cleanup()
 {
        tc qdisc del dev $h1 root &>/dev/null
index 8e67a25..3313566 100755 (executable)
@@ -445,10 +445,13 @@ cleanup()
                ip -netns ${NSA} link set dev ${NSA_DEV} down
                ip -netns ${NSA} link del dev ${NSA_DEV}
 
+               ip netns pids ${NSA} | xargs kill 2>/dev/null
                ip netns del ${NSA}
        fi
 
+       ip netns pids ${NSB} | xargs kill 2>/dev/null
        ip netns del ${NSB}
+       ip netns pids ${NSC} | xargs kill 2>/dev/null
        ip netns del ${NSC} >/dev/null 2>&1
 }
 
index 8bd85da..75a37c1 100644 (file)
@@ -4,9 +4,12 @@
 ALL_TESTS="
        ping_ipv4
        tbf_test
+       tbf_root_test
 "
 source $lib_dir/sch_tbf_core.sh
 
+QDISC_TYPE=${QDISC% *}
+
 tbf_test_one()
 {
        local bs=$1; shift
@@ -22,6 +25,8 @@ tbf_test_one()
 
 tbf_test()
 {
+       log_info "Testing root-$QDISC_TYPE-tbf"
+
        # This test is used for both ETS and PRIO. Even though we only need two
        # bands, PRIO demands a minimum of three.
        tc qdisc add dev $swp2 root handle 10: $QDISC 3 priomap 2 1 0
@@ -29,6 +34,29 @@ tbf_test()
        tc qdisc del dev $swp2 root
 }
 
+tbf_root_test()
+{
+       local bs=128K
+
+       log_info "Testing root-tbf-$QDISC_TYPE"
+
+       tc qdisc replace dev $swp2 root handle 1: \
+               tbf rate 400Mbit burst $bs limit 1M
+       tc qdisc replace dev $swp2 parent 1:1 handle 10: \
+               $QDISC 3 priomap 2 1 0
+       tc qdisc replace dev $swp2 parent 10:3 handle 103: \
+               bfifo limit 1M
+       tc qdisc replace dev $swp2 parent 10:2 handle 102: \
+               bfifo limit 1M
+       tc qdisc replace dev $swp2 parent 10:1 handle 101: \
+               bfifo limit 1M
+
+       do_tbf_test 10 400 $bs
+       do_tbf_test 11 400 $bs
+
+       tc qdisc del dev $swp2 root
+}
+
 trap cleanup EXIT
 
 setup_prepare